| @@ -54,9 +54,11 @@ dslist = [ | |||||
| # {'name': 'PTC_MR', 'dataset': '../datasets/PTC/Train/MR.ds',}, | # {'name': 'PTC_MR', 'dataset': '../datasets/PTC/Train/MR.ds',}, | ||||
| ] | ] | ||||
| estimator = untilhpathkernel | estimator = untilhpathkernel | ||||
| param_grid_precomputed = {'depth': np.linspace(1, 10, 10), # [2], | |||||
| 'k_func': ['MinMax'], # ['MinMax', 'tanimoto'], | |||||
| 'compute_method': ['trie']} # ['MinMax']} | |||||
| param_grid_precomputed = {'depth': np.linspace(3, 10, 8), # [2], | |||||
| 'k_func': [None]} # ['MinMax', 'tanimoto'], | |||||
| #param_grid_precomputed = {'depth': np.linspace(1, 10, 10), # [2], | |||||
| # 'k_func': ['MinMax'], # ['MinMax', 'tanimoto'], | |||||
| # 'compute_method': ['trie']} # ['MinMax']} | |||||
| param_grid = [{'C': np.logspace(-10, 10, num=41, base=10)}, | param_grid = [{'C': np.logspace(-10, 10, num=41, base=10)}, | ||||
| {'alpha': np.logspace(-10, 10, num=41, base=10)}] | {'alpha': np.logspace(-10, 10, num=41, base=10)}] | ||||
| @@ -17,8 +17,11 @@ import multiprocessing | |||||
| from tqdm import tqdm | from tqdm import tqdm | ||||
| import networkx as nx | import networkx as nx | ||||
| import matplotlib.pyplot as plt | import matplotlib.pyplot as plt | ||||
| import random | |||||
| from iam import iam, test_iam_with_more_graphs_as_init, test_iam_moreGraphsAsInit_tryAllPossibleBestGraphs_deleteNodesInIterations | |||||
| import matplotlib.pyplot as plt | |||||
| from iam import iam, test_iam_with_more_graphs_as_init, iam_moreGraphsAsInit_tryAllPossibleBestGraphs | |||||
| sys.path.insert(0, "../") | sys.path.insert(0, "../") | ||||
| from pygraph.kernels.marginalizedKernel import marginalizedkernel | from pygraph.kernels.marginalizedKernel import marginalizedkernel | ||||
| from pygraph.kernels.untilHPathKernel import untilhpathkernel | from pygraph.kernels.untilHPathKernel import untilhpathkernel | ||||
| @@ -67,7 +70,7 @@ def gk_iam(Gn, alpha): | |||||
| # Gs_nearest = Gk + gihat_list | # Gs_nearest = Gk + gihat_list | ||||
| # g_tmp = iam(Gs_nearest) | # g_tmp = iam(Gs_nearest) | ||||
| # | # | ||||
| # # compute distance between phi and the new generated graph. | |||||
| # # compute distance between \psi and the new generated graph. | |||||
| # knew = marginalizedkernel([g_tmp, g1, g2], node_label='atom', edge_label=None, | # knew = marginalizedkernel([g_tmp, g1, g2], node_label='atom', edge_label=None, | ||||
| # p_quit=lmbda, n_iteration=20, remove_totters=False, | # p_quit=lmbda, n_iteration=20, remove_totters=False, | ||||
| # n_jobs=multiprocessing.cpu_count(), verbose=False) | # n_jobs=multiprocessing.cpu_count(), verbose=False) | ||||
| @@ -142,7 +145,7 @@ def gk_iam_nearest(Gn, alpha, idx_gi, Kmatrix, k, r_max): | |||||
| print(g_tmp.nodes(data=True)) | print(g_tmp.nodes(data=True)) | ||||
| print(g_tmp.edges(data=True)) | print(g_tmp.edges(data=True)) | ||||
| # compute distance between phi and the new generated graph. | |||||
| # compute distance between \psi and the new generated graph. | |||||
| gi_list = [Gn[i] for i in idx_gi] | gi_list = [Gn[i] for i in idx_gi] | ||||
| knew = compute_kernel([g_tmp] + gi_list, 'untilhpathkernel', False) | knew = compute_kernel([g_tmp] + gi_list, 'untilhpathkernel', False) | ||||
| dnew = dis_gstar(0, range(1, len(gi_list) + 1), alpha, knew) | dnew = dis_gstar(0, range(1, len(gi_list) + 1), alpha, knew) | ||||
| @@ -236,7 +239,7 @@ def gk_iam_nearest(Gn, alpha, idx_gi, Kmatrix, k, r_max): | |||||
| # print(g.nodes(data=True)) | # print(g.nodes(data=True)) | ||||
| # print(g.edges(data=True)) | # print(g.edges(data=True)) | ||||
| # | # | ||||
| # # compute distance between phi and the new generated graphs. | |||||
| # # compute distance between \psi and the new generated graphs. | |||||
| # gi_list = [Gn[i] for i in idx_gi] | # gi_list = [Gn[i] for i in idx_gi] | ||||
| # knew = compute_kernel(g_tmp_list + gi_list, 'marginalizedkernel', False) | # knew = compute_kernel(g_tmp_list + gi_list, 'marginalizedkernel', False) | ||||
| # dnew_list = [] | # dnew_list = [] | ||||
| @@ -278,7 +281,12 @@ def gk_iam_nearest(Gn, alpha, idx_gi, Kmatrix, k, r_max): | |||||
| def gk_iam_nearest_multi(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max, | def gk_iam_nearest_multi(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max, | ||||
| gkernel, c_ei=1, c_er=1, c_es=1, epsilon=0.001): | |||||
| gkernel, epsilon=0.001, | |||||
| params_iam={'c_ei': 1, 'c_er': 1, 'c_es': 1, | |||||
| 'ite_max': 50, 'epsilon': 0.001, | |||||
| 'removeNodes': True, 'connected': False}, | |||||
| params_ged={'ged_cost': 'CHEM_1', 'ged_method': 'IPFP', | |||||
| 'saveGXL': 'benoit'}): | |||||
| """This function constructs graph pre-image by the iterative pre-image | """This function constructs graph pre-image by the iterative pre-image | ||||
| framework in reference [1], algorithm 1, where the step of generating new | framework in reference [1], algorithm 1, where the step of generating new | ||||
| graphs randomly is replaced by the IAM algorithm in reference [2]. | graphs randomly is replaced by the IAM algorithm in reference [2]. | ||||
| @@ -310,7 +318,7 @@ def gk_iam_nearest_multi(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max, | |||||
| g0hat_list = [Gn_init[idx] for idx in sort_idx[0:nb_best]] # the nearest neighbors of phi in DN | g0hat_list = [Gn_init[idx] for idx in sort_idx[0:nb_best]] # the nearest neighbors of phi in DN | ||||
| if dis_gs[0] == 0: # the exact pre-image. | if dis_gs[0] == 0: # the exact pre-image. | ||||
| print('The exact pre-image is found from the input dataset.') | print('The exact pre-image is found from the input dataset.') | ||||
| return 0, g0hat_list | |||||
| return 0, g0hat_list, 0, 0 | |||||
| dhat = dis_gs[0] # the nearest distance | dhat = dis_gs[0] # the nearest distance | ||||
| ghat_list = [g.copy() for g in g0hat_list] | ghat_list = [g.copy() for g in g0hat_list] | ||||
| # for g in ghat_list: | # for g in ghat_list: | ||||
| @@ -320,31 +328,33 @@ def gk_iam_nearest_multi(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max, | |||||
| # print(g.nodes(data=True)) | # print(g.nodes(data=True)) | ||||
| # print(g.edges(data=True)) | # print(g.edges(data=True)) | ||||
| Gk = [Gn_init[ig].copy() for ig in sort_idx[0:k]] # the k nearest neighbors | Gk = [Gn_init[ig].copy() for ig in sort_idx[0:k]] # the k nearest neighbors | ||||
| # for gi in Gk: | |||||
| ## nx.draw_networkx(gi) | |||||
| ## plt.show() | |||||
| for gi in Gk: | |||||
| nx.draw(gi, labels=nx.get_node_attributes(gi, 'atom'), with_labels=True) | |||||
| # nx.draw_networkx(gi) | |||||
| plt.show() | |||||
| # draw_Letter_graph(g) | # draw_Letter_graph(g) | ||||
| # print(gi.nodes(data=True)) | |||||
| # print(gi.edges(data=True)) | |||||
| Gs_nearest = Gk.copy() | |||||
| print(gi.nodes(data=True)) | |||||
| print(gi.edges(data=True)) | |||||
| Gs_nearest = [g.copy() for g in Gk] | |||||
| Gn_nearest_median = [g.copy() for g in Gs_nearest] | |||||
| # gihat_list = [] | # gihat_list = [] | ||||
| # i = 1 | # i = 1 | ||||
| r = 0 | r = 0 | ||||
| itr = 0 | |||||
| # cur_sod = dhat | |||||
| # old_sod = cur_sod * 2 | |||||
| sod_list = [dhat] | |||||
| itr_total = 0 | |||||
| # cur_dis = dhat | |||||
| # old_dis = cur_dis * 2 | |||||
| dis_list = [dhat] | |||||
| found = False | found = False | ||||
| nb_updated = 0 | nb_updated = 0 | ||||
| while r < r_max:# and not found: # @todo: if not found?# and np.abs(old_sod - cur_sod) > epsilon: | |||||
| print('\nr =', r) | |||||
| print('itr for gk =', itr, '\n') | |||||
| while r < r_max:# and not found: # @todo: if not found?# and np.abs(old_dis - cur_dis) > epsilon: | |||||
| print('\nCurrent preimage iteration =', r) | |||||
| print('Total preimage iteration =', itr_total, '\n') | |||||
| found = False | found = False | ||||
| # Gs_nearest = Gk + gihat_list | # Gs_nearest = Gk + gihat_list | ||||
| # g_tmp = iam(Gs_nearest) | # g_tmp = iam(Gs_nearest) | ||||
| g_tmp_list, _ = test_iam_moreGraphsAsInit_tryAllPossibleBestGraphs_deleteNodesInIterations( | |||||
| Gn_median, Gs_nearest, c_ei=c_ei, c_er=c_er, c_es=c_es) | |||||
| g_tmp_list, _ = iam_moreGraphsAsInit_tryAllPossibleBestGraphs( | |||||
| Gn_nearest_median, Gs_nearest, params_ged=params_ged, **params_iam) | |||||
| # for g in g_tmp_list: | # for g in g_tmp_list: | ||||
| # nx.draw_networkx(g) | # nx.draw_networkx(g) | ||||
| # plt.show() | # plt.show() | ||||
| @@ -352,31 +362,73 @@ def gk_iam_nearest_multi(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max, | |||||
| # print(g.nodes(data=True)) | # print(g.nodes(data=True)) | ||||
| # print(g.edges(data=True)) | # print(g.edges(data=True)) | ||||
| # compute distance between phi and the new generated graphs. | |||||
| # compute distance between \psi and the new generated graphs. | |||||
| knew = compute_kernel(g_tmp_list + Gn_median, gkernel, False) | knew = compute_kernel(g_tmp_list + Gn_median, gkernel, False) | ||||
| dnew_list = [] | dnew_list = [] | ||||
| for idx, g_tmp in enumerate(g_tmp_list): | for idx, g_tmp in enumerate(g_tmp_list): | ||||
| # @todo: the term3 below could use the one at the beginning of the function. | |||||
| dnew_list.append(dis_gstar(idx, range(len(g_tmp_list), | dnew_list.append(dis_gstar(idx, range(len(g_tmp_list), | ||||
| len(g_tmp_list) + len(Gn_median) + 1), alpha, knew, | |||||
| withterm3=False)) | |||||
| len(g_tmp_list) + len(Gn_median) + 1), | |||||
| alpha, knew, withterm3=False)) | |||||
| # dnew = knew[0, 0] - 2 * (alpha[0] * knew[0, 1] + alpha[1] * | # dnew = knew[0, 0] - 2 * (alpha[0] * knew[0, 1] + alpha[1] * | ||||
| # knew[0, 2]) + (alpha[0] * alpha[0] * k_list[0] + alpha[0] * | # knew[0, 2]) + (alpha[0] * alpha[0] * k_list[0] + alpha[0] * | ||||
| # alpha[1] * k_g2_list[0] + alpha[1] * alpha[0] * | # alpha[1] * k_g2_list[0] + alpha[1] * alpha[0] * | ||||
| # k_g1_list[1] + alpha[1] * alpha[1] * k_list[1]) | # k_g1_list[1] + alpha[1] * alpha[1] * k_list[1]) | ||||
| # # find the new k nearest graphs. | |||||
| # dnew_best = min(dnew_list) | |||||
| # dis_gs = dnew_list + dis_gs # add the new nearest distances. | |||||
| # Gs_nearest = [g.copy() for g in g_tmp_list] + Gs_nearest # add the corresponding graphs. | |||||
| # sort_idx = np.argsort(dis_gs) | |||||
| # if len([i for i in sort_idx[0:k] if i < len(dnew_list)]) > 0: | |||||
| # print('We got new k nearest neighbors! Hurray!') | |||||
| # dis_gs = [dis_gs[idx] for idx in sort_idx[0:k]] # the new k nearest distances. | |||||
| ## print(dis_gs[-1]) | |||||
| # Gs_nearest = [Gs_nearest[idx] for idx in sort_idx[0:k]] | |||||
| # nb_best = len(np.argwhere(dis_gs == dis_gs[0]).flatten().tolist()) | |||||
| # if dnew_best < dhat and np.abs(dnew_best - dhat) > epsilon: | |||||
| # print('I have smaller distance!') | |||||
| # print(str(dhat) + '->' + str(dis_gs[0])) | |||||
| # dhat = dis_gs[0] | |||||
| # idx_best_list = np.argwhere(dnew_list == dhat).flatten().tolist() | |||||
| # ghat_list = [g_tmp_list[idx].copy() for idx in idx_best_list] | |||||
| ## for g in ghat_list: | |||||
| ### nx.draw_networkx(g) | |||||
| ### plt.show() | |||||
| ## draw_Letter_graph(g) | |||||
| ## print(g.nodes(data=True)) | |||||
| ## print(g.edges(data=True)) | |||||
| # r = 0 | |||||
| # found = True | |||||
| # nb_updated += 1 | |||||
| # elif np.abs(dnew_best - dhat) < epsilon: | |||||
| # print('I have almost equal distance!') | |||||
| # print(str(dhat) + '->' + str(dnew_best)) | |||||
| # else: | |||||
| # dis_gs = [dis_gs[idx] for idx in sort_idx[0:k]] | |||||
| # Gs_nearest = [Gs_nearest[idx] for idx in sort_idx[0:k]] | |||||
| # Gn_nearest_median = [g.copy() for g in Gs_nearest] | |||||
| # if not found: | |||||
| # r += 1 | |||||
| # find the new k nearest graphs. | # find the new k nearest graphs. | ||||
| dnew_best = min(dnew_list) | dnew_best = min(dnew_list) | ||||
| dis_gs = dnew_list + dis_gs # add the new nearest distances. | |||||
| Gs_nearest = [g.copy() for g in g_tmp_list] + Gs_nearest # add the corresponding graphs. | |||||
| sort_idx = np.argsort(dis_gs) | |||||
| if np.abs(dnew_best - dhat) >= epsilon: | |||||
| dis_gs = dnew_list + dis_gs # add the new nearest distances. | |||||
| Gs_nearest = [g.copy() for g in g_tmp_list] + Gs_nearest # add the corresponding graphs. | |||||
| sort_idx = np.argsort(dis_gs) | |||||
| else: # if the new distance is equal to the old one. | |||||
| # @todo: works if only one graph is generated. | |||||
| Gs_nearest[0] = g_tmp_list[0].copy() | |||||
| sort_idx = np.argsort(dis_gs) | |||||
| if len([i for i in sort_idx[0:k] if i < len(dnew_list)]) > 0: | if len([i for i in sort_idx[0:k] if i < len(dnew_list)]) > 0: | ||||
| print('We got better k nearest neighbors! Hurray!') | |||||
| print('We got new k nearest neighbors! Hurray!') | |||||
| dis_gs = [dis_gs[idx] for idx in sort_idx[0:k]] # the new k nearest distances. | dis_gs = [dis_gs[idx] for idx in sort_idx[0:k]] # the new k nearest distances. | ||||
| print(dis_gs[-1]) | |||||
| # print(dis_gs[-1]) | |||||
| Gs_nearest = [Gs_nearest[idx] for idx in sort_idx[0:k]] | Gs_nearest = [Gs_nearest[idx] for idx in sort_idx[0:k]] | ||||
| nb_best = len(np.argwhere(dis_gs == dis_gs[0]).flatten().tolist()) | nb_best = len(np.argwhere(dis_gs == dis_gs[0]).flatten().tolist()) | ||||
| if dnew_best < dhat and np.abs(dnew_best - dhat) > epsilon: | |||||
| if dnew_best < dhat and np.abs(dnew_best - dhat) >= epsilon: | |||||
| print('I have smaller distance!') | print('I have smaller distance!') | ||||
| print(str(dhat) + '->' + str(dis_gs[0])) | print(str(dhat) + '->' + str(dis_gs[0])) | ||||
| dhat = dis_gs[0] | dhat = dis_gs[0] | ||||
| @@ -394,19 +446,269 @@ def gk_iam_nearest_multi(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max, | |||||
| elif np.abs(dnew_best - dhat) < epsilon: | elif np.abs(dnew_best - dhat) < epsilon: | ||||
| print('I have almost equal distance!') | print('I have almost equal distance!') | ||||
| print(str(dhat) + '->' + str(dnew_best)) | print(str(dhat) + '->' + str(dnew_best)) | ||||
| else: | |||||
| dis_gs = [dis_gs[idx] for idx in sort_idx[0:k]] | |||||
| Gs_nearest = [Gs_nearest[idx] for idx in sort_idx[0:k]] | |||||
| Gn_nearest_median = [g.copy() for g in Gs_nearest] | |||||
| if not found: | if not found: | ||||
| r += 1 | r += 1 | ||||
| # old_sod = cur_sod | |||||
| # cur_sod = dnew_best | |||||
| sod_list.append(dhat) | |||||
| itr += 1 | |||||
| # old_dis = cur_dis | |||||
| # cur_dis = dnew_best | |||||
| dis_list.append(dhat) | |||||
| itr_total += 1 | |||||
| print('\nthe graph is updated', nb_updated, 'times.') | print('\nthe graph is updated', nb_updated, 'times.') | ||||
| print('sods in kernel space:', sod_list, '\n') | |||||
| print('distances in kernel space:', dis_list, '\n') | |||||
| return dhat, ghat_list | |||||
| return dhat, ghat_list, dis_list[-1], nb_updated | |||||
| def preimage_iam_random_mix(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max, | |||||
| l_max, gkernel, epsilon=0.001, | |||||
| params_iam={'c_ei': 1, 'c_er': 1, 'c_es': 1, | |||||
| 'ite_max': 50, 'epsilon': 0.001, | |||||
| 'removeNodes': True, 'connected': False}, | |||||
| params_ged={'ged_cost': 'CHEM_1', 'ged_method': 'IPFP', | |||||
| 'saveGXL': 'benoit'}): | |||||
| """This function constructs graph pre-image by the iterative pre-image | |||||
| framework in reference [1], algorithm 1, where new graphs are generated | |||||
| randomly and by the IAM algorithm in reference [2]. | |||||
| notes | |||||
| ----- | |||||
| Every time a set of n better graphs is acquired, their distances in kernel space are | |||||
| compared with the k nearest ones, and the k nearest distances from the k+n | |||||
| distances will be used as the new ones. | |||||
| """ | |||||
| Gn_init = [nx.convert_node_labels_to_integers(g) for g in Gn_init] | |||||
| # compute k nearest neighbors of phi in DN. | |||||
| dis_list = [] # distance between g_star and each graph. | |||||
| term3 = 0 | |||||
| for i1, a1 in enumerate(alpha): | |||||
| for i2, a2 in enumerate(alpha): | |||||
| term3 += a1 * a2 * Kmatrix[idx_gi[i1], idx_gi[i2]] | |||||
| for ig, g in tqdm(enumerate(Gn_init), desc='computing distances', file=sys.stdout): | |||||
| dtemp = dis_gstar(ig, idx_gi, alpha, Kmatrix, term3=term3) | |||||
| dis_list.append(dtemp) | |||||
| # sort | |||||
| sort_idx = np.argsort(dis_list) | |||||
| dis_gs = [dis_list[idis] for idis in sort_idx[0:k]] # the k shortest distances | |||||
| nb_best = len(np.argwhere(dis_gs == dis_gs[0]).flatten().tolist()) | |||||
| g0hat_list = [Gn_init[idx] for idx in sort_idx[0:nb_best]] # the nearest neighbors of phi in DN | |||||
| if dis_gs[0] == 0: # the exact pre-image. | |||||
| print('The exact pre-image is found from the input dataset.') | |||||
| return 0, g0hat_list, 0, 0 | |||||
| dhat = dis_gs[0] # the nearest distance | |||||
| ghat_list = [g.copy() for g in g0hat_list] | |||||
| # for g in ghat_list: | |||||
| # draw_Letter_graph(g) | |||||
| # nx.draw_networkx(g) | |||||
| # plt.show() | |||||
| # print(g.nodes(data=True)) | |||||
| # print(g.edges(data=True)) | |||||
| Gk = [Gn_init[ig].copy() for ig in sort_idx[0:k]] # the k nearest neighbors | |||||
| for gi in Gk: | |||||
| nx.draw(gi, labels=nx.get_node_attributes(gi, 'atom'), with_labels=True) | |||||
| # nx.draw_networkx(gi) | |||||
| plt.show() | |||||
| # draw_Letter_graph(g) | |||||
| print(gi.nodes(data=True)) | |||||
| print(gi.edges(data=True)) | |||||
| Gs_nearest = [g.copy() for g in Gk] | |||||
| Gn_nearest_median = [g.copy() for g in Gs_nearest] | |||||
| # gihat_list = [] | |||||
| # i = 1 | |||||
| r = 0 | |||||
| itr_total = 0 | |||||
| # cur_dis = dhat | |||||
| # old_dis = cur_dis * 2 | |||||
| dis_list = [dhat] | |||||
| found = False | |||||
| nb_updated_iam = 0 | |||||
| nb_updated_random = 0 | |||||
| while r < r_max: # and not found: # @todo: if not found?# and np.abs(old_dis - cur_dis) > epsilon: | |||||
| print('\n-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-') | |||||
| print('Current preimage iteration =', r) | |||||
| print('Total preimage iteration =', itr_total, '\n') | |||||
| found = False | |||||
| # Gs_nearest = Gk + gihat_list | |||||
| # g_tmp = iam(Gs_nearest) | |||||
| g_tmp_list, _ = iam_moreGraphsAsInit_tryAllPossibleBestGraphs( | |||||
| Gn_nearest_median, Gs_nearest, params_ged=params_ged, **params_iam) | |||||
| # for g in g_tmp_list: | |||||
| # nx.draw_networkx(g) | |||||
| # plt.show() | |||||
| # draw_Letter_graph(g) | |||||
| # print(g.nodes(data=True)) | |||||
| # print(g.edges(data=True)) | |||||
| # compute distance between \psi and the new generated graphs. | |||||
| knew = compute_kernel(g_tmp_list + Gn_median, gkernel, False) | |||||
| dnew_list = [] | |||||
| for idx, g_tmp in enumerate(g_tmp_list): | |||||
| # @todo: the term3 below could use the one at the beginning of the function. | |||||
| dnew_list.append(dis_gstar(idx, range(len(g_tmp_list), | |||||
| len(g_tmp_list) + len(Gn_median) + 1), | |||||
| alpha, knew, withterm3=False)) | |||||
| # find the new k nearest graphs. | |||||
| # @todo: for now only consider the situation when only one graph is generated by IAM. | |||||
| dnew_best = min(dnew_list) | |||||
| gnew_best = g_tmp_list[0].copy() | |||||
| # when new distance is equal to the old one, use random generation. | |||||
| if np.abs(dnew_best - dhat) < epsilon or dhat < dnew_best: | |||||
| # Gs_nearest[0] = g_tmp_list[0].copy() | |||||
| # sort_idx = np.argsort(dis_gs) | |||||
| print('Distance almost equal or worse, switching to random generation now.') | |||||
| print(str(dhat) + '->' + str(dnew_best)) | |||||
| if dnew_best > dhat and np.abs(dnew_best - dhat) >= epsilon: | |||||
| dnew_best = dhat | |||||
| gnew_best = Gs_nearest[0].copy() | |||||
| # number of edges to be changed. | |||||
| # @todo what if the log is negetive? how to choose alpha (scalar)? seems fdgs is always 1. | |||||
| # fdgs = dnew_best | |||||
| fdgs = nb_updated_random + 1 | |||||
| if fdgs < 1: | |||||
| fdgs = 1 | |||||
| fdgs = int(np.ceil(np.log(fdgs))) | |||||
| if fdgs < 1: | |||||
| fdgs += 1 | |||||
| # fdgs = nb_updated_random + 1 # @todo: | |||||
| # @todo: should we use just half of the adjacency matrix for undirected graphs? | |||||
| nb_vpairs = nx.number_of_nodes(gnew_best) * (nx.number_of_nodes(gnew_best) - 1) | |||||
| l = 0 | |||||
| while l < l_max: | |||||
| # add and delete edges. | |||||
| gtemp = gnew_best.copy() | |||||
| np.random.seed() | |||||
| # which edges to change. | |||||
| # @todo: what if fdgs is bigger than nb_vpairs? | |||||
| idx_change = random.sample(range(nb_vpairs), fdgs if | |||||
| fdgs < nb_vpairs else nb_vpairs) | |||||
| # idx_change = np.random.randint(0, nx.number_of_nodes(gs) * | |||||
| # (nx.number_of_nodes(gs) - 1), fdgs) | |||||
| for item in idx_change: | |||||
| node1 = int(item / (nx.number_of_nodes(gtemp) - 1)) | |||||
| node2 = (item - node1 * (nx.number_of_nodes(gtemp) - 1)) | |||||
| if node2 >= node1: # skip the self pair. | |||||
| node2 += 1 | |||||
| # @todo: is the randomness correct? | |||||
| if not gtemp.has_edge(node1, node2): | |||||
| gtemp.add_edge(node1, node2) | |||||
| # nx.draw_networkx(gs) | |||||
| # plt.show() | |||||
| # nx.draw_networkx(gtemp) | |||||
| # plt.show() | |||||
| else: | |||||
| gtemp.remove_edge(node1, node2) | |||||
| # nx.draw_networkx(gs) | |||||
| # plt.show() | |||||
| # nx.draw_networkx(gtemp) | |||||
| # plt.show() | |||||
| # nx.draw_networkx(gtemp) | |||||
| # plt.show() | |||||
| # compute distance between \psi and the new generated graph. | |||||
| knew = compute_kernel([gtemp] + Gn_median, gkernel, verbose=False) | |||||
| dnew = dis_gstar(0, [1, 2], alpha, knew, withterm3=False) | |||||
| # @todo: the new distance is smaller or also equal? | |||||
| if dnew < dnew_best or np.abs(dnew_best - dnew) < epsilon: | |||||
| if np.abs(dnew_best - dnew) < epsilon: | |||||
| print('I am equal!') | |||||
| dnew_best = dnew | |||||
| gnew_best = gtemp.copy() | |||||
| else: | |||||
| print('\nI am smaller!') | |||||
| print('l =', str(l)) | |||||
| print(dnew_best, '->', dnew) | |||||
| dis_gs = [dnew] + dis_gs # add the new nearest distances. | |||||
| Gs_nearest = [gtemp.copy()] + Gs_nearest # add the corresponding graphs. | |||||
| sort_idx = np.argsort(dis_gs) | |||||
| dis_gs = [dis_gs[idx] for idx in sort_idx[0:k]] # the new k nearest distances. | |||||
| Gs_nearest = [Gs_nearest[idx] for idx in sort_idx[0:k]] | |||||
| Gn_nearest_median = [g.copy() for g in Gs_nearest] | |||||
| dhat = dnew | |||||
| nb_updated_random += 1 | |||||
| found = True # found better graph. | |||||
| r = 0 | |||||
| print('the graph is updated by random generation', | |||||
| nb_updated_random, 'times.') | |||||
| nx.draw(gtemp, labels=nx.get_node_attributes(gtemp, 'atom'), | |||||
| with_labels=True) | |||||
| ## plt.savefig("results/gk_iam/simple_two/xx" + str(i) + ".png", format="PNG") | |||||
| plt.show() | |||||
| break | |||||
| # nx.draw_networkx(gtemp) | |||||
| # plt.show() | |||||
| # print(gtemp.nodes(data=True)) | |||||
| # print(gtemp.edges(data=True)) | |||||
| l += 1 | |||||
| if l == l_max: | |||||
| r += 1 | |||||
| else: # if the new distance is not equal to the old one. | |||||
| dis_gs = dnew_list + dis_gs # add the new nearest distances. | |||||
| Gs_nearest = [nx.convert_node_labels_to_integers(g).copy() for g | |||||
| in g_tmp_list] + Gs_nearest # add the corresponding graphs. | |||||
| sort_idx = np.argsort(dis_gs) | |||||
| if len([i for i in sort_idx[0:k] if i < len(dnew_list)]) > 0: | |||||
| print('We got new k nearest neighbors! Hurray!') | |||||
| dis_gs = [dis_gs[idx] for idx in sort_idx[0:k]] # the new k nearest distances. | |||||
| # print(dis_gs[-1]) | |||||
| Gs_nearest = [Gs_nearest[idx] for idx in sort_idx[0:k]] | |||||
| nb_best = len(np.argwhere(dis_gs == dis_gs[0]).flatten().tolist()) | |||||
| if dnew_best < dhat: | |||||
| print('I have smaller distance!') | |||||
| print(str(dhat) + '->' + str(dis_gs[0])) | |||||
| dhat = dis_gs[0] | |||||
| idx_best_list = np.argwhere(dnew_list == dhat).flatten().tolist() | |||||
| ghat_list = [g_tmp_list[idx].copy() for idx in idx_best_list] | |||||
| # for g in ghat_list: | |||||
| ## nx.draw_networkx(g) | |||||
| ## plt.show() | |||||
| # draw_Letter_graph(g) | |||||
| # print(g.nodes(data=True)) | |||||
| # print(g.edges(data=True)) | |||||
| r = 0 | |||||
| found = True | |||||
| nb_updated_iam += 1 | |||||
| print('the graph is updated by IAM', nb_updated_iam, 'times.') | |||||
| nx.draw(ghat_list[0], labels=nx.get_node_attributes(ghat_list[0], 'atom'), | |||||
| with_labels=True) | |||||
| ## plt.savefig("results/gk_iam/simple_two/xx" + str(i) + ".png", format="PNG") | |||||
| plt.show() | |||||
| else: | |||||
| dis_gs = [dis_gs[idx] for idx in sort_idx[0:k]] | |||||
| Gs_nearest = [Gs_nearest[idx] for idx in sort_idx[0:k]] | |||||
| Gn_nearest_median = [g.copy() for g in Gs_nearest] | |||||
| if not found: | |||||
| r += 1 | |||||
| # old_dis = cur_dis | |||||
| # cur_dis = dnew_best | |||||
| dis_list.append(dhat) | |||||
| itr_total += 1 | |||||
| print('\nthe k shortest distances are', dis_gs) | |||||
| print('the shortest distances for previous iterations are', dis_list) | |||||
| print('\nthe graph is updated by IAM', nb_updated_iam, 'times, and by random generation', | |||||
| nb_updated_random, 'times.') | |||||
| print('distances in kernel space:', dis_list, '\n') | |||||
| return dhat, ghat_list, dis_list[-1], nb_updated_iam, nb_updated_random | |||||
| ############################################################################### | |||||
| # useful functions. | |||||
| def dis_gstar(idx_g, idx_gi, alpha, Kmatrix, term3=0, withterm3=True): | def dis_gstar(idx_g, idx_gi, alpha, Kmatrix, term3=0, withterm3=True): | ||||
| term1 = Kmatrix[idx_g, idx_g] | term1 = Kmatrix[idx_g, idx_g] | ||||
| @@ -424,10 +726,10 @@ def dis_gstar(idx_g, idx_gi, alpha, Kmatrix, term3=0, withterm3=True): | |||||
| def compute_kernel(Gn, graph_kernel, verbose): | def compute_kernel(Gn, graph_kernel, verbose): | ||||
| if graph_kernel == 'marginalizedkernel': | if graph_kernel == 'marginalizedkernel': | ||||
| Kmatrix, _ = marginalizedkernel(Gn, node_label='atom', edge_label=None, | Kmatrix, _ = marginalizedkernel(Gn, node_label='atom', edge_label=None, | ||||
| p_quit=0.03, n_iteration=20, remove_totters=False, | |||||
| p_quit=0.03, n_iteration=10, remove_totters=False, | |||||
| n_jobs=multiprocessing.cpu_count(), verbose=verbose) | n_jobs=multiprocessing.cpu_count(), verbose=verbose) | ||||
| elif graph_kernel == 'untilhpathkernel': | elif graph_kernel == 'untilhpathkernel': | ||||
| Kmatrix, _ = untilhpathkernel(Gn, node_label='atom', edge_label='bond_type', | |||||
| Kmatrix, _ = untilhpathkernel(Gn, node_label='atom', edge_label=None, | |||||
| depth=10, k_func='MinMax', compute_method='trie', | depth=10, k_func='MinMax', compute_method='trie', | ||||
| n_jobs=multiprocessing.cpu_count(), verbose=verbose) | n_jobs=multiprocessing.cpu_count(), verbose=verbose) | ||||
| elif graph_kernel == 'spkernel': | elif graph_kernel == 'spkernel': | ||||
| @@ -20,7 +20,424 @@ from pygraph.utils.graphdataset import get_dataset_attributes | |||||
| from pygraph.utils.utils import graph_isIdentical, get_node_labels, get_edge_labels | from pygraph.utils.utils import graph_isIdentical, get_node_labels, get_edge_labels | ||||
| #from pygraph.utils.utils import graph_deepcopy | #from pygraph.utils.utils import graph_deepcopy | ||||
| def iam_moreGraphsAsInit_tryAllPossibleBestGraphs(Gn_median, Gn_candidate, | |||||
| c_ei=3, c_er=3, c_es=1, ite_max=50, epsilon=0.001, | |||||
| node_label='atom', edge_label='bond_type', | |||||
| connected=False, removeNodes=True, AllBestInit=True, | |||||
| params_ged={'ged_cost': 'CHEM_1', 'ged_method': 'IPFP', 'saveGXL': 'benoit'}): | |||||
| """See my name, then you know what I do. | |||||
| """ | |||||
| from tqdm import tqdm | |||||
| # Gn_median = Gn_median[0:10] | |||||
| # Gn_median = [nx.convert_node_labels_to_integers(g) for g in Gn_median] | |||||
| if removeNodes: | |||||
| node_ir = np.inf # corresponding to the node remove and insertion. | |||||
| label_r = 'thanksdanny' # the label for node remove. # @todo: make this label unrepeatable. | |||||
| ds_attrs = get_dataset_attributes(Gn_median + Gn_candidate, | |||||
| attr_names=['edge_labeled', 'node_attr_dim', 'edge_attr_dim'], | |||||
| edge_label=edge_label) | |||||
| def generate_graph(G, pi_p_forward, label_set): | |||||
| G_new_list = [G.copy()] # all "best" graphs generated in this iteration. | |||||
| # nx.draw_networkx(G) | |||||
| # import matplotlib.pyplot as plt | |||||
| # plt.show() | |||||
| # print(pi_p_forward) | |||||
| # update vertex labels. | |||||
| # pre-compute h_i0 for each label. | |||||
| # for label in get_node_labels(Gn, node_label): | |||||
| # print(label) | |||||
| # for nd in G.nodes(data=True): | |||||
| # pass | |||||
| if not ds_attrs['node_attr_dim']: # labels are symbolic | |||||
| for ndi, (nd, _) in enumerate(G.nodes(data=True)): | |||||
| h_i0_list = [] | |||||
| label_list = [] | |||||
| for label in label_set: | |||||
| h_i0 = 0 | |||||
| for idx, g in enumerate(Gn_median): | |||||
| pi_i = pi_p_forward[idx][ndi] | |||||
| if pi_i != node_ir and g.nodes[pi_i][node_label] == label: | |||||
| h_i0 += 1 | |||||
| h_i0_list.append(h_i0) | |||||
| label_list.append(label) | |||||
| # case when the node is to be removed. | |||||
| if removeNodes: | |||||
| h_i0_remove = 0 # @todo: maybe this can be added to the label_set above. | |||||
| for idx, g in enumerate(Gn_median): | |||||
| pi_i = pi_p_forward[idx][ndi] | |||||
| if pi_i == node_ir: | |||||
| h_i0_remove += 1 | |||||
| h_i0_list.append(h_i0_remove) | |||||
| label_list.append(label_r) | |||||
| # get the best labels. | |||||
| idx_max = np.argwhere(h_i0_list == np.max(h_i0_list)).flatten().tolist() | |||||
| nlabel_best = [label_list[idx] for idx in idx_max] | |||||
| # generate "best" graphs with regard to "best" node labels. | |||||
| G_new_list_nd = [] | |||||
| for g in G_new_list: # @todo: seems it can be simplified. The G_new_list will only contain 1 graph for now. | |||||
| for nl in nlabel_best: | |||||
| g_tmp = g.copy() | |||||
| if nl == label_r: | |||||
| g_tmp.remove_node(nd) | |||||
| else: | |||||
| g_tmp.nodes[nd][node_label] = nl | |||||
| G_new_list_nd.append(g_tmp) | |||||
| # nx.draw_networkx(g_tmp) | |||||
| # import matplotlib.pyplot as plt | |||||
| # plt.show() | |||||
| # print(g_tmp.nodes(data=True)) | |||||
| # print(g_tmp.edges(data=True)) | |||||
| G_new_list = [ggg.copy() for ggg in G_new_list_nd] | |||||
| else: # labels are non-symbolic | |||||
| for ndi, (nd, _) in enumerate(G.nodes(data=True)): | |||||
| Si_norm = 0 | |||||
| phi_i_bar = np.array([0.0 for _ in range(ds_attrs['node_attr_dim'])]) | |||||
| for idx, g in enumerate(Gn_median): | |||||
| pi_i = pi_p_forward[idx][ndi] | |||||
| if g.has_node(pi_i): #@todo: what if no g has node? phi_i_bar = 0? | |||||
| Si_norm += 1 | |||||
| phi_i_bar += np.array([float(itm) for itm in g.nodes[pi_i]['attributes']]) | |||||
| phi_i_bar /= Si_norm | |||||
| G_new_list[0].nodes[nd]['attributes'] = phi_i_bar | |||||
| # for g in G_new_list: | |||||
| # import matplotlib.pyplot as plt | |||||
| # nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True) | |||||
| # plt.show() | |||||
| # print(g.nodes(data=True)) | |||||
| # print(g.edges(data=True)) | |||||
| # update edge labels and adjacency matrix. | |||||
| if ds_attrs['edge_labeled']: | |||||
| G_new_list_edge = [] | |||||
| for g_new in G_new_list: | |||||
| nd_list = [n for n in g_new.nodes()] | |||||
| g_tmp_list = [g_new.copy()] | |||||
| for nd1i in range(nx.number_of_nodes(g_new)): | |||||
| nd1 = nd_list[nd1i]# @todo: not just edges, but all pairs of nodes | |||||
| for nd2i in range(nd1i + 1, nx.number_of_nodes(g_new)): | |||||
| nd2 = nd_list[nd2i] | |||||
| # for nd1, nd2, _ in g_new.edges(data=True): | |||||
| h_ij0_list = [] | |||||
| label_list = [] | |||||
| # @todo: compute edge label set before. | |||||
| for label in get_edge_labels(Gn_median, edge_label): | |||||
| h_ij0 = 0 | |||||
| for idx, g in enumerate(Gn_median): | |||||
| pi_i = pi_p_forward[idx][nd1i] | |||||
| pi_j = pi_p_forward[idx][nd2i] | |||||
| h_ij0_p = (g.has_node(pi_i) and g.has_node(pi_j) and | |||||
| g.has_edge(pi_i, pi_j) and | |||||
| g.edges[pi_i, pi_j][edge_label] == label) | |||||
| h_ij0 += h_ij0_p | |||||
| h_ij0_list.append(h_ij0) | |||||
| label_list.append(label) | |||||
| # # case when the edge is to be removed. | |||||
| # h_ij0_remove = 0 | |||||
| # for idx, g in enumerate(Gn_median): | |||||
| # pi_i = pi_p_forward[idx][nd1i] | |||||
| # pi_j = pi_p_forward[idx][nd2i] | |||||
| # if g.has_node(pi_i) and g.has_node(pi_j) and not | |||||
| # g.has_edge(pi_i, pi_j): | |||||
| # h_ij0_remove += 1 | |||||
| # h_ij0_list.append(h_ij0_remove) | |||||
| # label_list.append(label_r) | |||||
| # get the best labels. | |||||
| # choose all best graphs. | |||||
| idx_max = np.argwhere(h_ij0_list == np.max(h_ij0_list)).flatten().tolist() | |||||
| elabel_best = [label_list[idx] for idx in idx_max] | |||||
| h_ij0_max = [h_ij0_list[idx] for idx in idx_max] | |||||
| # generate "best" graphs with regard to "best" node labels. | |||||
| G_new_list_ed = [] | |||||
| for g_tmp in g_tmp_list: # @todo: seems it can be simplified. The G_new_list will only contain 1 graph for now. | |||||
| for idxl, el in enumerate(elabel_best): | |||||
| g_tmp_copy = g_tmp.copy() | |||||
| # check whether a_ij is 0 or 1. | |||||
| sij_norm = 0 | |||||
| for idx, g in enumerate(Gn_median): | |||||
| pi_i = pi_p_forward[idx][nd1i] | |||||
| pi_j = pi_p_forward[idx][nd2i] | |||||
| if g.has_node(pi_i) and g.has_node(pi_j) and \ | |||||
| g.has_edge(pi_i, pi_j): | |||||
| sij_norm += 1 | |||||
| if h_ij0_max[idxl] > len(Gn_median) * c_er / c_es + \ | |||||
| sij_norm * (1 - (c_er + c_ei) / c_es): | |||||
| if not g_tmp_copy.has_edge(nd1, nd2): | |||||
| g_tmp_copy.add_edge(nd1, nd2) | |||||
| g_tmp_copy.edges[nd1, nd2][edge_label] = elabel_best[idxl] | |||||
| else: | |||||
| if g_tmp_copy.has_edge(nd1, nd2): | |||||
| g_tmp_copy.remove_edge(nd1, nd2) | |||||
| G_new_list_ed.append(g_tmp_copy) | |||||
| g_tmp_list = [ggg.copy() for ggg in G_new_list_ed] | |||||
| G_new_list_edge += g_tmp_list | |||||
| G_new_list = [ggg.copy() for ggg in G_new_list_edge] | |||||
| # # choose one of the best randomly. | |||||
| # idx_max = np.argwhere(h_ij0_list == np.max(h_ij0_list)).flatten().tolist() | |||||
| # h_ij0_max = h_ij0_list[idx_max[0]] | |||||
| # idx_rdm = random.randint(0, len(idx_max) - 1) | |||||
| # best_label = label_list[idx_max[idx_rdm]] | |||||
| # | |||||
| # # check whether a_ij is 0 or 1. | |||||
| # sij_norm = 0 | |||||
| # for idx, g in enumerate(Gn_median): | |||||
| # pi_i = pi_p_forward[idx][nd1i] | |||||
| # pi_j = pi_p_forward[idx][nd2i] | |||||
| # if g.has_node(pi_i) and g.has_node(pi_j) and g.has_edge(pi_i, pi_j): | |||||
| # sij_norm += 1 | |||||
| # if h_ij0_max > len(Gn_median) * c_er / c_es + sij_norm * (1 - (c_er + c_ei) / c_es): | |||||
| # if not g_new.has_edge(nd1, nd2): | |||||
| # g_new.add_edge(nd1, nd2) | |||||
| # g_new.edges[nd1, nd2][edge_label] = best_label | |||||
| # else: | |||||
| # if g_new.has_edge(nd1, nd2): | |||||
| # g_new.remove_edge(nd1, nd2) | |||||
| else: # if edges are unlabeled | |||||
| # @todo: is this even right? G or g_tmp? check if the new one is right | |||||
| # @todo: works only for undirected graphs. | |||||
| for g_tmp in G_new_list: | |||||
| nd_list = [n for n in g_tmp.nodes()] | |||||
| for nd1i in range(nx.number_of_nodes(g_tmp)): | |||||
| nd1 = nd_list[nd1i] | |||||
| for nd2i in range(nd1i + 1, nx.number_of_nodes(g_tmp)): | |||||
| nd2 = nd_list[nd2i] | |||||
| sij_norm = 0 | |||||
| for idx, g in enumerate(Gn_median): | |||||
| pi_i = pi_p_forward[idx][nd1i] | |||||
| pi_j = pi_p_forward[idx][nd2i] | |||||
| if g.has_node(pi_i) and g.has_node(pi_j) and g.has_edge(pi_i, pi_j): | |||||
| sij_norm += 1 | |||||
| if sij_norm > len(Gn_median) * c_er / (c_er + c_ei): | |||||
| # @todo: should we consider if nd1 and nd2 in g_tmp? | |||||
| # or just add the edge anyway? | |||||
| if g_tmp.has_node(nd1) and g_tmp.has_node(nd2) \ | |||||
| and not g_tmp.has_edge(nd1, nd2): | |||||
| g_tmp.add_edge(nd1, nd2) | |||||
| # else: # @todo: which to use? | |||||
| elif sij_norm < len(Gn_median) * c_er / (c_er + c_ei): | |||||
| if g_tmp.has_edge(nd1, nd2): | |||||
| g_tmp.remove_edge(nd1, nd2) | |||||
| # do not change anything when equal. | |||||
| # for i, g in enumerate(G_new_list): | |||||
| # import matplotlib.pyplot as plt | |||||
| # nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True) | |||||
| ## plt.savefig("results/gk_iam/simple_two/xx" + str(i) + ".png", format="PNG") | |||||
| # plt.show() | |||||
| # print(g.nodes(data=True)) | |||||
| # print(g.edges(data=True)) | |||||
| # # find the best graph generated in this iteration and update pi_p. | |||||
| # @todo: should we update all graphs generated or just the best ones? | |||||
| dis_list, pi_forward_list = median_distance(G_new_list, Gn_median, | |||||
| **params_ged) | |||||
| # @todo: should we remove the identical and connectivity check? | |||||
| # Don't know which is faster. | |||||
| if ds_attrs['node_attr_dim'] == 0 and ds_attrs['edge_attr_dim'] == 0: | |||||
| G_new_list, idx_list = remove_duplicates(G_new_list) | |||||
| pi_forward_list = [pi_forward_list[idx] for idx in idx_list] | |||||
| dis_list = [dis_list[idx] for idx in idx_list] | |||||
| # if connected == True: | |||||
| # G_new_list, idx_list = remove_disconnected(G_new_list) | |||||
| # pi_forward_list = [pi_forward_list[idx] for idx in idx_list] | |||||
| # idx_min_list = np.argwhere(dis_list == np.min(dis_list)).flatten().tolist() | |||||
| # dis_min = dis_list[idx_min_tmp_list[0]] | |||||
| # pi_forward_list = [pi_forward_list[idx] for idx in idx_min_list] | |||||
| # G_new_list = [G_new_list[idx] for idx in idx_min_list] | |||||
| # for g in G_new_list: | |||||
| # import matplotlib.pyplot as plt | |||||
| # nx.draw_networkx(g) | |||||
| # plt.show() | |||||
| # print(g.nodes(data=True)) | |||||
| # print(g.edges(data=True)) | |||||
| return G_new_list, pi_forward_list, dis_list | |||||
| def best_median_graphs(Gn_candidate, pi_all_forward, dis_all): | |||||
| idx_min_list = np.argwhere(dis_all == np.min(dis_all)).flatten().tolist() | |||||
| dis_min = dis_all[idx_min_list[0]] | |||||
| pi_forward_min_list = [pi_all_forward[idx] for idx in idx_min_list] | |||||
| G_min_list = [Gn_candidate[idx] for idx in idx_min_list] | |||||
| return G_min_list, pi_forward_min_list, dis_min | |||||
| def iteration_proc(G, pi_p_forward, cur_sod): | |||||
| G_list = [G] | |||||
| pi_forward_list = [pi_p_forward] | |||||
| old_sod = cur_sod * 2 | |||||
| sod_list = [cur_sod] | |||||
| dis_list = [cur_sod] | |||||
| # iterations. | |||||
| itr = 0 | |||||
| # @todo: what if difference == 0? | |||||
| # while itr < ite_max and (np.abs(old_sod - cur_sod) > epsilon or | |||||
| # np.abs(old_sod - cur_sod) == 0): | |||||
| while itr < ite_max and np.abs(old_sod - cur_sod) > epsilon: | |||||
| # for itr in range(0, 5): # the convergence condition? | |||||
| print('itr_iam is', itr) | |||||
| G_new_list = [] | |||||
| pi_forward_new_list = [] | |||||
| dis_new_list = [] | |||||
| for idx, g in enumerate(G_list): | |||||
| label_set = get_node_labels(Gn_median + [g], node_label) | |||||
| G_tmp_list, pi_forward_tmp_list, dis_tmp_list = generate_graph( | |||||
| g, pi_forward_list[idx], label_set) | |||||
| G_new_list += G_tmp_list | |||||
| pi_forward_new_list += pi_forward_tmp_list | |||||
| dis_new_list += dis_tmp_list | |||||
| # @todo: need to remove duplicates here? | |||||
| G_list = [ggg.copy() for ggg in G_new_list] | |||||
| pi_forward_list = [pitem.copy() for pitem in pi_forward_new_list] | |||||
| dis_list = dis_new_list[:] | |||||
| old_sod = cur_sod | |||||
| cur_sod = np.min(dis_list) | |||||
| sod_list.append(cur_sod) | |||||
| itr += 1 | |||||
| # @todo: do we return all graphs or the best ones? | |||||
| # get the best ones of the generated graphs. | |||||
| G_list, pi_forward_list, dis_min = best_median_graphs( | |||||
| G_list, pi_forward_list, dis_list) | |||||
| if ds_attrs['node_attr_dim'] == 0 and ds_attrs['edge_attr_dim'] == 0: | |||||
| G_list, idx_list = remove_duplicates(G_list) | |||||
| pi_forward_list = [pi_forward_list[idx] for idx in idx_list] | |||||
| # dis_list = [dis_list[idx] for idx in idx_list] | |||||
| # import matplotlib.pyplot as plt | |||||
| # for g in G_list: | |||||
| # nx.draw_networkx(g) | |||||
| # plt.show() | |||||
| # print(g.nodes(data=True)) | |||||
| # print(g.edges(data=True)) | |||||
| print('\nsods:', sod_list, '\n') | |||||
| return G_list, pi_forward_list, dis_min | |||||
| def remove_duplicates(Gn): | |||||
| """Remove duplicate graphs from list. | |||||
| """ | |||||
| Gn_new = [] | |||||
| idx_list = [] | |||||
| for idx, g in enumerate(Gn): | |||||
| dupl = False | |||||
| for g_new in Gn_new: | |||||
| if graph_isIdentical(g_new, g): | |||||
| dupl = True | |||||
| break | |||||
| if not dupl: | |||||
| Gn_new.append(g) | |||||
| idx_list.append(idx) | |||||
| return Gn_new, idx_list | |||||
| def remove_disconnected(Gn): | |||||
| """Remove disconnected graphs from list. | |||||
| """ | |||||
| Gn_new = [] | |||||
| idx_list = [] | |||||
| for idx, g in enumerate(Gn): | |||||
| if nx.is_connected(g): | |||||
| Gn_new.append(g) | |||||
| idx_list.append(idx) | |||||
| return Gn_new, idx_list | |||||
| # phase 1: initilize. | |||||
| # compute set-median. | |||||
| dis_min = np.inf | |||||
| dis_list, pi_forward_all = median_distance(Gn_candidate, Gn_median, | |||||
| **params_ged) | |||||
| # find all smallest distances. | |||||
| if AllBestInit: # try all best init graphs. | |||||
| idx_min_list = range(len(dis_list)) | |||||
| dis_min = dis_list | |||||
| else: | |||||
| idx_min_list = np.argwhere(dis_list == np.min(dis_list)).flatten().tolist() | |||||
| dis_min = [dis_list[idx_min_list[0]]] * len(idx_min_list) | |||||
| # phase 2: iteration. | |||||
| G_list = [] | |||||
| dis_list = [] | |||||
| pi_forward_list = [] | |||||
| for idx_tmp, idx_min in enumerate(idx_min_list): | |||||
| # print('idx_min is', idx_min) | |||||
| G = Gn_candidate[idx_min].copy() | |||||
| # list of edit operations. | |||||
| pi_p_forward = pi_forward_all[idx_min] | |||||
| # pi_p_backward = pi_all_backward[idx_min] | |||||
| Gi_list, pi_i_forward_list, dis_i_min = iteration_proc(G, pi_p_forward, dis_min[idx_tmp]) | |||||
| G_list += Gi_list | |||||
| dis_list += [dis_i_min] * len(Gi_list) | |||||
| pi_forward_list += pi_i_forward_list | |||||
| if ds_attrs['node_attr_dim'] == 0 and ds_attrs['edge_attr_dim'] == 0: | |||||
| G_list, idx_list = remove_duplicates(G_list) | |||||
| dis_list = [dis_list[idx] for idx in idx_list] | |||||
| pi_forward_list = [pi_forward_list[idx] for idx in idx_list] | |||||
| if connected == True: | |||||
| G_list_con, idx_list = remove_disconnected(G_list) | |||||
| # if there is no connected graphs at all, then remain the disconnected ones. | |||||
| if len(G_list_con) > 0: # @todo: ?????????????????????????? | |||||
| G_list = G_list_con | |||||
| dis_list = [dis_list[idx] for idx in idx_list] | |||||
| pi_forward_list = [pi_forward_list[idx] for idx in idx_list] | |||||
| # import matplotlib.pyplot as plt | |||||
| # for g in G_list: | |||||
| # nx.draw_networkx(g) | |||||
| # plt.show() | |||||
| # print(g.nodes(data=True)) | |||||
| # print(g.edges(data=True)) | |||||
| # get the best median graphs | |||||
| # dis_list, pi_forward_list = median_distance(G_list, Gn_median, | |||||
| # **params_ged) | |||||
| G_min_list, pi_forward_min_list, dis_min = best_median_graphs( | |||||
| G_list, pi_forward_list, dis_list) | |||||
| # for g in G_min_list: | |||||
| # nx.draw_networkx(g) | |||||
| # plt.show() | |||||
| # print(g.nodes(data=True)) | |||||
| # print(g.edges(data=True)) | |||||
| # randomly choose one graph. | |||||
| idx_rdm = random.randint(0, len(G_min_list) - 1) | |||||
| G_min_list = [G_min_list[idx_rdm]] | |||||
| return G_min_list, dis_min | |||||
| ############################################################################### | |||||
| def iam(Gn, c_ei=3, c_er=3, c_es=1, node_label='atom', edge_label='bond_type', | def iam(Gn, c_ei=3, c_er=3, c_es=1, node_label='atom', edge_label='bond_type', | ||||
| connected=True): | connected=True): | ||||
| """See my name, then you know what I do. | """See my name, then you know what I do. | ||||
| @@ -148,27 +565,42 @@ def iam(Gn, c_ei=3, c_er=3, c_es=1, node_label='atom', edge_label='bond_type', | |||||
| return G | return G | ||||
| def GED(g1, g2, lib='gedlib'): | |||||
| def GED(g1, g2, lib='gedlib', cost='CHEM_1', method='IPFP', saveGXL='benoit', | |||||
| stabilizer='min'): | |||||
| """ | """ | ||||
| Compute GED. | Compute GED. | ||||
| """ | """ | ||||
| if lib == 'gedlib': | if lib == 'gedlib': | ||||
| # transform dataset to the 'xml' file as the GedLib required. | # transform dataset to the 'xml' file as the GedLib required. | ||||
| saveDataset([g1, g2], [None, None], group='xml', filename='ged_tmp/tmp') | |||||
| # script.appel() | |||||
| saveDataset([g1, g2], [None, None], group='xml', filename='ged_tmp/tmp', | |||||
| xparams={'method': saveGXL}) | |||||
| # script.appel() | |||||
| script.PyRestartEnv() | script.PyRestartEnv() | ||||
| script.PyLoadGXLGraph('ged_tmp/', 'ged_tmp/tmp.xml') | script.PyLoadGXLGraph('ged_tmp/', 'ged_tmp/tmp.xml') | ||||
| listID = script.PyGetGraphIds() | listID = script.PyGetGraphIds() | ||||
| script.PySetEditCost("LETTER") #("CHEM_1") | |||||
| script.PySetEditCost(cost) #("CHEM_1") | |||||
| script.PyInitEnv() | script.PyInitEnv() | ||||
| script.PySetMethod("IPFP", "") | |||||
| script.PySetMethod(method, "") | |||||
| script.PyInitMethod() | script.PyInitMethod() | ||||
| g = listID[0] | g = listID[0] | ||||
| h = listID[1] | h = listID[1] | ||||
| script.PyRunMethod(g, h) | |||||
| pi_forward, pi_backward = script.PyGetAllMap(g, h) | |||||
| upper = script.PyGetUpperBound(g, h) | |||||
| lower = script.PyGetLowerBound(g, h) | |||||
| if stabilizer == None: | |||||
| script.PyRunMethod(g, h) | |||||
| pi_forward, pi_backward = script.PyGetAllMap(g, h) | |||||
| upper = script.PyGetUpperBound(g, h) | |||||
| lower = script.PyGetLowerBound(g, h) | |||||
| elif stabilizer == 'min': | |||||
| upper = np.inf | |||||
| for itr in range(50): | |||||
| script.PyRunMethod(g, h) | |||||
| upper_tmp = script.PyGetUpperBound(g, h) | |||||
| if upper_tmp < upper: | |||||
| upper = upper_tmp | |||||
| pi_forward, pi_backward = script.PyGetAllMap(g, h) | |||||
| lower = script.PyGetLowerBound(g, h) | |||||
| if upper == 0: | |||||
| break | |||||
| dis = upper | dis = upper | ||||
| # make the map label correct (label remove map as np.inf) | # make the map label correct (label remove map as np.inf) | ||||
| @@ -177,12 +609,13 @@ def GED(g1, g2, lib='gedlib'): | |||||
| nb1 = nx.number_of_nodes(g1) | nb1 = nx.number_of_nodes(g1) | ||||
| nb2 = nx.number_of_nodes(g2) | nb2 = nx.number_of_nodes(g2) | ||||
| pi_forward = [nodes2[pi] if pi < nb2 else np.inf for pi in pi_forward] | pi_forward = [nodes2[pi] if pi < nb2 else np.inf for pi in pi_forward] | ||||
| pi_backward = [nodes1[pi] if pi < nb1 else np.inf for pi in pi_backward] | |||||
| pi_backward = [nodes1[pi] if pi < nb1 else np.inf for pi in pi_backward] | |||||
| return dis, pi_forward, pi_backward | return dis, pi_forward, pi_backward | ||||
| def median_distance(Gn, Gn_median, measure='ged', verbose=False): | |||||
| def median_distance(Gn, Gn_median, measure='ged', verbose=False, | |||||
| ged_cost='CHEM_1', ged_method='IPFP', saveGXL='benoit'): | |||||
| dis_list = [] | dis_list = [] | ||||
| pi_forward_list = [] | pi_forward_list = [] | ||||
| for idx, G in tqdm(enumerate(Gn), desc='computing median distances', | for idx, G in tqdm(enumerate(Gn), desc='computing median distances', | ||||
| @@ -190,7 +623,8 @@ def median_distance(Gn, Gn_median, measure='ged', verbose=False): | |||||
| dis_sum = 0 | dis_sum = 0 | ||||
| pi_forward_list.append([]) | pi_forward_list.append([]) | ||||
| for G_p in Gn_median: | for G_p in Gn_median: | ||||
| dis_tmp, pi_tmp_forward, pi_tmp_backward = GED(G, G_p) | |||||
| dis_tmp, pi_tmp_forward, pi_tmp_backward = GED(G, G_p, | |||||
| cost=ged_cost, method=ged_method, saveGXL=saveGXL) | |||||
| pi_forward_list[idx].append(pi_tmp_forward) | pi_forward_list[idx].append(pi_tmp_forward) | ||||
| dis_sum += dis_tmp | dis_sum += dis_tmp | ||||
| dis_list.append(dis_sum) | dis_list.append(dis_sum) | ||||
| @@ -228,137 +662,13 @@ def test_iam_with_more_graphs_as_init(Gn, G_candidate, c_ei=3, c_er=3, c_es=1, | |||||
| # list of edit operations. | # list of edit operations. | ||||
| pi_p_forward = pi_all_forward[idx_min] | pi_p_forward = pi_all_forward[idx_min] | ||||
| pi_p_backward = pi_all_backward[idx_min] | pi_p_backward = pi_all_backward[idx_min] | ||||
| # phase 2: iteration. | |||||
| ds_attrs = get_dataset_attributes(Gn + [G], attr_names=['edge_labeled', 'node_attr_dim'], | |||||
| edge_label=edge_label) | |||||
| label_set = get_node_labels(Gn + [G], node_label) | |||||
| for itr in range(0, 10): # @todo: the convergence condition? | |||||
| G_new = G.copy() | |||||
| # update vertex labels. | |||||
| # pre-compute h_i0 for each label. | |||||
| # for label in get_node_labels(Gn, node_label): | |||||
| # print(label) | |||||
| # for nd in G.nodes(data=True): | |||||
| # pass | |||||
| if not ds_attrs['node_attr_dim']: # labels are symbolic | |||||
| for nd in G.nodes(): | |||||
| h_i0_list = [] | |||||
| label_list = [] | |||||
| for label in label_set: | |||||
| h_i0 = 0 | |||||
| for idx, g in enumerate(Gn): | |||||
| pi_i = pi_p_forward[idx][nd] | |||||
| if g.has_node(pi_i) and g.nodes[pi_i][node_label] == label: | |||||
| h_i0 += 1 | |||||
| h_i0_list.append(h_i0) | |||||
| label_list.append(label) | |||||
| # choose one of the best randomly. | |||||
| idx_max = np.argwhere(h_i0_list == np.max(h_i0_list)).flatten().tolist() | |||||
| idx_rdm = random.randint(0, len(idx_max) - 1) | |||||
| G_new.nodes[nd][node_label] = label_list[idx_max[idx_rdm]] | |||||
| else: # labels are non-symbolic | |||||
| for nd in G.nodes(): | |||||
| Si_norm = 0 | |||||
| phi_i_bar = np.array([0.0 for _ in range(ds_attrs['node_attr_dim'])]) | |||||
| for idx, g in enumerate(Gn): | |||||
| pi_i = pi_p_forward[idx][nd] | |||||
| if g.has_node(pi_i): #@todo: what if no g has node? phi_i_bar = 0? | |||||
| Si_norm += 1 | |||||
| phi_i_bar += np.array([float(itm) for itm in g.nodes[pi_i]['attributes']]) | |||||
| phi_i_bar /= Si_norm | |||||
| G_new.nodes[nd]['attributes'] = phi_i_bar | |||||
| # update edge labels and adjacency matrix. | |||||
| if ds_attrs['edge_labeled']: | |||||
| for nd1, nd2, _ in G.edges(data=True): | |||||
| h_ij0_list = [] | |||||
| label_list = [] | |||||
| for label in get_edge_labels(Gn, edge_label): | |||||
| h_ij0 = 0 | |||||
| for idx, g in enumerate(Gn): | |||||
| pi_i = pi_p_forward[idx][nd1] | |||||
| pi_j = pi_p_forward[idx][nd2] | |||||
| h_ij0_p = (g.has_node(pi_i) and g.has_node(pi_j) and | |||||
| g.has_edge(pi_i, pi_j) and | |||||
| g.edges[pi_i, pi_j][edge_label] == label) | |||||
| h_ij0 += h_ij0_p | |||||
| h_ij0_list.append(h_ij0) | |||||
| label_list.append(label) | |||||
| # choose one of the best randomly. | |||||
| idx_max = np.argwhere(h_ij0_list == np.max(h_ij0_list)).flatten().tolist() | |||||
| h_ij0_max = h_ij0_list[idx_max[0]] | |||||
| idx_rdm = random.randint(0, len(idx_max) - 1) | |||||
| best_label = label_list[idx_max[idx_rdm]] | |||||
| # check whether a_ij is 0 or 1. | |||||
| sij_norm = 0 | |||||
| for idx, g in enumerate(Gn): | |||||
| pi_i = pi_p_forward[idx][nd1] | |||||
| pi_j = pi_p_forward[idx][nd2] | |||||
| if g.has_node(pi_i) and g.has_node(pi_j) and g.has_edge(pi_i, pi_j): | |||||
| sij_norm += 1 | |||||
| if h_ij0_max > len(Gn) * c_er / c_es + sij_norm * (1 - (c_er + c_ei) / c_es): | |||||
| if not G_new.has_edge(nd1, nd2): | |||||
| G_new.add_edge(nd1, nd2) | |||||
| G_new.edges[nd1, nd2][edge_label] = best_label | |||||
| else: | |||||
| if G_new.has_edge(nd1, nd2): | |||||
| G_new.remove_edge(nd1, nd2) | |||||
| else: # if edges are unlabeled | |||||
| # @todo: works only for undirected graphs. | |||||
| for nd1 in range(nx.number_of_nodes(G)): | |||||
| for nd2 in range(nd1 + 1, nx.number_of_nodes(G)): | |||||
| sij_norm = 0 | |||||
| for idx, g in enumerate(Gn): | |||||
| pi_i = pi_p_forward[idx][nd1] | |||||
| pi_j = pi_p_forward[idx][nd2] | |||||
| if g.has_node(pi_i) and g.has_node(pi_j) and g.has_edge(pi_i, pi_j): | |||||
| sij_norm += 1 | |||||
| if sij_norm > len(Gn) * c_er / (c_er + c_ei): | |||||
| if not G_new.has_edge(nd1, nd2): | |||||
| G_new.add_edge(nd1, nd2) | |||||
| elif sij_norm < len(Gn) * c_er / (c_er + c_ei): | |||||
| if G_new.has_edge(nd1, nd2): | |||||
| G_new.remove_edge(nd1, nd2) | |||||
| # do not change anything when equal. | |||||
| G = G_new.copy() | |||||
| # update pi_p | |||||
| pi_p_forward = [] | |||||
| for G_p in Gn: | |||||
| dist_tmp, pi_tmp_forward, pi_tmp_backward = GED(G, G_p) | |||||
| pi_p_forward.append(pi_tmp_forward) | |||||
| return G | |||||
| def test_iam_moreGraphsAsInit_tryAllPossibleBestGraphs_deleteNodesInIterations( | |||||
| Gn_median, Gn_candidate, c_ei=3, c_er=3, c_es=1, node_label='atom', | |||||
| edge_label='bond_type', connected=False): | |||||
| """See my name, then you know what I do. | |||||
| """ | |||||
| from tqdm import tqdm | |||||
| # Gn_median = Gn_median[0:10] | |||||
| # Gn_median = [nx.convert_node_labels_to_integers(g) for g in Gn_median] | |||||
| node_ir = np.inf # corresponding to the node remove and insertion. | |||||
| label_r = 'thanksdanny' # the label for node remove. # @todo: make this label unrepeatable. | |||||
| ds_attrs = get_dataset_attributes(Gn_median + Gn_candidate, | |||||
| attr_names=['edge_labeled', 'node_attr_dim', 'edge_attr_dim'], | |||||
| # phase 2: iteration. | |||||
| ds_attrs = get_dataset_attributes(Gn + [G], attr_names=['edge_labeled', 'node_attr_dim'], | |||||
| edge_label=edge_label) | edge_label=edge_label) | ||||
| ite_max = 50 | |||||
| epsilon = 0.001 | |||||
| def generate_graph(G, pi_p_forward, label_set): | |||||
| G_new_list = [G.copy()] # all "best" graphs generated in this iteration. | |||||
| # nx.draw_networkx(G) | |||||
| # import matplotlib.pyplot as plt | |||||
| # plt.show() | |||||
| # print(pi_p_forward) | |||||
| label_set = get_node_labels(Gn + [G], node_label) | |||||
| for itr in range(0, 10): # @todo: the convergence condition? | |||||
| G_new = G.copy() | |||||
| # update vertex labels. | # update vertex labels. | ||||
| # pre-compute h_i0 for each label. | # pre-compute h_i0 for each label. | ||||
| # for label in get_node_labels(Gn, node_label): | # for label in get_node_labels(Gn, node_label): | ||||
| @@ -366,65 +676,41 @@ def test_iam_moreGraphsAsInit_tryAllPossibleBestGraphs_deleteNodesInIterations( | |||||
| # for nd in G.nodes(data=True): | # for nd in G.nodes(data=True): | ||||
| # pass | # pass | ||||
| if not ds_attrs['node_attr_dim']: # labels are symbolic | if not ds_attrs['node_attr_dim']: # labels are symbolic | ||||
| for ndi, (nd, _) in enumerate(G.nodes(data=True)): | |||||
| for nd in G.nodes(): | |||||
| h_i0_list = [] | h_i0_list = [] | ||||
| label_list = [] | label_list = [] | ||||
| for label in label_set: | for label in label_set: | ||||
| h_i0 = 0 | h_i0 = 0 | ||||
| for idx, g in enumerate(Gn_median): | |||||
| pi_i = pi_p_forward[idx][ndi] | |||||
| if pi_i != node_ir and g.nodes[pi_i][node_label] == label: | |||||
| for idx, g in enumerate(Gn): | |||||
| pi_i = pi_p_forward[idx][nd] | |||||
| if g.has_node(pi_i) and g.nodes[pi_i][node_label] == label: | |||||
| h_i0 += 1 | h_i0 += 1 | ||||
| h_i0_list.append(h_i0) | h_i0_list.append(h_i0) | ||||
| label_list.append(label) | label_list.append(label) | ||||
| # case when the node is to be removed. | |||||
| h_i0_remove = 0 | |||||
| for idx, g in enumerate(Gn_median): | |||||
| pi_i = pi_p_forward[idx][ndi] | |||||
| if pi_i == node_ir: | |||||
| h_i0_remove += 1 | |||||
| h_i0_list.append(h_i0_remove) | |||||
| label_list.append(label_r) | |||||
| # get the best labels. | |||||
| # choose one of the best randomly. | |||||
| idx_max = np.argwhere(h_i0_list == np.max(h_i0_list)).flatten().tolist() | idx_max = np.argwhere(h_i0_list == np.max(h_i0_list)).flatten().tolist() | ||||
| nlabel_best = [label_list[idx] for idx in idx_max] | |||||
| # generate "best" graphs with regard to "best" node labels. | |||||
| G_new_list_nd = [] | |||||
| for g in G_new_list: # @todo: seems it can be simplified. The G_new_list will only contain 1 graph for now. | |||||
| for nl in nlabel_best: | |||||
| g_tmp = g.copy() | |||||
| if nl == label_r: | |||||
| g_tmp.remove_node(nd) | |||||
| else: | |||||
| g_tmp.nodes[nd][node_label] = nl | |||||
| G_new_list_nd.append(g_tmp) | |||||
| # nx.draw_networkx(g_tmp) | |||||
| # import matplotlib.pyplot as plt | |||||
| # plt.show() | |||||
| # print(g_tmp.nodes(data=True)) | |||||
| # print(g_tmp.edges(data=True)) | |||||
| G_new_list = G_new_list_nd[:] | |||||
| idx_rdm = random.randint(0, len(idx_max) - 1) | |||||
| G_new.nodes[nd][node_label] = label_list[idx_max[idx_rdm]] | |||||
| else: # labels are non-symbolic | else: # labels are non-symbolic | ||||
| for ndi, (nd, _) in enumerate(G.nodes(data=True)): | |||||
| for nd in G.nodes(): | |||||
| Si_norm = 0 | Si_norm = 0 | ||||
| phi_i_bar = np.array([0.0 for _ in range(ds_attrs['node_attr_dim'])]) | phi_i_bar = np.array([0.0 for _ in range(ds_attrs['node_attr_dim'])]) | ||||
| for idx, g in enumerate(Gn_median): | |||||
| pi_i = pi_p_forward[idx][ndi] | |||||
| for idx, g in enumerate(Gn): | |||||
| pi_i = pi_p_forward[idx][nd] | |||||
| if g.has_node(pi_i): #@todo: what if no g has node? phi_i_bar = 0? | if g.has_node(pi_i): #@todo: what if no g has node? phi_i_bar = 0? | ||||
| Si_norm += 1 | Si_norm += 1 | ||||
| phi_i_bar += np.array([float(itm) for itm in g.nodes[pi_i]['attributes']]) | phi_i_bar += np.array([float(itm) for itm in g.nodes[pi_i]['attributes']]) | ||||
| phi_i_bar /= Si_norm | phi_i_bar /= Si_norm | ||||
| G_new_list[0].nodes[nd]['attributes'] = phi_i_bar | |||||
| G_new.nodes[nd]['attributes'] = phi_i_bar | |||||
| # update edge labels and adjacency matrix. | # update edge labels and adjacency matrix. | ||||
| if ds_attrs['edge_labeled']: | if ds_attrs['edge_labeled']: | ||||
| for nd1, nd2, _ in G.edges(data=True): | for nd1, nd2, _ in G.edges(data=True): | ||||
| h_ij0_list = [] | h_ij0_list = [] | ||||
| label_list = [] | label_list = [] | ||||
| for label in get_edge_labels(Gn_median, edge_label): | |||||
| for label in get_edge_labels(Gn, edge_label): | |||||
| h_ij0 = 0 | h_ij0 = 0 | ||||
| for idx, g in enumerate(Gn_median): | |||||
| for idx, g in enumerate(Gn): | |||||
| pi_i = pi_p_forward[idx][nd1] | pi_i = pi_p_forward[idx][nd1] | ||||
| pi_j = pi_p_forward[idx][nd2] | pi_j = pi_p_forward[idx][nd2] | ||||
| h_ij0_p = (g.has_node(pi_i) and g.has_node(pi_j) and | h_ij0_p = (g.has_node(pi_i) and g.has_node(pi_j) and | ||||
| @@ -441,12 +727,12 @@ def test_iam_moreGraphsAsInit_tryAllPossibleBestGraphs_deleteNodesInIterations( | |||||
| # check whether a_ij is 0 or 1. | # check whether a_ij is 0 or 1. | ||||
| sij_norm = 0 | sij_norm = 0 | ||||
| for idx, g in enumerate(Gn_median): | |||||
| for idx, g in enumerate(Gn): | |||||
| pi_i = pi_p_forward[idx][nd1] | pi_i = pi_p_forward[idx][nd1] | ||||
| pi_j = pi_p_forward[idx][nd2] | pi_j = pi_p_forward[idx][nd2] | ||||
| if g.has_node(pi_i) and g.has_node(pi_j) and g.has_edge(pi_i, pi_j): | if g.has_node(pi_i) and g.has_node(pi_j) and g.has_edge(pi_i, pi_j): | ||||
| sij_norm += 1 | sij_norm += 1 | ||||
| if h_ij0_max > len(Gn_median) * c_er / c_es + sij_norm * (1 - (c_er + c_ei) / c_es): | |||||
| if h_ij0_max > len(Gn) * c_er / c_es + sij_norm * (1 - (c_er + c_ei) / c_es): | |||||
| if not G_new.has_edge(nd1, nd2): | if not G_new.has_edge(nd1, nd2): | ||||
| G_new.add_edge(nd1, nd2) | G_new.add_edge(nd1, nd2) | ||||
| G_new.edges[nd1, nd2][edge_label] = best_label | G_new.edges[nd1, nd2][edge_label] = best_label | ||||
| @@ -455,197 +741,36 @@ def test_iam_moreGraphsAsInit_tryAllPossibleBestGraphs_deleteNodesInIterations( | |||||
| G_new.remove_edge(nd1, nd2) | G_new.remove_edge(nd1, nd2) | ||||
| else: # if edges are unlabeled | else: # if edges are unlabeled | ||||
| # @todo: works only for undirected graphs. | # @todo: works only for undirected graphs. | ||||
| nd_list = [n for n in G.nodes()] | |||||
| for g_tmp in G_new_list: | |||||
| for nd1i in range(nx.number_of_nodes(G)): | |||||
| nd1 = nd_list[nd1i] | |||||
| for nd2i in range(nd1i + 1, nx.number_of_nodes(G)): | |||||
| nd2 = nd_list[nd2i] | |||||
| sij_norm = 0 | |||||
| for idx, g in enumerate(Gn_median): | |||||
| pi_i = pi_p_forward[idx][nd1i] | |||||
| pi_j = pi_p_forward[idx][nd2i] | |||||
| if g.has_node(pi_i) and g.has_node(pi_j) and g.has_edge(pi_i, pi_j): | |||||
| sij_norm += 1 | |||||
| if sij_norm > len(Gn_median) * c_er / (c_er + c_ei): | |||||
| # @todo: should we consider if nd1 and nd2 in g_tmp? | |||||
| # or just add the edge anyway? | |||||
| if g_tmp.has_node(nd1) and g_tmp.has_node(nd2) \ | |||||
| and not g_tmp.has_edge(nd1, nd2): | |||||
| g_tmp.add_edge(nd1, nd2) | |||||
| elif sij_norm < len(Gn_median) * c_er / (c_er + c_ei): | |||||
| if g_tmp.has_edge(nd1, nd2): | |||||
| g_tmp.remove_edge(nd1, nd2) | |||||
| # do not change anything when equal. | |||||
| # # find the best graph generated in this iteration and update pi_p. | |||||
| # @todo: should we update all graphs generated or just the best ones? | |||||
| dis_list, pi_forward_list = median_distance(G_new_list, Gn_median) | |||||
| # @todo: should we remove the identical and connectivity check? | |||||
| # Don't know which is faster. | |||||
| if ds_attrs['node_attr_dim'] == 0 and ds_attrs['edge_attr_dim'] == 0: | |||||
| G_new_list, idx_list = remove_duplicates(G_new_list) | |||||
| pi_forward_list = [pi_forward_list[idx] for idx in idx_list] | |||||
| dis_list = [dis_list[idx] for idx in idx_list] | |||||
| # if connected == True: | |||||
| # G_new_list, idx_list = remove_disconnected(G_new_list) | |||||
| # pi_forward_list = [pi_forward_list[idx] for idx in idx_list] | |||||
| # idx_min_list = np.argwhere(dis_list == np.min(dis_list)).flatten().tolist() | |||||
| # dis_min = dis_list[idx_min_tmp_list[0]] | |||||
| # pi_forward_list = [pi_forward_list[idx] for idx in idx_min_list] | |||||
| # G_new_list = [G_new_list[idx] for idx in idx_min_list] | |||||
| # for g in G_new_list: | |||||
| # import matplotlib.pyplot as plt | |||||
| # nx.draw_networkx(g) | |||||
| # plt.show() | |||||
| # print(g.nodes(data=True)) | |||||
| # print(g.edges(data=True)) | |||||
| return G_new_list, pi_forward_list, dis_list | |||||
| def best_median_graphs(Gn_candidate, pi_all_forward, dis_all): | |||||
| idx_min_list = np.argwhere(dis_all == np.min(dis_all)).flatten().tolist() | |||||
| dis_min = dis_all[idx_min_list[0]] | |||||
| pi_forward_min_list = [pi_all_forward[idx] for idx in idx_min_list] | |||||
| G_min_list = [Gn_candidate[idx] for idx in idx_min_list] | |||||
| return G_min_list, pi_forward_min_list, dis_min | |||||
| def iteration_proc(G, pi_p_forward, cur_sod): | |||||
| G_list = [G] | |||||
| pi_forward_list = [pi_p_forward] | |||||
| old_sod = cur_sod * 2 | |||||
| sod_list = [cur_sod] | |||||
| # iterations. | |||||
| itr = 0 | |||||
| while itr < ite_max and np.abs(old_sod - cur_sod) > epsilon: | |||||
| # for itr in range(0, 5): # the convergence condition? | |||||
| print('itr is', itr) | |||||
| G_new_list = [] | |||||
| pi_forward_new_list = [] | |||||
| dis_new_list = [] | |||||
| for idx, G in enumerate(G_list): | |||||
| label_set = get_node_labels(Gn_median + [G], node_label) | |||||
| G_tmp_list, pi_forward_tmp_list, dis_tmp_list = generate_graph( | |||||
| G, pi_forward_list[idx], label_set) | |||||
| G_new_list += G_tmp_list | |||||
| pi_forward_new_list += pi_forward_tmp_list | |||||
| dis_new_list += dis_tmp_list | |||||
| G_list = G_new_list[:] | |||||
| pi_forward_list = pi_forward_new_list[:] | |||||
| dis_list = dis_new_list[:] | |||||
| old_sod = cur_sod | |||||
| cur_sod = np.min(dis_list) | |||||
| sod_list.append(cur_sod) | |||||
| itr += 1 | |||||
| # @todo: do we return all graphs or the best ones? | |||||
| # get the best ones of the generated graphs. | |||||
| G_list, pi_forward_list, dis_min = best_median_graphs( | |||||
| G_list, pi_forward_list, dis_list) | |||||
| for nd1 in range(nx.number_of_nodes(G)): | |||||
| for nd2 in range(nd1 + 1, nx.number_of_nodes(G)): | |||||
| sij_norm = 0 | |||||
| for idx, g in enumerate(Gn): | |||||
| pi_i = pi_p_forward[idx][nd1] | |||||
| pi_j = pi_p_forward[idx][nd2] | |||||
| if g.has_node(pi_i) and g.has_node(pi_j) and g.has_edge(pi_i, pi_j): | |||||
| sij_norm += 1 | |||||
| if sij_norm > len(Gn) * c_er / (c_er + c_ei): | |||||
| if not G_new.has_edge(nd1, nd2): | |||||
| G_new.add_edge(nd1, nd2) | |||||
| elif sij_norm < len(Gn) * c_er / (c_er + c_ei): | |||||
| if G_new.has_edge(nd1, nd2): | |||||
| G_new.remove_edge(nd1, nd2) | |||||
| # do not change anything when equal. | |||||
| G = G_new.copy() | |||||
| if ds_attrs['node_attr_dim'] == 0 and ds_attrs['edge_attr_dim'] == 0: | |||||
| G_list, idx_list = remove_duplicates(G_list) | |||||
| pi_forward_list = [pi_forward_list[idx] for idx in idx_list] | |||||
| # dis_list = [dis_list[idx] for idx in idx_list] | |||||
| # import matplotlib.pyplot as plt | |||||
| # for g in G_list: | |||||
| # nx.draw_networkx(g) | |||||
| # plt.show() | |||||
| # print(g.nodes(data=True)) | |||||
| # print(g.edges(data=True)) | |||||
| print('\nsods:', sod_list, '\n') | |||||
| return G_list, pi_forward_list, dis_min | |||||
| def remove_duplicates(Gn): | |||||
| """Remove duplicate graphs from list. | |||||
| """ | |||||
| Gn_new = [] | |||||
| idx_list = [] | |||||
| for idx, g in enumerate(Gn): | |||||
| dupl = False | |||||
| for g_new in Gn_new: | |||||
| if graph_isIdentical(g_new, g): | |||||
| dupl = True | |||||
| break | |||||
| if not dupl: | |||||
| Gn_new.append(g) | |||||
| idx_list.append(idx) | |||||
| return Gn_new, idx_list | |||||
| # update pi_p | |||||
| pi_p_forward = [] | |||||
| for G_p in Gn: | |||||
| dist_tmp, pi_tmp_forward, pi_tmp_backward = GED(G, G_p) | |||||
| pi_p_forward.append(pi_tmp_forward) | |||||
| def remove_disconnected(Gn): | |||||
| """Remove disconnected graphs from list. | |||||
| """ | |||||
| Gn_new = [] | |||||
| idx_list = [] | |||||
| for idx, g in enumerate(Gn): | |||||
| if nx.is_connected(g): | |||||
| Gn_new.append(g) | |||||
| idx_list.append(idx) | |||||
| return Gn_new, idx_list | |||||
| return G | |||||
| ############################################################################### | |||||
| # phase 1: initilize. | |||||
| # compute set-median. | |||||
| dis_min = np.inf | |||||
| dis_list, pi_forward_all = median_distance(Gn_candidate, Gn_median) | |||||
| # find all smallest distances. | |||||
| idx_min_list = np.argwhere(dis_list == np.min(dis_list)).flatten().tolist() | |||||
| dis_min = dis_list[idx_min_list[0]] | |||||
| # phase 2: iteration. | |||||
| G_list = [] | |||||
| dis_list = [] | |||||
| pi_forward_list = [] | |||||
| for idx_min in idx_min_list: | |||||
| # print('idx_min is', idx_min) | |||||
| G = Gn_candidate[idx_min].copy() | |||||
| # list of edit operations. | |||||
| pi_p_forward = pi_forward_all[idx_min] | |||||
| # pi_p_backward = pi_all_backward[idx_min] | |||||
| Gi_list, pi_i_forward_list, dis_i_min = iteration_proc(G, pi_p_forward, dis_min) | |||||
| G_list += Gi_list | |||||
| dis_list.append(dis_i_min) | |||||
| pi_forward_list += pi_i_forward_list | |||||
| if ds_attrs['node_attr_dim'] == 0 and ds_attrs['edge_attr_dim'] == 0: | |||||
| G_list, idx_list = remove_duplicates(G_list) | |||||
| dis_list = [dis_list[idx] for idx in idx_list] | |||||
| pi_forward_list = [pi_forward_list[idx] for idx in idx_list] | |||||
| if connected == True: | |||||
| G_list_con, idx_list = remove_disconnected(G_list) | |||||
| # if there is no connected graphs at all, then remain the disconnected ones. | |||||
| if len(G_list_con) > 0: # @todo: ?????????????????????????? | |||||
| G_list = G_list_con | |||||
| dis_list = [dis_list[idx] for idx in idx_list] | |||||
| pi_forward_list = [pi_forward_list[idx] for idx in idx_list] | |||||
| # import matplotlib.pyplot as plt | |||||
| # for g in G_list: | |||||
| # nx.draw_networkx(g) | |||||
| # plt.show() | |||||
| # print(g.nodes(data=True)) | |||||
| # print(g.edges(data=True)) | |||||
| # get the best median graphs | |||||
| # dis_list, pi_forward_list = median_distance(G_list, Gn_median) | |||||
| G_min_list, pi_forward_min_list, dis_min = best_median_graphs( | |||||
| G_list, pi_forward_list, dis_list) | |||||
| # for g in G_min_list: | |||||
| # nx.draw_networkx(g) | |||||
| # plt.show() | |||||
| # print(g.nodes(data=True)) | |||||
| # print(g.edges(data=True)) | |||||
| return G_min_list, dis_min | |||||
| if __name__ == '__main__': | if __name__ == '__main__': | ||||
| @@ -5,10 +5,10 @@ import numpy as np | |||||
| import networkx as nx | import networkx as nx | ||||
| import time | import time | ||||
| #import librariesImport | |||||
| #import script | |||||
| #sys.path.insert(0, "/home/bgauzere/dev/optim-graphes/") | |||||
| #import pygraph | |||||
| import librariesImport | |||||
| import script | |||||
| sys.path.insert(0, "/home/bgauzere/dev/optim-graphes/") | |||||
| import pygraph | |||||
| from pygraph.utils.graphfiles import loadDataset | from pygraph.utils.graphfiles import loadDataset | ||||
| def replace_graph_in_env(script, graph, old_id, label='median'): | def replace_graph_in_env(script, graph, old_id, label='median'): | ||||
| @@ -191,28 +191,28 @@ def compute_median_set(script,listID): | |||||
| return median_set_index, sod | return median_set_index, sod | ||||
| #if __name__ == "__main__": | |||||
| # #Chargement du dataset | |||||
| # script.PyLoadGXLGraph('/home/bgauzere/dev/gedlib/data/datasets/Letter/HIGH/', '/home/bgauzere/dev/gedlib/data/collections/Letter_Z.xml') | |||||
| # script.PySetEditCost("LETTER") | |||||
| # script.PyInitEnv() | |||||
| # script.PySetMethod("IPFP", "") | |||||
| # script.PyInitMethod() | |||||
| # | |||||
| # dataset,my_y = pygraph.utils.graphfiles.loadDataset("/home/bgauzere/dev/gedlib/data/datasets/Letter/HIGH/Letter_Z.cxl") | |||||
| # | |||||
| # listID = script.PyGetAllGraphIds() | |||||
| # median, sod = compute_median(script,listID,dataset,verbose=True) | |||||
| # | |||||
| # print(sod) | |||||
| # draw_Letter_graph(median) | |||||
| if __name__ == '__main__': | |||||
| # test draw_Letter_graph | |||||
| ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt', | |||||
| 'extra_params': {}} # node nsymb | |||||
| Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) | |||||
| print(y_all) | |||||
| for g in Gn: | |||||
| draw_Letter_graph(g) | |||||
| if __name__ == "__main__": | |||||
| #Chargement du dataset | |||||
| script.PyLoadGXLGraph('/home/bgauzere/dev/gedlib/data/datasets/Letter/HIGH/', '/home/bgauzere/dev/gedlib/data/collections/Letter_Z.xml') | |||||
| script.PySetEditCost("LETTER") | |||||
| script.PyInitEnv() | |||||
| script.PySetMethod("IPFP", "") | |||||
| script.PyInitMethod() | |||||
| dataset,my_y = pygraph.utils.graphfiles.loadDataset("/home/bgauzere/dev/gedlib/data/datasets/Letter/HIGH/Letter_Z.cxl") | |||||
| listID = script.PyGetAllGraphIds() | |||||
| median, sod = compute_median(script,listID,dataset,verbose=True) | |||||
| print(sod) | |||||
| draw_Letter_graph(median) | |||||
| #if __name__ == '__main__': | |||||
| # # test draw_Letter_graph | |||||
| # ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt', | |||||
| # 'extra_params': {}} # node nsymb | |||||
| # Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) | |||||
| # print(y_all) | |||||
| # for g in Gn: | |||||
| # draw_Letter_graph(g) | |||||
| @@ -25,14 +25,16 @@ import functools | |||||
| from pygraph.utils.kernels import deltakernel, gaussiankernel, kernelproduct | from pygraph.utils.kernels import deltakernel, gaussiankernel, kernelproduct | ||||
| from pygraph.kernels.structuralspKernel import structuralspkernel | from pygraph.kernels.structuralspKernel import structuralspkernel | ||||
| from gk_iam import dis_gstar | |||||
| def compute_kernel(Gn, graph_kernel, verbose): | def compute_kernel(Gn, graph_kernel, verbose): | ||||
| if graph_kernel == 'marginalizedkernel': | if graph_kernel == 'marginalizedkernel': | ||||
| Kmatrix, _ = marginalizedkernel(Gn, node_label='atom', edge_label=None, | Kmatrix, _ = marginalizedkernel(Gn, node_label='atom', edge_label=None, | ||||
| p_quit=0.03, n_iteration=20, remove_totters=False, | |||||
| p_quit=0.03, n_iteration=10, remove_totters=False, | |||||
| n_jobs=multiprocessing.cpu_count(), verbose=verbose) | n_jobs=multiprocessing.cpu_count(), verbose=verbose) | ||||
| elif graph_kernel == 'untilhpathkernel': | elif graph_kernel == 'untilhpathkernel': | ||||
| Kmatrix, _ = untilhpathkernel(Gn, node_label='atom', edge_label='bond_type', | |||||
| Kmatrix, _ = untilhpathkernel(Gn, node_label='atom', edge_label=None, | |||||
| depth=10, k_func='MinMax', compute_method='trie', | depth=10, k_func='MinMax', compute_method='trie', | ||||
| n_jobs=multiprocessing.cpu_count(), verbose=verbose) | n_jobs=multiprocessing.cpu_count(), verbose=verbose) | ||||
| elif graph_kernel == 'spkernel': | elif graph_kernel == 'spkernel': | ||||
| @@ -47,34 +49,167 @@ def compute_kernel(Gn, graph_kernel, verbose): | |||||
| n_jobs=multiprocessing.cpu_count(), verbose=verbose) | n_jobs=multiprocessing.cpu_count(), verbose=verbose) | ||||
| # normalization | # normalization | ||||
| # Kmatrix_diag = Kmatrix.diagonal().copy() | |||||
| # for i in range(len(Kmatrix)): | |||||
| # for j in range(i, len(Kmatrix)): | |||||
| # Kmatrix[i][j] /= np.sqrt(Kmatrix_diag[i] * Kmatrix_diag[j]) | |||||
| # Kmatrix[j][i] = Kmatrix[i][j] | |||||
| Kmatrix_diag = Kmatrix.diagonal().copy() | |||||
| for i in range(len(Kmatrix)): | |||||
| for j in range(i, len(Kmatrix)): | |||||
| Kmatrix[i][j] /= np.sqrt(Kmatrix_diag[i] * Kmatrix_diag[j]) | |||||
| Kmatrix[j][i] = Kmatrix[i][j] | |||||
| return Kmatrix | return Kmatrix | ||||
| def random_preimage(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max, l, gkernel): | |||||
| Gn_init = [nx.convert_node_labels_to_integers(g) for g in Gn_init] | |||||
| # compute k nearest neighbors of phi in DN. | |||||
| dis_list = [] # distance between g_star and each graph. | |||||
| term3 = 0 | |||||
| for i1, a1 in enumerate(alpha): | |||||
| for i2, a2 in enumerate(alpha): | |||||
| term3 += a1 * a2 * Kmatrix[idx_gi[i1], idx_gi[i2]] | |||||
| for ig, g in tqdm(enumerate(Gn_init), desc='computing distances', file=sys.stdout): | |||||
| dtemp = dis_gstar(ig, idx_gi, alpha, Kmatrix, term3=term3) | |||||
| dis_list.append(dtemp) | |||||
| # print(np.max(dis_list)) | |||||
| # print(np.min(dis_list)) | |||||
| # print(np.min([item for item in dis_list if item != 0])) | |||||
| # print(np.mean(dis_list)) | |||||
| # sort | |||||
| sort_idx = np.argsort(dis_list) | |||||
| dis_gs = [dis_list[idis] for idis in sort_idx[0:k]] # the k shortest distances | |||||
| nb_best = len(np.argwhere(dis_gs == dis_gs[0]).flatten().tolist()) | |||||
| g0hat_list = [Gn_init[idx] for idx in sort_idx[0:nb_best]] # the nearest neighbors of phi in DN | |||||
| if dis_gs[0] == 0: # the exact pre-image. | |||||
| print('The exact pre-image is found from the input dataset.') | |||||
| return 0, g0hat_list[0], 0 | |||||
| dhat = dis_gs[0] # the nearest distance | |||||
| # ghat_list = [g.copy() for g in g0hat_list] | |||||
| # for g in ghat_list: | |||||
| # draw_Letter_graph(g) | |||||
| # nx.draw_networkx(g) | |||||
| # plt.show() | |||||
| # print(g.nodes(data=True)) | |||||
| # print(g.edges(data=True)) | |||||
| Gk = [Gn_init[ig].copy() for ig in sort_idx[0:k]] # the k nearest neighbors | |||||
| # for gi in Gk: | |||||
| ## nx.draw_networkx(gi) | |||||
| ## plt.show() | |||||
| # draw_Letter_graph(g) | |||||
| # print(gi.nodes(data=True)) | |||||
| # print(gi.edges(data=True)) | |||||
| Gs_nearest = [g.copy() for g in Gk] | |||||
| gihat_list = [] | |||||
| dihat_list = [] | |||||
| # i = 1 | |||||
| r = 0 | |||||
| # sod_list = [dhat] | |||||
| # found = False | |||||
| nb_updated = 0 | |||||
| g_best = [] | |||||
| while r < r_max: | |||||
| print('\nr =', r) | |||||
| print('itr for gk =', nb_updated, '\n') | |||||
| found = False | |||||
| dis_bests = dis_gs + dihat_list | |||||
| # @todo what if the log is negetive? how to choose alpha (scalar)? | |||||
| fdgs_list = np.array(dis_bests) | |||||
| if np.min(fdgs_list) < 1: | |||||
| fdgs_list /= np.min(dis_bests) | |||||
| fdgs_list = [int(item) for item in np.ceil(np.log(fdgs_list))] | |||||
| if np.min(fdgs_list) < 1: | |||||
| fdgs_list = np.array(fdgs_list) + 1 | |||||
| for ig, gs in enumerate(Gs_nearest + gihat_list): | |||||
| # nx.draw_networkx(gs) | |||||
| # plt.show() | |||||
| for trail in range(0, l): | |||||
| # for trail in tqdm(range(0, l), desc='l loops', file=sys.stdout): | |||||
| # add and delete edges. | |||||
| gtemp = gs.copy() | |||||
| np.random.seed() | |||||
| # which edges to change. | |||||
| # @todo: should we use just half of the adjacency matrix for undirected graphs? | |||||
| nb_vpairs = nx.number_of_nodes(gs) * (nx.number_of_nodes(gs) - 1) | |||||
| # @todo: what if fdgs is bigger than nb_vpairs? | |||||
| idx_change = random.sample(range(nb_vpairs), fdgs_list[ig] if | |||||
| fdgs_list[ig] < nb_vpairs else nb_vpairs) | |||||
| # idx_change = np.random.randint(0, nx.number_of_nodes(gs) * | |||||
| # (nx.number_of_nodes(gs) - 1), fdgs) | |||||
| for item in idx_change: | |||||
| node1 = int(item / (nx.number_of_nodes(gs) - 1)) | |||||
| node2 = (item - node1 * (nx.number_of_nodes(gs) - 1)) | |||||
| if node2 >= node1: # skip the self pair. | |||||
| node2 += 1 | |||||
| # @todo: is the randomness correct? | |||||
| if not gtemp.has_edge(node1, node2): | |||||
| gtemp.add_edge(node1, node2) | |||||
| # nx.draw_networkx(gs) | |||||
| # plt.show() | |||||
| # nx.draw_networkx(gtemp) | |||||
| # plt.show() | |||||
| else: | |||||
| gtemp.remove_edge(node1, node2) | |||||
| # nx.draw_networkx(gs) | |||||
| # plt.show() | |||||
| # nx.draw_networkx(gtemp) | |||||
| # plt.show() | |||||
| # nx.draw_networkx(gtemp) | |||||
| # plt.show() | |||||
| # compute distance between \psi and the new generated graph. | |||||
| # knew = marginalizedkernel([gtemp, g1, g2], node_label='atom', edge_label=None, | |||||
| # p_quit=lmbda, n_iteration=20, remove_totters=False, | |||||
| # n_jobs=multiprocessing.cpu_count(), verbose=False) | |||||
| knew = compute_kernel([gtemp] + Gn_median, gkernel, verbose=False) | |||||
| dnew = dis_gstar(0, [1, 2], alpha, knew, withterm3=False) | |||||
| if dnew <= dhat: # @todo: the new distance is smaller or also equal? | |||||
| if dnew < dhat: | |||||
| print('\nI am smaller!') | |||||
| print('ig =', str(ig), ', l =', str(trail)) | |||||
| print(dhat, '->', dnew) | |||||
| nb_updated += 1 | |||||
| elif dnew == dhat: | |||||
| print('I am equal!') | |||||
| # nx.draw_networkx(gtemp) | |||||
| # plt.show() | |||||
| # print(gtemp.nodes(data=True)) | |||||
| # print(gtemp.edges(data=True)) | |||||
| dhat = dnew | |||||
| gnew = gtemp.copy() | |||||
| found = True # found better graph. | |||||
| if found: | |||||
| r = 0 | |||||
| gihat_list = [gnew] | |||||
| dihat_list = [dhat] | |||||
| else: | |||||
| r += 1 | |||||
| # dis_best.append(dhat) | |||||
| g_best = (g0hat_list[0] if len(gihat_list) == 0 else gihat_list[0]) | |||||
| return dhat, g_best, nb_updated | |||||
| # return 0, 0, 0 | |||||
| if __name__ == '__main__': | if __name__ == '__main__': | ||||
| # ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', | |||||
| # 'extra_params': {}} # node/edge symb | |||||
| # ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt', | |||||
| # 'extra_params': {}} # node nsymb | |||||
| # ds = {'name': 'Acyclic', 'dataset': '../datasets/monoterpenoides/trainset_9.ds', | |||||
| # 'extra_params': {}} | |||||
| ds = {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds', | |||||
| 'extra_params': {}} # node symb | |||||
| # ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', | |||||
| # 'extra_params': {}} # node/edge symb | |||||
| ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt', | |||||
| 'extra_params': {}} # node nsymb | |||||
| # ds = {'name': 'Acyclic', 'dataset': '../datasets/monoterpenoides/trainset_9.ds', | |||||
| # 'extra_params': {}} | |||||
| # ds = {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds', | |||||
| # 'extra_params': {}} # node symb | |||||
| DN, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) | DN, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) | ||||
| #DN = DN[0:10] | #DN = DN[0:10] | ||||
| lmbda = 0.03 # termination probalility | lmbda = 0.03 # termination probalility | ||||
| r_max = 10 # recursions | |||||
| r_max = 3 # 10 # iteration limit. | |||||
| l = 500 | l = 500 | ||||
| alpha_range = np.linspace(0.5, 0.5, 1) | alpha_range = np.linspace(0.5, 0.5, 1) | ||||
| #alpha_range = np.linspace(0.1, 0.9, 9) | #alpha_range = np.linspace(0.1, 0.9, 9) | ||||
| k = 5 # k nearest neighbors | |||||
| k = 10 # 5 # k nearest neighbors | |||||
| # randomly select two molecules | # randomly select two molecules | ||||
| #np.random.seed(1) | #np.random.seed(1) | ||||
| @@ -245,6 +245,9 @@ def test_remove_bests(Gn, gkernel): | |||||
| print(g.edges(data=True)) | print(g.edges(data=True)) | ||||
| ############################################################################### | |||||
| # Tests on dataset Letter-H. | |||||
| def test_gkiam_letter_h(): | def test_gkiam_letter_h(): | ||||
| from gk_iam import gk_iam_nearest_multi, compute_kernel | from gk_iam import gk_iam_nearest_multi, compute_kernel | ||||
| from iam import median_distance | from iam import median_distance | ||||
| @@ -263,8 +266,10 @@ def test_gkiam_letter_h(): | |||||
| # classify graphs according to letters. | # classify graphs according to letters. | ||||
| idx_dict = get_same_item_indices(y_all) | idx_dict = get_same_item_indices(y_all) | ||||
| time_list = [] | time_list = [] | ||||
| sod_list = [] | |||||
| sod_min_list = [] | |||||
| sod_ks_min_list = [] | |||||
| sod_gs_list = [] | |||||
| sod_gs_min_list = [] | |||||
| nb_updated_list = [] | |||||
| for letter in idx_dict: | for letter in idx_dict: | ||||
| print('\n-------------------------------------------------------\n') | print('\n-------------------------------------------------------\n') | ||||
| Gn_let = [Gn[i].copy() for i in idx_dict[letter]] | Gn_let = [Gn[i].copy() for i in idx_dict[letter]] | ||||
| @@ -280,10 +285,10 @@ def test_gkiam_letter_h(): | |||||
| # for each alpha | # for each alpha | ||||
| for alpha in alpha_range: | for alpha in alpha_range: | ||||
| print('alpha =', alpha) | print('alpha =', alpha) | ||||
| dhat, ghat_list = gk_iam_nearest_multi(Gn_let, Gn_let, [alpha] * len(Gn_let), | |||||
| range(len(Gn_let), len(Gn_mix)), km, | |||||
| k, r_max, gkernel, c_ei=1.7, | |||||
| c_er=1.7, c_es=1.7) | |||||
| dhat, ghat_list, sod_ks, nb_updated = gk_iam_nearest_multi(Gn_let, | |||||
| Gn_let, [alpha] * len(Gn_let), range(len(Gn_let), len(Gn_mix)), | |||||
| km, k, r_max, gkernel, c_ei=1.7, c_er=1.7, c_es=1.7, | |||||
| ged_cost='LETTER', ged_method='IPFP', saveGXL='gedlib-letter') | |||||
| dis_best.append(dhat) | dis_best.append(dhat) | ||||
| g_best.append(ghat_list) | g_best.append(ghat_list) | ||||
| time_list.append(time.time() - time0) | time_list.append(time.time() - time0) | ||||
| @@ -300,13 +305,18 @@ def test_gkiam_letter_h(): | |||||
| print(g.edges(data=True)) | print(g.edges(data=True)) | ||||
| # compute the corresponding sod in graph space. (alpha range not considered.) | # compute the corresponding sod in graph space. (alpha range not considered.) | ||||
| sod_tmp, _ = median_distance(g_best[0], Gn_let) | |||||
| sod_list.append(sod_tmp) | |||||
| sod_min_list.append(np.min(sod_tmp)) | |||||
| sod_tmp, _ = median_distance(g_best[0], Gn_let, ged_cost='LETTER', | |||||
| ged_method='IPFP', saveGXL='gedlib-letter') | |||||
| sod_gs_list.append(sod_tmp) | |||||
| sod_gs_min_list.append(np.min(sod_tmp)) | |||||
| sod_ks_min_list.append(sod_ks) | |||||
| nb_updated_list.append(nb_updated) | |||||
| print('\nsods in graph space: ', sod_list) | |||||
| print('\nsmallest sod in graph space for each letter: ', sod_min_list) | |||||
| print('\nsods in graph space: ', sod_gs_list) | |||||
| print('\nsmallest sod in graph space for each letter: ', sod_gs_min_list) | |||||
| print('\nsmallest sod in kernel space for each letter: ', sod_ks_min_list) | |||||
| print('\nnumber of updates for each letter: ', nb_updated_list) | |||||
| print('\ntimes:', time_list) | print('\ntimes:', time_list) | ||||
| @@ -356,7 +366,8 @@ def test_iam_letter_h(): | |||||
| for alpha in alpha_range: | for alpha in alpha_range: | ||||
| print('alpha =', alpha) | print('alpha =', alpha) | ||||
| ghat_list, dhat = test_iam_moreGraphsAsInit_tryAllPossibleBestGraphs_deleteNodesInIterations( | ghat_list, dhat = test_iam_moreGraphsAsInit_tryAllPossibleBestGraphs_deleteNodesInIterations( | ||||
| Gn_let, Gn_let, c_ei=1.7, c_er=1.7, c_es=1.7) | |||||
| Gn_let, Gn_let, c_ei=1.7, c_er=1.7, c_es=1.7, | |||||
| ged_cost='LETTER', ged_method='IPFP', saveGXL='gedlib-letter') | |||||
| dis_best.append(dhat) | dis_best.append(dhat) | ||||
| g_best.append(ghat_list) | g_best.append(ghat_list) | ||||
| time_list.append(time.time() - time0) | time_list.append(time.time() - time0) | ||||
| @@ -388,18 +399,283 @@ def test_iam_letter_h(): | |||||
| print('\nsods in kernel space: ', sod_list) | print('\nsods in kernel space: ', sod_list) | ||||
| print('\nsmallest sod in kernel space for each letter: ', sod_min_list) | print('\nsmallest sod in kernel space for each letter: ', sod_min_list) | ||||
| print('\ntimes:', time_list) | print('\ntimes:', time_list) | ||||
| def test_random_preimage_letter_h(): | |||||
| from preimage import random_preimage, compute_kernel | |||||
| ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt', | |||||
| 'extra_params': {}} # node nsymb | |||||
| # ds = {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt', | |||||
| # 'extra_params': {}} # node nsymb | |||||
| # ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', | |||||
| # 'extra_params': {}} # node/edge symb | |||||
| # ds = {'name': 'Acyclic', 'dataset': '../datasets/monoterpenoides/trainset_9.ds', | |||||
| # 'extra_params': {}} | |||||
| # ds = {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds', | |||||
| # 'extra_params': {}} # node symb | |||||
| Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) | |||||
| gkernel = 'structuralspkernel' | |||||
| # lmbda = 0.03 # termination probalility | |||||
| r_max = 3 # 10 # recursions | |||||
| l = 500 | |||||
| # alpha_range = np.linspace(0.5, 0.5, 1) | |||||
| #alpha_range = np.linspace(0.1, 0.9, 9) | |||||
| k = 10 # 5 # k nearest neighbors | |||||
| # classify graphs according to letters. | |||||
| idx_dict = get_same_item_indices(y_all) | |||||
| time_list = [] | |||||
| sod_list = [] | |||||
| sod_min_list = [] | |||||
| for letter in idx_dict: | |||||
| print('\n-------------------------------------------------------\n') | |||||
| Gn_let = [Gn[i].copy() for i in idx_dict[letter]] | |||||
| Gn_mix = Gn_let + [g.copy() for g in Gn_let] | |||||
| alpha_range = np.linspace(1 / len(Gn_let), 1 / len(Gn_let), 1) | |||||
| # compute | |||||
| time0 = time.time() | |||||
| km = compute_kernel(Gn_mix, gkernel, True) | |||||
| g_best = [] | |||||
| dis_best = [] | |||||
| # for each alpha | |||||
| for alpha in alpha_range: | |||||
| print('alpha =', alpha) | |||||
| dhat, ghat_list = random_preimage(Gn_let, Gn_let, [alpha] * len(Gn_let), | |||||
| range(len(Gn_let), len(Gn_mix)), km, | |||||
| k, r_max, gkernel, c_ei=1.7, | |||||
| c_er=1.7, c_es=1.7) | |||||
| dis_best.append(dhat) | |||||
| g_best.append(ghat_list) | |||||
| time_list.append(time.time() - time0) | |||||
| # show best graphs and save them to file. | |||||
| for idx, item in enumerate(alpha_range): | |||||
| print('when alpha is', item, 'the shortest distance is', dis_best[idx]) | |||||
| print('the corresponding pre-images are') | |||||
| for g in g_best[idx]: | |||||
| draw_Letter_graph(g, savepath='results/gk_iam/') | |||||
| # nx.draw_networkx(g) | |||||
| # plt.show() | |||||
| print(g.nodes(data=True)) | |||||
| print(g.edges(data=True)) | |||||
| # compute the corresponding sod in graph space. (alpha range not considered.) | |||||
| sod_tmp, _ = median_distance(g_best[0], Gn_let) | |||||
| sod_list.append(sod_tmp) | |||||
| sod_min_list.append(np.min(sod_tmp)) | |||||
| print('\nsods in graph space: ', sod_list) | |||||
| print('\nsmallest sod in graph space for each letter: ', sod_min_list) | |||||
| print('\ntimes:', time_list) | |||||
| def test_gkiam_mutag(): | |||||
| from gk_iam import gk_iam_nearest_multi, compute_kernel | |||||
| from iam import median_distance | |||||
| ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt', | |||||
| 'extra_params': {}} # node nsymb | |||||
| # ds = {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt', | |||||
| # 'extra_params': {}} # node nsymb | |||||
| Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) | |||||
| gkernel = 'structuralspkernel' | |||||
| lmbda = 0.03 # termination probalility | |||||
| r_max = 3 # recursions | |||||
| # alpha_range = np.linspace(0.5, 0.5, 1) | |||||
| k = 20 # k nearest neighbors | |||||
| # classify graphs according to letters. | |||||
| idx_dict = get_same_item_indices(y_all) | |||||
| time_list = [] | |||||
| sod_ks_min_list = [] | |||||
| sod_gs_list = [] | |||||
| sod_gs_min_list = [] | |||||
| nb_updated_list = [] | |||||
| for letter in idx_dict: | |||||
| print('\n-------------------------------------------------------\n') | |||||
| Gn_let = [Gn[i].copy() for i in idx_dict[letter]] | |||||
| Gn_mix = Gn_let + [g.copy() for g in Gn_let] | |||||
| alpha_range = np.linspace(1 / len(Gn_let), 1 / len(Gn_let), 1) | |||||
| # compute | |||||
| time0 = time.time() | |||||
| km = compute_kernel(Gn_mix, gkernel, True) | |||||
| g_best = [] | |||||
| dis_best = [] | |||||
| # for each alpha | |||||
| for alpha in alpha_range: | |||||
| print('alpha =', alpha) | |||||
| dhat, ghat_list, sod_ks, nb_updated = gk_iam_nearest_multi(Gn_let, Gn_let, [alpha] * len(Gn_let), | |||||
| range(len(Gn_let), len(Gn_mix)), km, | |||||
| k, r_max, gkernel, c_ei=1.7, | |||||
| c_er=1.7, c_es=1.7) | |||||
| dis_best.append(dhat) | |||||
| g_best.append(ghat_list) | |||||
| time_list.append(time.time() - time0) | |||||
| # show best graphs and save them to file. | |||||
| for idx, item in enumerate(alpha_range): | |||||
| print('when alpha is', item, 'the shortest distance is', dis_best[idx]) | |||||
| print('the corresponding pre-images are') | |||||
| for g in g_best[idx]: | |||||
| draw_Letter_graph(g, savepath='results/gk_iam/') | |||||
| # nx.draw_networkx(g) | |||||
| # plt.show() | |||||
| print(g.nodes(data=True)) | |||||
| print(g.edges(data=True)) | |||||
| # compute the corresponding sod in graph space. (alpha range not considered.) | |||||
| sod_tmp, _ = median_distance(g_best[0], Gn_let) | |||||
| sod_gs_list.append(sod_tmp) | |||||
| sod_gs_min_list.append(np.min(sod_tmp)) | |||||
| sod_ks_min_list.append(sod_ks) | |||||
| nb_updated_list.append(nb_updated) | |||||
| print('\nsods in graph space: ', sod_gs_list) | |||||
| print('\nsmallest sod in graph space for each letter: ', sod_gs_min_list) | |||||
| print('\nsmallest sod in kernel space for each letter: ', sod_ks_min_list) | |||||
| print('\nnumber of updates for each letter: ', nb_updated_list) | |||||
| print('\ntimes:', time_list) | |||||
| ############################################################################### | |||||
| # Re-test. | |||||
| def retest_the_simple_two(): | |||||
| from gk_iam import gk_iam_nearest_multi, compute_kernel | |||||
| from iam import median_distance | |||||
| from test_random_mutag import remove_edges | |||||
| # The two simple graphs. | |||||
| # g1 = nx.Graph(name='haha') | |||||
| # g1.add_nodes_from([(0, {'atom': 'C'}), (1, {'atom': 'O'}), (2, {'atom': 'C'})]) | |||||
| # g1.add_edges_from([(0, 1, {'bond_type': '1'}), (1, 2, {'bond_type': '1'})]) | |||||
| # g2 = nx.Graph(name='hahaha') | |||||
| # g2.add_nodes_from([(0, {'atom': 'C'}), (1, {'atom': 'O'}), (2, {'atom': 'C'}), | |||||
| # (3, {'atom': 'O'}), (4, {'atom': 'C'})]) | |||||
| # g2.add_edges_from([(0, 1, {'bond_type': '1'}), (1, 2, {'bond_type': '1'}), | |||||
| # (2, 3, {'bond_type': '1'}), (3, 4, {'bond_type': '1'})]) | |||||
| g1 = nx.Graph(name='haha') | |||||
| g1.add_nodes_from([(0, {'atom': 'C'}), (1, {'atom': 'C'}), (2, {'atom': 'C'}), | |||||
| (3, {'atom': 'S'}), (4, {'atom': 'S'})]) | |||||
| g1.add_edges_from([(0, 1, {'bond_type': '1'}), (1, 2, {'bond_type': '1'}), | |||||
| (2, 3, {'bond_type': '1'}), (2, 4, {'bond_type': '1'})]) | |||||
| g2 = nx.Graph(name='hahaha') | |||||
| g2.add_nodes_from([(0, {'atom': 'C'}), (1, {'atom': 'C'}), (2, {'atom': 'C'}), | |||||
| (3, {'atom': 'O'}), (4, {'atom': 'O'})]) | |||||
| g2.add_edges_from([(0, 1, {'bond_type': '1'}), (1, 2, {'bond_type': '1'}), | |||||
| (2, 3, {'bond_type': '1'}), (2, 4, {'bond_type': '1'})]) | |||||
| # # randomly select two molecules | |||||
| # np.random.seed(1) | |||||
| # idx_gi = [0, 6] # np.random.randint(0, len(Gn), 2) | |||||
| # g1 = Gn[idx_gi[0]] | |||||
| # g2 = Gn[idx_gi[1]] | |||||
| # Gn_mix = [g.copy() for g in Gn] | |||||
| # Gn_mix.append(g1.copy()) | |||||
| # Gn_mix.append(g2.copy()) | |||||
| Gn = [g1.copy(), g2.copy()] | |||||
| remove_edges(Gn) | |||||
| gkernel = 'marginalizedkernel' | |||||
| lmbda = 0.03 # termination probalility | |||||
| r_max = 10 # recursions | |||||
| # l = 500 | |||||
| alpha_range = np.linspace(0.5, 0.5, 1) | |||||
| k = 2 # k nearest neighbors | |||||
| epsilon = 1e-6 | |||||
| ged_cost='CHEM_1' | |||||
| ged_method='IPFP' | |||||
| saveGXL='gedlib' | |||||
| c_ei=1 | |||||
| c_er=1 | |||||
| c_es=1 | |||||
| Gn_mix = Gn + [g1.copy(), g2.copy()] | |||||
| # compute | |||||
| time0 = time.time() | |||||
| km = compute_kernel(Gn_mix, gkernel, True) | |||||
| time_km = time.time() - time0 | |||||
| time_list = [] | |||||
| sod_ks_min_list = [] | |||||
| sod_gs_list = [] | |||||
| sod_gs_min_list = [] | |||||
| nb_updated_list = [] | |||||
| g_best = [] | |||||
| # for each alpha | |||||
| for alpha in alpha_range: | |||||
| print('\n-------------------------------------------------------\n') | |||||
| print('alpha =', alpha) | |||||
| time0 = time.time() | |||||
| dhat, ghat_list, sod_ks, nb_updated = gk_iam_nearest_multi(Gn, [g1, g2], | |||||
| [alpha, 1 - alpha], range(len(Gn), len(Gn) + 2), km, k, r_max, | |||||
| gkernel, c_ei=c_ei, c_er=c_er, c_es=c_es, epsilon=epsilon, | |||||
| ged_cost=ged_cost, ged_method=ged_method, saveGXL=saveGXL) | |||||
| time_total = time.time() - time0 + time_km | |||||
| print('time: ', time_total) | |||||
| time_list.append(time_total) | |||||
| sod_ks_min_list.append(dhat) | |||||
| g_best.append(ghat_list) | |||||
| nb_updated_list.append(nb_updated) | |||||
| # show best graphs and save them to file. | |||||
| for idx, item in enumerate(alpha_range): | |||||
| print('when alpha is', item, 'the shortest distance is', sod_ks_min_list[idx]) | |||||
| print('one of the possible corresponding pre-images is') | |||||
| nx.draw(g_best[idx][0], labels=nx.get_node_attributes(g_best[idx][0], 'atom'), | |||||
| with_labels=True) | |||||
| plt.savefig('results/gk_iam/mutag_alpha' + str(item) + '.png', format="PNG") | |||||
| plt.show() | |||||
| print(g_best[idx][0].nodes(data=True)) | |||||
| print(g_best[idx][0].edges(data=True)) | |||||
| # for g in g_best[idx]: | |||||
| # draw_Letter_graph(g, savepath='results/gk_iam/') | |||||
| ## nx.draw_networkx(g) | |||||
| ## plt.show() | |||||
| # print(g.nodes(data=True)) | |||||
| # print(g.edges(data=True)) | |||||
| # compute the corresponding sod in graph space. | |||||
| for idx, item in enumerate(alpha_range): | |||||
| sod_tmp, _ = median_distance(g_best[0], [g1, g2], ged_cost=ged_cost, | |||||
| ged_method=ged_method, saveGXL=saveGXL) | |||||
| sod_gs_list.append(sod_tmp) | |||||
| sod_gs_min_list.append(np.min(sod_tmp)) | |||||
| print('\nsods in graph space: ', sod_gs_list) | |||||
| print('\nsmallest sod in graph space for each alpha: ', sod_gs_min_list) | |||||
| print('\nsmallest sod in kernel space for each alpha: ', sod_ks_min_list) | |||||
| print('\nnumber of updates for each alpha: ', nb_updated_list) | |||||
| print('\ntimes:', time_list) | |||||
| if __name__ == '__main__': | if __name__ == '__main__': | ||||
| # ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', | # ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', | ||||
| # 'extra_params': {}} # node/edge symb | # 'extra_params': {}} # node/edge symb | ||||
| ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt', | |||||
| 'extra_params': {}} # node nsymb | |||||
| # ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt', | |||||
| # 'extra_params': {}} # node nsymb | |||||
| # ds = {'name': 'Acyclic', 'dataset': '../datasets/monoterpenoides/trainset_9.ds', | # ds = {'name': 'Acyclic', 'dataset': '../datasets/monoterpenoides/trainset_9.ds', | ||||
| # 'extra_params': {}} | # 'extra_params': {}} | ||||
| # ds = {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds', | # ds = {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds', | ||||
| # 'extra_params': {}} # node symb | # 'extra_params': {}} # node symb | ||||
| Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) | |||||
| # Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) | |||||
| # Gn = Gn[0:20] | # Gn = Gn[0:20] | ||||
| # import networkx.algorithms.isomorphism as iso | # import networkx.algorithms.isomorphism as iso | ||||
| @@ -419,5 +695,10 @@ if __name__ == '__main__': | |||||
| # test_the_simple_two(Gn, 'untilhpathkernel') | # test_the_simple_two(Gn, 'untilhpathkernel') | ||||
| # test_remove_bests(Gn, 'untilhpathkernel') | # test_remove_bests(Gn, 'untilhpathkernel') | ||||
| test_gkiam_letter_h() | |||||
| # test_iam_letter_h() | |||||
| # test_gkiam_letter_h() | |||||
| # test_iam_letter_h() | |||||
| # test_random_preimage_letter_h | |||||
| ############################################################################### | |||||
| # retests. | |||||
| retest_the_simple_two() | |||||
| @@ -18,17 +18,17 @@ def test() : | |||||
| script.PyRestartEnv() | script.PyRestartEnv() | ||||
| # print("Here is the Python function !") | |||||
| # | |||||
| # print("List of Edit Cost Options : ") | |||||
| # for i in script.listOfEditCostOptions : | |||||
| # print (i) | |||||
| # print("") | |||||
| # | |||||
| # print("List of Method Options : ") | |||||
| # for j in script.listOfMethodOptions : | |||||
| # print (j) | |||||
| # print("") | |||||
| print("Here is the Python function !") | |||||
| print("List of Edit Cost Options : ") | |||||
| for i in script.listOfEditCostOptions : | |||||
| print (i) | |||||
| print("") | |||||
| print("List of Method Options : ") | |||||
| for j in script.listOfMethodOptions : | |||||
| print (j) | |||||
| print("") | |||||
| script.PyLoadGXLGraph('include/gedlib-master/data/datasets/Mutagenicity/data/', 'collections/MUTA_10.xml') | script.PyLoadGXLGraph('include/gedlib-master/data/datasets/Mutagenicity/data/', 'collections/MUTA_10.xml') | ||||
| listID = script.PyGetGraphIds() | listID = script.PyGetGraphIds() | ||||
| @@ -0,0 +1,599 @@ | |||||
| #!/usr/bin/env python3 | |||||
| # -*- coding: utf-8 -*- | |||||
| """ | |||||
| Created on Thu Sep 5 15:59:00 2019 | |||||
| @author: ljia | |||||
| """ | |||||
| import numpy as np | |||||
| import networkx as nx | |||||
| import matplotlib.pyplot as plt | |||||
| import time | |||||
| from tqdm import tqdm | |||||
| import os | |||||
| import sys | |||||
| sys.path.insert(0, "../") | |||||
| from pygraph.utils.graphfiles import loadDataset | |||||
| ############################################################################### | |||||
| # test on the combination of the two randomly chosen graphs. (the same as in the | |||||
| # random pre-image paper.) | |||||
| def test_preimage_mix_2combination_all_pairs(): | |||||
| from gk_iam import preimage_iam_random_mix, compute_kernel | |||||
| from iam import median_distance | |||||
| ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', | |||||
| 'extra_params': {}} # node/edge symb | |||||
| Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) | |||||
| # Gn = Gn[0:50] | |||||
| remove_edges(Gn) | |||||
| gkernel = 'marginalizedkernel' | |||||
| lmbda = 0.03 # termination probalility | |||||
| r_max = 10 # iteration limit for pre-image. | |||||
| l_max = 500 # update limit for random generation | |||||
| alpha_range = np.linspace(0.7, 1, 4) | |||||
| k = 5 # k nearest neighbors | |||||
| epsilon = 1e-6 | |||||
| # parameters for GED function | |||||
| ged_cost='CHEM_1' | |||||
| ged_method='IPFP' | |||||
| saveGXL='gedlib' | |||||
| # parameters for IAM function | |||||
| c_ei=1 | |||||
| c_er=1 | |||||
| c_es=1 | |||||
| ite_max_iam = 50 | |||||
| epsilon_iam = 0.001 | |||||
| removeNodes = True | |||||
| connected_iam = False | |||||
| nb_update_mat_iam = np.full((len(Gn), len(Gn)), np.inf) | |||||
| nb_update_mat_random = np.full((len(Gn), len(Gn)), np.inf) | |||||
| # test on each pair of graphs. | |||||
| # for idx1 in range(len(Gn) - 1, -1, -1): | |||||
| # for idx2 in range(idx1, -1, -1): | |||||
| for idx1 in range(187, 188): | |||||
| for idx2 in range(167, 168): | |||||
| g1 = Gn[idx1].copy() | |||||
| g2 = Gn[idx2].copy() | |||||
| # Gn[10] = [] | |||||
| # Gn[10] = [] | |||||
| nx.draw(g1, labels=nx.get_node_attributes(g1, 'atom'), with_labels=True) | |||||
| plt.savefig("results/preimage_mix/mutag187.png", format="PNG") | |||||
| plt.show() | |||||
| plt.clf() | |||||
| nx.draw(g2, labels=nx.get_node_attributes(g2, 'atom'), with_labels=True) | |||||
| plt.savefig("results/preimage_mix/mutag167.png", format="PNG") | |||||
| plt.show() | |||||
| plt.clf() | |||||
| ################################################################### | |||||
| # Gn_mix = [g.copy() for g in Gn] | |||||
| # Gn_mix.append(g1.copy()) | |||||
| # Gn_mix.append(g2.copy()) | |||||
| # | |||||
| # # compute | |||||
| # time0 = time.time() | |||||
| # km = compute_kernel(Gn_mix, gkernel, True) | |||||
| # time_km = time.time() - time0 | |||||
| # | |||||
| # # write Gram matrix to file and read it. | |||||
| # np.savez('results/gram_matrix_uhpath_itr7_pq0.8.gm', gm=km, gmtime=time_km) | |||||
| ################################################################### | |||||
| gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03.gm.npz') | |||||
| km = gmfile['gm'] | |||||
| time_km = gmfile['gmtime'] | |||||
| # modify mixed gram matrix. | |||||
| for i in range(len(Gn)): | |||||
| km[i, len(Gn)] = km[i, idx1] | |||||
| km[i, len(Gn) + 1] = km[i, idx2] | |||||
| km[len(Gn), i] = km[i, idx1] | |||||
| km[len(Gn) + 1, i] = km[i, idx2] | |||||
| km[len(Gn), len(Gn)] = km[idx1, idx1] | |||||
| km[len(Gn), len(Gn) + 1] = km[idx1, idx2] | |||||
| km[len(Gn) + 1, len(Gn)] = km[idx2, idx1] | |||||
| km[len(Gn) + 1, len(Gn) + 1] = km[idx2, idx2] | |||||
| ################################################################### | |||||
| # # use only the two graphs in median set as candidates. | |||||
| # Gn = [g1.copy(), g2.copy()] | |||||
| # Gn_mix = Gn + [g1.copy(), g2.copy()] | |||||
| # # compute | |||||
| # time0 = time.time() | |||||
| # km = compute_kernel(Gn_mix, gkernel, True) | |||||
| # time_km = time.time() - time0 | |||||
| time_list = [] | |||||
| dis_ks_min_list = [] | |||||
| sod_gs_list = [] | |||||
| sod_gs_min_list = [] | |||||
| nb_updated_list_iam = [] | |||||
| nb_updated_list_random = [] | |||||
| g_best = [] | |||||
| # for each alpha | |||||
| for alpha in alpha_range: | |||||
| print('\n-------------------------------------------------------\n') | |||||
| print('alpha =', alpha) | |||||
| time0 = time.time() | |||||
| dhat, ghat_list, sod_ks, nb_updated_iam, nb_updated_random = \ | |||||
| preimage_iam_random_mix(Gn, [g1, g2], | |||||
| [alpha, 1 - alpha], range(len(Gn), len(Gn) + 2), km, k, r_max, | |||||
| l_max, gkernel, epsilon=epsilon, | |||||
| params_iam={'c_ei': c_ei, 'c_er': c_er, 'c_es': c_es, | |||||
| 'ite_max': ite_max_iam, 'epsilon': epsilon_iam, | |||||
| 'removeNodes': removeNodes, 'connected': connected_iam}, | |||||
| params_ged={'ged_cost': ged_cost, 'ged_method': ged_method, | |||||
| 'saveGXL': saveGXL}) | |||||
| time_total = time.time() - time0 + time_km | |||||
| print('time: ', time_total) | |||||
| time_list.append(time_total) | |||||
| dis_ks_min_list.append(dhat) | |||||
| g_best.append(ghat_list) | |||||
| nb_updated_list_iam.append(nb_updated_iam) | |||||
| nb_updated_list_random.append(nb_updated_random) | |||||
| # show best graphs and save them to file. | |||||
| for idx, item in enumerate(alpha_range): | |||||
| print('when alpha is', item, 'the shortest distance is', dis_ks_min_list[idx]) | |||||
| print('one of the possible corresponding pre-images is') | |||||
| nx.draw(g_best[idx][0], labels=nx.get_node_attributes(g_best[idx][0], 'atom'), | |||||
| with_labels=True) | |||||
| plt.savefig('results/preimage_mix/mutag' + str(idx1) + '_' + str(idx2) | |||||
| + '_alpha' + str(item) + '.png', format="PNG") | |||||
| # plt.show() | |||||
| plt.clf() | |||||
| # print(g_best[idx][0].nodes(data=True)) | |||||
| # print(g_best[idx][0].edges(data=True)) | |||||
| # for g in g_best[idx]: | |||||
| # draw_Letter_graph(g, savepath='results/gk_iam/') | |||||
| ## nx.draw_networkx(g) | |||||
| ## plt.show() | |||||
| # print(g.nodes(data=True)) | |||||
| # print(g.edges(data=True)) | |||||
| # compute the corresponding sod in graph space. | |||||
| for idx, item in enumerate(alpha_range): | |||||
| sod_tmp, _ = median_distance(g_best[0], [g1, g2], ged_cost=ged_cost, | |||||
| ged_method=ged_method, saveGXL=saveGXL) | |||||
| sod_gs_list.append(sod_tmp) | |||||
| sod_gs_min_list.append(np.min(sod_tmp)) | |||||
| print('\nsods in graph space: ', sod_gs_list) | |||||
| print('\nsmallest sod in graph space for each alpha: ', sod_gs_min_list) | |||||
| print('\nsmallest distance in kernel space for each alpha: ', dis_ks_min_list) | |||||
| print('\nnumber of updates for each alpha by IAM: ', nb_updated_list_iam) | |||||
| print('\nnumber of updates for each alpha by random generation: ', | |||||
| nb_updated_list_random) | |||||
| print('\ntimes:', time_list) | |||||
| nb_update_mat_iam[idx1, idx2] = nb_updated_list_iam[0] | |||||
| nb_update_mat_random[idx1, idx2] = nb_updated_list_random[0] | |||||
| str_fw = 'graphs %d and %d: %d times by IAM, %d times by random generation.\n' \ | |||||
| % (idx1, idx2, nb_updated_list_iam[0], nb_updated_list_random[0]) | |||||
| with open('results/preimage_mix/nb_updates.txt', 'r+') as file: | |||||
| content = file.read() | |||||
| file.seek(0, 0) | |||||
| file.write(str_fw + content) | |||||
| def test_gkiam_2combination_all_pairs(): | |||||
| from gk_iam import gk_iam_nearest_multi, compute_kernel | |||||
| from iam import median_distance | |||||
| ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', | |||||
| 'extra_params': {}} # node/edge symb | |||||
| Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) | |||||
| # Gn = Gn[0:50] | |||||
| remove_edges(Gn) | |||||
| gkernel = 'marginalizedkernel' | |||||
| lmbda = 0.03 # termination probalility | |||||
| r_max = 10 # iteration limit for pre-image. | |||||
| alpha_range = np.linspace(1, 1, 1) | |||||
| k = 5 # k nearest neighbors | |||||
| epsilon = 1e-6 | |||||
| # parameters for GED function | |||||
| ged_cost='CHEM_1' | |||||
| ged_method='IPFP' | |||||
| saveGXL='gedlib' | |||||
| # parameters for IAM function | |||||
| c_ei=1 | |||||
| c_er=1 | |||||
| c_es=1 | |||||
| ite_max_iam = 50 | |||||
| epsilon_iam = 0.001 | |||||
| removeNodes = True | |||||
| connected_iam = False | |||||
| nb_update_mat = np.full((len(Gn), len(Gn)), np.inf) | |||||
| # test on each pair of graphs. | |||||
| # for idx1 in range(len(Gn) - 1, -1, -1): | |||||
| # for idx2 in range(idx1, -1, -1): | |||||
| for idx1 in range(187, 188): | |||||
| for idx2 in range(167, 168): | |||||
| g1 = Gn[idx1].copy() | |||||
| g2 = Gn[idx2].copy() | |||||
| # Gn[10] = [] | |||||
| # Gn[10] = [] | |||||
| nx.draw(g1, labels=nx.get_node_attributes(g1, 'atom'), with_labels=True) | |||||
| plt.savefig("results/gk_iam/all_pairs/mutag187.png", format="PNG") | |||||
| plt.show() | |||||
| plt.clf() | |||||
| nx.draw(g2, labels=nx.get_node_attributes(g2, 'atom'), with_labels=True) | |||||
| plt.savefig("results/gk_iam/all_pairs/mutag167.png", format="PNG") | |||||
| plt.show() | |||||
| plt.clf() | |||||
| ################################################################### | |||||
| # Gn_mix = [g.copy() for g in Gn] | |||||
| # Gn_mix.append(g1.copy()) | |||||
| # Gn_mix.append(g2.copy()) | |||||
| # | |||||
| # # compute | |||||
| # time0 = time.time() | |||||
| # km = compute_kernel(Gn_mix, gkernel, True) | |||||
| # time_km = time.time() - time0 | |||||
| # | |||||
| # # write Gram matrix to file and read it. | |||||
| # np.savez('results/gram_matrix_uhpath_itr7_pq0.8.gm', gm=km, gmtime=time_km) | |||||
| ################################################################### | |||||
| gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03.gm.npz') | |||||
| km = gmfile['gm'] | |||||
| time_km = gmfile['gmtime'] | |||||
| # modify mixed gram matrix. | |||||
| for i in range(len(Gn)): | |||||
| km[i, len(Gn)] = km[i, idx1] | |||||
| km[i, len(Gn) + 1] = km[i, idx2] | |||||
| km[len(Gn), i] = km[i, idx1] | |||||
| km[len(Gn) + 1, i] = km[i, idx2] | |||||
| km[len(Gn), len(Gn)] = km[idx1, idx1] | |||||
| km[len(Gn), len(Gn) + 1] = km[idx1, idx2] | |||||
| km[len(Gn) + 1, len(Gn)] = km[idx2, idx1] | |||||
| km[len(Gn) + 1, len(Gn) + 1] = km[idx2, idx2] | |||||
| ################################################################### | |||||
| # # use only the two graphs in median set as candidates. | |||||
| # Gn = [g1.copy(), g2.copy()] | |||||
| # Gn_mix = Gn + [g1.copy(), g2.copy()] | |||||
| # # compute | |||||
| # time0 = time.time() | |||||
| # km = compute_kernel(Gn_mix, gkernel, True) | |||||
| # time_km = time.time() - time0 | |||||
| time_list = [] | |||||
| dis_ks_min_list = [] | |||||
| sod_gs_list = [] | |||||
| sod_gs_min_list = [] | |||||
| nb_updated_list = [] | |||||
| g_best = [] | |||||
| # for each alpha | |||||
| for alpha in alpha_range: | |||||
| print('\n-------------------------------------------------------\n') | |||||
| print('alpha =', alpha) | |||||
| time0 = time.time() | |||||
| dhat, ghat_list, sod_ks, nb_updated = gk_iam_nearest_multi(Gn, [g1, g2], | |||||
| [alpha, 1 - alpha], range(len(Gn), len(Gn) + 2), km, k, r_max, | |||||
| gkernel, epsilon=epsilon, | |||||
| params_iam={'c_ei': c_ei, 'c_er': c_er, 'c_es': c_es, | |||||
| 'ite_max': ite_max_iam, 'epsilon': epsilon_iam, | |||||
| 'removeNodes': removeNodes, 'connected': connected_iam}, | |||||
| params_ged={'ged_cost': ged_cost, 'ged_method': ged_method, | |||||
| 'saveGXL': saveGXL}) | |||||
| time_total = time.time() - time0 + time_km | |||||
| print('time: ', time_total) | |||||
| time_list.append(time_total) | |||||
| dis_ks_min_list.append(dhat) | |||||
| g_best.append(ghat_list) | |||||
| nb_updated_list.append(nb_updated) | |||||
| # show best graphs and save them to file. | |||||
| for idx, item in enumerate(alpha_range): | |||||
| print('when alpha is', item, 'the shortest distance is', dis_ks_min_list[idx]) | |||||
| print('one of the possible corresponding pre-images is') | |||||
| nx.draw(g_best[idx][0], labels=nx.get_node_attributes(g_best[idx][0], 'atom'), | |||||
| with_labels=True) | |||||
| plt.savefig('results/gk_iam/mutag' + str(idx1) + '_' + str(idx2) | |||||
| + '_alpha' + str(item) + '.png', format="PNG") | |||||
| # plt.show() | |||||
| plt.clf() | |||||
| # print(g_best[idx][0].nodes(data=True)) | |||||
| # print(g_best[idx][0].edges(data=True)) | |||||
| # for g in g_best[idx]: | |||||
| # draw_Letter_graph(g, savepath='results/gk_iam/') | |||||
| ## nx.draw_networkx(g) | |||||
| ## plt.show() | |||||
| # print(g.nodes(data=True)) | |||||
| # print(g.edges(data=True)) | |||||
| # compute the corresponding sod in graph space. | |||||
| for idx, item in enumerate(alpha_range): | |||||
| sod_tmp, _ = median_distance(g_best[0], [g1, g2], ged_cost=ged_cost, | |||||
| ged_method=ged_method, saveGXL=saveGXL) | |||||
| sod_gs_list.append(sod_tmp) | |||||
| sod_gs_min_list.append(np.min(sod_tmp)) | |||||
| print('\nsods in graph space: ', sod_gs_list) | |||||
| print('\nsmallest sod in graph space for each alpha: ', sod_gs_min_list) | |||||
| print('\nsmallest distance in kernel space for each alpha: ', dis_ks_min_list) | |||||
| print('\nnumber of updates for each alpha: ', nb_updated_list) | |||||
| print('\ntimes:', time_list) | |||||
| nb_update_mat[idx1, idx2] = nb_updated_list[0] | |||||
| str_fw = 'graphs %d and %d: %d.\n' % (idx1, idx2, nb_updated_list[0]) | |||||
| with open('results/gk_iam/all_pairs/nb_updates.txt', 'r+') as file: | |||||
| content = file.read() | |||||
| file.seek(0, 0) | |||||
| file.write(str_fw + content) | |||||
| def test_gkiam_2combination(): | |||||
| from gk_iam import gk_iam_nearest_multi, compute_kernel | |||||
| from iam import median_distance | |||||
| ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', | |||||
| 'extra_params': {}} # node/edge symb | |||||
| Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) | |||||
| # Gn = Gn[0:50] | |||||
| remove_edges(Gn) | |||||
| gkernel = 'marginalizedkernel' | |||||
| lmbda = 0.03 # termination probalility | |||||
| r_max = 10 # iteration limit for pre-image. | |||||
| alpha_range = np.linspace(0.5, 0.5, 1) | |||||
| k = 20 # k nearest neighbors | |||||
| epsilon = 1e-6 | |||||
| ged_cost='CHEM_1' | |||||
| ged_method='IPFP' | |||||
| saveGXL='gedlib' | |||||
| c_ei=1 | |||||
| c_er=1 | |||||
| c_es=1 | |||||
| # randomly select two molecules | |||||
| np.random.seed(1) | |||||
| idx_gi = [10, 11] # np.random.randint(0, len(Gn), 2) | |||||
| g1 = Gn[idx_gi[0]].copy() | |||||
| g2 = Gn[idx_gi[1]].copy() | |||||
| # Gn[10] = [] | |||||
| # Gn[10] = [] | |||||
| # nx.draw(g1, labels=nx.get_node_attributes(g1, 'atom'), with_labels=True) | |||||
| # plt.savefig("results/random_preimage/mutag10.png", format="PNG") | |||||
| # plt.show() | |||||
| # nx.draw(g2, labels=nx.get_node_attributes(g2, 'atom'), with_labels=True) | |||||
| # plt.savefig("results/random_preimage/mutag11.png", format="PNG") | |||||
| # plt.show() | |||||
| Gn_mix = [g.copy() for g in Gn] | |||||
| Gn_mix.append(g1.copy()) | |||||
| Gn_mix.append(g2.copy()) | |||||
| # compute | |||||
| # time0 = time.time() | |||||
| # km = compute_kernel(Gn_mix, gkernel, True) | |||||
| # time_km = time.time() - time0 | |||||
| # write Gram matrix to file and read it. | |||||
| # np.savez('results/gram_matrix.gm', gm=km, gmtime=time_km) | |||||
| gmfile = np.load('results/gram_matrix.gm.npz') | |||||
| km = gmfile['gm'] | |||||
| time_km = gmfile['gmtime'] | |||||
| time_list = [] | |||||
| dis_ks_min_list = [] | |||||
| sod_gs_list = [] | |||||
| sod_gs_min_list = [] | |||||
| nb_updated_list = [] | |||||
| g_best = [] | |||||
| # for each alpha | |||||
| for alpha in alpha_range: | |||||
| print('\n-------------------------------------------------------\n') | |||||
| print('alpha =', alpha) | |||||
| time0 = time.time() | |||||
| dhat, ghat_list, sod_ks, nb_updated = gk_iam_nearest_multi(Gn, [g1, g2], | |||||
| [alpha, 1 - alpha], range(len(Gn), len(Gn) + 2), km, k, r_max, | |||||
| gkernel, c_ei=c_ei, c_er=c_er, c_es=c_es, epsilon=epsilon, | |||||
| ged_cost=ged_cost, ged_method=ged_method, saveGXL=saveGXL) | |||||
| time_total = time.time() - time0 + time_km | |||||
| print('time: ', time_total) | |||||
| time_list.append(time_total) | |||||
| dis_ks_min_list.append(dhat) | |||||
| g_best.append(ghat_list) | |||||
| nb_updated_list.append(nb_updated) | |||||
| # show best graphs and save them to file. | |||||
| for idx, item in enumerate(alpha_range): | |||||
| print('when alpha is', item, 'the shortest distance is', dis_ks_min_list[idx]) | |||||
| print('one of the possible corresponding pre-images is') | |||||
| nx.draw(g_best[idx][0], labels=nx.get_node_attributes(g_best[idx][0], 'atom'), | |||||
| with_labels=True) | |||||
| plt.savefig('results/gk_iam/mutag_alpha' + str(item) + '.png', format="PNG") | |||||
| plt.show() | |||||
| print(g_best[idx][0].nodes(data=True)) | |||||
| print(g_best[idx][0].edges(data=True)) | |||||
| # for g in g_best[idx]: | |||||
| # draw_Letter_graph(g, savepath='results/gk_iam/') | |||||
| ## nx.draw_networkx(g) | |||||
| ## plt.show() | |||||
| # print(g.nodes(data=True)) | |||||
| # print(g.edges(data=True)) | |||||
| # compute the corresponding sod in graph space. | |||||
| for idx, item in enumerate(alpha_range): | |||||
| sod_tmp, _ = median_distance(g_best[0], [g1, g2], ged_cost=ged_cost, | |||||
| ged_method=ged_method, saveGXL=saveGXL) | |||||
| sod_gs_list.append(sod_tmp) | |||||
| sod_gs_min_list.append(np.min(sod_tmp)) | |||||
| print('\nsods in graph space: ', sod_gs_list) | |||||
| print('\nsmallest sod in graph space for each alpha: ', sod_gs_min_list) | |||||
| print('\nsmallest distance in kernel space for each alpha: ', dis_ks_min_list) | |||||
| print('\nnumber of updates for each alpha: ', nb_updated_list) | |||||
| print('\ntimes:', time_list) | |||||
| def test_random_preimage_2combination(): | |||||
| # from gk_iam import compute_kernel | |||||
| from preimage import random_preimage | |||||
| ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', | |||||
| 'extra_params': {}} # node/edge symb | |||||
| Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) | |||||
| # Gn = Gn[0:12] | |||||
| remove_edges(Gn) | |||||
| gkernel = 'marginalizedkernel' | |||||
| # dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, gkernel=gkernel) | |||||
| # print(dis_max, dis_min, dis_mean) | |||||
| lmbda = 0.03 # termination probalility | |||||
| r_max = 10 # iteration limit for pre-image. | |||||
| l = 500 | |||||
| alpha_range = np.linspace(0, 1, 11) | |||||
| k = 5 # k nearest neighbors | |||||
| # randomly select two molecules | |||||
| np.random.seed(1) | |||||
| idx_gi = [187, 167] # np.random.randint(0, len(Gn), 2) | |||||
| g1 = Gn[idx_gi[0]].copy() | |||||
| g2 = Gn[idx_gi[1]].copy() | |||||
| # nx.draw(g1, labels=nx.get_node_attributes(g1, 'atom'), with_labels=True) | |||||
| # plt.savefig("results/random_preimage/mutag10.png", format="PNG") | |||||
| # plt.show() | |||||
| # nx.draw(g2, labels=nx.get_node_attributes(g2, 'atom'), with_labels=True) | |||||
| # plt.savefig("results/random_preimage/mutag11.png", format="PNG") | |||||
| # plt.show() | |||||
| ###################################################################### | |||||
| # Gn_mix = [g.copy() for g in Gn] | |||||
| # Gn_mix.append(g1.copy()) | |||||
| # Gn_mix.append(g2.copy()) | |||||
| # | |||||
| ## g_tmp = iam([g1, g2]) | |||||
| ## nx.draw_networkx(g_tmp) | |||||
| ## plt.show() | |||||
| # | |||||
| # # compute | |||||
| # time0 = time.time() | |||||
| # km = compute_kernel(Gn_mix, gkernel, True) | |||||
| # time_km = time.time() - time0 | |||||
| ################################################################### | |||||
| idx1 = idx_gi[0] | |||||
| idx2 = idx_gi[1] | |||||
| gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03.gm.npz') | |||||
| km = gmfile['gm'] | |||||
| time_km = gmfile['gmtime'] | |||||
| # modify mixed gram matrix. | |||||
| for i in range(len(Gn)): | |||||
| km[i, len(Gn)] = km[i, idx1] | |||||
| km[i, len(Gn) + 1] = km[i, idx2] | |||||
| km[len(Gn), i] = km[i, idx1] | |||||
| km[len(Gn) + 1, i] = km[i, idx2] | |||||
| km[len(Gn), len(Gn)] = km[idx1, idx1] | |||||
| km[len(Gn), len(Gn) + 1] = km[idx1, idx2] | |||||
| km[len(Gn) + 1, len(Gn)] = km[idx2, idx1] | |||||
| km[len(Gn) + 1, len(Gn) + 1] = km[idx2, idx2] | |||||
| ################################################################### | |||||
| time_list = [] | |||||
| nb_updated_list = [] | |||||
| g_best = [] | |||||
| dis_ks_min_list = [] | |||||
| # for each alpha | |||||
| for alpha in alpha_range: | |||||
| print('\n-------------------------------------------------------\n') | |||||
| print('alpha =', alpha) | |||||
| time0 = time.time() | |||||
| dhat, ghat, nb_updated = random_preimage(Gn, [g1, g2], [alpha, 1 - alpha], | |||||
| range(len(Gn), len(Gn) + 2), km, | |||||
| k, r_max, l, gkernel) | |||||
| time_total = time.time() - time0 + time_km | |||||
| print('time: ', time_total) | |||||
| time_list.append(time_total) | |||||
| dis_ks_min_list.append(dhat) | |||||
| g_best.append(ghat) | |||||
| nb_updated_list.append(nb_updated) | |||||
| # show best graphs and save them to file. | |||||
| for idx, item in enumerate(alpha_range): | |||||
| print('when alpha is', item, 'the shortest distance is', dis_ks_min_list[idx]) | |||||
| print('one of the possible corresponding pre-images is') | |||||
| nx.draw(g_best[idx], labels=nx.get_node_attributes(g_best[idx], 'atom'), | |||||
| with_labels=True) | |||||
| plt.savefig('results/random_preimage/mutag_alpha' + str(item) + '.png', format="PNG") | |||||
| plt.show() | |||||
| plt.clf() | |||||
| print(g_best[idx].nodes(data=True)) | |||||
| print(g_best[idx].edges(data=True)) | |||||
| # # compute the corresponding sod in graph space. (alpha range not considered.) | |||||
| # sod_tmp, _ = median_distance(g_best[0], Gn_let) | |||||
| # sod_gs_list.append(sod_tmp) | |||||
| # sod_gs_min_list.append(np.min(sod_tmp)) | |||||
| # sod_ks_min_list.append(sod_ks) | |||||
| # nb_updated_list.append(nb_updated) | |||||
| # print('\nsmallest sod in graph space for each alpha: ', sod_gs_min_list) | |||||
| print('\nsmallest distance in kernel space for each alpha: ', dis_ks_min_list) | |||||
| print('\nnumber of updates for each alpha: ', nb_updated_list) | |||||
| print('\ntimes:', time_list) | |||||
| ############################################################################### | |||||
| # help functions | |||||
| def remove_edges(Gn): | |||||
| for G in Gn: | |||||
| for _, _, attrs in G.edges(data=True): | |||||
| attrs.clear() | |||||
| def kernel_distance_matrix(Gn, Kmatrix=None, gkernel=None): | |||||
| from gk_iam import compute_kernel | |||||
| dis_mat = np.empty((len(Gn), len(Gn))) | |||||
| if Kmatrix == None: | |||||
| Kmatrix = compute_kernel(Gn, gkernel, True) | |||||
| for i in range(len(Gn)): | |||||
| for j in range(i, len(Gn)): | |||||
| dis = Kmatrix[i, i] + Kmatrix[j, j] - 2 * Kmatrix[i, j] | |||||
| if dis < 0: | |||||
| if dis > -1e-10: | |||||
| dis = 0 | |||||
| else: | |||||
| raise ValueError('The distance is negative.') | |||||
| dis_mat[i, j] = np.sqrt(dis) | |||||
| dis_mat[j, i] = dis_mat[i, j] | |||||
| dis_max = np.max(np.max(dis_mat)) | |||||
| dis_min = np.min(np.min(dis_mat[dis_mat != 0])) | |||||
| dis_mean = np.mean(np.mean(dis_mat)) | |||||
| return dis_mat, dis_max, dis_min, dis_mean | |||||
| ############################################################################### | |||||
| if __name__ == '__main__': | |||||
| ############################################################################### | |||||
| # test on the combination of the two randomly chosen graphs. (the same as in the | |||||
| # random pre-image paper.) | |||||
| # test_random_preimage_2combination() | |||||
| # test_gkiam_2combination() | |||||
| # test_gkiam_2combination_all_pairs() | |||||
| test_preimage_mix_2combination_all_pairs() | |||||
| @@ -51,6 +51,7 @@ def untilhpathkernel(*args, | |||||
| applied for the graph kernel. The Following choices are available: | applied for the graph kernel. The Following choices are available: | ||||
| 'MinMax': use the MiniMax kernel and counting feature map. | 'MinMax': use the MiniMax kernel and counting feature map. | ||||
| 'tanimoto': use the Tanimoto kernel and binary feature map. | 'tanimoto': use the Tanimoto kernel and binary feature map. | ||||
| None: no sub-kernel is used, the kernel is computed directly. | |||||
| compute_method : string | compute_method : string | ||||
| Computation method to store paths and compute the graph kernel. The | Computation method to store paths and compute the graph kernel. The | ||||
| Following choices are available: | Following choices are available: | ||||
| @@ -72,14 +73,16 @@ def untilhpathkernel(*args, | |||||
| Kmatrix = np.zeros((len(Gn), len(Gn))) | Kmatrix = np.zeros((len(Gn), len(Gn))) | ||||
| ds_attrs = get_dataset_attributes( | ds_attrs = get_dataset_attributes( | ||||
| Gn, | Gn, | ||||
| attr_names=['node_labeled', 'edge_labeled', 'is_directed'], | |||||
| attr_names=['node_labeled', 'node_attr_dim', 'edge_labeled', | |||||
| 'edge_attr_dim', 'is_directed'], | |||||
| node_label=node_label, edge_label=edge_label) | node_label=node_label, edge_label=edge_label) | ||||
| if not ds_attrs['node_labeled']: | |||||
| for G in Gn: | |||||
| nx.set_node_attributes(G, '0', 'atom') | |||||
| if not ds_attrs['edge_labeled']: | |||||
| for G in Gn: | |||||
| nx.set_edge_attributes(G, '0', 'bond_type') | |||||
| if k_func != None: | |||||
| if not ds_attrs['node_labeled']: | |||||
| for G in Gn: | |||||
| nx.set_node_attributes(G, '0', 'atom') | |||||
| if not ds_attrs['edge_labeled']: | |||||
| for G in Gn: | |||||
| nx.set_edge_attributes(G, '0', 'bond_type') | |||||
| start_time = time.time() | start_time = time.time() | ||||
| @@ -93,12 +96,15 @@ def untilhpathkernel(*args, | |||||
| else: | else: | ||||
| chunksize = 100 | chunksize = 100 | ||||
| all_paths = [[] for _ in range(len(Gn))] | all_paths = [[] for _ in range(len(Gn))] | ||||
| if compute_method == 'trie': | |||||
| if compute_method == 'trie' and k_func != None: | |||||
| getps_partial = partial(wrapper_find_all_path_as_trie, depth, | getps_partial = partial(wrapper_find_all_path_as_trie, depth, | ||||
| ds_attrs, node_label, edge_label) | ds_attrs, node_label, edge_label) | ||||
| else: | |||||
| elif compute_method != 'trie' and k_func != None: | |||||
| getps_partial = partial(wrapper_find_all_paths_until_length, depth, | getps_partial = partial(wrapper_find_all_paths_until_length, depth, | ||||
| ds_attrs, node_label, edge_label) | |||||
| ds_attrs, node_label, edge_label, True) | |||||
| else: | |||||
| getps_partial = partial(wrapper_find_all_paths_until_length, depth, | |||||
| ds_attrs, node_label, edge_label, False) | |||||
| if verbose: | if verbose: | ||||
| iterator = tqdm(pool.imap_unordered(getps_partial, itr, chunksize), | iterator = tqdm(pool.imap_unordered(getps_partial, itr, chunksize), | ||||
| desc='getting paths', file=sys.stdout) | desc='getting paths', file=sys.stdout) | ||||
| @@ -110,10 +116,12 @@ def untilhpathkernel(*args, | |||||
| pool.join() | pool.join() | ||||
| # for g in Gn: | # for g in Gn: | ||||
| # if compute_method == 'trie': | |||||
| # if compute_method == 'trie' and k_func != None: | |||||
| # find_all_path_as_trie(g, depth, ds_attrs, node_label, edge_label) | # find_all_path_as_trie(g, depth, ds_attrs, node_label, edge_label) | ||||
| # else: | |||||
| # elif compute_method != 'trie' and k_func != None: | |||||
| # find_all_paths_until_length(g, depth, ds_attrs, node_label, edge_label) | # find_all_paths_until_length(g, depth, ds_attrs, node_label, edge_label) | ||||
| # else: | |||||
| # find_all_paths_until_length(g, depth, ds_attrs, node_label, edge_label, False) | |||||
| ## size = sys.getsizeof(all_paths) | ## size = sys.getsizeof(all_paths) | ||||
| ## for item in all_paths: | ## for item in all_paths: | ||||
| @@ -130,20 +138,27 @@ def untilhpathkernel(*args, | |||||
| ## all_paths[i] = ps | ## all_paths[i] = ps | ||||
| ## print(time.time() - ttt) | ## print(time.time() - ttt) | ||||
| if compute_method == 'trie': | |||||
| if compute_method == 'trie' and k_func != None: | |||||
| def init_worker(trie_toshare): | def init_worker(trie_toshare): | ||||
| global G_trie | global G_trie | ||||
| G_trie = trie_toshare | G_trie = trie_toshare | ||||
| do_partial = partial(wrapper_uhpath_do_trie, k_func) | do_partial = partial(wrapper_uhpath_do_trie, k_func) | ||||
| parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker, | parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker, | ||||
| glbv=(all_paths,), n_jobs=n_jobs, verbose=verbose) | glbv=(all_paths,), n_jobs=n_jobs, verbose=verbose) | ||||
| else: | |||||
| elif compute_method != 'trie' and k_func != None: | |||||
| def init_worker(plist_toshare): | def init_worker(plist_toshare): | ||||
| global G_plist | global G_plist | ||||
| G_plist = plist_toshare | G_plist = plist_toshare | ||||
| do_partial = partial(wrapper_uhpath_do_naive, k_func) | do_partial = partial(wrapper_uhpath_do_naive, k_func) | ||||
| parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker, | parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker, | ||||
| glbv=(all_paths,), n_jobs=n_jobs, verbose=verbose) | glbv=(all_paths,), n_jobs=n_jobs, verbose=verbose) | ||||
| else: | |||||
| def init_worker(plist_toshare): | |||||
| global G_plist | |||||
| G_plist = plist_toshare | |||||
| do_partial = partial(wrapper_uhpath_do_kernelless, ds_attrs, edge_kernels) | |||||
| parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker, | |||||
| glbv=(all_paths,), n_jobs=n_jobs, verbose=verbose) | |||||
| # # ---- direct running, normally use single CPU core. ---- | # # ---- direct running, normally use single CPU core. ---- | ||||
| @@ -353,12 +368,62 @@ def wrapper_uhpath_do_naive(k_func, itr): | |||||
| return i, j, _untilhpathkernel_do_naive(G_plist[i], G_plist[j], k_func) | return i, j, _untilhpathkernel_do_naive(G_plist[i], G_plist[j], k_func) | ||||
| def _untilhpathkernel_do_kernelless(paths1, paths2, k_func): | |||||
| """Calculate path graph kernels up to depth d between 2 graphs naively. | |||||
| Parameters | |||||
| ---------- | |||||
| paths_list : list of list | |||||
| List of list of paths in all graphs, where for unlabeled graphs, each | |||||
| path is represented by a list of nodes; while for labeled graphs, each | |||||
| path is represented by a string consists of labels of nodes and/or | |||||
| edges on that path. | |||||
| k_func : function | |||||
| A kernel function applied using different notions of fingerprint | |||||
| similarity. | |||||
| Return | |||||
| ------ | |||||
| kernel : float | |||||
| Path kernel up to h between 2 graphs. | |||||
| """ | |||||
| all_paths = list(set(paths1 + paths2)) | |||||
| if k_func == 'tanimoto': | |||||
| length_union = len(set(paths1 + paths2)) | |||||
| kernel = (len(set(paths1)) + len(set(paths2)) - | |||||
| length_union) / length_union | |||||
| # vector1 = [(1 if path in paths1 else 0) for path in all_paths] | |||||
| # vector2 = [(1 if path in paths2 else 0) for path in all_paths] | |||||
| # kernel_uv = np.dot(vector1, vector2) | |||||
| # kernel = kernel_uv / (len(set(paths1)) + len(set(paths2)) - kernel_uv) | |||||
| else: # MinMax kernel | |||||
| path_count1 = Counter(paths1) | |||||
| path_count2 = Counter(paths2) | |||||
| vector1 = [(path_count1[key] if (key in path_count1.keys()) else 0) | |||||
| for key in all_paths] | |||||
| vector2 = [(path_count2[key] if (key in path_count2.keys()) else 0) | |||||
| for key in all_paths] | |||||
| kernel = np.sum(np.minimum(vector1, vector2)) / \ | |||||
| np.sum(np.maximum(vector1, vector2)) | |||||
| return kernel | |||||
| def wrapper_uhpath_do_kernelless(k_func, itr): | |||||
| i = itr[0] | |||||
| j = itr[1] | |||||
| return i, j, _untilhpathkernel_do_kernelless(G_plist[i], G_plist[j], k_func) | |||||
| # @todo: (can be removed maybe) this method find paths repetively, it could be faster. | # @todo: (can be removed maybe) this method find paths repetively, it could be faster. | ||||
| def find_all_paths_until_length(G, | def find_all_paths_until_length(G, | ||||
| length, | length, | ||||
| ds_attrs, | ds_attrs, | ||||
| node_label='atom', | node_label='atom', | ||||
| edge_label='bond_type'): | |||||
| edge_label='bond_type', | |||||
| tolabelseqs=True): | |||||
| """Find all paths no longer than a certain maximum length in a graph. A | """Find all paths no longer than a certain maximum length in a graph. A | ||||
| recursive depth first search is applied. | recursive depth first search is applied. | ||||
| @@ -398,7 +463,7 @@ def find_all_paths_until_length(G, | |||||
| # path_l = path_l_new[:] | # path_l = path_l_new[:] | ||||
| path_l = [[n] for n in G.nodes] # paths of length l | path_l = [[n] for n in G.nodes] # paths of length l | ||||
| all_paths = path_l[:] | |||||
| all_paths = [p.copy() for p in path_l] | |||||
| for l in range(1, length + 1): | for l in range(1, length + 1): | ||||
| path_lplus1 = [] | path_lplus1 = [] | ||||
| for path in path_l: | for path in path_l: | ||||
| @@ -409,7 +474,7 @@ def find_all_paths_until_length(G, | |||||
| path_lplus1.append(tmp) | path_lplus1.append(tmp) | ||||
| all_paths += path_lplus1 | all_paths += path_lplus1 | ||||
| path_l = path_lplus1[:] | |||||
| path_l = [p.copy() for p in path_lplus1] | |||||
| # for i in range(0, length + 1): | # for i in range(0, length + 1): | ||||
| # new_paths = find_all_paths(G, i) | # new_paths = find_all_paths(G, i) | ||||
| @@ -419,15 +484,18 @@ def find_all_paths_until_length(G, | |||||
| # consider labels | # consider labels | ||||
| # print(paths2labelseqs(all_paths, G, ds_attrs, node_label, edge_label)) | # print(paths2labelseqs(all_paths, G, ds_attrs, node_label, edge_label)) | ||||
| return paths2labelseqs(all_paths, G, ds_attrs, node_label, edge_label) | |||||
| print() | |||||
| return (paths2labelseqs(all_paths, G, ds_attrs, node_label, edge_label) | |||||
| if tolabelseqs else all_paths) | |||||
| def wrapper_find_all_paths_until_length(length, ds_attrs, node_label, | def wrapper_find_all_paths_until_length(length, ds_attrs, node_label, | ||||
| edge_label, itr_item): | |||||
| edge_label, tolabelseqs, itr_item): | |||||
| g = itr_item[0] | g = itr_item[0] | ||||
| i = itr_item[1] | i = itr_item[1] | ||||
| return i, find_all_paths_until_length(g, length, ds_attrs, | return i, find_all_paths_until_length(g, length, ds_attrs, | ||||
| node_label=node_label, edge_label=edge_label) | |||||
| node_label=node_label, edge_label=edge_label, | |||||
| tolabelseqs=tolabelseqs) | |||||
| def find_all_path_as_trie(G, | def find_all_path_as_trie(G, | ||||
| @@ -84,7 +84,7 @@ def loadGXL(filename): | |||||
| return g | return g | ||||
| def saveGXL(graph, filename, method='gedlib-letter'): | |||||
| def saveGXL(graph, filename, method='benoit'): | |||||
| if method == 'benoit': | if method == 'benoit': | ||||
| import xml.etree.ElementTree as ET | import xml.etree.ElementTree as ET | ||||
| root_node = ET.Element('gxl') | root_node = ET.Element('gxl') | ||||
| @@ -131,13 +131,13 @@ def saveGXL(graph, filename, method='gedlib-letter'): | |||||
| gxl_file.write("<gxl>\n") | gxl_file.write("<gxl>\n") | ||||
| gxl_file.write("<graph id=\"" + str(graph.graph['name']) + "\" edgeids=\"true\" edgemode=\"undirected\">\n") | gxl_file.write("<graph id=\"" + str(graph.graph['name']) + "\" edgeids=\"true\" edgemode=\"undirected\">\n") | ||||
| for v, attrs in graph.nodes(data=True): | for v, attrs in graph.nodes(data=True): | ||||
| gxl_file.write("<node id=\"_" + str(v) + "\">\n") | |||||
| gxl_file.write("<attr name=\"" + "chem" + "\"><int>" + str(attrs['atom']) + "</int></attr>\n") | |||||
| gxl_file.write("<node id=\"_" + str(v) + "\">") | |||||
| gxl_file.write("<attr name=\"" + "chem" + "\"><int>" + str(attrs['atom']) + "</int></attr>") | |||||
| gxl_file.write("</node>\n") | gxl_file.write("</node>\n") | ||||
| for v1, v2, attrs in graph.edges(data=True): | for v1, v2, attrs in graph.edges(data=True): | ||||
| gxl_file.write("<edge from=\"_" + str(v1) + "\" to=\"_" + str(v2) + "\">\n") | |||||
| # gxl_file.write("<attr name=\"valence\"><int>" + str(attrs['bond_type']) + "</int></attr>\n") | |||||
| gxl_file.write("<attr name=\"valence\"><int>" + "1" + "</int></attr>\n") | |||||
| gxl_file.write("<edge from=\"_" + str(v1) + "\" to=\"_" + str(v2) + "\">") | |||||
| # gxl_file.write("<attr name=\"valence\"><int>" + str(attrs['bond_type']) + "</int></attr>") | |||||
| gxl_file.write("<attr name=\"valence\"><int>" + "1" + "</int></attr>") | |||||
| gxl_file.write("</edge>\n") | gxl_file.write("</edge>\n") | ||||
| gxl_file.write("</graph>\n") | gxl_file.write("</graph>\n") | ||||
| gxl_file.write("</gxl>\n") | gxl_file.write("</gxl>\n") | ||||
| @@ -485,7 +485,7 @@ def loadDataset(filename, filename_y=None, extra_params=None): | |||||
| return data, y | return data, y | ||||
| def saveDataset(Gn, y, gformat='gxl', group=None, filename='gfile'): | |||||
| def saveDataset(Gn, y, gformat='gxl', group=None, filename='gfile', xparams=None): | |||||
| """Save list of graphs. | """Save list of graphs. | ||||
| """ | """ | ||||
| import os | import os | ||||
| @@ -502,7 +502,7 @@ def saveDataset(Gn, y, gformat='gxl', group=None, filename='gfile'): | |||||
| fgroup.write("\n<GraphCollection>") | fgroup.write("\n<GraphCollection>") | ||||
| for idx, g in enumerate(Gn): | for idx, g in enumerate(Gn): | ||||
| fname_tmp = "graph" + str(idx) + ".gxl" | fname_tmp = "graph" + str(idx) + ".gxl" | ||||
| saveGXL(g, dirname_ds + fname_tmp) | |||||
| saveGXL(g, dirname_ds + fname_tmp, method=xparams['method']) | |||||
| fgroup.write("\n\t<graph file=\"" + fname_tmp + "\" class=\"" + str(y[idx]) + "\"/>") | fgroup.write("\n\t<graph file=\"" + fname_tmp + "\" class=\"" + str(y[idx]) + "\"/>") | ||||
| fgroup.write("\n</GraphCollection>") | fgroup.write("\n</GraphCollection>") | ||||
| fgroup.close() | fgroup.close() | ||||