| @@ -10,9 +10,11 @@ import numpy as np | |||||
| import time | import time | ||||
| import random | import random | ||||
| import sys | import sys | ||||
| import tqdm | |||||
| from tqdm import tqdm | |||||
| import multiprocessing | import multiprocessing | ||||
| import networkx as nx | import networkx as nx | ||||
| from multiprocessing import Pool | |||||
| from functools import partial | |||||
| from gklearn.preimage import PreimageGenerator | from gklearn.preimage import PreimageGenerator | ||||
| from gklearn.preimage.utils import compute_k_dis | from gklearn.preimage.utils import compute_k_dis | ||||
| from gklearn.utils import Timer | from gklearn.utils import Timer | ||||
| @@ -144,12 +146,14 @@ class RandomPreimageGenerator(PreimageGenerator): | |||||
| dihat_list = [] | dihat_list = [] | ||||
| r = 0 | r = 0 | ||||
| dis_of_each_itr = [dhat] | dis_of_each_itr = [dhat] | ||||
| if self.__parallel: | |||||
| self._kernel_options['parallel'] = None | |||||
| while r < self.__r_max: | while r < self.__r_max: | ||||
| print('\n- r =', r) | print('\n- r =', r) | ||||
| found = False | found = False | ||||
| dis_bests = dis_gs + dihat_list | dis_bests = dis_gs + dihat_list | ||||
| # compute numbers of nodes to be inserted/deleted. | |||||
| # compute numbers of edges to be inserted/deleted. | |||||
| # @todo what if the log is negetive? how to choose alpha (scalar)? | # @todo what if the log is negetive? how to choose alpha (scalar)? | ||||
| fdgs_list = np.array(dis_bests) | fdgs_list = np.array(dis_bests) | ||||
| if np.min(fdgs_list) < 1: | if np.min(fdgs_list) < 1: | ||||
| @@ -161,54 +165,7 @@ class RandomPreimageGenerator(PreimageGenerator): | |||||
| for ig, gs in enumerate(Gs_nearest + gihat_list): | for ig, gs in enumerate(Gs_nearest + gihat_list): | ||||
| if self._verbose >= 2: | if self._verbose >= 2: | ||||
| print('-- computing', ig + 1, 'graphs out of', len(Gs_nearest) + len(gihat_list)) | print('-- computing', ig + 1, 'graphs out of', len(Gs_nearest) + len(gihat_list)) | ||||
| for trail in range(0, self.__l): | |||||
| if self._verbose >= 2: | |||||
| print('---', trail + 1, 'trail out of', self.__l) | |||||
| # add and delete edges. | |||||
| gtemp = gs.copy() | |||||
| np.random.seed() # @todo: may not work for possible parallel. | |||||
| # which edges to change. | |||||
| # @todo: should we use just half of the adjacency matrix for undirected graphs? | |||||
| nb_vpairs = nx.number_of_nodes(gs) * (nx.number_of_nodes(gs) - 1) | |||||
| # @todo: what if fdgs is bigger than nb_vpairs? | |||||
| idx_change = random.sample(range(nb_vpairs), fdgs_list[ig] if | |||||
| fdgs_list[ig] < nb_vpairs else nb_vpairs) | |||||
| for item in idx_change: | |||||
| node1 = int(item / (nx.number_of_nodes(gs) - 1)) | |||||
| node2 = (item - node1 * (nx.number_of_nodes(gs) - 1)) | |||||
| if node2 >= node1: # skip the self pair. | |||||
| node2 += 1 | |||||
| # @todo: is the randomness correct? | |||||
| if not gtemp.has_edge(node1, node2): | |||||
| gtemp.add_edge(node1, node2) | |||||
| else: | |||||
| gtemp.remove_edge(node1, node2) | |||||
| # compute new distances. | |||||
| kernels_to_gtmp, _ = self._graph_kernel.compute(gtemp, D_N, **self._kernel_options) | |||||
| kernel_gtmp, _ = self._graph_kernel.compute(gtemp, gtemp, **self._kernel_options) | |||||
| kernels_to_gtmp = [kernels_to_gtmp[i] / np.sqrt(self.__gram_matrix_unnorm[i, i] * kernel_gtmp) for i in range(len(kernels_to_gtmp))] # normalize | |||||
| # @todo: not correct kernel value | |||||
| gram_with_gtmp = np.concatenate((np.array([kernels_to_gtmp]), np.copy(self._graph_kernel.gram_matrix)), axis=0) | |||||
| gram_with_gtmp = np.concatenate((np.array([[1] + kernels_to_gtmp]).T, gram_with_gtmp), axis=1) | |||||
| dnew = compute_k_dis(0, range(1, 1 + len(D_N)), self.__alphas, gram_with_gtmp, term3=term3, withterm3=True) | |||||
| # get the better graph preimage. | |||||
| if dnew <= dhat: # @todo: the new distance is smaller or also equal? | |||||
| if dnew < dhat: | |||||
| if self._verbose >= 2: | |||||
| print('trail =', str(trail)) | |||||
| print('\nI am smaller!') | |||||
| print('index (as in D_k U {gihat} =', str(ig)) | |||||
| print('distance:', dhat, '->', dnew) | |||||
| self.__num_updates += 1 | |||||
| elif dnew == dhat: | |||||
| if self._verbose >= 2: | |||||
| print('I am equal!') | |||||
| dhat = dnew | |||||
| gnew = gtemp.copy() | |||||
| found = True # found better graph. | |||||
| gnew, dhat, found = self.__generate_l_graphs(gs, fdgs_list[ig], dhat, ig, found, term3) | |||||
| if found: | if found: | ||||
| r = 0 | r = 0 | ||||
| @@ -220,10 +177,9 @@ class RandomPreimageGenerator(PreimageGenerator): | |||||
| dis_of_each_itr.append(dhat) | dis_of_each_itr.append(dhat) | ||||
| self.__itrs += 1 | self.__itrs += 1 | ||||
| if self._verbose >= 2: | if self._verbose >= 2: | ||||
| print('Total number of iterations is', self.__itrs) | |||||
| print('Total number of iterations is', self.__itrs, '.') | |||||
| print('The preimage is updated', self.__num_updates, 'times.') | print('The preimage is updated', self.__num_updates, 'times.') | ||||
| print('The shortest distances for previous iterations are', dis_of_each_itr) | |||||
| print('The shortest distances for previous iterations are', dis_of_each_itr, '.') | |||||
| # get results and print. | # get results and print. | ||||
| @@ -245,8 +201,149 @@ class RandomPreimageGenerator(PreimageGenerator): | |||||
| print('Time to generate pre-images:', self.__runtime_generate_preimage) | print('Time to generate pre-images:', self.__runtime_generate_preimage) | ||||
| print('Total time:', self.__runtime_total) | print('Total time:', self.__runtime_total) | ||||
| print('=============================================================================') | print('=============================================================================') | ||||
| print() | |||||
| print() | |||||
| def __generate_l_graphs(self, g_init, fdgs, dhat, ig, found, term3): | |||||
| if self.__parallel: | |||||
| gnew, dhat, found = self.__generate_l_graphs_parallel(g_init, fdgs, dhat, ig, found, term3) | |||||
| else: | |||||
| gnew, dhat, found = self.__generate_l_graphs_series(g_init, fdgs, dhat, ig, found, term3) | |||||
| return gnew, dhat, found | |||||
| def __generate_l_graphs_series(self, g_init, fdgs, dhat, ig, found, term3): | |||||
| gnew = None | |||||
| for trail in range(0, self.__l): | |||||
| if self._verbose >= 2: | |||||
| print('---', trail + 1, 'trail out of', self.__l) | |||||
| # add and delete edges. | |||||
| gtemp = g_init.copy() | |||||
| np.random.seed() # @todo: may not work for possible parallel. | |||||
| # which edges to change. | |||||
| # @todo: should we use just half of the adjacency matrix for undirected graphs? | |||||
| nb_vpairs = nx.number_of_nodes(g_init) * (nx.number_of_nodes(g_init) - 1) | |||||
| # @todo: what if fdgs is bigger than nb_vpairs? | |||||
| idx_change = random.sample(range(nb_vpairs), fdgs if | |||||
| fdgs < nb_vpairs else nb_vpairs) | |||||
| for item in idx_change: | |||||
| node1 = int(item / (nx.number_of_nodes(g_init) - 1)) | |||||
| node2 = (item - node1 * (nx.number_of_nodes(g_init) - 1)) | |||||
| if node2 >= node1: # skip the self pair. | |||||
| node2 += 1 | |||||
| # @todo: is the randomness correct? | |||||
| if not gtemp.has_edge(node1, node2): | |||||
| gtemp.add_edge(node1, node2) | |||||
| else: | |||||
| gtemp.remove_edge(node1, node2) | |||||
| # compute new distances. | |||||
| kernels_to_gtmp, _ = self._graph_kernel.compute(gtemp, self._dataset.graphs, **self._kernel_options) | |||||
| kernel_gtmp, _ = self._graph_kernel.compute(gtemp, gtemp, **self._kernel_options) | |||||
| kernels_to_gtmp = [kernels_to_gtmp[i] / np.sqrt(self.__gram_matrix_unnorm[i, i] * kernel_gtmp) for i in range(len(kernels_to_gtmp))] # normalize | |||||
| # @todo: not correct kernel value | |||||
| gram_with_gtmp = np.concatenate((np.array([kernels_to_gtmp]), np.copy(self._graph_kernel.gram_matrix)), axis=0) | |||||
| gram_with_gtmp = np.concatenate((np.array([[1] + kernels_to_gtmp]).T, gram_with_gtmp), axis=1) | |||||
| dnew = compute_k_dis(0, range(1, 1 + len(self._dataset.graphs)), self.__alphas, gram_with_gtmp, term3=term3, withterm3=True) | |||||
| # get the better graph preimage. | |||||
| if dnew <= dhat: # @todo: the new distance is smaller or also equal? | |||||
| if dnew < dhat: | |||||
| if self._verbose >= 2: | |||||
| print('trail =', str(trail)) | |||||
| print('\nI am smaller!') | |||||
| print('index (as in D_k U {gihat} =', str(ig)) | |||||
| print('distance:', dhat, '->', dnew) | |||||
| self.__num_updates += 1 | |||||
| elif dnew == dhat: | |||||
| if self._verbose >= 2: | |||||
| print('I am equal!') | |||||
| dhat = dnew | |||||
| gnew = gtemp.copy() | |||||
| found = True # found better graph. | |||||
| return gnew, dhat, found | |||||
| def __generate_l_graphs_parallel(self, g_init, fdgs, dhat, ig, found, term3): | |||||
| gnew = None | |||||
| len_itr = self.__l | |||||
| gnew_list = [None] * len_itr | |||||
| dnew_list = [None] * len_itr | |||||
| itr = range(0, len_itr) | |||||
| n_jobs = multiprocessing.cpu_count() | |||||
| if len_itr < 100 * n_jobs: | |||||
| chunksize = int(len_itr / n_jobs) + 1 | |||||
| else: | |||||
| chunksize = 100 | |||||
| do_fun = partial(self._generate_graph_parallel, g_init, fdgs, term3) | |||||
| pool = Pool(processes=n_jobs) | |||||
| if self._verbose >= 2: | |||||
| iterator = tqdm(pool.imap_unordered(do_fun, itr, chunksize), | |||||
| desc='Generating l graphs', file=sys.stdout) | |||||
| else: | |||||
| iterator = pool.imap_unordered(do_fun, itr, chunksize) | |||||
| for idx, gnew, dnew in iterator: | |||||
| gnew_list[idx] = gnew | |||||
| dnew_list[idx] = dnew | |||||
| pool.close() | |||||
| pool.join() | |||||
| # check if get the better graph preimage. | |||||
| idx_min = np.argmin(dnew_list) | |||||
| dnew = dnew_list[idx_min] | |||||
| if dnew <= dhat: # @todo: the new distance is smaller or also equal? | |||||
| if dnew < dhat: | |||||
| if self._verbose >= 2: | |||||
| print('\nI am smaller!') | |||||
| print('index (as in D_k U {gihat} =', str(ig)) | |||||
| print('distance:', dhat, '->', dnew) | |||||
| self.__num_updates += 1 | |||||
| elif dnew == dhat: | |||||
| if self._verbose >= 2: | |||||
| print('I am equal!') | |||||
| dhat = dnew | |||||
| gnew = gnew_list[idx_min] | |||||
| found = True # found better graph. | |||||
| return gnew, dhat, found | |||||
| def _generate_graph_parallel(self, g_init, fdgs, term3, itr): | |||||
| trail = itr | |||||
| # add and delete edges. | |||||
| gtemp = g_init.copy() | |||||
| np.random.seed() # @todo: may not work for possible parallel. | |||||
| # which edges to change. | |||||
| # @todo: should we use just half of the adjacency matrix for undirected graphs? | |||||
| nb_vpairs = nx.number_of_nodes(g_init) * (nx.number_of_nodes(g_init) - 1) | |||||
| # @todo: what if fdgs is bigger than nb_vpairs? | |||||
| idx_change = random.sample(range(nb_vpairs), fdgs if | |||||
| fdgs < nb_vpairs else nb_vpairs) | |||||
| for item in idx_change: | |||||
| node1 = int(item / (nx.number_of_nodes(g_init) - 1)) | |||||
| node2 = (item - node1 * (nx.number_of_nodes(g_init) - 1)) | |||||
| if node2 >= node1: # skip the self pair. | |||||
| node2 += 1 | |||||
| # @todo: is the randomness correct? | |||||
| if not gtemp.has_edge(node1, node2): | |||||
| gtemp.add_edge(node1, node2) | |||||
| else: | |||||
| gtemp.remove_edge(node1, node2) | |||||
| # compute new distances. | |||||
| kernels_to_gtmp, _ = self._graph_kernel.compute(gtemp, self._dataset.graphs, **self._kernel_options) | |||||
| kernel_gtmp, _ = self._graph_kernel.compute(gtemp, gtemp, **self._kernel_options) | |||||
| kernels_to_gtmp = [kernels_to_gtmp[i] / np.sqrt(self.__gram_matrix_unnorm[i, i] * kernel_gtmp) for i in range(len(kernels_to_gtmp))] # normalize | |||||
| # @todo: not correct kernel value | |||||
| gram_with_gtmp = np.concatenate((np.array([kernels_to_gtmp]), np.copy(self._graph_kernel.gram_matrix)), axis=0) | |||||
| gram_with_gtmp = np.concatenate((np.array([[1] + kernels_to_gtmp]).T, gram_with_gtmp), axis=1) | |||||
| dnew = compute_k_dis(0, range(1, 1 + len(self._dataset.graphs)), self.__alphas, gram_with_gtmp, term3=term3, withterm3=True) | |||||
| return trail, gtemp, dnew | |||||
| def get_results(self): | def get_results(self): | ||||
| results = {} | results = {} | ||||