| @@ -10,9 +10,11 @@ import numpy as np | |||
| import time | |||
| import random | |||
| import sys | |||
| import tqdm | |||
| from tqdm import tqdm | |||
| import multiprocessing | |||
| import networkx as nx | |||
| from multiprocessing import Pool | |||
| from functools import partial | |||
| from gklearn.preimage import PreimageGenerator | |||
| from gklearn.preimage.utils import compute_k_dis | |||
| from gklearn.utils import Timer | |||
| @@ -144,12 +146,14 @@ class RandomPreimageGenerator(PreimageGenerator): | |||
| dihat_list = [] | |||
| r = 0 | |||
| dis_of_each_itr = [dhat] | |||
| if self.__parallel: | |||
| self._kernel_options['parallel'] = None | |||
| while r < self.__r_max: | |||
| print('\n- r =', r) | |||
| found = False | |||
| dis_bests = dis_gs + dihat_list | |||
| # compute numbers of nodes to be inserted/deleted. | |||
| # compute numbers of edges to be inserted/deleted. | |||
| # @todo what if the log is negetive? how to choose alpha (scalar)? | |||
| fdgs_list = np.array(dis_bests) | |||
| if np.min(fdgs_list) < 1: | |||
| @@ -161,54 +165,7 @@ class RandomPreimageGenerator(PreimageGenerator): | |||
| for ig, gs in enumerate(Gs_nearest + gihat_list): | |||
| if self._verbose >= 2: | |||
| print('-- computing', ig + 1, 'graphs out of', len(Gs_nearest) + len(gihat_list)) | |||
| for trail in range(0, self.__l): | |||
| if self._verbose >= 2: | |||
| print('---', trail + 1, 'trail out of', self.__l) | |||
| # add and delete edges. | |||
| gtemp = gs.copy() | |||
| np.random.seed() # @todo: may not work for possible parallel. | |||
| # which edges to change. | |||
| # @todo: should we use just half of the adjacency matrix for undirected graphs? | |||
| nb_vpairs = nx.number_of_nodes(gs) * (nx.number_of_nodes(gs) - 1) | |||
| # @todo: what if fdgs is bigger than nb_vpairs? | |||
| idx_change = random.sample(range(nb_vpairs), fdgs_list[ig] if | |||
| fdgs_list[ig] < nb_vpairs else nb_vpairs) | |||
| for item in idx_change: | |||
| node1 = int(item / (nx.number_of_nodes(gs) - 1)) | |||
| node2 = (item - node1 * (nx.number_of_nodes(gs) - 1)) | |||
| if node2 >= node1: # skip the self pair. | |||
| node2 += 1 | |||
| # @todo: is the randomness correct? | |||
| if not gtemp.has_edge(node1, node2): | |||
| gtemp.add_edge(node1, node2) | |||
| else: | |||
| gtemp.remove_edge(node1, node2) | |||
| # compute new distances. | |||
| kernels_to_gtmp, _ = self._graph_kernel.compute(gtemp, D_N, **self._kernel_options) | |||
| kernel_gtmp, _ = self._graph_kernel.compute(gtemp, gtemp, **self._kernel_options) | |||
| kernels_to_gtmp = [kernels_to_gtmp[i] / np.sqrt(self.__gram_matrix_unnorm[i, i] * kernel_gtmp) for i in range(len(kernels_to_gtmp))] # normalize | |||
| # @todo: not correct kernel value | |||
| gram_with_gtmp = np.concatenate((np.array([kernels_to_gtmp]), np.copy(self._graph_kernel.gram_matrix)), axis=0) | |||
| gram_with_gtmp = np.concatenate((np.array([[1] + kernels_to_gtmp]).T, gram_with_gtmp), axis=1) | |||
| dnew = compute_k_dis(0, range(1, 1 + len(D_N)), self.__alphas, gram_with_gtmp, term3=term3, withterm3=True) | |||
| # get the better graph preimage. | |||
| if dnew <= dhat: # @todo: the new distance is smaller or also equal? | |||
| if dnew < dhat: | |||
| if self._verbose >= 2: | |||
| print('trail =', str(trail)) | |||
| print('\nI am smaller!') | |||
| print('index (as in D_k U {gihat} =', str(ig)) | |||
| print('distance:', dhat, '->', dnew) | |||
| self.__num_updates += 1 | |||
| elif dnew == dhat: | |||
| if self._verbose >= 2: | |||
| print('I am equal!') | |||
| dhat = dnew | |||
| gnew = gtemp.copy() | |||
| found = True # found better graph. | |||
| gnew, dhat, found = self.__generate_l_graphs(gs, fdgs_list[ig], dhat, ig, found, term3) | |||
| if found: | |||
| r = 0 | |||
| @@ -220,10 +177,9 @@ class RandomPreimageGenerator(PreimageGenerator): | |||
| dis_of_each_itr.append(dhat) | |||
| self.__itrs += 1 | |||
| if self._verbose >= 2: | |||
| print('Total number of iterations is', self.__itrs) | |||
| print('Total number of iterations is', self.__itrs, '.') | |||
| print('The preimage is updated', self.__num_updates, 'times.') | |||
| print('The shortest distances for previous iterations are', dis_of_each_itr) | |||
| print('The shortest distances for previous iterations are', dis_of_each_itr, '.') | |||
| # get results and print. | |||
| @@ -245,8 +201,149 @@ class RandomPreimageGenerator(PreimageGenerator): | |||
| print('Time to generate pre-images:', self.__runtime_generate_preimage) | |||
| print('Total time:', self.__runtime_total) | |||
| print('=============================================================================') | |||
| print() | |||
| print() | |||
| def __generate_l_graphs(self, g_init, fdgs, dhat, ig, found, term3): | |||
| if self.__parallel: | |||
| gnew, dhat, found = self.__generate_l_graphs_parallel(g_init, fdgs, dhat, ig, found, term3) | |||
| else: | |||
| gnew, dhat, found = self.__generate_l_graphs_series(g_init, fdgs, dhat, ig, found, term3) | |||
| return gnew, dhat, found | |||
| def __generate_l_graphs_series(self, g_init, fdgs, dhat, ig, found, term3): | |||
| gnew = None | |||
| for trail in range(0, self.__l): | |||
| if self._verbose >= 2: | |||
| print('---', trail + 1, 'trail out of', self.__l) | |||
| # add and delete edges. | |||
| gtemp = g_init.copy() | |||
| np.random.seed() # @todo: may not work for possible parallel. | |||
| # which edges to change. | |||
| # @todo: should we use just half of the adjacency matrix for undirected graphs? | |||
| nb_vpairs = nx.number_of_nodes(g_init) * (nx.number_of_nodes(g_init) - 1) | |||
| # @todo: what if fdgs is bigger than nb_vpairs? | |||
| idx_change = random.sample(range(nb_vpairs), fdgs if | |||
| fdgs < nb_vpairs else nb_vpairs) | |||
| for item in idx_change: | |||
| node1 = int(item / (nx.number_of_nodes(g_init) - 1)) | |||
| node2 = (item - node1 * (nx.number_of_nodes(g_init) - 1)) | |||
| if node2 >= node1: # skip the self pair. | |||
| node2 += 1 | |||
| # @todo: is the randomness correct? | |||
| if not gtemp.has_edge(node1, node2): | |||
| gtemp.add_edge(node1, node2) | |||
| else: | |||
| gtemp.remove_edge(node1, node2) | |||
| # compute new distances. | |||
| kernels_to_gtmp, _ = self._graph_kernel.compute(gtemp, self._dataset.graphs, **self._kernel_options) | |||
| kernel_gtmp, _ = self._graph_kernel.compute(gtemp, gtemp, **self._kernel_options) | |||
| kernels_to_gtmp = [kernels_to_gtmp[i] / np.sqrt(self.__gram_matrix_unnorm[i, i] * kernel_gtmp) for i in range(len(kernels_to_gtmp))] # normalize | |||
| # @todo: not correct kernel value | |||
| gram_with_gtmp = np.concatenate((np.array([kernels_to_gtmp]), np.copy(self._graph_kernel.gram_matrix)), axis=0) | |||
| gram_with_gtmp = np.concatenate((np.array([[1] + kernels_to_gtmp]).T, gram_with_gtmp), axis=1) | |||
| dnew = compute_k_dis(0, range(1, 1 + len(self._dataset.graphs)), self.__alphas, gram_with_gtmp, term3=term3, withterm3=True) | |||
| # get the better graph preimage. | |||
| if dnew <= dhat: # @todo: the new distance is smaller or also equal? | |||
| if dnew < dhat: | |||
| if self._verbose >= 2: | |||
| print('trail =', str(trail)) | |||
| print('\nI am smaller!') | |||
| print('index (as in D_k U {gihat} =', str(ig)) | |||
| print('distance:', dhat, '->', dnew) | |||
| self.__num_updates += 1 | |||
| elif dnew == dhat: | |||
| if self._verbose >= 2: | |||
| print('I am equal!') | |||
| dhat = dnew | |||
| gnew = gtemp.copy() | |||
| found = True # found better graph. | |||
| return gnew, dhat, found | |||
| def __generate_l_graphs_parallel(self, g_init, fdgs, dhat, ig, found, term3): | |||
| gnew = None | |||
| len_itr = self.__l | |||
| gnew_list = [None] * len_itr | |||
| dnew_list = [None] * len_itr | |||
| itr = range(0, len_itr) | |||
| n_jobs = multiprocessing.cpu_count() | |||
| if len_itr < 100 * n_jobs: | |||
| chunksize = int(len_itr / n_jobs) + 1 | |||
| else: | |||
| chunksize = 100 | |||
| do_fun = partial(self._generate_graph_parallel, g_init, fdgs, term3) | |||
| pool = Pool(processes=n_jobs) | |||
| if self._verbose >= 2: | |||
| iterator = tqdm(pool.imap_unordered(do_fun, itr, chunksize), | |||
| desc='Generating l graphs', file=sys.stdout) | |||
| else: | |||
| iterator = pool.imap_unordered(do_fun, itr, chunksize) | |||
| for idx, gnew, dnew in iterator: | |||
| gnew_list[idx] = gnew | |||
| dnew_list[idx] = dnew | |||
| pool.close() | |||
| pool.join() | |||
| # check if get the better graph preimage. | |||
| idx_min = np.argmin(dnew_list) | |||
| dnew = dnew_list[idx_min] | |||
| if dnew <= dhat: # @todo: the new distance is smaller or also equal? | |||
| if dnew < dhat: | |||
| if self._verbose >= 2: | |||
| print('\nI am smaller!') | |||
| print('index (as in D_k U {gihat} =', str(ig)) | |||
| print('distance:', dhat, '->', dnew) | |||
| self.__num_updates += 1 | |||
| elif dnew == dhat: | |||
| if self._verbose >= 2: | |||
| print('I am equal!') | |||
| dhat = dnew | |||
| gnew = gnew_list[idx_min] | |||
| found = True # found better graph. | |||
| return gnew, dhat, found | |||
| def _generate_graph_parallel(self, g_init, fdgs, term3, itr): | |||
| trail = itr | |||
| # add and delete edges. | |||
| gtemp = g_init.copy() | |||
| np.random.seed() # @todo: may not work for possible parallel. | |||
| # which edges to change. | |||
| # @todo: should we use just half of the adjacency matrix for undirected graphs? | |||
| nb_vpairs = nx.number_of_nodes(g_init) * (nx.number_of_nodes(g_init) - 1) | |||
| # @todo: what if fdgs is bigger than nb_vpairs? | |||
| idx_change = random.sample(range(nb_vpairs), fdgs if | |||
| fdgs < nb_vpairs else nb_vpairs) | |||
| for item in idx_change: | |||
| node1 = int(item / (nx.number_of_nodes(g_init) - 1)) | |||
| node2 = (item - node1 * (nx.number_of_nodes(g_init) - 1)) | |||
| if node2 >= node1: # skip the self pair. | |||
| node2 += 1 | |||
| # @todo: is the randomness correct? | |||
| if not gtemp.has_edge(node1, node2): | |||
| gtemp.add_edge(node1, node2) | |||
| else: | |||
| gtemp.remove_edge(node1, node2) | |||
| # compute new distances. | |||
| kernels_to_gtmp, _ = self._graph_kernel.compute(gtemp, self._dataset.graphs, **self._kernel_options) | |||
| kernel_gtmp, _ = self._graph_kernel.compute(gtemp, gtemp, **self._kernel_options) | |||
| kernels_to_gtmp = [kernels_to_gtmp[i] / np.sqrt(self.__gram_matrix_unnorm[i, i] * kernel_gtmp) for i in range(len(kernels_to_gtmp))] # normalize | |||
| # @todo: not correct kernel value | |||
| gram_with_gtmp = np.concatenate((np.array([kernels_to_gtmp]), np.copy(self._graph_kernel.gram_matrix)), axis=0) | |||
| gram_with_gtmp = np.concatenate((np.array([[1] + kernels_to_gtmp]).T, gram_with_gtmp), axis=1) | |||
| dnew = compute_k_dis(0, range(1, 1 + len(self._dataset.graphs)), self.__alphas, gram_with_gtmp, term3=term3, withterm3=True) | |||
| return trail, gtemp, dnew | |||
| def get_results(self): | |||
| results = {} | |||