| @@ -5,15 +5,15 @@ Created on Tue Aug 18 11:21:31 2020 | |||||
| @author: ljia | @author: ljia | ||||
| @references: | |||||
| @references: | |||||
| [1] Thomas Gärtner, Peter Flach, and Stefan Wrobel. On graph kernels: | |||||
| [1] Thomas Gärtner, Peter Flach, and Stefan Wrobel. On graph kernels: | |||||
| Hardness results and efficient alternatives. Learning Theory and Kernel | Hardness results and efficient alternatives. Learning Theory and Kernel | ||||
| Machines, pages 129–143, 2003. | Machines, pages 129–143, 2003. | ||||
| """ | """ | ||||
| import sys | import sys | ||||
| from tqdm import tqdm | |||||
| from gklearn.utils import get_iters | |||||
| import numpy as np | import numpy as np | ||||
| import networkx as nx | import networkx as nx | ||||
| from gklearn.utils import SpecialLabel | from gklearn.utils import SpecialLabel | ||||
| @@ -23,7 +23,7 @@ from gklearn.kernels import GraphKernel | |||||
| class CommonWalk(GraphKernel): | class CommonWalk(GraphKernel): | ||||
| def __init__(self, **kwargs): | def __init__(self, **kwargs): | ||||
| GraphKernel.__init__(self) | GraphKernel.__init__(self) | ||||
| self._node_labels = kwargs.get('node_labels', []) | self._node_labels = kwargs.get('node_labels', []) | ||||
| @@ -39,17 +39,16 @@ class CommonWalk(GraphKernel): | |||||
| self._add_dummy_labels(self._graphs) | self._add_dummy_labels(self._graphs) | ||||
| if not self._ds_infos['directed']: # convert | if not self._ds_infos['directed']: # convert | ||||
| self._graphs = [G.to_directed() for G in self._graphs] | self._graphs = [G.to_directed() for G in self._graphs] | ||||
| # compute Gram matrix. | # compute Gram matrix. | ||||
| gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) | gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) | ||||
| from itertools import combinations_with_replacement | from itertools import combinations_with_replacement | ||||
| itr = combinations_with_replacement(range(0, len(self._graphs)), 2) | itr = combinations_with_replacement(range(0, len(self._graphs)), 2) | ||||
| if self._verbose >= 2: | |||||
| iterator = tqdm(itr, desc='Computing kernels', file=sys.stdout) | |||||
| else: | |||||
| iterator = itr | |||||
| len_itr = int(len(self._graphs) * (len(self._graphs) + 1) / 2) | |||||
| iterator = get_iters(itr, desc='Computing kernels', file=sys.stdout, | |||||
| length=len_itr, verbose=(self._verbose >= 2)) | |||||
| # direct product graph method - exponential | # direct product graph method - exponential | ||||
| if self._compute_method == 'exp': | if self._compute_method == 'exp': | ||||
| for i, j in iterator: | for i, j in iterator: | ||||
| @@ -62,50 +61,51 @@ class CommonWalk(GraphKernel): | |||||
| kernel = self._kernel_do_geo(self._graphs[i], self._graphs[j], self._weight) | kernel = self._kernel_do_geo(self._graphs[i], self._graphs[j], self._weight) | ||||
| gram_matrix[i][j] = kernel | gram_matrix[i][j] = kernel | ||||
| gram_matrix[j][i] = kernel | gram_matrix[j][i] = kernel | ||||
| return gram_matrix | return gram_matrix | ||||
| def _compute_gm_imap_unordered(self): | def _compute_gm_imap_unordered(self): | ||||
| self._check_graphs(self._graphs) | self._check_graphs(self._graphs) | ||||
| self._add_dummy_labels(self._graphs) | self._add_dummy_labels(self._graphs) | ||||
| if not self._ds_infos['directed']: # convert | if not self._ds_infos['directed']: # convert | ||||
| self._graphs = [G.to_directed() for G in self._graphs] | self._graphs = [G.to_directed() for G in self._graphs] | ||||
| # compute Gram matrix. | # compute Gram matrix. | ||||
| gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) | gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) | ||||
| # def init_worker(gn_toshare): | # def init_worker(gn_toshare): | ||||
| # global G_gn | # global G_gn | ||||
| # G_gn = gn_toshare | # G_gn = gn_toshare | ||||
| # direct product graph method - exponential | # direct product graph method - exponential | ||||
| if self._compute_method == 'exp': | if self._compute_method == 'exp': | ||||
| do_fun = self._wrapper_kernel_do_exp | do_fun = self._wrapper_kernel_do_exp | ||||
| # direct product graph method - geometric | # direct product graph method - geometric | ||||
| elif self._compute_method == 'geo': | elif self._compute_method == 'geo': | ||||
| do_fun = self._wrapper_kernel_do_geo | do_fun = self._wrapper_kernel_do_geo | ||||
| parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=_init_worker_gm, | |||||
| parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=_init_worker_gm, | |||||
| glbv=(self._graphs,), n_jobs=self._n_jobs, verbose=self._verbose) | glbv=(self._graphs,), n_jobs=self._n_jobs, verbose=self._verbose) | ||||
| return gram_matrix | return gram_matrix | ||||
| def _compute_kernel_list_series(self, g1, g_list): | def _compute_kernel_list_series(self, g1, g_list): | ||||
| self._check_graphs(g_list + [g1]) | self._check_graphs(g_list + [g1]) | ||||
| self._add_dummy_labels(g_list + [g1]) | self._add_dummy_labels(g_list + [g1]) | ||||
| if not self._ds_infos['directed']: # convert | if not self._ds_infos['directed']: # convert | ||||
| g1 = g1.to_directed() | g1 = g1.to_directed() | ||||
| g_list = [G.to_directed() for G in g_list] | g_list = [G.to_directed() for G in g_list] | ||||
| # compute kernel list. | # compute kernel list. | ||||
| kernel_list = [None] * len(g_list) | kernel_list = [None] * len(g_list) | ||||
| if self._verbose >= 2: | if self._verbose >= 2: | ||||
| iterator = tqdm(range(len(g_list)), desc='Computing kernels', file=sys.stdout) | |||||
| iterator = get_iters(range(len(g_list)), desc='Computing kernels', | |||||
| file=sys.stdout, length=len(g_list), verbose=(self._verbose >= 2)) | |||||
| else: | else: | ||||
| iterator = range(len(g_list)) | iterator = range(len(g_list)) | ||||
| # direct product graph method - exponential | # direct product graph method - exponential | ||||
| if self._compute_method == 'exp': | if self._compute_method == 'exp': | ||||
| for i in iterator: | for i in iterator: | ||||
| @@ -116,17 +116,17 @@ class CommonWalk(GraphKernel): | |||||
| for i in iterator: | for i in iterator: | ||||
| kernel = self._kernel_do_geo(g1, g_list[i], self._weight) | kernel = self._kernel_do_geo(g1, g_list[i], self._weight) | ||||
| kernel_list[i] = kernel | kernel_list[i] = kernel | ||||
| return kernel_list | return kernel_list | ||||
| def _compute_kernel_list_imap_unordered(self, g1, g_list): | def _compute_kernel_list_imap_unordered(self, g1, g_list): | ||||
| self._check_graphs(g_list + [g1]) | self._check_graphs(g_list + [g1]) | ||||
| self._add_dummy_labels(g_list + [g1]) | self._add_dummy_labels(g_list + [g1]) | ||||
| if not self._ds_infos['directed']: # convert | if not self._ds_infos['directed']: # convert | ||||
| g1 = g1.to_directed() | g1 = g1.to_directed() | ||||
| g_list = [G.to_directed() for G in g_list] | g_list = [G.to_directed() for G in g_list] | ||||
| # compute kernel list. | # compute kernel list. | ||||
| kernel_list = [None] * len(g_list) | kernel_list = [None] * len(g_list) | ||||
| @@ -134,61 +134,61 @@ class CommonWalk(GraphKernel): | |||||
| # global G_g1, G_g_list | # global G_g1, G_g_list | ||||
| # G_g1 = g1_toshare | # G_g1 = g1_toshare | ||||
| # G_g_list = g_list_toshare | # G_g_list = g_list_toshare | ||||
| # direct product graph method - exponential | # direct product graph method - exponential | ||||
| if self._compute_method == 'exp': | if self._compute_method == 'exp': | ||||
| do_fun = self._wrapper_kernel_list_do_exp | do_fun = self._wrapper_kernel_list_do_exp | ||||
| # direct product graph method - geometric | # direct product graph method - geometric | ||||
| elif self._compute_method == 'geo': | elif self._compute_method == 'geo': | ||||
| do_fun = self._wrapper_kernel_list_do_geo | do_fun = self._wrapper_kernel_list_do_geo | ||||
| def func_assign(result, var_to_assign): | |||||
| def func_assign(result, var_to_assign): | |||||
| var_to_assign[result[0]] = result[1] | var_to_assign[result[0]] = result[1] | ||||
| itr = range(len(g_list)) | itr = range(len(g_list)) | ||||
| len_itr = len(g_list) | len_itr = len(g_list) | ||||
| parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, | parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, | ||||
| init_worker=_init_worker_list, glbv=(g1, g_list), method='imap_unordered', | |||||
| init_worker=_init_worker_list, glbv=(g1, g_list), method='imap_unordered', | |||||
| n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose) | n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose) | ||||
| return kernel_list | return kernel_list | ||||
| def _wrapper_kernel_list_do_exp(self, itr): | def _wrapper_kernel_list_do_exp(self, itr): | ||||
| return itr, self._kernel_do_exp(G_g1, G_g_list[itr], self._weight) | return itr, self._kernel_do_exp(G_g1, G_g_list[itr], self._weight) | ||||
| def _wrapper_kernel_list_do_geo(self, itr): | def _wrapper_kernel_list_do_geo(self, itr): | ||||
| return itr, self._kernel_do_geo(G_g1, G_g_list[itr], self._weight) | return itr, self._kernel_do_geo(G_g1, G_g_list[itr], self._weight) | ||||
| def _compute_single_kernel_series(self, g1, g2): | def _compute_single_kernel_series(self, g1, g2): | ||||
| self._check_graphs([g1] + [g2]) | self._check_graphs([g1] + [g2]) | ||||
| self._add_dummy_labels([g1] + [g2]) | self._add_dummy_labels([g1] + [g2]) | ||||
| if not self._ds_infos['directed']: # convert | if not self._ds_infos['directed']: # convert | ||||
| g1 = g1.to_directed() | g1 = g1.to_directed() | ||||
| g2 = g2.to_directed() | g2 = g2.to_directed() | ||||
| # direct product graph method - exponential | # direct product graph method - exponential | ||||
| if self._compute_method == 'exp': | if self._compute_method == 'exp': | ||||
| kernel = self._kernel_do_exp(g1, g2, self._weight) | |||||
| kernel = self._kernel_do_exp(g1, g2, self._weight) | |||||
| # direct product graph method - geometric | # direct product graph method - geometric | ||||
| elif self._compute_method == 'geo': | elif self._compute_method == 'geo': | ||||
| kernel = self._kernel_do_geo(g1, g2, self._weight) | |||||
| kernel = self._kernel_do_geo(g1, g2, self._weight) | |||||
| return kernel | |||||
| return kernel | |||||
| def _kernel_do_exp(self, g1, g2, beta): | def _kernel_do_exp(self, g1, g2, beta): | ||||
| """Compute common walk graph kernel between 2 graphs using exponential | |||||
| """Compute common walk graph kernel between 2 graphs using exponential | |||||
| series. | series. | ||||
| Parameters | Parameters | ||||
| ---------- | ---------- | ||||
| g1, g2 : NetworkX graphs | g1, g2 : NetworkX graphs | ||||
| Graphs between which the kernels are computed. | Graphs between which the kernels are computed. | ||||
| beta : integer | beta : integer | ||||
| Weight. | Weight. | ||||
| Return | Return | ||||
| ------ | ------ | ||||
| kernel : float | kernel : float | ||||
| @@ -200,9 +200,9 @@ class CommonWalk(GraphKernel): | |||||
| if nx.number_of_nodes(gp) < 2: | if nx.number_of_nodes(gp) < 2: | ||||
| return 0 | return 0 | ||||
| A = nx.adjacency_matrix(gp).todense() | A = nx.adjacency_matrix(gp).todense() | ||||
| ew, ev = np.linalg.eig(A) | ew, ev = np.linalg.eig(A) | ||||
| # # remove imaginary part if possible. | |||||
| # # remove imaginary part if possible. | |||||
| # # @todo: don't know if it is necessary. | # # @todo: don't know if it is necessary. | ||||
| # for i in range(len(ew)): | # for i in range(len(ew)): | ||||
| # if np.abs(ew[i].imag) < 1e-9: | # if np.abs(ew[i].imag) < 1e-9: | ||||
| @@ -220,27 +220,27 @@ class CommonWalk(GraphKernel): | |||||
| kernel = exp_D.sum() | kernel = exp_D.sum() | ||||
| if (kernel.real == 0 and np.abs(kernel.imag) < 1e-9) or np.abs(kernel.imag / kernel.real) < 1e-9: | if (kernel.real == 0 and np.abs(kernel.imag) < 1e-9) or np.abs(kernel.imag / kernel.real) < 1e-9: | ||||
| kernel = kernel.real | kernel = kernel.real | ||||
| return kernel | return kernel | ||||
| def _wrapper_kernel_do_exp(self, itr): | def _wrapper_kernel_do_exp(self, itr): | ||||
| i = itr[0] | i = itr[0] | ||||
| j = itr[1] | j = itr[1] | ||||
| return i, j, self._kernel_do_exp(G_gn[i], G_gn[j], self._weight) | return i, j, self._kernel_do_exp(G_gn[i], G_gn[j], self._weight) | ||||
| def _kernel_do_geo(self, g1, g2, gamma): | def _kernel_do_geo(self, g1, g2, gamma): | ||||
| """Compute common walk graph kernel between 2 graphs using geometric | |||||
| """Compute common walk graph kernel between 2 graphs using geometric | |||||
| series. | series. | ||||
| Parameters | Parameters | ||||
| ---------- | ---------- | ||||
| g1, g2 : NetworkX graphs | g1, g2 : NetworkX graphs | ||||
| Graphs between which the kernels are computed. | Graphs between which the kernels are computed. | ||||
| gamma : integer | gamma : integer | ||||
| Weight. | Weight. | ||||
| Return | Return | ||||
| ------ | ------ | ||||
| kernel : float | kernel : float | ||||
| @@ -258,19 +258,19 @@ class CommonWalk(GraphKernel): | |||||
| # except np.linalg.LinAlgError: | # except np.linalg.LinAlgError: | ||||
| # return np.nan | # return np.nan | ||||
| def _wrapper_kernel_do_geo(self, itr): | def _wrapper_kernel_do_geo(self, itr): | ||||
| i = itr[0] | i = itr[0] | ||||
| j = itr[1] | j = itr[1] | ||||
| return i, j, self._kernel_do_geo(G_gn[i], G_gn[j], self._weight) | return i, j, self._kernel_do_geo(G_gn[i], G_gn[j], self._weight) | ||||
| def _check_graphs(self, Gn): | def _check_graphs(self, Gn): | ||||
| for g in Gn: | for g in Gn: | ||||
| if nx.number_of_nodes(g) == 1: | if nx.number_of_nodes(g) == 1: | ||||
| raise Exception('Graphs must contain more than 1 nodes to construct adjacency matrices.') | raise Exception('Graphs must contain more than 1 nodes to construct adjacency matrices.') | ||||
| def _add_dummy_labels(self, Gn): | def _add_dummy_labels(self, Gn): | ||||
| if len(self._node_labels) == 0 or (len(self._node_labels) == 1 and self._node_labels[0] == SpecialLabel.DUMMY): | if len(self._node_labels) == 0 or (len(self._node_labels) == 1 and self._node_labels[0] == SpecialLabel.DUMMY): | ||||
| for i in range(len(Gn)): | for i in range(len(Gn)): | ||||
| @@ -280,13 +280,13 @@ class CommonWalk(GraphKernel): | |||||
| for i in range(len(Gn)): | for i in range(len(Gn)): | ||||
| nx.set_edge_attributes(Gn[i], '0', SpecialLabel.DUMMY) | nx.set_edge_attributes(Gn[i], '0', SpecialLabel.DUMMY) | ||||
| self._edge_labels = [SpecialLabel.DUMMY] | self._edge_labels = [SpecialLabel.DUMMY] | ||||
| def _init_worker_gm(gn_toshare): | def _init_worker_gm(gn_toshare): | ||||
| global G_gn | global G_gn | ||||
| G_gn = gn_toshare | G_gn = gn_toshare | ||||
| def _init_worker_list(g1_toshare, g_list_toshare): | def _init_worker_list(g1_toshare, g_list_toshare): | ||||
| global G_g1, G_g_list | global G_g1, G_g_list | ||||
| G_g1 = g1_toshare | G_g1 = g1_toshare | ||||
| @@ -5,13 +5,13 @@ Created on Thu Aug 20 16:09:51 2020 | |||||
| @author: ljia | @author: ljia | ||||
| @references: | |||||
| @references: | |||||
| [1] S Vichy N Vishwanathan, Nicol N Schraudolph, Risi Kondor, and Karsten M Borgwardt. Graph kernels. Journal of Machine Learning Research, 11(Apr):1201–1242, 2010. | [1] S Vichy N Vishwanathan, Nicol N Schraudolph, Risi Kondor, and Karsten M Borgwardt. Graph kernels. Journal of Machine Learning Research, 11(Apr):1201–1242, 2010. | ||||
| """ | """ | ||||
| import sys | import sys | ||||
| from tqdm import tqdm | |||||
| from gklearn.utils import get_iters | |||||
| import numpy as np | import numpy as np | ||||
| import networkx as nx | import networkx as nx | ||||
| from scipy.sparse import identity | from scipy.sparse import identity | ||||
| @@ -22,8 +22,8 @@ from gklearn.utils.utils import compute_vertex_kernels | |||||
| class ConjugateGradient(RandomWalkMeta): | class ConjugateGradient(RandomWalkMeta): | ||||
| def __init__(self, **kwargs): | def __init__(self, **kwargs): | ||||
| super().__init__(**kwargs) | super().__init__(**kwargs) | ||||
| self._node_kernels = kwargs.get('node_kernels', None) | self._node_kernels = kwargs.get('node_kernels', None) | ||||
| @@ -32,33 +32,28 @@ class ConjugateGradient(RandomWalkMeta): | |||||
| self._edge_labels = kwargs.get('edge_labels', []) | self._edge_labels = kwargs.get('edge_labels', []) | ||||
| self._node_attrs = kwargs.get('node_attrs', []) | self._node_attrs = kwargs.get('node_attrs', []) | ||||
| self._edge_attrs = kwargs.get('edge_attrs', []) | self._edge_attrs = kwargs.get('edge_attrs', []) | ||||
| def _compute_gm_series(self): | def _compute_gm_series(self): | ||||
| self._check_edge_weight(self._graphs, self._verbose) | self._check_edge_weight(self._graphs, self._verbose) | ||||
| self._check_graphs(self._graphs) | self._check_graphs(self._graphs) | ||||
| lmda = self._weight | lmda = self._weight | ||||
| # Compute Gram matrix. | # Compute Gram matrix. | ||||
| gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) | |||||
| gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) | |||||
| # Reindex nodes using consecutive integers for the convenience of kernel computation. | # Reindex nodes using consecutive integers for the convenience of kernel computation. | ||||
| if self._verbose >= 2: | |||||
| iterator = tqdm(self._graphs, desc='Reindex vertices', file=sys.stdout) | |||||
| else: | |||||
| iterator = self._graphs | |||||
| iterator = get_iters(self._graphs, desc='Reindex vertices', file=sys.stdout, verbose=(self._verbose >= 2)) | |||||
| self._graphs = [nx.convert_node_labels_to_integers(g, first_label=0, label_attribute='label_orignal') for g in iterator] | self._graphs = [nx.convert_node_labels_to_integers(g, first_label=0, label_attribute='label_orignal') for g in iterator] | ||||
| if self._p is None and self._q is None: # p and q are uniform distributions as default. | if self._p is None and self._q is None: # p and q are uniform distributions as default. | ||||
| from itertools import combinations_with_replacement | from itertools import combinations_with_replacement | ||||
| itr = combinations_with_replacement(range(0, len(self._graphs)), 2) | itr = combinations_with_replacement(range(0, len(self._graphs)), 2) | ||||
| if self._verbose >= 2: | |||||
| iterator = tqdm(itr, desc='Computing kernels', file=sys.stdout) | |||||
| else: | |||||
| iterator = itr | |||||
| len_itr = int(len(self._graphs) * (len(self._graphs) + 1) / 2) | |||||
| iterator = get_iters(itr, desc='Computing kernels', file=sys.stdout, length=len_itr, verbose=(self._verbose >= 2)) | |||||
| for i, j in iterator: | for i, j in iterator: | ||||
| kernel = self._kernel_do(self._graphs[i], self._graphs[j], lmda) | kernel = self._kernel_do(self._graphs[i], self._graphs[j], lmda) | ||||
| gram_matrix[i][j] = kernel | gram_matrix[i][j] = kernel | ||||
| @@ -66,92 +61,79 @@ class ConjugateGradient(RandomWalkMeta): | |||||
| else: # @todo | else: # @todo | ||||
| pass | pass | ||||
| return gram_matrix | return gram_matrix | ||||
| def _compute_gm_imap_unordered(self): | def _compute_gm_imap_unordered(self): | ||||
| self._check_edge_weight(self._graphs, self._verbose) | self._check_edge_weight(self._graphs, self._verbose) | ||||
| self._check_graphs(self._graphs) | self._check_graphs(self._graphs) | ||||
| # Compute Gram matrix. | # Compute Gram matrix. | ||||
| gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) | gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) | ||||
| # @todo: parallel this. | # @todo: parallel this. | ||||
| # Reindex nodes using consecutive integers for the convenience of kernel computation. | # Reindex nodes using consecutive integers for the convenience of kernel computation. | ||||
| if self._verbose >= 2: | |||||
| iterator = tqdm(self._graphs, desc='Reindex vertices', file=sys.stdout) | |||||
| else: | |||||
| iterator = self._graphs | |||||
| iterator = get_iters(self._graphs, desc='Reindex vertices', file=sys.stdout, verbose=(self._verbose >= 2)) | |||||
| self._graphs = [nx.convert_node_labels_to_integers(g, first_label=0, label_attribute='label_orignal') for g in iterator] | self._graphs = [nx.convert_node_labels_to_integers(g, first_label=0, label_attribute='label_orignal') for g in iterator] | ||||
| if self._p is None and self._q is None: # p and q are uniform distributions as default. | if self._p is None and self._q is None: # p and q are uniform distributions as default. | ||||
| def init_worker(gn_toshare): | def init_worker(gn_toshare): | ||||
| global G_gn | global G_gn | ||||
| G_gn = gn_toshare | G_gn = gn_toshare | ||||
| do_fun = self._wrapper_kernel_do | do_fun = self._wrapper_kernel_do | ||||
| parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker, | |||||
| parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker, | |||||
| glbv=(self._graphs,), n_jobs=self._n_jobs, verbose=self._verbose) | glbv=(self._graphs,), n_jobs=self._n_jobs, verbose=self._verbose) | ||||
| else: # @todo | else: # @todo | ||||
| pass | pass | ||||
| return gram_matrix | return gram_matrix | ||||
| def _compute_kernel_list_series(self, g1, g_list): | def _compute_kernel_list_series(self, g1, g_list): | ||||
| self._check_edge_weight(g_list + [g1], self._verbose) | self._check_edge_weight(g_list + [g1], self._verbose) | ||||
| self._check_graphs(g_list + [g1]) | self._check_graphs(g_list + [g1]) | ||||
| lmda = self._weight | lmda = self._weight | ||||
| # compute kernel list. | # compute kernel list. | ||||
| kernel_list = [None] * len(g_list) | kernel_list = [None] * len(g_list) | ||||
| # Reindex nodes using consecutive integers for the convenience of kernel computation. | # Reindex nodes using consecutive integers for the convenience of kernel computation. | ||||
| g1 = nx.convert_node_labels_to_integers(g1, first_label=0, label_attribute='label_orignal') | g1 = nx.convert_node_labels_to_integers(g1, first_label=0, label_attribute='label_orignal') | ||||
| if self._verbose >= 2: | |||||
| iterator = tqdm(g_list, desc='Reindex vertices', file=sys.stdout) | |||||
| else: | |||||
| iterator = g_list | |||||
| iterator = get_iters(g_list, desc='Reindex vertices', file=sys.stdout, verbose=(self._verbose >= 2)) | |||||
| g_list = [nx.convert_node_labels_to_integers(g, first_label=0, label_attribute='label_orignal') for g in iterator] | g_list = [nx.convert_node_labels_to_integers(g, first_label=0, label_attribute='label_orignal') for g in iterator] | ||||
| if self._p is None and self._q is None: # p and q are uniform distributions as default. | if self._p is None and self._q is None: # p and q are uniform distributions as default. | ||||
| iterator = get_iters(range(len(g_list)), desc='Computing kernels', file=sys.stdout, length=len(g_list), verbose=(self._verbose >= 2)) | |||||
| if self._verbose >= 2: | |||||
| iterator = tqdm(range(len(g_list)), desc='Computing kernels', file=sys.stdout) | |||||
| else: | |||||
| iterator = range(len(g_list)) | |||||
| for i in iterator: | for i in iterator: | ||||
| kernel = self._kernel_do(g1, g_list[i], lmda) | kernel = self._kernel_do(g1, g_list[i], lmda) | ||||
| kernel_list[i] = kernel | kernel_list[i] = kernel | ||||
| else: # @todo | else: # @todo | ||||
| pass | pass | ||||
| return kernel_list | return kernel_list | ||||
| def _compute_kernel_list_imap_unordered(self, g1, g_list): | def _compute_kernel_list_imap_unordered(self, g1, g_list): | ||||
| self._check_edge_weight(g_list + [g1], self._verbose) | self._check_edge_weight(g_list + [g1], self._verbose) | ||||
| self._check_graphs(g_list + [g1]) | self._check_graphs(g_list + [g1]) | ||||
| # compute kernel list. | # compute kernel list. | ||||
| kernel_list = [None] * len(g_list) | kernel_list = [None] * len(g_list) | ||||
| # Reindex nodes using consecutive integers for the convenience of kernel computation. | # Reindex nodes using consecutive integers for the convenience of kernel computation. | ||||
| g1 = nx.convert_node_labels_to_integers(g1, first_label=0, label_attribute='label_orignal') | g1 = nx.convert_node_labels_to_integers(g1, first_label=0, label_attribute='label_orignal') | ||||
| # @todo: parallel this. | # @todo: parallel this. | ||||
| if self._verbose >= 2: | |||||
| iterator = tqdm(g_list, desc='Reindex vertices', file=sys.stdout) | |||||
| else: | |||||
| iterator = g_list | |||||
| iterator = get_iters(g_list, desc='Reindex vertices', file=sys.stdout, verbose=(self._verbose >= 2)) | |||||
| g_list = [nx.convert_node_labels_to_integers(g, first_label=0, label_attribute='label_orignal') for g in iterator] | g_list = [nx.convert_node_labels_to_integers(g, first_label=0, label_attribute='label_orignal') for g in iterator] | ||||
| if self._p is None and self._q is None: # p and q are uniform distributions as default. | if self._p is None and self._q is None: # p and q are uniform distributions as default. | ||||
| def init_worker(g1_toshare, g_list_toshare): | def init_worker(g1_toshare, g_list_toshare): | ||||
| @@ -159,56 +141,56 @@ class ConjugateGradient(RandomWalkMeta): | |||||
| G_g1 = g1_toshare | G_g1 = g1_toshare | ||||
| G_g_list = g_list_toshare | G_g_list = g_list_toshare | ||||
| do_fun = self._wrapper_kernel_list_do | |||||
| def func_assign(result, var_to_assign): | |||||
| do_fun = self._wrapper_kernel_list_do | |||||
| def func_assign(result, var_to_assign): | |||||
| var_to_assign[result[0]] = result[1] | var_to_assign[result[0]] = result[1] | ||||
| itr = range(len(g_list)) | itr = range(len(g_list)) | ||||
| len_itr = len(g_list) | len_itr = len(g_list) | ||||
| parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, | parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, | ||||
| init_worker=init_worker, glbv=(g1, g_list), method='imap_unordered', | |||||
| init_worker=init_worker, glbv=(g1, g_list), method='imap_unordered', | |||||
| n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose) | n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose) | ||||
| else: # @todo | else: # @todo | ||||
| pass | pass | ||||
| return kernel_list | return kernel_list | ||||
| def _wrapper_kernel_list_do(self, itr): | def _wrapper_kernel_list_do(self, itr): | ||||
| return itr, self._kernel_do(G_g1, G_g_list[itr], self._weight) | return itr, self._kernel_do(G_g1, G_g_list[itr], self._weight) | ||||
| def _compute_single_kernel_series(self, g1, g2): | def _compute_single_kernel_series(self, g1, g2): | ||||
| self._check_edge_weight([g1] + [g2], self._verbose) | self._check_edge_weight([g1] + [g2], self._verbose) | ||||
| self._check_graphs([g1] + [g2]) | self._check_graphs([g1] + [g2]) | ||||
| lmda = self._weight | lmda = self._weight | ||||
| # Reindex nodes using consecutive integers for the convenience of kernel computation. | # Reindex nodes using consecutive integers for the convenience of kernel computation. | ||||
| g1 = nx.convert_node_labels_to_integers(g1, first_label=0, label_attribute='label_orignal') | g1 = nx.convert_node_labels_to_integers(g1, first_label=0, label_attribute='label_orignal') | ||||
| g2 = nx.convert_node_labels_to_integers(g2, first_label=0, label_attribute='label_orignal') | g2 = nx.convert_node_labels_to_integers(g2, first_label=0, label_attribute='label_orignal') | ||||
| if self._p is None and self._q is None: # p and q are uniform distributions as default. | if self._p is None and self._q is None: # p and q are uniform distributions as default. | ||||
| kernel = self._kernel_do(g1, g2, lmda) | kernel = self._kernel_do(g1, g2, lmda) | ||||
| else: # @todo | else: # @todo | ||||
| pass | pass | ||||
| return kernel | |||||
| return kernel | |||||
| def _kernel_do(self, g1, g2, lmda): | def _kernel_do(self, g1, g2, lmda): | ||||
| # Frist, compute kernels between all pairs of nodes using the method borrowed | # Frist, compute kernels between all pairs of nodes using the method borrowed | ||||
| # from FCSP. It is faster than directly computing all edge kernels | |||||
| # from FCSP. It is faster than directly computing all edge kernels | |||||
| # when $d_1d_2>2$, where $d_1$ and $d_2$ are vertex degrees of the | # when $d_1d_2>2$, where $d_1$ and $d_2$ are vertex degrees of the | ||||
| # graphs compared, which is the most case we went though. For very | |||||
| # graphs compared, which is the most case we went though. For very | |||||
| # sparse graphs, this would be slow. | # sparse graphs, this would be slow. | ||||
| vk_dict = self._compute_vertex_kernels(g1, g2) | vk_dict = self._compute_vertex_kernels(g1, g2) | ||||
| # Compute the weight matrix of the direct product graph. | |||||
| w_times, w_dim = self._compute_weight_matrix(g1, g2, vk_dict) | |||||
| # Compute the weight matrix of the direct product graph. | |||||
| w_times, w_dim = self._compute_weight_matrix(g1, g2, vk_dict) | |||||
| # use uniform distribution if there is no prior knowledge. | # use uniform distribution if there is no prior knowledge. | ||||
| p_times_uni = 1 / w_dim | p_times_uni = 1 / w_dim | ||||
| A = identity(w_times.shape[0]) - w_times * lmda | A = identity(w_times.shape[0]) - w_times * lmda | ||||
| @@ -217,27 +199,27 @@ class ConjugateGradient(RandomWalkMeta): | |||||
| # use uniform distribution if there is no prior knowledge. | # use uniform distribution if there is no prior knowledge. | ||||
| q_times = np.full((1, w_dim), p_times_uni) | q_times = np.full((1, w_dim), p_times_uni) | ||||
| return np.dot(q_times, x) | return np.dot(q_times, x) | ||||
| def _wrapper_kernel_do(self, itr): | def _wrapper_kernel_do(self, itr): | ||||
| i = itr[0] | i = itr[0] | ||||
| j = itr[1] | j = itr[1] | ||||
| return i, j, self._kernel_do(G_gn[i], G_gn[j], self._weight) | return i, j, self._kernel_do(G_gn[i], G_gn[j], self._weight) | ||||
| def _func_fp(x, p_times, lmda, w_times): | def _func_fp(x, p_times, lmda, w_times): | ||||
| haha = w_times * x | haha = w_times * x | ||||
| haha = lmda * haha | haha = lmda * haha | ||||
| haha = p_times + haha | haha = p_times + haha | ||||
| return p_times + lmda * np.dot(w_times, x) | return p_times + lmda * np.dot(w_times, x) | ||||
| def _compute_vertex_kernels(self, g1, g2): | def _compute_vertex_kernels(self, g1, g2): | ||||
| """Compute vertex kernels between vertices of two graphs. | """Compute vertex kernels between vertices of two graphs. | ||||
| """ | """ | ||||
| return compute_vertex_kernels(g1, g2, self._node_kernels, node_labels=self._node_labels, node_attrs=self._node_attrs) | return compute_vertex_kernels(g1, g2, self._node_kernels, node_labels=self._node_labels, node_attrs=self._node_attrs) | ||||
| # @todo: move if out to make it faster. | # @todo: move if out to make it faster. | ||||
| # @todo: node/edge kernels use direct function rather than dicts. | # @todo: node/edge kernels use direct function rather than dicts. | ||||
| def _compute_weight_matrix(self, g1, g2, vk_dict): | def _compute_weight_matrix(self, g1, g2, vk_dict): | ||||
| @@ -250,20 +232,20 @@ class ConjugateGradient(RandomWalkMeta): | |||||
| e1_attrs = [e1[2][ea] for ea in self._edge_attrs] | e1_attrs = [e1[2][ea] for ea in self._edge_attrs] | ||||
| e2_attrs = [e2[2][ea] for ea in self._edge_attrs] | e2_attrs = [e2[2][ea] for ea in self._edge_attrs] | ||||
| return ke(e1_labels, e2_labels, e1_attrs, e2_attrs) | return ke(e1_labels, e2_labels, e1_attrs, e2_attrs) | ||||
| def compute_ek_10(e1, e2, ke): | def compute_ek_10(e1, e2, ke): | ||||
| e1_labels = [e1[2][el] for el in self._edge_labels] | e1_labels = [e1[2][el] for el in self._edge_labels] | ||||
| e2_labels = [e2[2][el] for el in self._edge_labels] | e2_labels = [e2[2][el] for el in self._edge_labels] | ||||
| return ke(e1_labels, e2_labels) | return ke(e1_labels, e2_labels) | ||||
| def compute_ek_01(e1, e2, ke): | def compute_ek_01(e1, e2, ke): | ||||
| e1_attrs = [e1[2][ea] for ea in self._edge_attrs] | e1_attrs = [e1[2][ea] for ea in self._edge_attrs] | ||||
| e2_attrs = [e2[2][ea] for ea in self._edge_attrs] | e2_attrs = [e2[2][ea] for ea in self._edge_attrs] | ||||
| return ke(e1_attrs, e2_attrs) | return ke(e1_attrs, e2_attrs) | ||||
| def compute_ek_00(e1, e2, ke): | def compute_ek_00(e1, e2, ke): | ||||
| return 1 | return 1 | ||||
| # Select the proper edge kernel. | # Select the proper edge kernel. | ||||
| if len(self._edge_labels) > 0: | if len(self._edge_labels) > 0: | ||||
| # edge symb and non-synb labeled | # edge symb and non-synb labeled | ||||
| @@ -283,11 +265,11 @@ class ConjugateGradient(RandomWalkMeta): | |||||
| else: | else: | ||||
| ke = None | ke = None | ||||
| ek_temp = compute_ek_00 # @todo: check how much slower is this. | ek_temp = compute_ek_00 # @todo: check how much slower is this. | ||||
| # Compute the weight matrix. | # Compute the weight matrix. | ||||
| w_dim = nx.number_of_nodes(g1) * nx.number_of_nodes(g2) | w_dim = nx.number_of_nodes(g1) * nx.number_of_nodes(g2) | ||||
| w_times = np.zeros((w_dim, w_dim)) | w_times = np.zeros((w_dim, w_dim)) | ||||
| if vk_dict: # node labeled | if vk_dict: # node labeled | ||||
| if self._ds_infos['directed']: | if self._ds_infos['directed']: | ||||
| for e1 in g1.edges(data=True): | for e1 in g1.edges(data=True): | ||||
| @@ -5,13 +5,13 @@ Created on Thu Aug 20 16:09:51 2020 | |||||
| @author: ljia | @author: ljia | ||||
| @references: | |||||
| @references: | |||||
| [1] S Vichy N Vishwanathan, Nicol N Schraudolph, Risi Kondor, and Karsten M Borgwardt. Graph kernels. Journal of Machine Learning Research, 11(Apr):1201–1242, 2010. | [1] S Vichy N Vishwanathan, Nicol N Schraudolph, Risi Kondor, and Karsten M Borgwardt. Graph kernels. Journal of Machine Learning Research, 11(Apr):1201–1242, 2010. | ||||
| """ | """ | ||||
| import sys | import sys | ||||
| from tqdm import tqdm | |||||
| from gklearn.utils import get_iters | |||||
| import numpy as np | import numpy as np | ||||
| import networkx as nx | import networkx as nx | ||||
| from scipy import optimize | from scipy import optimize | ||||
| @@ -22,8 +22,8 @@ from gklearn.utils.utils import compute_vertex_kernels | |||||
| class FixedPoint(RandomWalkMeta): | class FixedPoint(RandomWalkMeta): | ||||
| def __init__(self, **kwargs): | def __init__(self, **kwargs): | ||||
| super().__init__(**kwargs) | super().__init__(**kwargs) | ||||
| self._node_kernels = kwargs.get('node_kernels', None) | self._node_kernels = kwargs.get('node_kernels', None) | ||||
| @@ -32,33 +32,28 @@ class FixedPoint(RandomWalkMeta): | |||||
| self._edge_labels = kwargs.get('edge_labels', []) | self._edge_labels = kwargs.get('edge_labels', []) | ||||
| self._node_attrs = kwargs.get('node_attrs', []) | self._node_attrs = kwargs.get('node_attrs', []) | ||||
| self._edge_attrs = kwargs.get('edge_attrs', []) | self._edge_attrs = kwargs.get('edge_attrs', []) | ||||
| def _compute_gm_series(self): | def _compute_gm_series(self): | ||||
| self._check_edge_weight(self._graphs, self._verbose) | self._check_edge_weight(self._graphs, self._verbose) | ||||
| self._check_graphs(self._graphs) | self._check_graphs(self._graphs) | ||||
| lmda = self._weight | lmda = self._weight | ||||
| # Compute Gram matrix. | # Compute Gram matrix. | ||||
| gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) | |||||
| gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) | |||||
| # Reindex nodes using consecutive integers for the convenience of kernel computation. | # Reindex nodes using consecutive integers for the convenience of kernel computation. | ||||
| if self._verbose >= 2: | |||||
| iterator = tqdm(self._graphs, desc='Reindex vertices', file=sys.stdout) | |||||
| else: | |||||
| iterator = self._graphs | |||||
| iterator = get_iters(self._graphs, desc='Reindex vertices', file=sys.stdout,verbose=(self._verbose >= 2)) | |||||
| self._graphs = [nx.convert_node_labels_to_integers(g, first_label=0, label_attribute='label_orignal') for g in iterator] | self._graphs = [nx.convert_node_labels_to_integers(g, first_label=0, label_attribute='label_orignal') for g in iterator] | ||||
| if self._p is None and self._q is None: # p and q are uniform distributions as default. | if self._p is None and self._q is None: # p and q are uniform distributions as default. | ||||
| from itertools import combinations_with_replacement | from itertools import combinations_with_replacement | ||||
| itr = combinations_with_replacement(range(0, len(self._graphs)), 2) | itr = combinations_with_replacement(range(0, len(self._graphs)), 2) | ||||
| if self._verbose >= 2: | |||||
| iterator = tqdm(itr, desc='Computing kernels', file=sys.stdout) | |||||
| else: | |||||
| iterator = itr | |||||
| len_itr = int(len(self._graphs) * (len(self._graphs) + 1) / 2) | |||||
| iterator = get_iters(itr, desc='Computing kernels', file=sys.stdout, length=len_itr, verbose=(self._verbose >= 2)) | |||||
| for i, j in iterator: | for i, j in iterator: | ||||
| kernel = self._kernel_do(self._graphs[i], self._graphs[j], lmda) | kernel = self._kernel_do(self._graphs[i], self._graphs[j], lmda) | ||||
| gram_matrix[i][j] = kernel | gram_matrix[i][j] = kernel | ||||
| @@ -66,92 +61,80 @@ class FixedPoint(RandomWalkMeta): | |||||
| else: # @todo | else: # @todo | ||||
| pass | pass | ||||
| return gram_matrix | return gram_matrix | ||||
| def _compute_gm_imap_unordered(self): | def _compute_gm_imap_unordered(self): | ||||
| self._check_edge_weight(self._graphs, self._verbose) | self._check_edge_weight(self._graphs, self._verbose) | ||||
| self._check_graphs(self._graphs) | self._check_graphs(self._graphs) | ||||
| # Compute Gram matrix. | # Compute Gram matrix. | ||||
| gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) | gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) | ||||
| # @todo: parallel this. | # @todo: parallel this. | ||||
| # Reindex nodes using consecutive integers for the convenience of kernel computation. | # Reindex nodes using consecutive integers for the convenience of kernel computation. | ||||
| if self._verbose >= 2: | |||||
| iterator = tqdm(self._graphs, desc='Reindex vertices', file=sys.stdout) | |||||
| else: | |||||
| iterator = self._graphs | |||||
| iterator = get_iters(self._graphs, desc='Reindex vertices', file=sys.stdout, verbose=(self._verbose >= 2)) | |||||
| self._graphs = [nx.convert_node_labels_to_integers(g, first_label=0, label_attribute='label_orignal') for g in iterator] | self._graphs = [nx.convert_node_labels_to_integers(g, first_label=0, label_attribute='label_orignal') for g in iterator] | ||||
| if self._p is None and self._q is None: # p and q are uniform distributions as default. | if self._p is None and self._q is None: # p and q are uniform distributions as default. | ||||
| def init_worker(gn_toshare): | def init_worker(gn_toshare): | ||||
| global G_gn | global G_gn | ||||
| G_gn = gn_toshare | G_gn = gn_toshare | ||||
| do_fun = self._wrapper_kernel_do | do_fun = self._wrapper_kernel_do | ||||
| parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker, | |||||
| parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker, | |||||
| glbv=(self._graphs,), n_jobs=self._n_jobs, verbose=self._verbose) | glbv=(self._graphs,), n_jobs=self._n_jobs, verbose=self._verbose) | ||||
| else: # @todo | else: # @todo | ||||
| pass | pass | ||||
| return gram_matrix | return gram_matrix | ||||
| def _compute_kernel_list_series(self, g1, g_list): | def _compute_kernel_list_series(self, g1, g_list): | ||||
| self._check_edge_weight(g_list + [g1], self._verbose) | self._check_edge_weight(g_list + [g1], self._verbose) | ||||
| self._check_graphs(g_list + [g1]) | self._check_graphs(g_list + [g1]) | ||||
| lmda = self._weight | lmda = self._weight | ||||
| # compute kernel list. | # compute kernel list. | ||||
| kernel_list = [None] * len(g_list) | kernel_list = [None] * len(g_list) | ||||
| # Reindex nodes using consecutive integers for the convenience of kernel computation. | # Reindex nodes using consecutive integers for the convenience of kernel computation. | ||||
| g1 = nx.convert_node_labels_to_integers(g1, first_label=0, label_attribute='label_orignal') | g1 = nx.convert_node_labels_to_integers(g1, first_label=0, label_attribute='label_orignal') | ||||
| if self._verbose >= 2: | |||||
| iterator = tqdm(g_list, desc='Reindex vertices', file=sys.stdout) | |||||
| else: | |||||
| iterator = g_list | |||||
| iterator = get_iters(g_list, desc='Reindex vertices', file=sys.stdout, verbose=(self._verbose >= 2)) | |||||
| g_list = [nx.convert_node_labels_to_integers(g, first_label=0, label_attribute='label_orignal') for g in iterator] | g_list = [nx.convert_node_labels_to_integers(g, first_label=0, label_attribute='label_orignal') for g in iterator] | ||||
| if self._p is None and self._q is None: # p and q are uniform distributions as default. | if self._p is None and self._q is None: # p and q are uniform distributions as default. | ||||
| if self._verbose >= 2: | |||||
| iterator = tqdm(range(len(g_list)), desc='Computing kernels', file=sys.stdout) | |||||
| else: | |||||
| iterator = range(len(g_list)) | |||||
| iterator = get_iters(range(len(g_list)), desc='Computing kernels', file=sys.stdout, length=len(g_list), verbose=(self._verbose >= 2)) | |||||
| for i in iterator: | for i in iterator: | ||||
| kernel = self._kernel_do(g1, g_list[i], lmda) | kernel = self._kernel_do(g1, g_list[i], lmda) | ||||
| kernel_list[i] = kernel | kernel_list[i] = kernel | ||||
| else: # @todo | else: # @todo | ||||
| pass | pass | ||||
| return kernel_list | return kernel_list | ||||
| def _compute_kernel_list_imap_unordered(self, g1, g_list): | def _compute_kernel_list_imap_unordered(self, g1, g_list): | ||||
| self._check_edge_weight(g_list + [g1], self._verbose) | self._check_edge_weight(g_list + [g1], self._verbose) | ||||
| self._check_graphs(g_list + [g1]) | self._check_graphs(g_list + [g1]) | ||||
| # compute kernel list. | # compute kernel list. | ||||
| kernel_list = [None] * len(g_list) | kernel_list = [None] * len(g_list) | ||||
| # Reindex nodes using consecutive integers for the convenience of kernel computation. | # Reindex nodes using consecutive integers for the convenience of kernel computation. | ||||
| g1 = nx.convert_node_labels_to_integers(g1, first_label=0, label_attribute='label_orignal') | g1 = nx.convert_node_labels_to_integers(g1, first_label=0, label_attribute='label_orignal') | ||||
| # @todo: parallel this. | # @todo: parallel this. | ||||
| if self._verbose >= 2: | |||||
| iterator = tqdm(g_list, desc='Reindex vertices', file=sys.stdout) | |||||
| else: | |||||
| iterator = g_list | |||||
| iterator = get_iters(g_list, desc='Reindex vertices', file=sys.stdout, verbose=(self._verbose >= 2)) | |||||
| g_list = [nx.convert_node_labels_to_integers(g, first_label=0, label_attribute='label_orignal') for g in iterator] | g_list = [nx.convert_node_labels_to_integers(g, first_label=0, label_attribute='label_orignal') for g in iterator] | ||||
| if self._p is None and self._q is None: # p and q are uniform distributions as default. | if self._p is None and self._q is None: # p and q are uniform distributions as default. | ||||
| def init_worker(g1_toshare, g_list_toshare): | def init_worker(g1_toshare, g_list_toshare): | ||||
| @@ -159,56 +142,56 @@ class FixedPoint(RandomWalkMeta): | |||||
| G_g1 = g1_toshare | G_g1 = g1_toshare | ||||
| G_g_list = g_list_toshare | G_g_list = g_list_toshare | ||||
| do_fun = self._wrapper_kernel_list_do | |||||
| def func_assign(result, var_to_assign): | |||||
| do_fun = self._wrapper_kernel_list_do | |||||
| def func_assign(result, var_to_assign): | |||||
| var_to_assign[result[0]] = result[1] | var_to_assign[result[0]] = result[1] | ||||
| itr = range(len(g_list)) | itr = range(len(g_list)) | ||||
| len_itr = len(g_list) | len_itr = len(g_list) | ||||
| parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, | parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, | ||||
| init_worker=init_worker, glbv=(g1, g_list), method='imap_unordered', | |||||
| init_worker=init_worker, glbv=(g1, g_list), method='imap_unordered', | |||||
| n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose) | n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose) | ||||
| else: # @todo | else: # @todo | ||||
| pass | pass | ||||
| return kernel_list | return kernel_list | ||||
| def _wrapper_kernel_list_do(self, itr): | def _wrapper_kernel_list_do(self, itr): | ||||
| return itr, self._kernel_do(G_g1, G_g_list[itr], self._weight) | return itr, self._kernel_do(G_g1, G_g_list[itr], self._weight) | ||||
| def _compute_single_kernel_series(self, g1, g2): | def _compute_single_kernel_series(self, g1, g2): | ||||
| self._check_edge_weight([g1] + [g2], self._verbose) | self._check_edge_weight([g1] + [g2], self._verbose) | ||||
| self._check_graphs([g1] + [g2]) | self._check_graphs([g1] + [g2]) | ||||
| lmda = self._weight | lmda = self._weight | ||||
| # Reindex nodes using consecutive integers for the convenience of kernel computation. | # Reindex nodes using consecutive integers for the convenience of kernel computation. | ||||
| g1 = nx.convert_node_labels_to_integers(g1, first_label=0, label_attribute='label_orignal') | g1 = nx.convert_node_labels_to_integers(g1, first_label=0, label_attribute='label_orignal') | ||||
| g2 = nx.convert_node_labels_to_integers(g2, first_label=0, label_attribute='label_orignal') | g2 = nx.convert_node_labels_to_integers(g2, first_label=0, label_attribute='label_orignal') | ||||
| if self._p is None and self._q is None: # p and q are uniform distributions as default. | if self._p is None and self._q is None: # p and q are uniform distributions as default. | ||||
| kernel = self._kernel_do(g1, g2, lmda) | kernel = self._kernel_do(g1, g2, lmda) | ||||
| else: # @todo | else: # @todo | ||||
| pass | pass | ||||
| return kernel | |||||
| return kernel | |||||
| def _kernel_do(self, g1, g2, lmda): | def _kernel_do(self, g1, g2, lmda): | ||||
| # Frist, compute kernels between all pairs of nodes using the method borrowed | # Frist, compute kernels between all pairs of nodes using the method borrowed | ||||
| # from FCSP. It is faster than directly computing all edge kernels | |||||
| # from FCSP. It is faster than directly computing all edge kernels | |||||
| # when $d_1d_2>2$, where $d_1$ and $d_2$ are vertex degrees of the | # when $d_1d_2>2$, where $d_1$ and $d_2$ are vertex degrees of the | ||||
| # graphs compared, which is the most case we went though. For very | |||||
| # graphs compared, which is the most case we went though. For very | |||||
| # sparse graphs, this would be slow. | # sparse graphs, this would be slow. | ||||
| vk_dict = self._compute_vertex_kernels(g1, g2) | vk_dict = self._compute_vertex_kernels(g1, g2) | ||||
| # Compute the weight matrix of the direct product graph. | |||||
| w_times, w_dim = self._compute_weight_matrix(g1, g2, vk_dict) | |||||
| # Compute the weight matrix of the direct product graph. | |||||
| w_times, w_dim = self._compute_weight_matrix(g1, g2, vk_dict) | |||||
| # use uniform distribution if there is no prior knowledge. | # use uniform distribution if there is no prior knowledge. | ||||
| p_times_uni = 1 / w_dim | p_times_uni = 1 / w_dim | ||||
| p_times = np.full((w_dim, 1), p_times_uni) | p_times = np.full((w_dim, 1), p_times_uni) | ||||
| @@ -216,27 +199,27 @@ class FixedPoint(RandomWalkMeta): | |||||
| # use uniform distribution if there is no prior knowledge. | # use uniform distribution if there is no prior knowledge. | ||||
| q_times = np.full((1, w_dim), p_times_uni) | q_times = np.full((1, w_dim), p_times_uni) | ||||
| return np.dot(q_times, x) | return np.dot(q_times, x) | ||||
| def _wrapper_kernel_do(self, itr): | def _wrapper_kernel_do(self, itr): | ||||
| i = itr[0] | i = itr[0] | ||||
| j = itr[1] | j = itr[1] | ||||
| return i, j, self._kernel_do(G_gn[i], G_gn[j], self._weight) | return i, j, self._kernel_do(G_gn[i], G_gn[j], self._weight) | ||||
| def _func_fp(self, x, p_times, lmda, w_times): | def _func_fp(self, x, p_times, lmda, w_times): | ||||
| haha = w_times * x | haha = w_times * x | ||||
| haha = lmda * haha | haha = lmda * haha | ||||
| haha = p_times + haha | haha = p_times + haha | ||||
| return p_times + lmda * np.dot(w_times, x) | return p_times + lmda * np.dot(w_times, x) | ||||
| def _compute_vertex_kernels(self, g1, g2): | def _compute_vertex_kernels(self, g1, g2): | ||||
| """Compute vertex kernels between vertices of two graphs. | """Compute vertex kernels between vertices of two graphs. | ||||
| """ | """ | ||||
| return compute_vertex_kernels(g1, g2, self._node_kernels, node_labels=self._node_labels, node_attrs=self._node_attrs) | return compute_vertex_kernels(g1, g2, self._node_kernels, node_labels=self._node_labels, node_attrs=self._node_attrs) | ||||
| # @todo: move if out to make it faster. | # @todo: move if out to make it faster. | ||||
| # @todo: node/edge kernels use direct function rather than dicts. | # @todo: node/edge kernels use direct function rather than dicts. | ||||
| def _compute_weight_matrix(self, g1, g2, vk_dict): | def _compute_weight_matrix(self, g1, g2, vk_dict): | ||||
| @@ -249,20 +232,20 @@ class FixedPoint(RandomWalkMeta): | |||||
| e1_attrs = [e1[2][ea] for ea in self._edge_attrs] | e1_attrs = [e1[2][ea] for ea in self._edge_attrs] | ||||
| e2_attrs = [e2[2][ea] for ea in self._edge_attrs] | e2_attrs = [e2[2][ea] for ea in self._edge_attrs] | ||||
| return ke(e1_labels, e2_labels, e1_attrs, e2_attrs) | return ke(e1_labels, e2_labels, e1_attrs, e2_attrs) | ||||
| def compute_ek_10(e1, e2, ke): | def compute_ek_10(e1, e2, ke): | ||||
| e1_labels = [e1[2][el] for el in self._edge_labels] | e1_labels = [e1[2][el] for el in self._edge_labels] | ||||
| e2_labels = [e2[2][el] for el in self._edge_labels] | e2_labels = [e2[2][el] for el in self._edge_labels] | ||||
| return ke(e1_labels, e2_labels) | return ke(e1_labels, e2_labels) | ||||
| def compute_ek_01(e1, e2, ke): | def compute_ek_01(e1, e2, ke): | ||||
| e1_attrs = [e1[2][ea] for ea in self._edge_attrs] | e1_attrs = [e1[2][ea] for ea in self._edge_attrs] | ||||
| e2_attrs = [e2[2][ea] for ea in self._edge_attrs] | e2_attrs = [e2[2][ea] for ea in self._edge_attrs] | ||||
| return ke(e1_attrs, e2_attrs) | return ke(e1_attrs, e2_attrs) | ||||
| def compute_ek_00(e1, e2, ke): | def compute_ek_00(e1, e2, ke): | ||||
| return 1 | return 1 | ||||
| # Select the proper edge kernel. | # Select the proper edge kernel. | ||||
| if len(self._edge_labels) > 0: | if len(self._edge_labels) > 0: | ||||
| # edge symb and non-synb labeled | # edge symb and non-synb labeled | ||||
| @@ -282,11 +265,11 @@ class FixedPoint(RandomWalkMeta): | |||||
| else: | else: | ||||
| ke = None | ke = None | ||||
| ek_temp = compute_ek_00 # @todo: check how much slower is this. | ek_temp = compute_ek_00 # @todo: check how much slower is this. | ||||
| # Compute the weight matrix. | # Compute the weight matrix. | ||||
| w_dim = nx.number_of_nodes(g1) * nx.number_of_nodes(g2) | w_dim = nx.number_of_nodes(g1) * nx.number_of_nodes(g2) | ||||
| w_times = np.zeros((w_dim, w_dim)) | w_times = np.zeros((w_dim, w_dim)) | ||||
| if vk_dict: # node labeled | if vk_dict: # node labeled | ||||
| if self._ds_infos['directed']: | if self._ds_infos['directed']: | ||||
| for e1 in g1.edges(data=True): | for e1 in g1.edges(data=True): | ||||
| @@ -7,19 +7,19 @@ Created on Wed Jun 3 22:22:57 2020 | |||||
| @references: | @references: | ||||
| [1] H. Kashima, K. Tsuda, and A. Inokuchi. Marginalized kernels between | |||||
| labeled graphs. In Proceedings of the 20th International Conference on | |||||
| [1] H. Kashima, K. Tsuda, and A. Inokuchi. Marginalized kernels between | |||||
| labeled graphs. In Proceedings of the 20th International Conference on | |||||
| Machine Learning, Washington, DC, United States, 2003. | Machine Learning, Washington, DC, United States, 2003. | ||||
| [2] Pierre Mahé, Nobuhisa Ueda, Tatsuya Akutsu, Jean-Luc Perret, and | |||||
| Jean-Philippe Vert. Extensions of marginalized graph kernels. In | |||||
| Proceedings of the twenty-first international conference on Machine | |||||
| [2] Pierre Mahé, Nobuhisa Ueda, Tatsuya Akutsu, Jean-Luc Perret, and | |||||
| Jean-Philippe Vert. Extensions of marginalized graph kernels. In | |||||
| Proceedings of the twenty-first international conference on Machine | |||||
| learning, page 70. ACM, 2004. | learning, page 70. ACM, 2004. | ||||
| """ | """ | ||||
| import sys | import sys | ||||
| from multiprocessing import Pool | from multiprocessing import Pool | ||||
| from tqdm import tqdm | |||||
| from gklearn.utils import get_iters | |||||
| import numpy as np | import numpy as np | ||||
| import networkx as nx | import networkx as nx | ||||
| from gklearn.utils import SpecialLabel | from gklearn.utils import SpecialLabel | ||||
| @@ -30,7 +30,7 @@ from gklearn.kernels import GraphKernel | |||||
| class Marginalized(GraphKernel): | class Marginalized(GraphKernel): | ||||
| def __init__(self, **kwargs): | def __init__(self, **kwargs): | ||||
| GraphKernel.__init__(self) | GraphKernel.__init__(self) | ||||
| self._node_labels = kwargs.get('node_labels', []) | self._node_labels = kwargs.get('node_labels', []) | ||||
| @@ -44,35 +44,31 @@ class Marginalized(GraphKernel): | |||||
| def _compute_gm_series(self): | def _compute_gm_series(self): | ||||
| self._add_dummy_labels(self._graphs) | self._add_dummy_labels(self._graphs) | ||||
| if self._remove_totters: | if self._remove_totters: | ||||
| if self._verbose >= 2: | |||||
| iterator = tqdm(self._graphs, desc='removing tottering', file=sys.stdout) | |||||
| else: | |||||
| iterator = self._graphs | |||||
| iterator = get_iters(self._graphs, desc='removing tottering', file=sys.stdout, verbose=(self._verbose >= 2)) | |||||
| # @todo: this may not work. | # @todo: this may not work. | ||||
| self._graphs = [untotterTransformation(G, self._node_labels, self._edge_labels) for G in iterator] | self._graphs = [untotterTransformation(G, self._node_labels, self._edge_labels) for G in iterator] | ||||
| # compute Gram matrix. | # compute Gram matrix. | ||||
| gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) | gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) | ||||
| from itertools import combinations_with_replacement | from itertools import combinations_with_replacement | ||||
| itr = combinations_with_replacement(range(0, len(self._graphs)), 2) | itr = combinations_with_replacement(range(0, len(self._graphs)), 2) | ||||
| if self._verbose >= 2: | |||||
| iterator = tqdm(itr, desc='Computing kernels', file=sys.stdout) | |||||
| else: | |||||
| iterator = itr | |||||
| len_itr = int(len(self._graphs) * (len(self._graphs) + 1) / 2) | |||||
| iterator = get_iters(itr, desc='Computing kernels', file=sys.stdout, | |||||
| length=len_itr, verbose=(self._verbose >= 2)) | |||||
| for i, j in iterator: | for i, j in iterator: | ||||
| kernel = self._kernel_do(self._graphs[i], self._graphs[j]) | kernel = self._kernel_do(self._graphs[i], self._graphs[j]) | ||||
| gram_matrix[i][j] = kernel | gram_matrix[i][j] = kernel | ||||
| gram_matrix[j][i] = kernel # @todo: no directed graph considered? | gram_matrix[j][i] = kernel # @todo: no directed graph considered? | ||||
| return gram_matrix | return gram_matrix | ||||
| def _compute_gm_imap_unordered(self): | def _compute_gm_imap_unordered(self): | ||||
| self._add_dummy_labels(self._graphs) | self._add_dummy_labels(self._graphs) | ||||
| if self._remove_totters: | if self._remove_totters: | ||||
| pool = Pool(self._n_jobs) | pool = Pool(self._n_jobs) | ||||
| itr = range(0, len(self._graphs)) | itr = range(0, len(self._graphs)) | ||||
| @@ -81,57 +77,49 @@ class Marginalized(GraphKernel): | |||||
| else: | else: | ||||
| chunksize = 100 | chunksize = 100 | ||||
| remove_fun = self._wrapper_untotter | remove_fun = self._wrapper_untotter | ||||
| if self._verbose >= 2: | |||||
| iterator = tqdm(pool.imap_unordered(remove_fun, itr, chunksize), | |||||
| desc='removing tottering', file=sys.stdout) | |||||
| else: | |||||
| iterator = pool.imap_unordered(remove_fun, itr, chunksize) | |||||
| iterator = get_iters(pool.imap_unordered(remove_fun, itr, chunksize), | |||||
| desc='removing tottering', file=sys.stdout, | |||||
| length=len(self._graphs), verbose=(self._verbose >= 2)) | |||||
| for i, g in iterator: | for i, g in iterator: | ||||
| self._graphs[i] = g | self._graphs[i] = g | ||||
| pool.close() | pool.close() | ||||
| pool.join() | pool.join() | ||||
| # compute Gram matrix. | # compute Gram matrix. | ||||
| gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) | gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) | ||||
| def init_worker(gn_toshare): | def init_worker(gn_toshare): | ||||
| global G_gn | global G_gn | ||||
| G_gn = gn_toshare | G_gn = gn_toshare | ||||
| do_fun = self._wrapper_kernel_do | do_fun = self._wrapper_kernel_do | ||||
| parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker, | |||||
| parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker, | |||||
| glbv=(self._graphs,), n_jobs=self._n_jobs, verbose=self._verbose) | glbv=(self._graphs,), n_jobs=self._n_jobs, verbose=self._verbose) | ||||
| return gram_matrix | return gram_matrix | ||||
| def _compute_kernel_list_series(self, g1, g_list): | def _compute_kernel_list_series(self, g1, g_list): | ||||
| self._add_dummy_labels(g_list + [g1]) | self._add_dummy_labels(g_list + [g1]) | ||||
| if self._remove_totters: | if self._remove_totters: | ||||
| g1 = untotterTransformation(g1, self._node_labels, self._edge_labels) # @todo: this may not work. | g1 = untotterTransformation(g1, self._node_labels, self._edge_labels) # @todo: this may not work. | ||||
| if self._verbose >= 2: | |||||
| iterator = tqdm(g_list, desc='removing tottering', file=sys.stdout) | |||||
| else: | |||||
| iterator = g_list | |||||
| iterator = get_iters(g_list, desc='removing tottering', file=sys.stdout, verbose=(self._verbose >= 2)) | |||||
| # @todo: this may not work. | # @todo: this may not work. | ||||
| g_list = [untotterTransformation(G, self._node_labels, self._edge_labels) for G in iterator] | g_list = [untotterTransformation(G, self._node_labels, self._edge_labels) for G in iterator] | ||||
| # compute kernel list. | # compute kernel list. | ||||
| kernel_list = [None] * len(g_list) | kernel_list = [None] * len(g_list) | ||||
| if self._verbose >= 2: | |||||
| iterator = tqdm(range(len(g_list)), desc='Computing kernels', file=sys.stdout) | |||||
| else: | |||||
| iterator = range(len(g_list)) | |||||
| iterator = get_iters(range(len(g_list)), desc='Computing kernels', file=sys.stdout, length=len(g_list), verbose=(self._verbose >= 2)) | |||||
| for i in iterator: | for i in iterator: | ||||
| kernel = self._kernel_do(g1, g_list[i]) | kernel = self._kernel_do(g1, g_list[i]) | ||||
| kernel_list[i] = kernel | kernel_list[i] = kernel | ||||
| return kernel_list | return kernel_list | ||||
| def _compute_kernel_list_imap_unordered(self, g1, g_list): | def _compute_kernel_list_imap_unordered(self, g1, g_list): | ||||
| self._add_dummy_labels(g_list + [g1]) | self._add_dummy_labels(g_list + [g1]) | ||||
| if self._remove_totters: | if self._remove_totters: | ||||
| g1 = untotterTransformation(g1, self._node_labels, self._edge_labels) # @todo: this may not work. | g1 = untotterTransformation(g1, self._node_labels, self._edge_labels) # @todo: this may not work. | ||||
| pool = Pool(self._n_jobs) | pool = Pool(self._n_jobs) | ||||
| @@ -141,16 +129,14 @@ class Marginalized(GraphKernel): | |||||
| else: | else: | ||||
| chunksize = 100 | chunksize = 100 | ||||
| remove_fun = self._wrapper_untotter | remove_fun = self._wrapper_untotter | ||||
| if self._verbose >= 2: | |||||
| iterator = tqdm(pool.imap_unordered(remove_fun, itr, chunksize), | |||||
| desc='removing tottering', file=sys.stdout) | |||||
| else: | |||||
| iterator = pool.imap_unordered(remove_fun, itr, chunksize) | |||||
| iterator = get_iters(pool.imap_unordered(remove_fun, itr, chunksize), | |||||
| desc='removing tottering', file=sys.stdout, | |||||
| length=len(g_list), verbose=(self._verbose >= 2)) | |||||
| for i, g in iterator: | for i, g in iterator: | ||||
| g_list[i] = g | g_list[i] = g | ||||
| pool.close() | pool.close() | ||||
| pool.join() | pool.join() | ||||
| # compute kernel list. | # compute kernel list. | ||||
| kernel_list = [None] * len(g_list) | kernel_list = [None] * len(g_list) | ||||
| @@ -159,38 +145,38 @@ class Marginalized(GraphKernel): | |||||
| G_g1 = g1_toshare | G_g1 = g1_toshare | ||||
| G_g_list = g_list_toshare | G_g_list = g_list_toshare | ||||
| do_fun = self._wrapper_kernel_list_do | do_fun = self._wrapper_kernel_list_do | ||||
| def func_assign(result, var_to_assign): | |||||
| def func_assign(result, var_to_assign): | |||||
| var_to_assign[result[0]] = result[1] | var_to_assign[result[0]] = result[1] | ||||
| itr = range(len(g_list)) | itr = range(len(g_list)) | ||||
| len_itr = len(g_list) | len_itr = len(g_list) | ||||
| parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, | parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, | ||||
| init_worker=init_worker, glbv=(g1, g_list), method='imap_unordered', | |||||
| init_worker=init_worker, glbv=(g1, g_list), method='imap_unordered', | |||||
| n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose) | n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose) | ||||
| return kernel_list | return kernel_list | ||||
| def _wrapper_kernel_list_do(self, itr): | def _wrapper_kernel_list_do(self, itr): | ||||
| return itr, self._kernel_do(G_g1, G_g_list[itr]) | return itr, self._kernel_do(G_g1, G_g_list[itr]) | ||||
| def _compute_single_kernel_series(self, g1, g2): | def _compute_single_kernel_series(self, g1, g2): | ||||
| self._add_dummy_labels([g1] + [g2]) | self._add_dummy_labels([g1] + [g2]) | ||||
| if self._remove_totters: | if self._remove_totters: | ||||
| g1 = untotterTransformation(g1, self._node_labels, self._edge_labels) # @todo: this may not work. | g1 = untotterTransformation(g1, self._node_labels, self._edge_labels) # @todo: this may not work. | ||||
| g2 = untotterTransformation(g2, self._node_labels, self._edge_labels) | g2 = untotterTransformation(g2, self._node_labels, self._edge_labels) | ||||
| kernel = self._kernel_do(g1, g2) | kernel = self._kernel_do(g1, g2) | ||||
| return kernel | |||||
| return kernel | |||||
| def _kernel_do(self, g1, g2): | def _kernel_do(self, g1, g2): | ||||
| """Compute marginalized graph kernel between 2 graphs. | """Compute marginalized graph kernel between 2 graphs. | ||||
| Parameters | Parameters | ||||
| ---------- | ---------- | ||||
| g1, g2 : NetworkX graphs | g1, g2 : NetworkX graphs | ||||
| 2 graphs between which the kernel is computed. | 2 graphs between which the kernel is computed. | ||||
| Return | Return | ||||
| ------ | ------ | ||||
| kernel : float | kernel : float | ||||
| @@ -204,10 +190,10 @@ class Marginalized(GraphKernel): | |||||
| # (uniform distribution over |G|) | # (uniform distribution over |G|) | ||||
| p_init_G1 = 1 / num_nodes_G1 | p_init_G1 = 1 / num_nodes_G1 | ||||
| p_init_G2 = 1 / num_nodes_G2 | p_init_G2 = 1 / num_nodes_G2 | ||||
| q = self._p_quit * self._p_quit | q = self._p_quit * self._p_quit | ||||
| r1 = q | r1 = q | ||||
| # # initial R_inf | # # initial R_inf | ||||
| # # matrix to save all the R_inf for all pairs of nodes | # # matrix to save all the R_inf for all pairs of nodes | ||||
| # R_inf = np.zeros([num_nodes_G1, num_nodes_G2]) | # R_inf = np.zeros([num_nodes_G1, num_nodes_G2]) | ||||
| @@ -229,7 +215,7 @@ class Marginalized(GraphKernel): | |||||
| # neighbor_n2 = g2[node2[0]] | # neighbor_n2 = g2[node2[0]] | ||||
| # if len(neighbor_n2) > 0: | # if len(neighbor_n2) > 0: | ||||
| # p_trans_n2 = (1 - p_quit) / len(neighbor_n2) | # p_trans_n2 = (1 - p_quit) / len(neighbor_n2) | ||||
| # | |||||
| # | |||||
| # for neighbor1 in neighbor_n1: | # for neighbor1 in neighbor_n1: | ||||
| # for neighbor2 in neighbor_n2: | # for neighbor2 in neighbor_n2: | ||||
| # t = p_trans_n1 * p_trans_n2 * \ | # t = p_trans_n1 * p_trans_n2 * \ | ||||
| @@ -238,7 +224,7 @@ class Marginalized(GraphKernel): | |||||
| # deltakernel( | # deltakernel( | ||||
| # neighbor_n1[neighbor1][edge_label], | # neighbor_n1[neighbor1][edge_label], | ||||
| # neighbor_n2[neighbor2][edge_label]) | # neighbor_n2[neighbor2][edge_label]) | ||||
| # | |||||
| # | |||||
| # R_inf_new[node1[0]][node2[0]] += t * R_inf[neighbor1][ | # R_inf_new[node1[0]][node2[0]] += t * R_inf[neighbor1][ | ||||
| # neighbor2] # ref [1] equation (8) | # neighbor2] # ref [1] equation (8) | ||||
| # R_inf[:] = R_inf_new | # R_inf[:] = R_inf_new | ||||
| @@ -249,8 +235,8 @@ class Marginalized(GraphKernel): | |||||
| # s = p_init_G1 * p_init_G2 * deltakernel( | # s = p_init_G1 * p_init_G2 * deltakernel( | ||||
| # node1[1][node_label], node2[1][node_label]) | # node1[1][node_label], node2[1][node_label]) | ||||
| # kernel += s * R_inf[node1[0]][node2[0]] # ref [1] equation (6) | # kernel += s * R_inf[node1[0]][node2[0]] # ref [1] equation (6) | ||||
| R_inf = {} # dict to save all the R_inf for all pairs of nodes | R_inf = {} # dict to save all the R_inf for all pairs of nodes | ||||
| # initial R_inf, the 1st iteration. | # initial R_inf, the 1st iteration. | ||||
| for node1 in g1.nodes(): | for node1 in g1.nodes(): | ||||
| @@ -266,7 +252,7 @@ class Marginalized(GraphKernel): | |||||
| R_inf[(node1, node2)] = self._p_quit | R_inf[(node1, node2)] = self._p_quit | ||||
| else: | else: | ||||
| R_inf[(node1, node2)] = 1 | R_inf[(node1, node2)] = 1 | ||||
| # compute all transition probability first. | # compute all transition probability first. | ||||
| t_dict = {} | t_dict = {} | ||||
| if self._n_iteration > 1: | if self._n_iteration > 1: | ||||
| @@ -287,11 +273,11 @@ class Marginalized(GraphKernel): | |||||
| p_trans_n1 * p_trans_n2 * \ | p_trans_n1 * p_trans_n2 * \ | ||||
| deltakernel(tuple(g1.nodes[neighbor1][nl] for nl in self._node_labels), tuple(g2.nodes[neighbor2][nl] for nl in self._node_labels)) * \ | deltakernel(tuple(g1.nodes[neighbor1][nl] for nl in self._node_labels), tuple(g2.nodes[neighbor2][nl] for nl in self._node_labels)) * \ | ||||
| deltakernel(tuple(neighbor_n1[neighbor1][el] for el in self._edge_labels), tuple(neighbor_n2[neighbor2][el] for el in self._edge_labels)) | deltakernel(tuple(neighbor_n1[neighbor1][el] for el in self._edge_labels), tuple(neighbor_n2[neighbor2][el] for el in self._edge_labels)) | ||||
| # Compute R_inf with a simple interative method | # Compute R_inf with a simple interative method | ||||
| for i in range(2, self._n_iteration + 1): | for i in range(2, self._n_iteration + 1): | ||||
| R_inf_old = R_inf.copy() | R_inf_old = R_inf.copy() | ||||
| # Compute R_inf for each pair of nodes | # Compute R_inf for each pair of nodes | ||||
| for node1 in g1.nodes(): | for node1 in g1.nodes(): | ||||
| neighbor_n1 = g1[node1] | neighbor_n1 = g1[node1] | ||||
| @@ -301,32 +287,32 @@ class Marginalized(GraphKernel): | |||||
| if len(neighbor_n1) > 0: | if len(neighbor_n1) > 0: | ||||
| for node2 in g2.nodes(): | for node2 in g2.nodes(): | ||||
| neighbor_n2 = g2[node2] | neighbor_n2 = g2[node2] | ||||
| if len(neighbor_n2) > 0: | |||||
| if len(neighbor_n2) > 0: | |||||
| R_inf[(node1, node2)] = r1 | R_inf[(node1, node2)] = r1 | ||||
| for neighbor1 in neighbor_n1: | for neighbor1 in neighbor_n1: | ||||
| for neighbor2 in neighbor_n2: | for neighbor2 in neighbor_n2: | ||||
| R_inf[(node1, node2)] += \ | R_inf[(node1, node2)] += \ | ||||
| (t_dict[(node1, node2, neighbor1, neighbor2)] * \ | (t_dict[(node1, node2, neighbor1, neighbor2)] * \ | ||||
| R_inf_old[(neighbor1, neighbor2)]) # ref [1] equation (8) | R_inf_old[(neighbor1, neighbor2)]) # ref [1] equation (8) | ||||
| # add elements of R_inf up and compute kernel. | # add elements of R_inf up and compute kernel. | ||||
| for (n1, n2), value in R_inf.items(): | for (n1, n2), value in R_inf.items(): | ||||
| s = p_init_G1 * p_init_G2 * deltakernel(tuple(g1.nodes[n1][nl] for nl in self._node_labels), tuple(g2.nodes[n2][nl] for nl in self._node_labels)) | s = p_init_G1 * p_init_G2 * deltakernel(tuple(g1.nodes[n1][nl] for nl in self._node_labels), tuple(g2.nodes[n2][nl] for nl in self._node_labels)) | ||||
| kernel += s * value # ref [1] equation (6) | kernel += s * value # ref [1] equation (6) | ||||
| return kernel | return kernel | ||||
| def _wrapper_kernel_do(self, itr): | def _wrapper_kernel_do(self, itr): | ||||
| i = itr[0] | i = itr[0] | ||||
| j = itr[1] | j = itr[1] | ||||
| return i, j, self._kernel_do(G_gn[i], G_gn[j]) | return i, j, self._kernel_do(G_gn[i], G_gn[j]) | ||||
| def _wrapper_untotter(self, i): | def _wrapper_untotter(self, i): | ||||
| return i, untotterTransformation(self._graphs[i], self._node_labels, self._edge_labels) # @todo: this may not work. | return i, untotterTransformation(self._graphs[i], self._node_labels, self._edge_labels) # @todo: this may not work. | ||||
| def _add_dummy_labels(self, Gn): | def _add_dummy_labels(self, Gn): | ||||
| if len(self._node_labels) == 0 or (len(self._node_labels) == 1 and self._node_labels[0] == SpecialLabel.DUMMY): | if len(self._node_labels) == 0 or (len(self._node_labels) == 1 and self._node_labels[0] == SpecialLabel.DUMMY): | ||||
| for i in range(len(Gn)): | for i in range(len(Gn)): | ||||
| @@ -5,15 +5,15 @@ Created on Fri Apr 10 18:33:13 2020 | |||||
| @author: ljia | @author: ljia | ||||
| @references: | |||||
| @references: | |||||
| [1] Liva Ralaivola, Sanjay J Swamidass, Hiroto Saigo, and Pierre | |||||
| Baldi. Graph kernels for chemical informatics. Neural networks, | |||||
| [1] Liva Ralaivola, Sanjay J Swamidass, Hiroto Saigo, and Pierre | |||||
| Baldi. Graph kernels for chemical informatics. Neural networks, | |||||
| 18(8):1093–1110, 2005. | 18(8):1093–1110, 2005. | ||||
| """ | """ | ||||
| import sys | import sys | ||||
| from multiprocessing import Pool | from multiprocessing import Pool | ||||
| from tqdm import tqdm | |||||
| from gklearn.utils import get_iters | |||||
| import numpy as np | import numpy as np | ||||
| import networkx as nx | import networkx as nx | ||||
| from collections import Counter | from collections import Counter | ||||
| @@ -25,7 +25,7 @@ from gklearn.utils import Trie | |||||
| class PathUpToH(GraphKernel): # @todo: add function for k_func is None | class PathUpToH(GraphKernel): # @todo: add function for k_func is None | ||||
| def __init__(self, **kwargs): | def __init__(self, **kwargs): | ||||
| GraphKernel.__init__(self) | GraphKernel.__init__(self) | ||||
| self._node_labels = kwargs.get('node_labels', []) | self._node_labels = kwargs.get('node_labels', []) | ||||
| @@ -38,16 +38,14 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None | |||||
| def _compute_gm_series(self): | def _compute_gm_series(self): | ||||
| self._add_dummy_labels(self._graphs) | self._add_dummy_labels(self._graphs) | ||||
| from itertools import combinations_with_replacement | from itertools import combinations_with_replacement | ||||
| itr_kernel = combinations_with_replacement(range(0, len(self._graphs)), 2) | |||||
| if self._verbose >= 2: | |||||
| iterator_ps = tqdm(range(0, len(self._graphs)), desc='getting paths', file=sys.stdout) | |||||
| iterator_kernel = tqdm(itr_kernel, desc='Computing kernels', file=sys.stdout) | |||||
| else: | |||||
| iterator_ps = range(0, len(self._graphs)) | |||||
| iterator_kernel = itr_kernel | |||||
| itr_kernel = combinations_with_replacement(range(0, len(self._graphs)), 2) | |||||
| iterator_ps = get_iters(range(0, len(self._graphs)), desc='getting paths', file=sys.stdout, length=len(self._graphs), verbose=(self._verbose >= 2)) | |||||
| len_itr = int(len(self._graphs) * (len(self._graphs) + 1) / 2) | |||||
| iterator_kernel = get_iters(itr_kernel, desc='Computing kernels', | |||||
| file=sys.stdout, length=len_itr, verbose=(self._verbose >= 2)) | |||||
| gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) | gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) | ||||
| if self._compute_method == 'trie': | if self._compute_method == 'trie': | ||||
| @@ -62,13 +60,13 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None | |||||
| kernel = self._kernel_do_naive(all_paths[i], all_paths[j]) | kernel = self._kernel_do_naive(all_paths[i], all_paths[j]) | ||||
| gram_matrix[i][j] = kernel | gram_matrix[i][j] = kernel | ||||
| gram_matrix[j][i] = kernel | gram_matrix[j][i] = kernel | ||||
| return gram_matrix | return gram_matrix | ||||
| def _compute_gm_imap_unordered(self): | def _compute_gm_imap_unordered(self): | ||||
| self._add_dummy_labels(self._graphs) | self._add_dummy_labels(self._graphs) | ||||
| # get all paths of all graphs before computing kernels to save time, | # get all paths of all graphs before computing kernels to save time, | ||||
| # but this may cost a lot of memory for large datasets. | # but this may cost a lot of memory for large datasets. | ||||
| pool = Pool(self._n_jobs) | pool = Pool(self._n_jobs) | ||||
| @@ -80,23 +78,21 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None | |||||
| all_paths = [[] for _ in range(len(self._graphs))] | all_paths = [[] for _ in range(len(self._graphs))] | ||||
| if self._compute_method == 'trie' and self._k_func is not None: | if self._compute_method == 'trie' and self._k_func is not None: | ||||
| get_ps_fun = self._wrapper_find_all_path_as_trie | get_ps_fun = self._wrapper_find_all_path_as_trie | ||||
| elif self._compute_method != 'trie' and self._k_func is not None: | |||||
| get_ps_fun = partial(self._wrapper_find_all_paths_until_length, True) | |||||
| else: | |||||
| get_ps_fun = partial(self._wrapper_find_all_paths_until_length, False) | |||||
| if self._verbose >= 2: | |||||
| iterator = tqdm(pool.imap_unordered(get_ps_fun, itr, chunksize), | |||||
| desc='getting paths', file=sys.stdout) | |||||
| elif self._compute_method != 'trie' and self._k_func is not None: | |||||
| get_ps_fun = partial(self._wrapper_find_all_paths_until_length, True) | |||||
| else: | else: | ||||
| iterator = pool.imap_unordered(get_ps_fun, itr, chunksize) | |||||
| get_ps_fun = partial(self._wrapper_find_all_paths_until_length, False) | |||||
| iterator = get_iters(pool.imap_unordered(get_ps_fun, itr, chunksize), | |||||
| desc='getting paths', file=sys.stdout, | |||||
| length=len(self._graphs), verbose=(self._verbose >= 2)) | |||||
| for i, ps in iterator: | for i, ps in iterator: | ||||
| all_paths[i] = ps | all_paths[i] = ps | ||||
| pool.close() | pool.close() | ||||
| pool.join() | pool.join() | ||||
| # compute Gram matrix. | # compute Gram matrix. | ||||
| gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) | gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) | ||||
| if self._compute_method == 'trie' and self._k_func is not None: | if self._compute_method == 'trie' and self._k_func is not None: | ||||
| def init_worker(trie_toshare): | def init_worker(trie_toshare): | ||||
| global G_trie | global G_trie | ||||
| @@ -106,28 +102,24 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None | |||||
| def init_worker(plist_toshare): | def init_worker(plist_toshare): | ||||
| global G_plist | global G_plist | ||||
| G_plist = plist_toshare | G_plist = plist_toshare | ||||
| do_fun = self._wrapper_kernel_do_naive | |||||
| do_fun = self._wrapper_kernel_do_naive | |||||
| else: | else: | ||||
| def init_worker(plist_toshare): | def init_worker(plist_toshare): | ||||
| global G_plist | global G_plist | ||||
| G_plist = plist_toshare | G_plist = plist_toshare | ||||
| do_fun = self._wrapper_kernel_do_kernelless # @todo: what is this? | |||||
| parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker, | |||||
| glbv=(all_paths,), n_jobs=self._n_jobs, verbose=self._verbose) | |||||
| do_fun = self._wrapper_kernel_do_kernelless # @todo: what is this? | |||||
| parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker, | |||||
| glbv=(all_paths,), n_jobs=self._n_jobs, verbose=self._verbose) | |||||
| return gram_matrix | return gram_matrix | ||||
| def _compute_kernel_list_series(self, g1, g_list): | def _compute_kernel_list_series(self, g1, g_list): | ||||
| self._add_dummy_labels(g_list + [g1]) | self._add_dummy_labels(g_list + [g1]) | ||||
| if self._verbose >= 2: | |||||
| iterator_ps = tqdm(g_list, desc='getting paths', file=sys.stdout) | |||||
| iterator_kernel = tqdm(range(len(g_list)), desc='Computing kernels', file=sys.stdout) | |||||
| else: | |||||
| iterator_ps = g_list | |||||
| iterator_kernel = range(len(g_list)) | |||||
| iterator_ps = get_iters(g_list, desc='getting paths', file=sys.stdout, verbose=(self._verbose >= 2)) | |||||
| iterator_kernel = get_iters(range(len(g_list)), desc='Computing kernels', file=sys.stdout, length=len(g_list), verbose=(self._verbose >= 2)) | |||||
| kernel_list = [None] * len(g_list) | kernel_list = [None] * len(g_list) | ||||
| if self._compute_method == 'trie': | if self._compute_method == 'trie': | ||||
| @@ -142,13 +134,13 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None | |||||
| for i in iterator_kernel: | for i in iterator_kernel: | ||||
| kernel = self._kernel_do_naive(paths_g1, paths_g_list[i]) | kernel = self._kernel_do_naive(paths_g1, paths_g_list[i]) | ||||
| kernel_list[i] = kernel | kernel_list[i] = kernel | ||||
| return kernel_list | return kernel_list | ||||
| def _compute_kernel_list_imap_unordered(self, g1, g_list): | def _compute_kernel_list_imap_unordered(self, g1, g_list): | ||||
| self._add_dummy_labels(g_list + [g1]) | self._add_dummy_labels(g_list + [g1]) | ||||
| # get all paths of all graphs before computing kernels to save time, | # get all paths of all graphs before computing kernels to save time, | ||||
| # but this may cost a lot of memory for large datasets. | # but this may cost a lot of memory for large datasets. | ||||
| pool = Pool(self._n_jobs) | pool = Pool(self._n_jobs) | ||||
| @@ -162,48 +154,46 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None | |||||
| paths_g1 = self._find_all_path_as_trie(g1) | paths_g1 = self._find_all_path_as_trie(g1) | ||||
| get_ps_fun = self._wrapper_find_all_path_as_trie | get_ps_fun = self._wrapper_find_all_path_as_trie | ||||
| elif self._compute_method != 'trie' and self._k_func is not None: | elif self._compute_method != 'trie' and self._k_func is not None: | ||||
| paths_g1 = self._find_all_paths_until_length(g1) | |||||
| get_ps_fun = partial(self._wrapper_find_all_paths_until_length, True) | |||||
| paths_g1 = self._find_all_paths_until_length(g1) | |||||
| get_ps_fun = partial(self._wrapper_find_all_paths_until_length, True) | |||||
| else: | else: | ||||
| paths_g1 = self._find_all_paths_until_length(g1) | |||||
| paths_g1 = self._find_all_paths_until_length(g1) | |||||
| get_ps_fun = partial(self._wrapper_find_all_paths_until_length, False) | get_ps_fun = partial(self._wrapper_find_all_paths_until_length, False) | ||||
| if self._verbose >= 2: | |||||
| iterator = tqdm(pool.imap_unordered(get_ps_fun, itr, chunksize), | |||||
| desc='getting paths', file=sys.stdout) | |||||
| else: | |||||
| iterator = pool.imap_unordered(get_ps_fun, itr, chunksize) | |||||
| iterator = get_iters(pool.imap_unordered(get_ps_fun, itr, chunksize), | |||||
| desc='getting paths', file=sys.stdout, | |||||
| length=len(g_list), verbose=(self._verbose >= 2)) | |||||
| for i, ps in iterator: | for i, ps in iterator: | ||||
| paths_g_list[i] = ps | paths_g_list[i] = ps | ||||
| pool.close() | pool.close() | ||||
| pool.join() | pool.join() | ||||
| # compute kernel list. | # compute kernel list. | ||||
| kernel_list = [None] * len(g_list) | kernel_list = [None] * len(g_list) | ||||
| def init_worker(p1_toshare, plist_toshare): | def init_worker(p1_toshare, plist_toshare): | ||||
| global G_p1, G_plist | global G_p1, G_plist | ||||
| G_p1 = p1_toshare | G_p1 = p1_toshare | ||||
| G_plist = plist_toshare | G_plist = plist_toshare | ||||
| do_fun = self._wrapper_kernel_list_do | do_fun = self._wrapper_kernel_list_do | ||||
| def func_assign(result, var_to_assign): | |||||
| def func_assign(result, var_to_assign): | |||||
| var_to_assign[result[0]] = result[1] | var_to_assign[result[0]] = result[1] | ||||
| itr = range(len(g_list)) | itr = range(len(g_list)) | ||||
| len_itr = len(g_list) | len_itr = len(g_list) | ||||
| parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, | parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, | ||||
| init_worker=init_worker, glbv=(paths_g1, paths_g_list), method='imap_unordered', n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose) | init_worker=init_worker, glbv=(paths_g1, paths_g_list), method='imap_unordered', n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose) | ||||
| return kernel_list | return kernel_list | ||||
| def _wrapper_kernel_list_do(self, itr): | def _wrapper_kernel_list_do(self, itr): | ||||
| if self._compute_method == 'trie' and self._k_func is not None: | if self._compute_method == 'trie' and self._k_func is not None: | ||||
| return itr, self._kernel_do_trie(G_p1, G_plist[itr]) | return itr, self._kernel_do_trie(G_p1, G_plist[itr]) | ||||
| elif self._compute_method != 'trie' and self._k_func is not None: | elif self._compute_method != 'trie' and self._k_func is not None: | ||||
| return itr, self._kernel_do_naive(G_p1, G_plist[itr]) | |||||
| return itr, self._kernel_do_naive(G_p1, G_plist[itr]) | |||||
| else: | else: | ||||
| return itr, self._kernel_do_kernelless(G_p1, G_plist[itr]) | return itr, self._kernel_do_kernelless(G_p1, G_plist[itr]) | ||||
| def _compute_single_kernel_series(self, g1, g2): | def _compute_single_kernel_series(self, g1, g2): | ||||
| self._add_dummy_labels([g1] + [g2]) | self._add_dummy_labels([g1] + [g2]) | ||||
| if self._compute_method == 'trie': | if self._compute_method == 'trie': | ||||
| @@ -214,32 +204,32 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None | |||||
| paths_g1 = self._find_all_paths_until_length(g1) | paths_g1 = self._find_all_paths_until_length(g1) | ||||
| paths_g2 = self._find_all_paths_until_length(g2) | paths_g2 = self._find_all_paths_until_length(g2) | ||||
| kernel = self._kernel_do_naive(paths_g1, paths_g2) | kernel = self._kernel_do_naive(paths_g1, paths_g2) | ||||
| return kernel | |||||
| return kernel | |||||
| def _kernel_do_trie(self, trie1, trie2): | def _kernel_do_trie(self, trie1, trie2): | ||||
| """Compute path graph kernels up to depth d between 2 graphs using trie. | """Compute path graph kernels up to depth d between 2 graphs using trie. | ||||
| Parameters | Parameters | ||||
| ---------- | ---------- | ||||
| trie1, trie2 : list | trie1, trie2 : list | ||||
| Tries that contains all paths in 2 graphs. | Tries that contains all paths in 2 graphs. | ||||
| k_func : function | k_func : function | ||||
| A kernel function applied using different notions of fingerprint | |||||
| A kernel function applied using different notions of fingerprint | |||||
| similarity. | similarity. | ||||
| Return | Return | ||||
| ------ | ------ | ||||
| kernel : float | kernel : float | ||||
| Path kernel up to h between 2 graphs. | Path kernel up to h between 2 graphs. | ||||
| """ | """ | ||||
| if self._k_func == 'tanimoto': | |||||
| # traverse all paths in graph1 and search them in graph2. Deep-first | |||||
| if self._k_func == 'tanimoto': | |||||
| # traverse all paths in graph1 and search them in graph2. Deep-first | |||||
| # search is applied. | # search is applied. | ||||
| def traverseTrie1t(root, trie2, setlist, pcurrent=[]): | |||||
| def traverseTrie1t(root, trie2, setlist, pcurrent=[]): # @todo: no need to use value (# of occurrence of paths) in this case. | |||||
| for key, node in root['children'].items(): | for key, node in root['children'].items(): | ||||
| pcurrent.append(key) | pcurrent.append(key) | ||||
| if node['isEndOfWord']: | |||||
| if node['isEndOfWord']: | |||||
| setlist[1] += 1 | setlist[1] += 1 | ||||
| count2 = trie2.searchWord(pcurrent) | count2 = trie2.searchWord(pcurrent) | ||||
| if count2 != 0: | if count2 != 0: | ||||
| @@ -250,17 +240,17 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None | |||||
| del pcurrent[-1] | del pcurrent[-1] | ||||
| if pcurrent != []: | if pcurrent != []: | ||||
| del pcurrent[-1] | del pcurrent[-1] | ||||
| # traverse all paths in graph2 and find out those that are not in | |||||
| # graph1. Deep-first search is applied. | |||||
| # traverse all paths in graph2 and find out those that are not in | |||||
| # graph1. Deep-first search is applied. | |||||
| def traverseTrie2t(root, trie1, setlist, pcurrent=[]): | def traverseTrie2t(root, trie1, setlist, pcurrent=[]): | ||||
| for key, node in root['children'].items(): | for key, node in root['children'].items(): | ||||
| pcurrent.append(key) | pcurrent.append(key) | ||||
| if node['isEndOfWord']: | if node['isEndOfWord']: | ||||
| # print(node['count']) | # print(node['count']) | ||||
| count1 = trie1.searchWord(pcurrent) | count1 = trie1.searchWord(pcurrent) | ||||
| if count1 == 0: | |||||
| if count1 == 0: | |||||
| setlist[1] += 1 | setlist[1] += 1 | ||||
| if node['children'] != {}: | if node['children'] != {}: | ||||
| traverseTrie2t(node, trie1, setlist, pcurrent) | traverseTrie2t(node, trie1, setlist, pcurrent) | ||||
| @@ -268,7 +258,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None | |||||
| del pcurrent[-1] | del pcurrent[-1] | ||||
| if pcurrent != []: | if pcurrent != []: | ||||
| del pcurrent[-1] | del pcurrent[-1] | ||||
| setlist = [0, 0] # intersection and union of path sets of g1, g2. | setlist = [0, 0] # intersection and union of path sets of g1, g2. | ||||
| # print(trie1.root) | # print(trie1.root) | ||||
| # print(trie2.root) | # print(trie2.root) | ||||
| @@ -277,9 +267,9 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None | |||||
| traverseTrie2t(trie2.root, trie1, setlist) | traverseTrie2t(trie2.root, trie1, setlist) | ||||
| # print(setlist) | # print(setlist) | ||||
| kernel = setlist[0] / setlist[1] | kernel = setlist[0] / setlist[1] | ||||
| elif self._k_func == 'MinMax': # MinMax kernel | |||||
| # traverse all paths in graph1 and search them in graph2. Deep-first | |||||
| elif self._k_func == 'MinMax': # MinMax kernel | |||||
| # traverse all paths in graph1 and search them in graph2. Deep-first | |||||
| # search is applied. | # search is applied. | ||||
| def traverseTrie1m(root, trie2, sumlist, pcurrent=[]): | def traverseTrie1m(root, trie2, sumlist, pcurrent=[]): | ||||
| for key, node in root['children'].items(): | for key, node in root['children'].items(): | ||||
| @@ -296,16 +286,16 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None | |||||
| del pcurrent[-1] | del pcurrent[-1] | ||||
| if pcurrent != []: | if pcurrent != []: | ||||
| del pcurrent[-1] | del pcurrent[-1] | ||||
| # traverse all paths in graph2 and find out those that are not in | |||||
| # graph1. Deep-first search is applied. | |||||
| # traverse all paths in graph2 and find out those that are not in | |||||
| # graph1. Deep-first search is applied. | |||||
| def traverseTrie2m(root, trie1, sumlist, pcurrent=[]): | def traverseTrie2m(root, trie1, sumlist, pcurrent=[]): | ||||
| for key, node in root['children'].items(): | for key, node in root['children'].items(): | ||||
| pcurrent.append(key) | pcurrent.append(key) | ||||
| if node['isEndOfWord']: | |||||
| if node['isEndOfWord']: | |||||
| # print(node['count']) | # print(node['count']) | ||||
| count1 = trie1.searchWord(pcurrent) | count1 = trie1.searchWord(pcurrent) | ||||
| if count1 == 0: | |||||
| if count1 == 0: | |||||
| sumlist[1] += node['count'] | sumlist[1] += node['count'] | ||||
| if node['children'] != {}: | if node['children'] != {}: | ||||
| traverseTrie2m(node, trie1, sumlist, pcurrent) | traverseTrie2m(node, trie1, sumlist, pcurrent) | ||||
| @@ -313,7 +303,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None | |||||
| del pcurrent[-1] | del pcurrent[-1] | ||||
| if pcurrent != []: | if pcurrent != []: | ||||
| del pcurrent[-1] | del pcurrent[-1] | ||||
| sumlist = [0, 0] # sum of mins and sum of maxs | sumlist = [0, 0] # sum of mins and sum of maxs | ||||
| # print(trie1.root) | # print(trie1.root) | ||||
| # print(trie2.root) | # print(trie2.root) | ||||
| @@ -324,37 +314,37 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None | |||||
| kernel = sumlist[0] / sumlist[1] | kernel = sumlist[0] / sumlist[1] | ||||
| else: | else: | ||||
| raise Exception('The given "k_func" cannot be recognized. Possible choices include: "tanimoto", "MinMax".') | raise Exception('The given "k_func" cannot be recognized. Possible choices include: "tanimoto", "MinMax".') | ||||
| return kernel | return kernel | ||||
| def _wrapper_kernel_do_trie(self, itr): | def _wrapper_kernel_do_trie(self, itr): | ||||
| i = itr[0] | i = itr[0] | ||||
| j = itr[1] | j = itr[1] | ||||
| return i, j, self._kernel_do_trie(G_trie[i], G_trie[j]) | return i, j, self._kernel_do_trie(G_trie[i], G_trie[j]) | ||||
| def _kernel_do_naive(self, paths1, paths2): | def _kernel_do_naive(self, paths1, paths2): | ||||
| """Compute path graph kernels up to depth d between 2 graphs naively. | """Compute path graph kernels up to depth d between 2 graphs naively. | ||||
| Parameters | Parameters | ||||
| ---------- | ---------- | ||||
| paths_list : list of list | paths_list : list of list | ||||
| List of list of paths in all graphs, where for unlabeled graphs, each | |||||
| path is represented by a list of nodes; while for labeled graphs, each | |||||
| path is represented by a string consists of labels of nodes and/or | |||||
| List of list of paths in all graphs, where for unlabeled graphs, each | |||||
| path is represented by a list of nodes; while for labeled graphs, each | |||||
| path is represented by a string consists of labels of nodes and/or | |||||
| edges on that path. | edges on that path. | ||||
| k_func : function | k_func : function | ||||
| A kernel function applied using different notions of fingerprint | |||||
| A kernel function applied using different notions of fingerprint | |||||
| similarity. | similarity. | ||||
| Return | Return | ||||
| ------ | ------ | ||||
| kernel : float | kernel : float | ||||
| Path kernel up to h between 2 graphs. | Path kernel up to h between 2 graphs. | ||||
| """ | """ | ||||
| all_paths = list(set(paths1 + paths2)) | all_paths = list(set(paths1 + paths2)) | ||||
| if self._k_func == 'tanimoto': | if self._k_func == 'tanimoto': | ||||
| length_union = len(set(paths1 + paths2)) | length_union = len(set(paths1 + paths2)) | ||||
| kernel = (len(set(paths1)) + len(set(paths2)) - | kernel = (len(set(paths1)) + len(set(paths2)) - | ||||
| @@ -363,7 +353,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None | |||||
| # vector2 = [(1 if path in paths2 else 0) for path in all_paths] | # vector2 = [(1 if path in paths2 else 0) for path in all_paths] | ||||
| # kernel_uv = np.dot(vector1, vector2) | # kernel_uv = np.dot(vector1, vector2) | ||||
| # kernel = kernel_uv / (len(set(paths1)) + len(set(paths2)) - kernel_uv) | # kernel = kernel_uv / (len(set(paths1)) + len(set(paths2)) - kernel_uv) | ||||
| elif self._k_func == 'MinMax': # MinMax kernel | elif self._k_func == 'MinMax': # MinMax kernel | ||||
| path_count1 = Counter(paths1) | path_count1 = Counter(paths1) | ||||
| path_count2 = Counter(paths2) | path_count2 = Counter(paths2) | ||||
| @@ -373,7 +363,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None | |||||
| for key in all_paths] | for key in all_paths] | ||||
| kernel = np.sum(np.minimum(vector1, vector2)) / \ | kernel = np.sum(np.minimum(vector1, vector2)) / \ | ||||
| np.sum(np.maximum(vector1, vector2)) | np.sum(np.maximum(vector1, vector2)) | ||||
| elif self._k_func is None: # no sub-kernel used; compare paths directly. | elif self._k_func is None: # no sub-kernel used; compare paths directly. | ||||
| path_count1 = Counter(paths1) | path_count1 = Counter(paths1) | ||||
| path_count2 = Counter(paths2) | path_count2 = Counter(paths2) | ||||
| @@ -382,27 +372,27 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None | |||||
| vector2 = [(path_count2[key] if (key in path_count2.keys()) else 0) | vector2 = [(path_count2[key] if (key in path_count2.keys()) else 0) | ||||
| for key in all_paths] | for key in all_paths] | ||||
| kernel = np.dot(vector1, vector2) | kernel = np.dot(vector1, vector2) | ||||
| else: | else: | ||||
| raise Exception('The given "k_func" cannot be recognized. Possible choices include: "tanimoto", "MinMax" and None.') | raise Exception('The given "k_func" cannot be recognized. Possible choices include: "tanimoto", "MinMax" and None.') | ||||
| return kernel | return kernel | ||||
| def _wrapper_kernel_do_naive(self, itr): | def _wrapper_kernel_do_naive(self, itr): | ||||
| i = itr[0] | i = itr[0] | ||||
| j = itr[1] | j = itr[1] | ||||
| return i, j, self._kernel_do_naive(G_plist[i], G_plist[j]) | return i, j, self._kernel_do_naive(G_plist[i], G_plist[j]) | ||||
| def _find_all_path_as_trie(self, G): | def _find_all_path_as_trie(self, G): | ||||
| # all_path = find_all_paths_until_length(G, length, ds_attrs, | |||||
| # all_path = find_all_paths_until_length(G, length, ds_attrs, | |||||
| # node_label=node_label, | # node_label=node_label, | ||||
| # edge_label=edge_label) | # edge_label=edge_label) | ||||
| # ptrie = Trie() | # ptrie = Trie() | ||||
| # for path in all_path: | # for path in all_path: | ||||
| # ptrie.insertWord(path) | # ptrie.insertWord(path) | ||||
| # ptrie = Trie() | # ptrie = Trie() | ||||
| # path_l = [[n] for n in G.nodes] # paths of length l | # path_l = [[n] for n in G.nodes] # paths of length l | ||||
| # path_l_str = paths2labelseqs(path_l, G, ds_attrs, node_label, edge_label) | # path_l_str = paths2labelseqs(path_l, G, ds_attrs, node_label, edge_label) | ||||
| @@ -421,15 +411,15 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None | |||||
| # path_l_str = paths2labelseqs(path_l, G, ds_attrs, node_label, edge_label) | # path_l_str = paths2labelseqs(path_l, G, ds_attrs, node_label, edge_label) | ||||
| # for p in path_l_str: | # for p in path_l_str: | ||||
| # ptrie.insertWord(p) | # ptrie.insertWord(p) | ||||
| # | |||||
| # | |||||
| # print(time.time() - time1) | # print(time.time() - time1) | ||||
| # print(ptrie.root) | # print(ptrie.root) | ||||
| # print() | # print() | ||||
| # traverse all paths up to length h in a graph and construct a trie with | |||||
| # them. Deep-first search is applied. Notice the reverse of each path is | |||||
| # also stored to the trie. | |||||
| # traverse all paths up to length h in a graph and construct a trie with | |||||
| # them. Deep-first search is applied. Notice the reverse of each path is | |||||
| # also stored to the trie. | |||||
| def traverseGraph(root, ptrie, G, pcurrent=[]): | def traverseGraph(root, ptrie, G, pcurrent=[]): | ||||
| if len(pcurrent) < self._depth + 1: | if len(pcurrent) < self._depth + 1: | ||||
| for neighbor in G[root]: | for neighbor in G[root]: | ||||
| @@ -439,8 +429,8 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None | |||||
| ptrie.insertWord(plstr[0]) | ptrie.insertWord(plstr[0]) | ||||
| traverseGraph(neighbor, ptrie, G, pcurrent) | traverseGraph(neighbor, ptrie, G, pcurrent) | ||||
| del pcurrent[-1] | del pcurrent[-1] | ||||
| ptrie = Trie() | ptrie = Trie() | ||||
| path_l = [[n] for n in G.nodes] # paths of length l | path_l = [[n] for n in G.nodes] # paths of length l | ||||
| path_l_str = self._paths2labelseqs(path_l, G) | path_l_str = self._paths2labelseqs(path_l, G) | ||||
| @@ -448,18 +438,18 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None | |||||
| ptrie.insertWord(p) | ptrie.insertWord(p) | ||||
| for n in G.nodes: | for n in G.nodes: | ||||
| traverseGraph(n, ptrie, G, pcurrent=[n]) | traverseGraph(n, ptrie, G, pcurrent=[n]) | ||||
| # def traverseGraph(root, all_paths, length, G, ds_attrs, node_label, edge_label, | # def traverseGraph(root, all_paths, length, G, ds_attrs, node_label, edge_label, | ||||
| # pcurrent=[]): | # pcurrent=[]): | ||||
| # if len(pcurrent) < length + 1: | # if len(pcurrent) < length + 1: | ||||
| # for neighbor in G[root]: | # for neighbor in G[root]: | ||||
| # if neighbor not in pcurrent: | # if neighbor not in pcurrent: | ||||
| # pcurrent.append(neighbor) | # pcurrent.append(neighbor) | ||||
| # plstr = paths2labelseqs([pcurrent], G, ds_attrs, | |||||
| # plstr = paths2labelseqs([pcurrent], G, ds_attrs, | |||||
| # node_label, edge_label) | # node_label, edge_label) | ||||
| # all_paths.append(pcurrent[:]) | # all_paths.append(pcurrent[:]) | ||||
| # traverseGraph(neighbor, all_paths, length, G, ds_attrs, | |||||
| # traverseGraph(neighbor, all_paths, length, G, ds_attrs, | |||||
| # node_label, edge_label, pcurrent) | # node_label, edge_label, pcurrent) | ||||
| # del pcurrent[-1] | # del pcurrent[-1] | ||||
| # | # | ||||
| @@ -470,24 +460,24 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None | |||||
| ## for p in path_l_str: | ## for p in path_l_str: | ||||
| ## ptrie.insertWord(p) | ## ptrie.insertWord(p) | ||||
| # for n in G.nodes: | # for n in G.nodes: | ||||
| # traverseGraph(n, all_paths, length, G, ds_attrs, node_label, edge_label, | |||||
| # traverseGraph(n, all_paths, length, G, ds_attrs, node_label, edge_label, | |||||
| # pcurrent=[n]) | # pcurrent=[n]) | ||||
| # print(ptrie.root) | # print(ptrie.root) | ||||
| return ptrie | return ptrie | ||||
| def _wrapper_find_all_path_as_trie(self, itr_item): | def _wrapper_find_all_path_as_trie(self, itr_item): | ||||
| g = itr_item[0] | g = itr_item[0] | ||||
| i = itr_item[1] | i = itr_item[1] | ||||
| return i, self._find_all_path_as_trie(g) | return i, self._find_all_path_as_trie(g) | ||||
| # @todo: (can be removed maybe) this method find paths repetively, it could be faster. | # @todo: (can be removed maybe) this method find paths repetively, it could be faster. | ||||
| def _find_all_paths_until_length(self, G, tolabelseqs=True): | def _find_all_paths_until_length(self, G, tolabelseqs=True): | ||||
| """Find all paths no longer than a certain maximum length in a graph. A | |||||
| """Find all paths no longer than a certain maximum length in a graph. A | |||||
| recursive depth first search is applied. | recursive depth first search is applied. | ||||
| Parameters | Parameters | ||||
| ---------- | ---------- | ||||
| G : NetworkX graphs | G : NetworkX graphs | ||||
| @@ -500,13 +490,13 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None | |||||
| Node attribute used as label. The default node label is atom. | Node attribute used as label. The default node label is atom. | ||||
| edge_label : string | edge_label : string | ||||
| Edge attribute used as label. The default edge label is bond_type. | Edge attribute used as label. The default edge label is bond_type. | ||||
| Return | Return | ||||
| ------ | ------ | ||||
| path : list | path : list | ||||
| List of paths retrieved, where for unlabeled graphs, each path is | |||||
| represented by a list of nodes; while for labeled graphs, each path is | |||||
| represented by a list of strings consists of labels of nodes and/or | |||||
| List of paths retrieved, where for unlabeled graphs, each path is | |||||
| represented by a list of nodes; while for labeled graphs, each path is | |||||
| represented by a list of strings consists of labels of nodes and/or | |||||
| edges on that path. | edges on that path. | ||||
| """ | """ | ||||
| # path_l = [tuple([n]) for n in G.nodes] # paths of length l | # path_l = [tuple([n]) for n in G.nodes] # paths of length l | ||||
| @@ -519,10 +509,10 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None | |||||
| # tmp = path + (neighbor, ) | # tmp = path + (neighbor, ) | ||||
| # if tuple(tmp[::-1]) not in path_l_new: | # if tuple(tmp[::-1]) not in path_l_new: | ||||
| # path_l_new.append(tuple(tmp)) | # path_l_new.append(tuple(tmp)) | ||||
| # all_paths += path_l_new | # all_paths += path_l_new | ||||
| # path_l = path_l_new[:] | # path_l = path_l_new[:] | ||||
| path_l = [[n] for n in G.nodes] # paths of length l | path_l = [[n] for n in G.nodes] # paths of length l | ||||
| all_paths = [p.copy() for p in path_l] | all_paths = [p.copy() for p in path_l] | ||||
| for l in range(1, self._depth + 1): | for l in range(1, self._depth + 1): | ||||
| @@ -533,28 +523,28 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None | |||||
| tmp = path + [neighbor] | tmp = path + [neighbor] | ||||
| # if tmp[::-1] not in path_lplus1: | # if tmp[::-1] not in path_lplus1: | ||||
| path_lplus1.append(tmp) | path_lplus1.append(tmp) | ||||
| all_paths += path_lplus1 | all_paths += path_lplus1 | ||||
| path_l = [p.copy() for p in path_lplus1] | path_l = [p.copy() for p in path_lplus1] | ||||
| # for i in range(0, self._depth + 1): | # for i in range(0, self._depth + 1): | ||||
| # new_paths = find_all_paths(G, i) | # new_paths = find_all_paths(G, i) | ||||
| # if new_paths == []: | # if new_paths == []: | ||||
| # break | # break | ||||
| # all_paths.extend(new_paths) | # all_paths.extend(new_paths) | ||||
| # consider labels | # consider labels | ||||
| # print(paths2labelseqs(all_paths, G, ds_attrs, node_label, edge_label)) | # print(paths2labelseqs(all_paths, G, ds_attrs, node_label, edge_label)) | ||||
| # print() | # print() | ||||
| return (self._paths2labelseqs(all_paths, G) if tolabelseqs else all_paths) | return (self._paths2labelseqs(all_paths, G) if tolabelseqs else all_paths) | ||||
| def _wrapper_find_all_paths_until_length(self, tolabelseqs, itr_item): | def _wrapper_find_all_paths_until_length(self, tolabelseqs, itr_item): | ||||
| g = itr_item[0] | g = itr_item[0] | ||||
| i = itr_item[1] | i = itr_item[1] | ||||
| return i, self._find_all_paths_until_length(g, tolabelseqs=tolabelseqs) | return i, self._find_all_paths_until_length(g, tolabelseqs=tolabelseqs) | ||||
| def _paths2labelseqs(self, plist, G): | def _paths2labelseqs(self, plist, G): | ||||
| if len(self._node_labels) > 0: | if len(self._node_labels) > 0: | ||||
| if len(self._edge_labels) > 0: | if len(self._edge_labels) > 0: | ||||
| @@ -589,8 +579,8 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None | |||||
| else: | else: | ||||
| return [tuple(['0' for node in path]) for path in plist] | return [tuple(['0' for node in path]) for path in plist] | ||||
| # return [tuple([len(path)]) for path in all_paths] | # return [tuple([len(path)]) for path in all_paths] | ||||
| def _add_dummy_labels(self, Gn): | def _add_dummy_labels(self, Gn): | ||||
| if self._k_func is not None: | if self._k_func is not None: | ||||
| if len(self._node_labels) == 0 or (len(self._node_labels) == 1 and self._node_labels[0] == SpecialLabel.DUMMY): | if len(self._node_labels) == 0 or (len(self._node_labels) == 1 and self._node_labels[0] == SpecialLabel.DUMMY): | ||||
| @@ -15,7 +15,7 @@ import sys | |||||
| from itertools import product | from itertools import product | ||||
| # from functools import partial | # from functools import partial | ||||
| from multiprocessing import Pool | from multiprocessing import Pool | ||||
| from tqdm import tqdm | |||||
| from gklearn.utils import get_iters | |||||
| import numpy as np | import numpy as np | ||||
| import networkx as nx | import networkx as nx | ||||
| from gklearn.utils.parallel import parallel_gm, parallel_me | from gklearn.utils.parallel import parallel_gm, parallel_me | ||||
| @@ -38,10 +38,7 @@ class ShortestPath(GraphKernel): | |||||
| def _compute_gm_series(self): | def _compute_gm_series(self): | ||||
| self._all_graphs_have_edges(self._graphs) | self._all_graphs_have_edges(self._graphs) | ||||
| # get shortest path graph of each graph. | # get shortest path graph of each graph. | ||||
| if self._verbose >= 2: | |||||
| iterator = tqdm(self._graphs, desc='getting sp graphs', file=sys.stdout) | |||||
| else: | |||||
| iterator = self._graphs | |||||
| iterator = get_iters(self._graphs, desc='getting sp graphs', file=sys.stdout, verbose=(self._verbose >= 2)) | |||||
| self._graphs = [getSPGraph(g, edge_weight=self._edge_weight) for g in iterator] | self._graphs = [getSPGraph(g, edge_weight=self._edge_weight) for g in iterator] | ||||
| # compute Gram matrix. | # compute Gram matrix. | ||||
| @@ -49,10 +46,9 @@ class ShortestPath(GraphKernel): | |||||
| from itertools import combinations_with_replacement | from itertools import combinations_with_replacement | ||||
| itr = combinations_with_replacement(range(0, len(self._graphs)), 2) | itr = combinations_with_replacement(range(0, len(self._graphs)), 2) | ||||
| if self._verbose >= 2: | |||||
| iterator = tqdm(itr, desc='Computing kernels', file=sys.stdout) | |||||
| else: | |||||
| iterator = itr | |||||
| len_itr = int(len(self._graphs) * (len(self._graphs) + 1) / 2) | |||||
| iterator = get_iters(itr, desc='Computing kernels', | |||||
| length=len_itr, file=sys.stdout,verbose=(self._verbose >= 2)) | |||||
| for i, j in iterator: | for i, j in iterator: | ||||
| kernel = self._sp_do(self._graphs[i], self._graphs[j]) | kernel = self._sp_do(self._graphs[i], self._graphs[j]) | ||||
| gram_matrix[i][j] = kernel | gram_matrix[i][j] = kernel | ||||
| @@ -71,11 +67,9 @@ class ShortestPath(GraphKernel): | |||||
| chunksize = int(len(self._graphs) / self._n_jobs) + 1 | chunksize = int(len(self._graphs) / self._n_jobs) + 1 | ||||
| else: | else: | ||||
| chunksize = 100 | chunksize = 100 | ||||
| if self._verbose >= 2: | |||||
| iterator = tqdm(pool.imap_unordered(get_sp_graphs_fun, itr, chunksize), | |||||
| desc='getting sp graphs', file=sys.stdout) | |||||
| else: | |||||
| iterator = pool.imap_unordered(get_sp_graphs_fun, itr, chunksize) | |||||
| iterator = get_iters(pool.imap_unordered(get_sp_graphs_fun, itr, chunksize), | |||||
| desc='getting sp graphs', file=sys.stdout, | |||||
| length=len(self._graphs), verbose=(self._verbose >= 2)) | |||||
| for i, g in iterator: | for i, g in iterator: | ||||
| self._graphs[i] = g | self._graphs[i] = g | ||||
| pool.close() | pool.close() | ||||
| @@ -98,18 +92,12 @@ class ShortestPath(GraphKernel): | |||||
| self._all_graphs_have_edges([g1] + g_list) | self._all_graphs_have_edges([g1] + g_list) | ||||
| # get shortest path graphs of g1 and each graph in g_list. | # get shortest path graphs of g1 and each graph in g_list. | ||||
| g1 = getSPGraph(g1, edge_weight=self._edge_weight) | g1 = getSPGraph(g1, edge_weight=self._edge_weight) | ||||
| if self._verbose >= 2: | |||||
| iterator = tqdm(g_list, desc='getting sp graphs', file=sys.stdout) | |||||
| else: | |||||
| iterator = g_list | |||||
| iterator = get_iters(g_list, desc='getting sp graphs', file=sys.stdout, verbose=(self._verbose >= 2)) | |||||
| g_list = [getSPGraph(g, edge_weight=self._edge_weight) for g in iterator] | g_list = [getSPGraph(g, edge_weight=self._edge_weight) for g in iterator] | ||||
| # compute kernel list. | # compute kernel list. | ||||
| kernel_list = [None] * len(g_list) | kernel_list = [None] * len(g_list) | ||||
| if self._verbose >= 2: | |||||
| iterator = tqdm(range(len(g_list)), desc='Computing kernels', file=sys.stdout) | |||||
| else: | |||||
| iterator = range(len(g_list)) | |||||
| iterator = get_iters(range(len(g_list)), desc='Computing kernels', file=sys.stdout, length=len(g_list), verbose=(self._verbose >= 2)) | |||||
| for i in iterator: | for i in iterator: | ||||
| kernel = self._sp_do(g1, g_list[i]) | kernel = self._sp_do(g1, g_list[i]) | ||||
| kernel_list[i] = kernel | kernel_list[i] = kernel | ||||
| @@ -128,11 +116,9 @@ class ShortestPath(GraphKernel): | |||||
| chunksize = int(len(g_list) / self._n_jobs) + 1 | chunksize = int(len(g_list) / self._n_jobs) + 1 | ||||
| else: | else: | ||||
| chunksize = 100 | chunksize = 100 | ||||
| if self._verbose >= 2: | |||||
| iterator = tqdm(pool.imap_unordered(get_sp_graphs_fun, itr, chunksize), | |||||
| desc='getting sp graphs', file=sys.stdout) | |||||
| else: | |||||
| iterator = pool.imap_unordered(get_sp_graphs_fun, itr, chunksize) | |||||
| iterator = get_iters(pool.imap_unordered(get_sp_graphs_fun, itr, chunksize), | |||||
| desc='getting sp graphs', file=sys.stdout, | |||||
| length=len(g_list), verbose=(self._verbose >= 2)) | |||||
| for i, g in iterator: | for i, g in iterator: | ||||
| g_list[i] = g | g_list[i] = g | ||||
| pool.close() | pool.close() | ||||
| @@ -5,13 +5,13 @@ Created on Thu Aug 20 16:12:45 2020 | |||||
| @author: ljia | @author: ljia | ||||
| @references: | |||||
| @references: | |||||
| [1] S Vichy N Vishwanathan, Nicol N Schraudolph, Risi Kondor, and Karsten M Borgwardt. Graph kernels. Journal of Machine Learning Research, 11(Apr):1201–1242, 2010. | [1] S Vichy N Vishwanathan, Nicol N Schraudolph, Risi Kondor, and Karsten M Borgwardt. Graph kernels. Journal of Machine Learning Research, 11(Apr):1201–1242, 2010. | ||||
| """ | """ | ||||
| import sys | import sys | ||||
| from tqdm import tqdm | |||||
| from gklearn.utils import get_iters | |||||
| import numpy as np | import numpy as np | ||||
| import networkx as nx | import networkx as nx | ||||
| from scipy.sparse import kron | from scipy.sparse import kron | ||||
| @@ -20,12 +20,12 @@ from gklearn.kernels import RandomWalkMeta | |||||
| class SpectralDecomposition(RandomWalkMeta): | class SpectralDecomposition(RandomWalkMeta): | ||||
| def __init__(self, **kwargs): | def __init__(self, **kwargs): | ||||
| super().__init__(**kwargs) | super().__init__(**kwargs) | ||||
| self._sub_kernel = kwargs.get('sub_kernel', None) | self._sub_kernel = kwargs.get('sub_kernel', None) | ||||
| def _compute_gm_series(self): | def _compute_gm_series(self): | ||||
| self._check_edge_weight(self._graphs, self._verbose) | self._check_edge_weight(self._graphs, self._verbose) | ||||
| @@ -33,18 +33,15 @@ class SpectralDecomposition(RandomWalkMeta): | |||||
| if self._verbose >= 2: | if self._verbose >= 2: | ||||
| import warnings | import warnings | ||||
| warnings.warn('All labels are ignored. Only works for undirected graphs.') | warnings.warn('All labels are ignored. Only works for undirected graphs.') | ||||
| # compute Gram matrix. | # compute Gram matrix. | ||||
| gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) | |||||
| gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) | |||||
| if self._q is None: | if self._q is None: | ||||
| # precompute the spectral decomposition of each graph. | # precompute the spectral decomposition of each graph. | ||||
| P_list = [] | P_list = [] | ||||
| D_list = [] | D_list = [] | ||||
| if self._verbose >= 2: | |||||
| iterator = tqdm(self._graphs, desc='spectral decompose', file=sys.stdout) | |||||
| else: | |||||
| iterator = self._graphs | |||||
| iterator = get_iters(self._graphs, desc='spectral decompose', file=sys.stdout, verbose=(self._verbose >= 2)) | |||||
| for G in iterator: | for G in iterator: | ||||
| # don't normalize adjacency matrices if q is a uniform vector. Note | # don't normalize adjacency matrices if q is a uniform vector. Note | ||||
| # A actually is the transpose of the adjacency matrix. | # A actually is the transpose of the adjacency matrix. | ||||
| @@ -60,42 +57,37 @@ class SpectralDecomposition(RandomWalkMeta): | |||||
| from itertools import combinations_with_replacement | from itertools import combinations_with_replacement | ||||
| itr = combinations_with_replacement(range(0, len(self._graphs)), 2) | itr = combinations_with_replacement(range(0, len(self._graphs)), 2) | ||||
| if self._verbose >= 2: | |||||
| iterator = tqdm(itr, desc='Computing kernels', file=sys.stdout) | |||||
| else: | |||||
| iterator = itr | |||||
| len_itr = int(len(self._graphs) * (len(self._graphs) + 1) / 2) | |||||
| iterator = get_iters(itr, desc='Computing kernels', file=sys.stdout, length=len_itr, verbose=(self._verbose >= 2)) | |||||
| for i, j in iterator: | for i, j in iterator: | ||||
| kernel = self._kernel_do(q_T_list[i], q_T_list[j], P_list[i], P_list[j], D_list[i], D_list[j], self._weight, self._sub_kernel) | kernel = self._kernel_do(q_T_list[i], q_T_list[j], P_list[i], P_list[j], D_list[i], D_list[j], self._weight, self._sub_kernel) | ||||
| gram_matrix[i][j] = kernel | gram_matrix[i][j] = kernel | ||||
| gram_matrix[j][i] = kernel | gram_matrix[j][i] = kernel | ||||
| else: # @todo | else: # @todo | ||||
| pass | pass | ||||
| else: # @todo | else: # @todo | ||||
| pass | pass | ||||
| return gram_matrix | return gram_matrix | ||||
| def _compute_gm_imap_unordered(self): | def _compute_gm_imap_unordered(self): | ||||
| self._check_edge_weight(self._graphs, self._verbose) | self._check_edge_weight(self._graphs, self._verbose) | ||||
| self._check_graphs(self._graphs) | self._check_graphs(self._graphs) | ||||
| if self._verbose >= 2: | if self._verbose >= 2: | ||||
| import warnings | import warnings | ||||
| warnings.warn('All labels are ignored. Only works for undirected graphs.') | warnings.warn('All labels are ignored. Only works for undirected graphs.') | ||||
| # compute Gram matrix. | # compute Gram matrix. | ||||
| gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) | |||||
| gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) | |||||
| if self._q is None: | if self._q is None: | ||||
| # precompute the spectral decomposition of each graph. | # precompute the spectral decomposition of each graph. | ||||
| P_list = [] | P_list = [] | ||||
| D_list = [] | D_list = [] | ||||
| if self._verbose >= 2: | |||||
| iterator = tqdm(self._graphs, desc='spectral decompose', file=sys.stdout) | |||||
| else: | |||||
| iterator = self._graphs | |||||
| iterator = get_iters(self._graphs, desc='spectral decompose', file=sys.stdout, verbose=(self._verbose >= 2)) | |||||
| for G in iterator: | for G in iterator: | ||||
| # don't normalize adjacency matrices if q is a uniform vector. Note | # don't normalize adjacency matrices if q is a uniform vector. Note | ||||
| # A actually is the transpose of the adjacency matrix. | # A actually is the transpose of the adjacency matrix. | ||||
| @@ -106,45 +98,42 @@ class SpectralDecomposition(RandomWalkMeta): | |||||
| if self._p is None: # p is uniform distribution as default. | if self._p is None: # p is uniform distribution as default. | ||||
| q_T_list = [np.full((1, nx.number_of_nodes(G)), 1 / nx.number_of_nodes(G)) for G in self._graphs] # @todo: parallel? | q_T_list = [np.full((1, nx.number_of_nodes(G)), 1 / nx.number_of_nodes(G)) for G in self._graphs] # @todo: parallel? | ||||
| def init_worker(q_T_list_toshare, P_list_toshare, D_list_toshare): | def init_worker(q_T_list_toshare, P_list_toshare, D_list_toshare): | ||||
| global G_q_T_list, G_P_list, G_D_list | global G_q_T_list, G_P_list, G_D_list | ||||
| G_q_T_list = q_T_list_toshare | G_q_T_list = q_T_list_toshare | ||||
| G_P_list = P_list_toshare | G_P_list = P_list_toshare | ||||
| G_D_list = D_list_toshare | G_D_list = D_list_toshare | ||||
| do_fun = self._wrapper_kernel_do | |||||
| parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker, | |||||
| do_fun = self._wrapper_kernel_do | |||||
| parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker, | |||||
| glbv=(q_T_list, P_list, D_list), n_jobs=self._n_jobs, verbose=self._verbose) | glbv=(q_T_list, P_list, D_list), n_jobs=self._n_jobs, verbose=self._verbose) | ||||
| else: # @todo | else: # @todo | ||||
| pass | pass | ||||
| else: # @todo | else: # @todo | ||||
| pass | pass | ||||
| return gram_matrix | return gram_matrix | ||||
| def _compute_kernel_list_series(self, g1, g_list): | def _compute_kernel_list_series(self, g1, g_list): | ||||
| self._check_edge_weight(g_list + [g1], self._verbose) | self._check_edge_weight(g_list + [g1], self._verbose) | ||||
| self._check_graphs(g_list + [g1]) | self._check_graphs(g_list + [g1]) | ||||
| if self._verbose >= 2: | if self._verbose >= 2: | ||||
| import warnings | import warnings | ||||
| warnings.warn('All labels are ignored. Only works for undirected graphs.') | warnings.warn('All labels are ignored. Only works for undirected graphs.') | ||||
| # compute kernel list. | # compute kernel list. | ||||
| kernel_list = [None] * len(g_list) | kernel_list = [None] * len(g_list) | ||||
| if self._q is None: | if self._q is None: | ||||
| # precompute the spectral decomposition of each graph. | # precompute the spectral decomposition of each graph. | ||||
| A1 = nx.adjacency_matrix(g1, self._edge_weight).todense().transpose() | A1 = nx.adjacency_matrix(g1, self._edge_weight).todense().transpose() | ||||
| D1, P1 = np.linalg.eig(A1) | D1, P1 = np.linalg.eig(A1) | ||||
| P_list = [] | P_list = [] | ||||
| D_list = [] | D_list = [] | ||||
| if self._verbose >= 2: | |||||
| iterator = tqdm(g_list, desc='spectral decompose', file=sys.stdout) | |||||
| else: | |||||
| iterator = g_list | |||||
| iterator = get_iters(g_list, desc='spectral decompose', file=sys.stdout, verbose=(self._verbose >= 2)) | |||||
| for G in iterator: | for G in iterator: | ||||
| # don't normalize adjacency matrices if q is a uniform vector. Note | # don't normalize adjacency matrices if q is a uniform vector. Note | ||||
| # A actually is the transpose of the adjacency matrix. | # A actually is the transpose of the adjacency matrix. | ||||
| @@ -156,33 +145,30 @@ class SpectralDecomposition(RandomWalkMeta): | |||||
| if self._p is None: # p is uniform distribution as default. | if self._p is None: # p is uniform distribution as default. | ||||
| q_T1 = 1 / nx.number_of_nodes(g1) | q_T1 = 1 / nx.number_of_nodes(g1) | ||||
| q_T_list = [np.full((1, nx.number_of_nodes(G)), 1 / nx.number_of_nodes(G)) for G in g_list] | q_T_list = [np.full((1, nx.number_of_nodes(G)), 1 / nx.number_of_nodes(G)) for G in g_list] | ||||
| if self._verbose >= 2: | |||||
| iterator = tqdm(range(len(g_list)), desc='Computing kernels', file=sys.stdout) | |||||
| else: | |||||
| iterator = range(len(g_list)) | |||||
| iterator = get_iters(range(len(g_list)), desc='Computing kernels', file=sys.stdout, length=len(g_list), verbose=(self._verbose >= 2)) | |||||
| for i in iterator: | for i in iterator: | ||||
| kernel = self._kernel_do(q_T1, q_T_list[i], P1, P_list[i], D1, D_list[i], self._weight, self._sub_kernel) | kernel = self._kernel_do(q_T1, q_T_list[i], P1, P_list[i], D1, D_list[i], self._weight, self._sub_kernel) | ||||
| kernel_list[i] = kernel | kernel_list[i] = kernel | ||||
| else: # @todo | else: # @todo | ||||
| pass | pass | ||||
| else: # @todo | else: # @todo | ||||
| pass | pass | ||||
| return kernel_list | return kernel_list | ||||
| def _compute_kernel_list_imap_unordered(self, g1, g_list): | def _compute_kernel_list_imap_unordered(self, g1, g_list): | ||||
| self._check_edge_weight(g_list + [g1], self._verbose) | self._check_edge_weight(g_list + [g1], self._verbose) | ||||
| self._check_graphs(g_list + [g1]) | self._check_graphs(g_list + [g1]) | ||||
| if self._verbose >= 2: | if self._verbose >= 2: | ||||
| import warnings | import warnings | ||||
| warnings.warn('All labels are ignored. Only works for undirected graphs.') | warnings.warn('All labels are ignored. Only works for undirected graphs.') | ||||
| # compute kernel list. | # compute kernel list. | ||||
| kernel_list = [None] * len(g_list) | kernel_list = [None] * len(g_list) | ||||
| if self._q is None: | if self._q is None: | ||||
| # precompute the spectral decomposition of each graph. | # precompute the spectral decomposition of each graph. | ||||
| A1 = nx.adjacency_matrix(g1, self._edge_weight).todense().transpose() | A1 = nx.adjacency_matrix(g1, self._edge_weight).todense().transpose() | ||||
| @@ -204,7 +190,7 @@ class SpectralDecomposition(RandomWalkMeta): | |||||
| if self._p is None: # p is uniform distribution as default. | if self._p is None: # p is uniform distribution as default. | ||||
| q_T1 = 1 / nx.number_of_nodes(g1) | q_T1 = 1 / nx.number_of_nodes(g1) | ||||
| q_T_list = [np.full((1, nx.number_of_nodes(G)), 1 / nx.number_of_nodes(G)) for G in g_list] # @todo: parallel? | q_T_list = [np.full((1, nx.number_of_nodes(G)), 1 / nx.number_of_nodes(G)) for G in g_list] # @todo: parallel? | ||||
| def init_worker(q_T1_toshare, P1_toshare, D1_toshare, q_T_list_toshare, P_list_toshare, D_list_toshare): | def init_worker(q_T1_toshare, P1_toshare, D1_toshare, q_T_list_toshare, P_list_toshare, D_list_toshare): | ||||
| global G_q_T1, G_P1, G_D1, G_q_T_list, G_P_list, G_D_list | global G_q_T1, G_P1, G_D1, G_q_T_list, G_P_list, G_D_list | ||||
| G_q_T1 = q_T1_toshare | G_q_T1 = q_T1_toshare | ||||
| @@ -214,34 +200,34 @@ class SpectralDecomposition(RandomWalkMeta): | |||||
| G_P_list = P_list_toshare | G_P_list = P_list_toshare | ||||
| G_D_list = D_list_toshare | G_D_list = D_list_toshare | ||||
| do_fun = self._wrapper_kernel_list_do | |||||
| def func_assign(result, var_to_assign): | |||||
| do_fun = self._wrapper_kernel_list_do | |||||
| def func_assign(result, var_to_assign): | |||||
| var_to_assign[result[0]] = result[1] | var_to_assign[result[0]] = result[1] | ||||
| itr = range(len(g_list)) | itr = range(len(g_list)) | ||||
| len_itr = len(g_list) | len_itr = len(g_list) | ||||
| parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, | parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, | ||||
| init_worker=init_worker, glbv=(q_T1, P1, D1, q_T_list, P_list, D_list), method='imap_unordered', n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose) | init_worker=init_worker, glbv=(q_T1, P1, D1, q_T_list, P_list, D_list), method='imap_unordered', n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose) | ||||
| else: # @todo | else: # @todo | ||||
| pass | pass | ||||
| else: # @todo | else: # @todo | ||||
| pass | pass | ||||
| return kernel_list | return kernel_list | ||||
| def _wrapper_kernel_list_do(self, itr): | def _wrapper_kernel_list_do(self, itr): | ||||
| return itr, self._kernel_do(G_q_T1, G_q_T_list[itr], G_P1, G_P_list[itr], G_D1, G_D_list[itr], self._weight, self._sub_kernel) | return itr, self._kernel_do(G_q_T1, G_q_T_list[itr], G_P1, G_P_list[itr], G_D1, G_D_list[itr], self._weight, self._sub_kernel) | ||||
| def _compute_single_kernel_series(self, g1, g2): | def _compute_single_kernel_series(self, g1, g2): | ||||
| self._check_edge_weight([g1] + [g2], self._verbose) | self._check_edge_weight([g1] + [g2], self._verbose) | ||||
| self._check_graphs([g1] + [g2]) | self._check_graphs([g1] + [g2]) | ||||
| if self._verbose >= 2: | if self._verbose >= 2: | ||||
| import warnings | import warnings | ||||
| warnings.warn('All labels are ignored. Only works for undirected graphs.') | warnings.warn('All labels are ignored. Only works for undirected graphs.') | ||||
| if self._q is None: | if self._q is None: | ||||
| # precompute the spectral decomposition of each graph. | # precompute the spectral decomposition of each graph. | ||||
| A1 = nx.adjacency_matrix(g1, self._edge_weight).todense().transpose() | A1 = nx.adjacency_matrix(g1, self._edge_weight).todense().transpose() | ||||
| @@ -257,10 +243,10 @@ class SpectralDecomposition(RandomWalkMeta): | |||||
| pass | pass | ||||
| else: # @todo | else: # @todo | ||||
| pass | pass | ||||
| return kernel | |||||
| return kernel | |||||
| def _kernel_do(self, q_T1, q_T2, P1, P2, D1, D2, weight, sub_kernel): | def _kernel_do(self, q_T1, q_T2, P1, P2, D1, D2, weight, sub_kernel): | ||||
| # use uniform distribution if there is no prior knowledge. | # use uniform distribution if there is no prior knowledge. | ||||
| kl = kron(np.dot(q_T1, P1), np.dot(q_T2, P2)).todense() | kl = kron(np.dot(q_T1, P1), np.dot(q_T2, P2)).todense() | ||||
| @@ -276,7 +262,7 @@ class SpectralDecomposition(RandomWalkMeta): | |||||
| kmiddle = np.linalg.inv(kmiddle) | kmiddle = np.linalg.inv(kmiddle) | ||||
| return np.dot(np.dot(kl, kmiddle), kl.T)[0, 0] | return np.dot(np.dot(kl, kmiddle), kl.T)[0, 0] | ||||
| def _wrapper_kernel_do(self, itr): | def _wrapper_kernel_do(self, itr): | ||||
| i = itr[0] | i = itr[0] | ||||
| j = itr[1] | j = itr[1] | ||||
| @@ -5,13 +5,13 @@ Created on Wed Aug 19 17:24:46 2020 | |||||
| @author: ljia | @author: ljia | ||||
| @references: | |||||
| @references: | |||||
| [1] S Vichy N Vishwanathan, Nicol N Schraudolph, Risi Kondor, and Karsten M Borgwardt. Graph kernels. Journal of Machine Learning Research, 11(Apr):1201–1242, 2010. | [1] S Vichy N Vishwanathan, Nicol N Schraudolph, Risi Kondor, and Karsten M Borgwardt. Graph kernels. Journal of Machine Learning Research, 11(Apr):1201–1242, 2010. | ||||
| """ | """ | ||||
| import sys | import sys | ||||
| from tqdm import tqdm | |||||
| from gklearn.utils import get_iters | |||||
| import numpy as np | import numpy as np | ||||
| import networkx as nx | import networkx as nx | ||||
| from control import dlyap | from control import dlyap | ||||
| @@ -20,11 +20,11 @@ from gklearn.kernels import RandomWalkMeta | |||||
| class SylvesterEquation(RandomWalkMeta): | class SylvesterEquation(RandomWalkMeta): | ||||
| def __init__(self, **kwargs): | def __init__(self, **kwargs): | ||||
| super().__init__(**kwargs) | super().__init__(**kwargs) | ||||
| def _compute_gm_series(self): | def _compute_gm_series(self): | ||||
| self._check_edge_weight(self._graphs, self._verbose) | self._check_edge_weight(self._graphs, self._verbose) | ||||
| @@ -32,24 +32,21 @@ class SylvesterEquation(RandomWalkMeta): | |||||
| if self._verbose >= 2: | if self._verbose >= 2: | ||||
| import warnings | import warnings | ||||
| warnings.warn('All labels are ignored.') | warnings.warn('All labels are ignored.') | ||||
| lmda = self._weight | lmda = self._weight | ||||
| # compute Gram matrix. | # compute Gram matrix. | ||||
| gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) | |||||
| gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) | |||||
| if self._q is None: | if self._q is None: | ||||
| # don't normalize adjacency matrices if q is a uniform vector. Note | # don't normalize adjacency matrices if q is a uniform vector. Note | ||||
| # A_wave_list actually contains the transposes of the adjacency matrices. | # A_wave_list actually contains the transposes of the adjacency matrices. | ||||
| if self._verbose >= 2: | |||||
| iterator = tqdm(self._graphs, desc='compute adjacency matrices', file=sys.stdout) | |||||
| else: | |||||
| iterator = self._graphs | |||||
| iterator = get_iters(self._graphs, desc='compute adjacency matrices', file=sys.stdout, verbose=(self._verbose >= 2)) | |||||
| A_wave_list = [nx.adjacency_matrix(G, self._edge_weight).todense().transpose() for G in iterator] | A_wave_list = [nx.adjacency_matrix(G, self._edge_weight).todense().transpose() for G in iterator] | ||||
| # # normalized adjacency matrices | # # normalized adjacency matrices | ||||
| # A_wave_list = [] | # A_wave_list = [] | ||||
| # for G in tqdm(Gn, desc='compute adjacency matrices', file=sys.stdout): | # for G in tqdm(Gn, desc='compute adjacency matrices', file=sys.stdout): | ||||
| # A_tilde = nx.adjacency_matrix(G, eweight).todense().transpose() | |||||
| # A_tilde = nx.adjacency_matrix(G, eweight).todense().transpose() | |||||
| # norm = A_tilde.sum(axis=0) | # norm = A_tilde.sum(axis=0) | ||||
| # norm[norm == 0] = 1 | # norm[norm == 0] = 1 | ||||
| # A_wave_list.append(A_tilde / norm) | # A_wave_list.append(A_tilde / norm) | ||||
| @@ -57,119 +54,105 @@ class SylvesterEquation(RandomWalkMeta): | |||||
| if self._p is None: # p is uniform distribution as default. | if self._p is None: # p is uniform distribution as default. | ||||
| from itertools import combinations_with_replacement | from itertools import combinations_with_replacement | ||||
| itr = combinations_with_replacement(range(0, len(self._graphs)), 2) | itr = combinations_with_replacement(range(0, len(self._graphs)), 2) | ||||
| if self._verbose >= 2: | |||||
| iterator = tqdm(itr, desc='Computing kernels', file=sys.stdout) | |||||
| else: | |||||
| iterator = itr | |||||
| len_itr = int(len(self._graphs) * (len(self._graphs) + 1) / 2) | |||||
| iterator = get_iters(itr, desc='Computing kernels', file=sys.stdout, length=len_itr, verbose=(self._verbose >= 2)) | |||||
| for i, j in iterator: | for i, j in iterator: | ||||
| kernel = self._kernel_do(A_wave_list[i], A_wave_list[j], lmda) | kernel = self._kernel_do(A_wave_list[i], A_wave_list[j], lmda) | ||||
| gram_matrix[i][j] = kernel | gram_matrix[i][j] = kernel | ||||
| gram_matrix[j][i] = kernel | gram_matrix[j][i] = kernel | ||||
| else: # @todo | else: # @todo | ||||
| pass | pass | ||||
| else: # @todo | else: # @todo | ||||
| pass | pass | ||||
| return gram_matrix | return gram_matrix | ||||
| def _compute_gm_imap_unordered(self): | def _compute_gm_imap_unordered(self): | ||||
| self._check_edge_weight(self._graphs, self._verbose) | self._check_edge_weight(self._graphs, self._verbose) | ||||
| self._check_graphs(self._graphs) | self._check_graphs(self._graphs) | ||||
| if self._verbose >= 2: | if self._verbose >= 2: | ||||
| import warnings | import warnings | ||||
| warnings.warn('All labels are ignored.') | warnings.warn('All labels are ignored.') | ||||
| # compute Gram matrix. | # compute Gram matrix. | ||||
| gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) | |||||
| gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) | |||||
| if self._q is None: | if self._q is None: | ||||
| # don't normalize adjacency matrices if q is a uniform vector. Note | # don't normalize adjacency matrices if q is a uniform vector. Note | ||||
| # A_wave_list actually contains the transposes of the adjacency matrices. | # A_wave_list actually contains the transposes of the adjacency matrices. | ||||
| if self._verbose >= 2: | |||||
| iterator = tqdm(self._graphs, desc='compute adjacency matrices', file=sys.stdout) | |||||
| else: | |||||
| iterator = self._graphs | |||||
| iterator = get_iters(self._graphs, desc='compute adjacency matrices', file=sys.stdout, verbose=(self._verbose >= 2)) | |||||
| A_wave_list = [nx.adjacency_matrix(G, self._edge_weight).todense().transpose() for G in iterator] # @todo: parallel? | A_wave_list = [nx.adjacency_matrix(G, self._edge_weight).todense().transpose() for G in iterator] # @todo: parallel? | ||||
| if self._p is None: # p is uniform distribution as default. | if self._p is None: # p is uniform distribution as default. | ||||
| def init_worker(A_wave_list_toshare): | def init_worker(A_wave_list_toshare): | ||||
| global G_A_wave_list | global G_A_wave_list | ||||
| G_A_wave_list = A_wave_list_toshare | G_A_wave_list = A_wave_list_toshare | ||||
| do_fun = self._wrapper_kernel_do | do_fun = self._wrapper_kernel_do | ||||
| parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker, | |||||
| parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker, | |||||
| glbv=(A_wave_list,), n_jobs=self._n_jobs, verbose=self._verbose) | glbv=(A_wave_list,), n_jobs=self._n_jobs, verbose=self._verbose) | ||||
| else: # @todo | else: # @todo | ||||
| pass | pass | ||||
| else: # @todo | else: # @todo | ||||
| pass | pass | ||||
| return gram_matrix | return gram_matrix | ||||
| def _compute_kernel_list_series(self, g1, g_list): | def _compute_kernel_list_series(self, g1, g_list): | ||||
| self._check_edge_weight(g_list + [g1], self._verbose) | self._check_edge_weight(g_list + [g1], self._verbose) | ||||
| self._check_graphs(g_list + [g1]) | self._check_graphs(g_list + [g1]) | ||||
| if self._verbose >= 2: | if self._verbose >= 2: | ||||
| import warnings | import warnings | ||||
| warnings.warn('All labels are ignored.') | warnings.warn('All labels are ignored.') | ||||
| lmda = self._weight | lmda = self._weight | ||||
| # compute kernel list. | # compute kernel list. | ||||
| kernel_list = [None] * len(g_list) | kernel_list = [None] * len(g_list) | ||||
| if self._q is None: | if self._q is None: | ||||
| # don't normalize adjacency matrices if q is a uniform vector. Note | # don't normalize adjacency matrices if q is a uniform vector. Note | ||||
| # A_wave_list actually contains the transposes of the adjacency matrices. | # A_wave_list actually contains the transposes of the adjacency matrices. | ||||
| A_wave_1 = nx.adjacency_matrix(g1, self._edge_weight).todense().transpose() | A_wave_1 = nx.adjacency_matrix(g1, self._edge_weight).todense().transpose() | ||||
| if self._verbose >= 2: | |||||
| iterator = tqdm(g_list, desc='compute adjacency matrices', file=sys.stdout) | |||||
| else: | |||||
| iterator = g_list | |||||
| iterator = get_iters(g_list, desc='compute adjacency matrices', file=sys.stdout, verbose=(self._verbose >= 2)) | |||||
| A_wave_list = [nx.adjacency_matrix(G, self._edge_weight).todense().transpose() for G in iterator] | A_wave_list = [nx.adjacency_matrix(G, self._edge_weight).todense().transpose() for G in iterator] | ||||
| if self._p is None: # p is uniform distribution as default. | if self._p is None: # p is uniform distribution as default. | ||||
| if self._verbose >= 2: | |||||
| iterator = tqdm(range(len(g_list)), desc='Computing kernels', file=sys.stdout) | |||||
| else: | |||||
| iterator = range(len(g_list)) | |||||
| iterator = get_iters(range(len(g_list)), desc='Computing kernels', file=sys.stdout, length=len(g_list), verbose=(self._verbose >= 2)) | |||||
| for i in iterator: | for i in iterator: | ||||
| kernel = self._kernel_do(A_wave_1, A_wave_list[i], lmda) | kernel = self._kernel_do(A_wave_1, A_wave_list[i], lmda) | ||||
| kernel_list[i] = kernel | kernel_list[i] = kernel | ||||
| else: # @todo | else: # @todo | ||||
| pass | pass | ||||
| else: # @todo | else: # @todo | ||||
| pass | pass | ||||
| return kernel_list | return kernel_list | ||||
| def _compute_kernel_list_imap_unordered(self, g1, g_list): | def _compute_kernel_list_imap_unordered(self, g1, g_list): | ||||
| self._check_edge_weight(g_list + [g1], self._verbose) | self._check_edge_weight(g_list + [g1], self._verbose) | ||||
| self._check_graphs(g_list + [g1]) | self._check_graphs(g_list + [g1]) | ||||
| if self._verbose >= 2: | if self._verbose >= 2: | ||||
| import warnings | import warnings | ||||
| warnings.warn('All labels are ignored.') | warnings.warn('All labels are ignored.') | ||||
| # compute kernel list. | # compute kernel list. | ||||
| kernel_list = [None] * len(g_list) | kernel_list = [None] * len(g_list) | ||||
| if self._q is None: | if self._q is None: | ||||
| # don't normalize adjacency matrices if q is a uniform vector. Note | # don't normalize adjacency matrices if q is a uniform vector. Note | ||||
| # A_wave_list actually contains the transposes of the adjacency matrices. | # A_wave_list actually contains the transposes of the adjacency matrices. | ||||
| A_wave_1 = nx.adjacency_matrix(g1, self._edge_weight).todense().transpose() | A_wave_1 = nx.adjacency_matrix(g1, self._edge_weight).todense().transpose() | ||||
| if self._verbose >= 2: | |||||
| iterator = tqdm(g_list, desc='compute adjacency matrices', file=sys.stdout) | |||||
| else: | |||||
| iterator = g_list | |||||
| iterator = get_iters(g_list, desc='compute adjacency matrices', file=sys.stdout, verbose=(self._verbose >= 2)) | |||||
| A_wave_list = [nx.adjacency_matrix(G, self._edge_weight).todense().transpose() for G in iterator] # @todo: parallel? | A_wave_list = [nx.adjacency_matrix(G, self._edge_weight).todense().transpose() for G in iterator] # @todo: parallel? | ||||
| if self._p is None: # p is uniform distribution as default. | if self._p is None: # p is uniform distribution as default. | ||||
| @@ -178,37 +161,37 @@ class SylvesterEquation(RandomWalkMeta): | |||||
| G_A_wave_1 = A_wave_1_toshare | G_A_wave_1 = A_wave_1_toshare | ||||
| G_A_wave_list = A_wave_list_toshare | G_A_wave_list = A_wave_list_toshare | ||||
| do_fun = self._wrapper_kernel_list_do | |||||
| def func_assign(result, var_to_assign): | |||||
| do_fun = self._wrapper_kernel_list_do | |||||
| def func_assign(result, var_to_assign): | |||||
| var_to_assign[result[0]] = result[1] | var_to_assign[result[0]] = result[1] | ||||
| itr = range(len(g_list)) | itr = range(len(g_list)) | ||||
| len_itr = len(g_list) | len_itr = len(g_list) | ||||
| parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, | parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, | ||||
| init_worker=init_worker, glbv=(A_wave_1, A_wave_list), method='imap_unordered', | |||||
| init_worker=init_worker, glbv=(A_wave_1, A_wave_list), method='imap_unordered', | |||||
| n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose) | n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose) | ||||
| else: # @todo | else: # @todo | ||||
| pass | pass | ||||
| else: # @todo | else: # @todo | ||||
| pass | pass | ||||
| return kernel_list | return kernel_list | ||||
| def _wrapper_kernel_list_do(self, itr): | def _wrapper_kernel_list_do(self, itr): | ||||
| return itr, self._kernel_do(G_A_wave_1, G_A_wave_list[itr], self._weight) | return itr, self._kernel_do(G_A_wave_1, G_A_wave_list[itr], self._weight) | ||||
| def _compute_single_kernel_series(self, g1, g2): | def _compute_single_kernel_series(self, g1, g2): | ||||
| self._check_edge_weight([g1] + [g2], self._verbose) | self._check_edge_weight([g1] + [g2], self._verbose) | ||||
| self._check_graphs([g1] + [g2]) | self._check_graphs([g1] + [g2]) | ||||
| if self._verbose >= 2: | if self._verbose >= 2: | ||||
| import warnings | import warnings | ||||
| warnings.warn('All labels are ignored.') | warnings.warn('All labels are ignored.') | ||||
| lmda = self._weight | lmda = self._weight | ||||
| if self._q is None: | if self._q is None: | ||||
| # don't normalize adjacency matrices if q is a uniform vector. Note | # don't normalize adjacency matrices if q is a uniform vector. Note | ||||
| # A_wave_list actually contains the transposes of the adjacency matrices. | # A_wave_list actually contains the transposes of the adjacency matrices. | ||||
| @@ -220,12 +203,12 @@ class SylvesterEquation(RandomWalkMeta): | |||||
| pass | pass | ||||
| else: # @todo | else: # @todo | ||||
| pass | pass | ||||
| return kernel | |||||
| return kernel | |||||
| def _kernel_do(self, A_wave1, A_wave2, lmda): | def _kernel_do(self, A_wave1, A_wave2, lmda): | ||||
| S = lmda * A_wave2 | S = lmda * A_wave2 | ||||
| T_t = A_wave1 | T_t = A_wave1 | ||||
| # use uniform distribution if there is no prior knowledge. | # use uniform distribution if there is no prior knowledge. | ||||
| @@ -237,8 +220,8 @@ class SylvesterEquation(RandomWalkMeta): | |||||
| # use uniform distribution if there is no prior knowledge. | # use uniform distribution if there is no prior knowledge. | ||||
| q_times = np.full((1, nb_pd), p_times_uni) | q_times = np.full((1, nb_pd), p_times_uni) | ||||
| return np.dot(q_times, X) | return np.dot(q_times, X) | ||||
| def _wrapper_kernel_do(self, itr): | def _wrapper_kernel_do(self, itr): | ||||
| i = itr[0] | i = itr[0] | ||||
| j = itr[1] | j = itr[1] | ||||
| @@ -5,15 +5,15 @@ Created on Mon Apr 13 18:02:46 2020 | |||||
| @author: ljia | @author: ljia | ||||
| @references: | |||||
| @references: | |||||
| [1] Gaüzère B, Brun L, Villemin D. Two new graphs kernels in | |||||
| [1] Gaüzère B, Brun L, Villemin D. Two new graphs kernels in | |||||
| chemoinformatics. Pattern Recognition Letters. 2012 Nov 1;33(15):2038-47. | chemoinformatics. Pattern Recognition Letters. 2012 Nov 1;33(15):2038-47. | ||||
| """ | """ | ||||
| import sys | import sys | ||||
| from multiprocessing import Pool | from multiprocessing import Pool | ||||
| from tqdm import tqdm | |||||
| from gklearn.utils import get_iters | |||||
| import numpy as np | import numpy as np | ||||
| import networkx as nx | import networkx as nx | ||||
| from collections import Counter | from collections import Counter | ||||
| @@ -25,7 +25,7 @@ from gklearn.kernels import GraphKernel | |||||
| class Treelet(GraphKernel): | class Treelet(GraphKernel): | ||||
| def __init__(self, **kwargs): | def __init__(self, **kwargs): | ||||
| GraphKernel.__init__(self) | GraphKernel.__init__(self) | ||||
| self._node_labels = kwargs.get('node_labels', []) | self._node_labels = kwargs.get('node_labels', []) | ||||
| @@ -38,38 +38,35 @@ class Treelet(GraphKernel): | |||||
| def _compute_gm_series(self): | def _compute_gm_series(self): | ||||
| self._add_dummy_labels(self._graphs) | self._add_dummy_labels(self._graphs) | ||||
| # get all canonical keys of all graphs before computing kernels to save | |||||
| # get all canonical keys of all graphs before computing kernels to save | |||||
| # time, but this may cost a lot of memory for large dataset. | # time, but this may cost a lot of memory for large dataset. | ||||
| canonkeys = [] | canonkeys = [] | ||||
| if self._verbose >= 2: | |||||
| iterator = tqdm(self._graphs, desc='getting canonkeys', file=sys.stdout) | |||||
| else: | |||||
| iterator = self._graphs | |||||
| iterator = get_iters(self._graphs, desc='getting canonkeys', file=sys.stdout, | |||||
| verbose=(self._verbose >= 2)) | |||||
| for g in iterator: | for g in iterator: | ||||
| canonkeys.append(self._get_canonkeys(g)) | canonkeys.append(self._get_canonkeys(g)) | ||||
| # compute Gram matrix. | # compute Gram matrix. | ||||
| gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) | gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) | ||||
| from itertools import combinations_with_replacement | from itertools import combinations_with_replacement | ||||
| itr = combinations_with_replacement(range(0, len(self._graphs)), 2) | itr = combinations_with_replacement(range(0, len(self._graphs)), 2) | ||||
| if self._verbose >= 2: | |||||
| iterator = tqdm(itr, desc='Computing kernels', file=sys.stdout) | |||||
| else: | |||||
| iterator = itr | |||||
| len_itr = int(len(self._graphs) * (len(self._graphs) + 1) / 2) | |||||
| iterator = get_iters(itr, desc='Computing kernels', file=sys.stdout, | |||||
| length=len_itr, verbose=(self._verbose >= 2)) | |||||
| for i, j in iterator: | for i, j in iterator: | ||||
| kernel = self._kernel_do(canonkeys[i], canonkeys[j]) | kernel = self._kernel_do(canonkeys[i], canonkeys[j]) | ||||
| gram_matrix[i][j] = kernel | gram_matrix[i][j] = kernel | ||||
| gram_matrix[j][i] = kernel # @todo: no directed graph considered? | gram_matrix[j][i] = kernel # @todo: no directed graph considered? | ||||
| return gram_matrix | return gram_matrix | ||||
| def _compute_gm_imap_unordered(self): | def _compute_gm_imap_unordered(self): | ||||
| self._add_dummy_labels(self._graphs) | self._add_dummy_labels(self._graphs) | ||||
| # get all canonical keys of all graphs before computing kernels to save | |||||
| # get all canonical keys of all graphs before computing kernels to save | |||||
| # time, but this may cost a lot of memory for large dataset. | # time, but this may cost a lot of memory for large dataset. | ||||
| pool = Pool(self._n_jobs) | pool = Pool(self._n_jobs) | ||||
| itr = zip(self._graphs, range(0, len(self._graphs))) | itr = zip(self._graphs, range(0, len(self._graphs))) | ||||
| @@ -79,60 +76,52 @@ class Treelet(GraphKernel): | |||||
| chunksize = 100 | chunksize = 100 | ||||
| canonkeys = [[] for _ in range(len(self._graphs))] | canonkeys = [[] for _ in range(len(self._graphs))] | ||||
| get_fun = self._wrapper_get_canonkeys | get_fun = self._wrapper_get_canonkeys | ||||
| if self._verbose >= 2: | |||||
| iterator = tqdm(pool.imap_unordered(get_fun, itr, chunksize), | |||||
| desc='getting canonkeys', file=sys.stdout) | |||||
| else: | |||||
| iterator = pool.imap_unordered(get_fun, itr, chunksize) | |||||
| iterator = get_iters(pool.imap_unordered(get_fun, itr, chunksize), | |||||
| desc='getting canonkeys', file=sys.stdout, | |||||
| length=len(self._graphs), verbose=(self._verbose >= 2)) | |||||
| for i, ck in iterator: | for i, ck in iterator: | ||||
| canonkeys[i] = ck | canonkeys[i] = ck | ||||
| pool.close() | pool.close() | ||||
| pool.join() | pool.join() | ||||
| # compute Gram matrix. | # compute Gram matrix. | ||||
| gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) | gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) | ||||
| def init_worker(canonkeys_toshare): | def init_worker(canonkeys_toshare): | ||||
| global G_canonkeys | global G_canonkeys | ||||
| G_canonkeys = canonkeys_toshare | G_canonkeys = canonkeys_toshare | ||||
| do_fun = self._wrapper_kernel_do | do_fun = self._wrapper_kernel_do | ||||
| parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker, | |||||
| parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker, | |||||
| glbv=(canonkeys,), n_jobs=self._n_jobs, verbose=self._verbose) | glbv=(canonkeys,), n_jobs=self._n_jobs, verbose=self._verbose) | ||||
| return gram_matrix | return gram_matrix | ||||
| def _compute_kernel_list_series(self, g1, g_list): | def _compute_kernel_list_series(self, g1, g_list): | ||||
| self._add_dummy_labels(g_list + [g1]) | self._add_dummy_labels(g_list + [g1]) | ||||
| # get all canonical keys of all graphs before computing kernels to save | |||||
| # get all canonical keys of all graphs before computing kernels to save | |||||
| # time, but this may cost a lot of memory for large dataset. | # time, but this may cost a lot of memory for large dataset. | ||||
| canonkeys_1 = self._get_canonkeys(g1) | canonkeys_1 = self._get_canonkeys(g1) | ||||
| canonkeys_list = [] | canonkeys_list = [] | ||||
| if self._verbose >= 2: | |||||
| iterator = tqdm(g_list, desc='getting canonkeys', file=sys.stdout) | |||||
| else: | |||||
| iterator = g_list | |||||
| iterator = get_iters(g_list, desc='getting canonkeys', file=sys.stdout, verbose=(self._verbose >= 2)) | |||||
| for g in iterator: | for g in iterator: | ||||
| canonkeys_list.append(self._get_canonkeys(g)) | canonkeys_list.append(self._get_canonkeys(g)) | ||||
| # compute kernel list. | # compute kernel list. | ||||
| kernel_list = [None] * len(g_list) | kernel_list = [None] * len(g_list) | ||||
| if self._verbose >= 2: | |||||
| iterator = tqdm(range(len(g_list)), desc='Computing kernels', file=sys.stdout) | |||||
| else: | |||||
| iterator = range(len(g_list)) | |||||
| iterator = get_iters(range(len(g_list)), desc='Computing kernels', file=sys.stdout, length=len(g_list), verbose=(self._verbose >= 2)) | |||||
| for i in iterator: | for i in iterator: | ||||
| kernel = self._kernel_do(canonkeys_1, canonkeys_list[i]) | kernel = self._kernel_do(canonkeys_1, canonkeys_list[i]) | ||||
| kernel_list[i] = kernel | kernel_list[i] = kernel | ||||
| return kernel_list | return kernel_list | ||||
| def _compute_kernel_list_imap_unordered(self, g1, g_list): | def _compute_kernel_list_imap_unordered(self, g1, g_list): | ||||
| self._add_dummy_labels(g_list + [g1]) | self._add_dummy_labels(g_list + [g1]) | ||||
| # get all canonical keys of all graphs before computing kernels to save | |||||
| # get all canonical keys of all graphs before computing kernels to save | |||||
| # time, but this may cost a lot of memory for large dataset. | # time, but this may cost a lot of memory for large dataset. | ||||
| canonkeys_1 = self._get_canonkeys(g1) | canonkeys_1 = self._get_canonkeys(g1) | ||||
| canonkeys_list = [[] for _ in range(len(g_list))] | canonkeys_list = [[] for _ in range(len(g_list))] | ||||
| @@ -143,16 +132,14 @@ class Treelet(GraphKernel): | |||||
| else: | else: | ||||
| chunksize = 100 | chunksize = 100 | ||||
| get_fun = self._wrapper_get_canonkeys | get_fun = self._wrapper_get_canonkeys | ||||
| if self._verbose >= 2: | |||||
| iterator = tqdm(pool.imap_unordered(get_fun, itr, chunksize), | |||||
| desc='getting canonkeys', file=sys.stdout) | |||||
| else: | |||||
| iterator = pool.imap_unordered(get_fun, itr, chunksize) | |||||
| iterator = get_iters(pool.imap_unordered(get_fun, itr, chunksize), | |||||
| desc='getting canonkeys', file=sys.stdout, | |||||
| length=len(g_list), verbose=(self._verbose >= 2)) | |||||
| for i, ck in iterator: | for i, ck in iterator: | ||||
| canonkeys_list[i] = ck | canonkeys_list[i] = ck | ||||
| pool.close() | pool.close() | ||||
| pool.join() | pool.join() | ||||
| # compute kernel list. | # compute kernel list. | ||||
| kernel_list = [None] * len(g_list) | kernel_list = [None] * len(g_list) | ||||
| @@ -161,37 +148,37 @@ class Treelet(GraphKernel): | |||||
| G_ck_1 = ck_1_toshare | G_ck_1 = ck_1_toshare | ||||
| G_ck_list = ck_list_toshare | G_ck_list = ck_list_toshare | ||||
| do_fun = self._wrapper_kernel_list_do | do_fun = self._wrapper_kernel_list_do | ||||
| def func_assign(result, var_to_assign): | |||||
| def func_assign(result, var_to_assign): | |||||
| var_to_assign[result[0]] = result[1] | var_to_assign[result[0]] = result[1] | ||||
| itr = range(len(g_list)) | itr = range(len(g_list)) | ||||
| len_itr = len(g_list) | len_itr = len(g_list) | ||||
| parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, | parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, | ||||
| init_worker=init_worker, glbv=(canonkeys_1, canonkeys_list), method='imap_unordered', | |||||
| init_worker=init_worker, glbv=(canonkeys_1, canonkeys_list), method='imap_unordered', | |||||
| n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose) | n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose) | ||||
| return kernel_list | return kernel_list | ||||
| def _wrapper_kernel_list_do(self, itr): | def _wrapper_kernel_list_do(self, itr): | ||||
| return itr, self._kernel_do(G_ck_1, G_ck_list[itr]) | return itr, self._kernel_do(G_ck_1, G_ck_list[itr]) | ||||
| def _compute_single_kernel_series(self, g1, g2): | def _compute_single_kernel_series(self, g1, g2): | ||||
| self._add_dummy_labels([g1] + [g2]) | self._add_dummy_labels([g1] + [g2]) | ||||
| canonkeys_1 = self._get_canonkeys(g1) | canonkeys_1 = self._get_canonkeys(g1) | ||||
| canonkeys_2 = self._get_canonkeys(g2) | canonkeys_2 = self._get_canonkeys(g2) | ||||
| kernel = self._kernel_do(canonkeys_1, canonkeys_2) | kernel = self._kernel_do(canonkeys_1, canonkeys_2) | ||||
| return kernel | |||||
| return kernel | |||||
| def _kernel_do(self, canonkey1, canonkey2): | def _kernel_do(self, canonkey1, canonkey2): | ||||
| """Compute treelet graph kernel between 2 graphs. | """Compute treelet graph kernel between 2 graphs. | ||||
| Parameters | Parameters | ||||
| ---------- | ---------- | ||||
| canonkey1, canonkey2 : list | canonkey1, canonkey2 : list | ||||
| List of canonical keys in 2 graphs, where each key is represented by a string. | List of canonical keys in 2 graphs, where each key is represented by a string. | ||||
| Return | Return | ||||
| ------ | ------ | ||||
| kernel : float | kernel : float | ||||
| @@ -199,38 +186,38 @@ class Treelet(GraphKernel): | |||||
| """ | """ | ||||
| keys = set(canonkey1.keys()) & set(canonkey2.keys()) # find same canonical keys in both graphs | keys = set(canonkey1.keys()) & set(canonkey2.keys()) # find same canonical keys in both graphs | ||||
| vector1 = np.array([(canonkey1[key] if (key in canonkey1.keys()) else 0) for key in keys]) | vector1 = np.array([(canonkey1[key] if (key in canonkey1.keys()) else 0) for key in keys]) | ||||
| vector2 = np.array([(canonkey2[key] if (key in canonkey2.keys()) else 0) for key in keys]) | |||||
| kernel = self._sub_kernel(vector1, vector2) | |||||
| vector2 = np.array([(canonkey2[key] if (key in canonkey2.keys()) else 0) for key in keys]) | |||||
| kernel = self._sub_kernel(vector1, vector2) | |||||
| return kernel | return kernel | ||||
| def _wrapper_kernel_do(self, itr): | def _wrapper_kernel_do(self, itr): | ||||
| i = itr[0] | i = itr[0] | ||||
| j = itr[1] | j = itr[1] | ||||
| return i, j, self._kernel_do(G_canonkeys[i], G_canonkeys[j]) | return i, j, self._kernel_do(G_canonkeys[i], G_canonkeys[j]) | ||||
| def _get_canonkeys(self, G): | def _get_canonkeys(self, G): | ||||
| """Generate canonical keys of all treelets in a graph. | """Generate canonical keys of all treelets in a graph. | ||||
| Parameters | Parameters | ||||
| ---------- | ---------- | ||||
| G : NetworkX graphs | G : NetworkX graphs | ||||
| The graph in which keys are generated. | The graph in which keys are generated. | ||||
| Return | Return | ||||
| ------ | ------ | ||||
| canonkey/canonkey_l : dict | canonkey/canonkey_l : dict | ||||
| For unlabeled graphs, canonkey is a dictionary which records amount of | |||||
| every tree pattern. For labeled graphs, canonkey_l is one which keeps | |||||
| For unlabeled graphs, canonkey is a dictionary which records amount of | |||||
| every tree pattern. For labeled graphs, canonkey_l is one which keeps | |||||
| track of amount of every treelet. | track of amount of every treelet. | ||||
| """ | """ | ||||
| patterns = {} # a dictionary which consists of lists of patterns for all graphlet. | patterns = {} # a dictionary which consists of lists of patterns for all graphlet. | ||||
| canonkey = {} # canonical key, a dictionary which records amount of every tree pattern. | canonkey = {} # canonical key, a dictionary which records amount of every tree pattern. | ||||
| ### structural analysis ### | ### structural analysis ### | ||||
| ### In this section, a list of patterns is generated for each graphlet, | |||||
| ### where every pattern is represented by nodes ordered by Morgan's | |||||
| ### In this section, a list of patterns is generated for each graphlet, | |||||
| ### where every pattern is represented by nodes ordered by Morgan's | |||||
| ### extended labeling. | ### extended labeling. | ||||
| # linear patterns | # linear patterns | ||||
| patterns['0'] = list(G.nodes()) | patterns['0'] = list(G.nodes()) | ||||
| @@ -238,16 +225,16 @@ class Treelet(GraphKernel): | |||||
| for i in range(1, 6): # for i in range(1, 6): | for i in range(1, 6): # for i in range(1, 6): | ||||
| patterns[str(i)] = find_all_paths(G, i, self._ds_infos['directed']) | patterns[str(i)] = find_all_paths(G, i, self._ds_infos['directed']) | ||||
| canonkey[str(i)] = len(patterns[str(i)]) | canonkey[str(i)] = len(patterns[str(i)]) | ||||
| # n-star patterns | # n-star patterns | ||||
| patterns['3star'] = [[node] + [neighbor for neighbor in G[node]] for node in G.nodes() if G.degree(node) == 3] | patterns['3star'] = [[node] + [neighbor for neighbor in G[node]] for node in G.nodes() if G.degree(node) == 3] | ||||
| patterns['4star'] = [[node] + [neighbor for neighbor in G[node]] for node in G.nodes() if G.degree(node) == 4] | patterns['4star'] = [[node] + [neighbor for neighbor in G[node]] for node in G.nodes() if G.degree(node) == 4] | ||||
| patterns['5star'] = [[node] + [neighbor for neighbor in G[node]] for node in G.nodes() if G.degree(node) == 5] | |||||
| patterns['5star'] = [[node] + [neighbor for neighbor in G[node]] for node in G.nodes() if G.degree(node) == 5] | |||||
| # n-star patterns | # n-star patterns | ||||
| canonkey['6'] = len(patterns['3star']) | canonkey['6'] = len(patterns['3star']) | ||||
| canonkey['8'] = len(patterns['4star']) | canonkey['8'] = len(patterns['4star']) | ||||
| canonkey['d'] = len(patterns['5star']) | canonkey['d'] = len(patterns['5star']) | ||||
| # pattern 7 | # pattern 7 | ||||
| patterns['7'] = [] # the 1st line of Table 1 in Ref [1] | patterns['7'] = [] # the 1st line of Table 1 in Ref [1] | ||||
| for pattern in patterns['3star']: | for pattern in patterns['3star']: | ||||
| @@ -261,7 +248,7 @@ class Treelet(GraphKernel): | |||||
| new_pattern = pattern_t + [neighborx] | new_pattern = pattern_t + [neighborx] | ||||
| patterns['7'].append(new_pattern) | patterns['7'].append(new_pattern) | ||||
| canonkey['7'] = len(patterns['7']) | canonkey['7'] = len(patterns['7']) | ||||
| # pattern 11 | # pattern 11 | ||||
| patterns['11'] = [] # the 4th line of Table 1 in Ref [1] | patterns['11'] = [] # the 4th line of Table 1 in Ref [1] | ||||
| for pattern in patterns['4star']: | for pattern in patterns['4star']: | ||||
| @@ -274,7 +261,7 @@ class Treelet(GraphKernel): | |||||
| new_pattern = pattern_t + [neighborx] | new_pattern = pattern_t + [neighborx] | ||||
| patterns['11'].append(new_pattern) | patterns['11'].append(new_pattern) | ||||
| canonkey['b'] = len(patterns['11']) | canonkey['b'] = len(patterns['11']) | ||||
| # pattern 12 | # pattern 12 | ||||
| patterns['12'] = [] # the 5th line of Table 1 in Ref [1] | patterns['12'] = [] # the 5th line of Table 1 in Ref [1] | ||||
| rootlist = [] # a list of root nodes, whose extended labels are 3 | rootlist = [] # a list of root nodes, whose extended labels are 3 | ||||
| @@ -294,7 +281,7 @@ class Treelet(GraphKernel): | |||||
| # new_patterns = [ pattern + [neighborx1] + [neighborx2] for neighborx1 in G[pattern[i]] if neighborx1 != pattern[0] for neighborx2 in G[pattern[i]] if (neighborx1 > neighborx2 and neighborx2 != pattern[0]) ] | # new_patterns = [ pattern + [neighborx1] + [neighborx2] for neighborx1 in G[pattern[i]] if neighborx1 != pattern[0] for neighborx2 in G[pattern[i]] if (neighborx1 > neighborx2 and neighborx2 != pattern[0]) ] | ||||
| patterns['12'].append(new_pattern) | patterns['12'].append(new_pattern) | ||||
| canonkey['c'] = int(len(patterns['12']) / 2) | canonkey['c'] = int(len(patterns['12']) / 2) | ||||
| # pattern 9 | # pattern 9 | ||||
| patterns['9'] = [] # the 2nd line of Table 1 in Ref [1] | patterns['9'] = [] # the 2nd line of Table 1 in Ref [1] | ||||
| for pattern in patterns['3star']: | for pattern in patterns['3star']: | ||||
| @@ -311,10 +298,10 @@ class Treelet(GraphKernel): | |||||
| new_pattern = pattern_t + [neighborx1] + [neighborx2] | new_pattern = pattern_t + [neighborx1] + [neighborx2] | ||||
| patterns['9'].append(new_pattern) | patterns['9'].append(new_pattern) | ||||
| canonkey['9'] = len(patterns['9']) | canonkey['9'] = len(patterns['9']) | ||||
| # pattern 10 | # pattern 10 | ||||
| patterns['10'] = [] # the 3rd line of Table 1 in Ref [1] | patterns['10'] = [] # the 3rd line of Table 1 in Ref [1] | ||||
| for pattern in patterns['3star']: | |||||
| for pattern in patterns['3star']: | |||||
| for i in range(1, len(pattern)): | for i in range(1, len(pattern)): | ||||
| if G.degree(pattern[i]) >= 2: | if G.degree(pattern[i]) >= 2: | ||||
| for neighborx in G[pattern[i]]: | for neighborx in G[pattern[i]]: | ||||
| @@ -324,20 +311,20 @@ class Treelet(GraphKernel): | |||||
| new_patterns = [ pattern_t + [neighborx] + [neighborxx] for neighborxx in G[neighborx] if neighborxx != pattern[i] ] | new_patterns = [ pattern_t + [neighborx] + [neighborxx] for neighborxx in G[neighborx] if neighborxx != pattern[i] ] | ||||
| patterns['10'].extend(new_patterns) | patterns['10'].extend(new_patterns) | ||||
| canonkey['a'] = len(patterns['10']) | canonkey['a'] = len(patterns['10']) | ||||
| ### labeling information ### | ### labeling information ### | ||||
| ### In this section, a list of canonical keys is generated for every | |||||
| ### pattern obtained in the structural analysis section above, which is a | |||||
| ### In this section, a list of canonical keys is generated for every | |||||
| ### pattern obtained in the structural analysis section above, which is a | |||||
| ### string corresponding to a unique treelet. A dictionary is built to keep | ### string corresponding to a unique treelet. A dictionary is built to keep | ||||
| ### track of the amount of every treelet. | ### track of the amount of every treelet. | ||||
| if len(self._node_labels) > 0 or len(self._edge_labels) > 0: | if len(self._node_labels) > 0 or len(self._edge_labels) > 0: | ||||
| canonkey_l = {} # canonical key, a dictionary which keeps track of amount of every treelet. | canonkey_l = {} # canonical key, a dictionary which keeps track of amount of every treelet. | ||||
| # linear patterns | # linear patterns | ||||
| canonkey_t = Counter(get_mlti_dim_node_attrs(G, self._node_labels)) | canonkey_t = Counter(get_mlti_dim_node_attrs(G, self._node_labels)) | ||||
| for key in canonkey_t: | for key in canonkey_t: | ||||
| canonkey_l[('0', key)] = canonkey_t[key] | canonkey_l[('0', key)] = canonkey_t[key] | ||||
| for i in range(1, 6): # for i in range(1, 6): | for i in range(1, 6): # for i in range(1, 6): | ||||
| treelet = [] | treelet = [] | ||||
| for pattern in patterns[str(i)]: | for pattern in patterns[str(i)]: | ||||
| @@ -349,7 +336,7 @@ class Treelet(GraphKernel): | |||||
| canonkey_t = canonlist if canonlist < canonlist[::-1] else canonlist[::-1] | canonkey_t = canonlist if canonlist < canonlist[::-1] else canonlist[::-1] | ||||
| treelet.append(tuple([str(i)] + canonkey_t)) | treelet.append(tuple([str(i)] + canonkey_t)) | ||||
| canonkey_l.update(Counter(treelet)) | canonkey_l.update(Counter(treelet)) | ||||
| # n-star patterns | # n-star patterns | ||||
| for i in range(3, 6): | for i in range(3, 6): | ||||
| treelet = [] | treelet = [] | ||||
| @@ -361,12 +348,12 @@ class Treelet(GraphKernel): | |||||
| canonlist.append(tuple((nlabels, elabels))) | canonlist.append(tuple((nlabels, elabels))) | ||||
| canonlist.sort() | canonlist.sort() | ||||
| canonlist = list(chain.from_iterable(canonlist)) | canonlist = list(chain.from_iterable(canonlist)) | ||||
| canonkey_t = tuple(['d' if i == 5 else str(i * 2)] + | |||||
| [tuple(G.nodes[pattern[0]][nl] for nl in self._node_labels)] | |||||
| canonkey_t = tuple(['d' if i == 5 else str(i * 2)] + | |||||
| [tuple(G.nodes[pattern[0]][nl] for nl in self._node_labels)] | |||||
| + canonlist) | + canonlist) | ||||
| treelet.append(canonkey_t) | treelet.append(canonkey_t) | ||||
| canonkey_l.update(Counter(treelet)) | canonkey_l.update(Counter(treelet)) | ||||
| # pattern 7 | # pattern 7 | ||||
| treelet = [] | treelet = [] | ||||
| for pattern in patterns['7']: | for pattern in patterns['7']: | ||||
| @@ -377,15 +364,15 @@ class Treelet(GraphKernel): | |||||
| canonlist.append(tuple((nlabels, elabels))) | canonlist.append(tuple((nlabels, elabels))) | ||||
| canonlist.sort() | canonlist.sort() | ||||
| canonlist = list(chain.from_iterable(canonlist)) | canonlist = list(chain.from_iterable(canonlist)) | ||||
| canonkey_t = tuple(['7'] | |||||
| + [tuple(G.nodes[pattern[0]][nl] for nl in self._node_labels)] + canonlist | |||||
| + [tuple(G.nodes[pattern[3]][nl] for nl in self._node_labels)] | |||||
| canonkey_t = tuple(['7'] | |||||
| + [tuple(G.nodes[pattern[0]][nl] for nl in self._node_labels)] + canonlist | |||||
| + [tuple(G.nodes[pattern[3]][nl] for nl in self._node_labels)] | |||||
| + [tuple(G[pattern[3]][pattern[0]][el] for el in self._edge_labels)] | + [tuple(G[pattern[3]][pattern[0]][el] for el in self._edge_labels)] | ||||
| + [tuple(G.nodes[pattern[4]][nl] for nl in self._node_labels)] | |||||
| + [tuple(G.nodes[pattern[4]][nl] for nl in self._node_labels)] | |||||
| + [tuple(G[pattern[4]][pattern[3]][el] for el in self._edge_labels)]) | + [tuple(G[pattern[4]][pattern[3]][el] for el in self._edge_labels)]) | ||||
| treelet.append(canonkey_t) | treelet.append(canonkey_t) | ||||
| canonkey_l.update(Counter(treelet)) | canonkey_l.update(Counter(treelet)) | ||||
| # pattern 11 | # pattern 11 | ||||
| treelet = [] | treelet = [] | ||||
| for pattern in patterns['11']: | for pattern in patterns['11']: | ||||
| @@ -396,15 +383,15 @@ class Treelet(GraphKernel): | |||||
| canonlist.append(tuple((nlabels, elabels))) | canonlist.append(tuple((nlabels, elabels))) | ||||
| canonlist.sort() | canonlist.sort() | ||||
| canonlist = list(chain.from_iterable(canonlist)) | canonlist = list(chain.from_iterable(canonlist)) | ||||
| canonkey_t = tuple(['b'] | |||||
| + [tuple(G.nodes[pattern[0]][nl] for nl in self._node_labels)] + canonlist | |||||
| + [tuple(G.nodes[pattern[4]][nl] for nl in self._node_labels)] | |||||
| canonkey_t = tuple(['b'] | |||||
| + [tuple(G.nodes[pattern[0]][nl] for nl in self._node_labels)] + canonlist | |||||
| + [tuple(G.nodes[pattern[4]][nl] for nl in self._node_labels)] | |||||
| + [tuple(G[pattern[4]][pattern[0]][el] for el in self._edge_labels)] | + [tuple(G[pattern[4]][pattern[0]][el] for el in self._edge_labels)] | ||||
| + [tuple(G.nodes[pattern[5]][nl] for nl in self._node_labels)] | |||||
| + [tuple(G.nodes[pattern[5]][nl] for nl in self._node_labels)] | |||||
| + [tuple(G[pattern[5]][pattern[4]][el] for el in self._edge_labels)]) | + [tuple(G[pattern[5]][pattern[4]][el] for el in self._edge_labels)]) | ||||
| treelet.append(canonkey_t) | treelet.append(canonkey_t) | ||||
| canonkey_l.update(Counter(treelet)) | canonkey_l.update(Counter(treelet)) | ||||
| # pattern 10 | # pattern 10 | ||||
| treelet = [] | treelet = [] | ||||
| for pattern in patterns['10']: | for pattern in patterns['10']: | ||||
| @@ -418,15 +405,15 @@ class Treelet(GraphKernel): | |||||
| canonlist.sort() | canonlist.sort() | ||||
| canonkey0 = list(chain.from_iterable(canonlist)) | canonkey0 = list(chain.from_iterable(canonlist)) | ||||
| canonkey_t = tuple(['a'] | canonkey_t = tuple(['a'] | ||||
| + [tuple(G.nodes[pattern[3]][nl] for nl in self._node_labels)] | |||||
| + [tuple(G.nodes[pattern[4]][nl] for nl in self._node_labels)] | |||||
| + [tuple(G[pattern[4]][pattern[3]][el] for el in self._edge_labels)] | |||||
| + [tuple(G.nodes[pattern[0]][nl] for nl in self._node_labels)] | |||||
| + [tuple(G[pattern[0]][pattern[3]][el] for el in self._edge_labels)] | |||||
| + [tuple(G.nodes[pattern[3]][nl] for nl in self._node_labels)] | |||||
| + [tuple(G.nodes[pattern[4]][nl] for nl in self._node_labels)] | |||||
| + [tuple(G[pattern[4]][pattern[3]][el] for el in self._edge_labels)] | |||||
| + [tuple(G.nodes[pattern[0]][nl] for nl in self._node_labels)] | |||||
| + [tuple(G[pattern[0]][pattern[3]][el] for el in self._edge_labels)] | |||||
| + canonkey4 + canonkey0) | + canonkey4 + canonkey0) | ||||
| treelet.append(canonkey_t) | treelet.append(canonkey_t) | ||||
| canonkey_l.update(Counter(treelet)) | canonkey_l.update(Counter(treelet)) | ||||
| # pattern 12 | # pattern 12 | ||||
| treelet = [] | treelet = [] | ||||
| for pattern in patterns['12']: | for pattern in patterns['12']: | ||||
| @@ -444,22 +431,22 @@ class Treelet(GraphKernel): | |||||
| canonlist3.append(tuple((nlabels, elabels))) | canonlist3.append(tuple((nlabels, elabels))) | ||||
| canonlist3.sort() | canonlist3.sort() | ||||
| canonlist3 = list(chain.from_iterable(canonlist3)) | canonlist3 = list(chain.from_iterable(canonlist3)) | ||||
| # 2 possible key can be generated from 2 nodes with extended label 3, | |||||
| # 2 possible key can be generated from 2 nodes with extended label 3, | |||||
| # select the one with lower lexicographic order. | # select the one with lower lexicographic order. | ||||
| canonkey_t1 = tuple(['c'] | |||||
| + [tuple(G.nodes[pattern[0]][nl] for nl in self._node_labels)] + canonlist0 | |||||
| + [tuple(G.nodes[pattern[3]][nl] for nl in self._node_labels)] | |||||
| + [tuple(G[pattern[3]][pattern[0]][el] for el in self._edge_labels)] | |||||
| canonkey_t1 = tuple(['c'] | |||||
| + [tuple(G.nodes[pattern[0]][nl] for nl in self._node_labels)] + canonlist0 | |||||
| + [tuple(G.nodes[pattern[3]][nl] for nl in self._node_labels)] | |||||
| + [tuple(G[pattern[3]][pattern[0]][el] for el in self._edge_labels)] | |||||
| + canonlist3) | + canonlist3) | ||||
| canonkey_t2 = tuple(['c'] | |||||
| + [tuple(G.nodes[pattern[3]][nl] for nl in self._node_labels)] + canonlist3 | |||||
| + [tuple(G.nodes[pattern[0]][nl] for nl in self._node_labels)] | |||||
| + [tuple(G[pattern[0]][pattern[3]][el] for el in self._edge_labels)] | |||||
| canonkey_t2 = tuple(['c'] | |||||
| + [tuple(G.nodes[pattern[3]][nl] for nl in self._node_labels)] + canonlist3 | |||||
| + [tuple(G.nodes[pattern[0]][nl] for nl in self._node_labels)] | |||||
| + [tuple(G[pattern[0]][pattern[3]][el] for el in self._edge_labels)] | |||||
| + canonlist0) | + canonlist0) | ||||
| treelet.append(canonkey_t1 if canonkey_t1 < canonkey_t2 else canonkey_t2) | treelet.append(canonkey_t1 if canonkey_t1 < canonkey_t2 else canonkey_t2) | ||||
| canonkey_l.update(Counter(treelet)) | canonkey_l.update(Counter(treelet)) | ||||
| # pattern 9 | # pattern 9 | ||||
| treelet = [] | treelet = [] | ||||
| for pattern in patterns['9']: | for pattern in patterns['9']: | ||||
| @@ -469,7 +456,7 @@ class Treelet(GraphKernel): | |||||
| tuple(G[pattern[5]][pattern[3]][el] for el in self._edge_labels)] | tuple(G[pattern[5]][pattern[3]][el] for el in self._edge_labels)] | ||||
| prekey2 = [tuple(G.nodes[pattern[2]][nl] for nl in self._node_labels), | prekey2 = [tuple(G.nodes[pattern[2]][nl] for nl in self._node_labels), | ||||
| tuple(G[pattern[2]][pattern[0]][el] for el in self._edge_labels)] | tuple(G[pattern[2]][pattern[0]][el] for el in self._edge_labels)] | ||||
| prekey3 = [tuple(G.nodes[pattern[3]][nl] for nl in self._node_labels), | |||||
| prekey3 = [tuple(G.nodes[pattern[3]][nl] for nl in self._node_labels), | |||||
| tuple(G[pattern[3]][pattern[0]][el] for el in self._edge_labels)] | tuple(G[pattern[3]][pattern[0]][el] for el in self._edge_labels)] | ||||
| if prekey2 + canonkey2 < prekey3 + canonkey3: | if prekey2 + canonkey2 < prekey3 + canonkey3: | ||||
| canonkey_t = [tuple(G.nodes[pattern[1]][nl] for nl in self._node_labels)] \ | canonkey_t = [tuple(G.nodes[pattern[1]][nl] for nl in self._node_labels)] \ | ||||
| @@ -480,21 +467,21 @@ class Treelet(GraphKernel): | |||||
| + [tuple(G[pattern[1]][pattern[0]][el] for el in self._edge_labels)] \ | + [tuple(G[pattern[1]][pattern[0]][el] for el in self._edge_labels)] \ | ||||
| + prekey3 + prekey2 + canonkey3 + canonkey2 | + prekey3 + prekey2 + canonkey3 + canonkey2 | ||||
| treelet.append(tuple(['9'] | treelet.append(tuple(['9'] | ||||
| + [tuple(G.nodes[pattern[0]][nl] for nl in self._node_labels)] | |||||
| + [tuple(G.nodes[pattern[0]][nl] for nl in self._node_labels)] | |||||
| + canonkey_t)) | + canonkey_t)) | ||||
| canonkey_l.update(Counter(treelet)) | canonkey_l.update(Counter(treelet)) | ||||
| return canonkey_l | return canonkey_l | ||||
| return canonkey | return canonkey | ||||
| def _wrapper_get_canonkeys(self, itr_item): | def _wrapper_get_canonkeys(self, itr_item): | ||||
| g = itr_item[0] | g = itr_item[0] | ||||
| i = itr_item[1] | i = itr_item[1] | ||||
| return i, self._get_canonkeys(g) | return i, self._get_canonkeys(g) | ||||
| def _add_dummy_labels(self, Gn): | def _add_dummy_labels(self, Gn): | ||||
| if len(self._node_labels) == 0 or (len(self._node_labels) == 1 and self._node_labels[0] == SpecialLabel.DUMMY): | if len(self._node_labels) == 0 or (len(self._node_labels) == 1 and self._node_labels[0] == SpecialLabel.DUMMY): | ||||
| for i in range(len(Gn)): | for i in range(len(Gn)): | ||||
| @@ -555,5 +555,12 @@ if __name__ == "__main__": | |||||
| # test_RandomWalk('Acyclic', 'conjugate', None, 'imap_unordered') | # test_RandomWalk('Acyclic', 'conjugate', None, 'imap_unordered') | ||||
| # test_RandomWalk('Acyclic', 'fp', None, None) | # test_RandomWalk('Acyclic', 'fp', None, None) | ||||
| # test_RandomWalk('Acyclic', 'spectral', 'exp', 'imap_unordered') | # test_RandomWalk('Acyclic', 'spectral', 'exp', 'imap_unordered') | ||||
| # test_CommonWalk('AIDS', 0.01, 'geo') | |||||
| # test_CommonWalk('Acyclic', 0.01, 'geo') | |||||
| # test_Marginalized('Acyclic', False) | |||||
| # test_ShortestPath('Acyclic') | # test_ShortestPath('Acyclic') | ||||
| # test_PathUpToH('Acyclic', 'MinMax') | |||||
| # test_Treelet('Acyclic') | |||||
| # test_SylvesterEquation('Acyclic') | |||||
| # test_ConjugateGradient('Acyclic') | |||||
| # test_FixedPoint('Acyclic') | |||||
| # test_SpectralDecomposition('Acyclic', 'exp') | |||||
| @@ -25,3 +25,4 @@ from gklearn.utils.utils import normalize_gram_matrix, compute_distance_matrix | |||||
| from gklearn.utils.trie import Trie | from gklearn.utils.trie import Trie | ||||
| from gklearn.utils.knn import knn_cv, knn_classification | from gklearn.utils.knn import knn_cv, knn_classification | ||||
| from gklearn.utils.model_selection_precomputed import model_selection_for_precomputed_kernel | from gklearn.utils.model_selection_precomputed import model_selection_for_precomputed_kernel | ||||
| from gklearn.utils.iters import get_iters | |||||
| @@ -0,0 +1,55 @@ | |||||
| #!/usr/bin/env python3 | |||||
| # -*- coding: utf-8 -*- | |||||
| """ | |||||
| Created on Thu Dec 24 10:35:26 2020 | |||||
| @author: ljia | |||||
| """ | |||||
| from tqdm import tqdm | |||||
| import math | |||||
| def get_iters(iterable, desc=None, file=None, length=None, verbose=True, **kwargs): | |||||
| if verbose: | |||||
| if 'miniters' not in kwargs: | |||||
| if length is None: | |||||
| try: | |||||
| kwargs['miniters'] = math.ceil(len(iterable) / 100) | |||||
| except TypeError: | |||||
| raise | |||||
| kwargs['miniters'] = 100 | |||||
| else: | |||||
| kwargs['miniters'] = math.ceil(length / 100) | |||||
| if 'maxinterval' not in kwargs: | |||||
| kwargs['maxinterval'] = 600 | |||||
| return tqdm(iterable, desc=desc, file=file, **kwargs) | |||||
| else: | |||||
| return iterable | |||||
| # class mytqdm(tqdm): | |||||
| # def __init__(iterable=None, desc=None, total=None, leave=True, | |||||
| # file=None, ncols=None, mininterval=0.1, maxinterval=10.0, | |||||
| # miniters=None, ascii=None, disable=False, unit='it', | |||||
| # unit_scale=False, dynamic_ncols=False, smoothing=0.3, | |||||
| # bar_format=None, initial=0, position=None, postfix=None, | |||||
| # unit_divisor=1000, write_bytes=None, lock_args=None, | |||||
| # nrows=None, | |||||
| # gui=False, **kwargs): | |||||
| # if iterable is not None: | |||||
| # miniters=math.ceil(len(iterable) / 100) | |||||
| # maxinterval=600 | |||||
| # super().__init__(iterable=iterable, desc=desc, total=total, leave=leave, | |||||
| # file=file, ncols=ncols, mininterval=mininterval, maxinterval=maxinterval, | |||||
| # miniters=miniters, ascii=ascii, disable=disable, unit=unit, | |||||
| # unit_scale=unit_scale, dynamic_ncols=dynamic_ncols, smoothing=smoothing, | |||||
| # bar_format=bar_format, initial=initial, position=position, postfix=postfix, | |||||
| # unit_divisor=unit_divisor, write_bytes=write_bytes, lock_args=lock_args, | |||||
| # nrows=nrows, | |||||
| # gui=gui, **kwargs) | |||||
| # tqdm = mytqdm | |||||