| @@ -47,7 +47,7 @@ class CommonWalk(GraphKernel): | |||||
| itr = combinations_with_replacement(range(0, len(self._graphs)), 2) | itr = combinations_with_replacement(range(0, len(self._graphs)), 2) | ||||
| len_itr = int(len(self._graphs) * (len(self._graphs) + 1) / 2) | len_itr = int(len(self._graphs) * (len(self._graphs) + 1) / 2) | ||||
| iterator = get_iters(itr, desc='Computing kernels', file=sys.stdout, | iterator = get_iters(itr, desc='Computing kernels', file=sys.stdout, | ||||
| length=len_itr, verbose=(self._verbose >= 2)) | |||||
| length=len_itr, verbose=(self.verbose >= 2)) | |||||
| # direct product graph method - exponential | # direct product graph method - exponential | ||||
| if self._compute_method == 'exp': | if self._compute_method == 'exp': | ||||
| @@ -86,7 +86,7 @@ class CommonWalk(GraphKernel): | |||||
| do_fun = self._wrapper_kernel_do_geo | do_fun = self._wrapper_kernel_do_geo | ||||
| parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=_init_worker_gm, | parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=_init_worker_gm, | ||||
| glbv=(self._graphs,), n_jobs=self._n_jobs, verbose=self._verbose) | |||||
| glbv=(self._graphs,), n_jobs=self.n_jobs, verbose=self.verbose) | |||||
| return gram_matrix | return gram_matrix | ||||
| @@ -100,9 +100,9 @@ class CommonWalk(GraphKernel): | |||||
| # compute kernel list. | # compute kernel list. | ||||
| kernel_list = [None] * len(g_list) | kernel_list = [None] * len(g_list) | ||||
| if self._verbose >= 2: | |||||
| if self.verbose >= 2: | |||||
| iterator = get_iters(range(len(g_list)), desc='Computing kernels', | iterator = get_iters(range(len(g_list)), desc='Computing kernels', | ||||
| file=sys.stdout, length=len(g_list), verbose=(self._verbose >= 2)) | |||||
| file=sys.stdout, length=len(g_list), verbose=(self.verbose >= 2)) | |||||
| else: | else: | ||||
| iterator = range(len(g_list)) | iterator = range(len(g_list)) | ||||
| @@ -148,7 +148,7 @@ class CommonWalk(GraphKernel): | |||||
| len_itr = len(g_list) | len_itr = len(g_list) | ||||
| parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, | parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, | ||||
| init_worker=_init_worker_list, glbv=(g1, g_list), method='imap_unordered', | init_worker=_init_worker_list, glbv=(g1, g_list), method='imap_unordered', | ||||
| n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose) | |||||
| n_jobs=self.n_jobs, itr_desc='Computing kernels', verbose=self.verbose) | |||||
| return kernel_list | return kernel_list | ||||
| @@ -35,7 +35,7 @@ class ConjugateGradient(RandomWalkMeta): | |||||
| def _compute_gm_series(self): | def _compute_gm_series(self): | ||||
| self._check_edge_weight(self._graphs, self._verbose) | |||||
| self._check_edge_weight(self._graphs, self.verbose) | |||||
| self._check_graphs(self._graphs) | self._check_graphs(self._graphs) | ||||
| lmda = self._weight | lmda = self._weight | ||||
| @@ -44,7 +44,7 @@ class ConjugateGradient(RandomWalkMeta): | |||||
| gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) | gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) | ||||
| # Reindex nodes using consecutive integers for the convenience of kernel computation. | # Reindex nodes using consecutive integers for the convenience of kernel computation. | ||||
| iterator = get_iters(self._graphs, desc='Reindex vertices', file=sys.stdout, verbose=(self._verbose >= 2)) | |||||
| iterator = get_iters(self._graphs, desc='Reindex vertices', file=sys.stdout, verbose=(self.verbose >= 2)) | |||||
| self._graphs = [nx.convert_node_labels_to_integers(g, first_label=0, label_attribute='label_orignal') for g in iterator] | self._graphs = [nx.convert_node_labels_to_integers(g, first_label=0, label_attribute='label_orignal') for g in iterator] | ||||
| if self._p is None and self._q is None: # p and q are uniform distributions as default. | if self._p is None and self._q is None: # p and q are uniform distributions as default. | ||||
| @@ -52,7 +52,7 @@ class ConjugateGradient(RandomWalkMeta): | |||||
| from itertools import combinations_with_replacement | from itertools import combinations_with_replacement | ||||
| itr = combinations_with_replacement(range(0, len(self._graphs)), 2) | itr = combinations_with_replacement(range(0, len(self._graphs)), 2) | ||||
| len_itr = int(len(self._graphs) * (len(self._graphs) + 1) / 2) | len_itr = int(len(self._graphs) * (len(self._graphs) + 1) / 2) | ||||
| iterator = get_iters(itr, desc='Computing kernels', file=sys.stdout, length=len_itr, verbose=(self._verbose >= 2)) | |||||
| iterator = get_iters(itr, desc='Computing kernels', file=sys.stdout, length=len_itr, verbose=(self.verbose >= 2)) | |||||
| for i, j in iterator: | for i, j in iterator: | ||||
| kernel = self._kernel_do(self._graphs[i], self._graphs[j], lmda) | kernel = self._kernel_do(self._graphs[i], self._graphs[j], lmda) | ||||
| @@ -66,7 +66,7 @@ class ConjugateGradient(RandomWalkMeta): | |||||
| def _compute_gm_imap_unordered(self): | def _compute_gm_imap_unordered(self): | ||||
| self._check_edge_weight(self._graphs, self._verbose) | |||||
| self._check_edge_weight(self._graphs, self.verbose) | |||||
| self._check_graphs(self._graphs) | self._check_graphs(self._graphs) | ||||
| # Compute Gram matrix. | # Compute Gram matrix. | ||||
| @@ -74,7 +74,7 @@ class ConjugateGradient(RandomWalkMeta): | |||||
| # @todo: parallel this. | # @todo: parallel this. | ||||
| # Reindex nodes using consecutive integers for the convenience of kernel computation. | # Reindex nodes using consecutive integers for the convenience of kernel computation. | ||||
| iterator = get_iters(self._graphs, desc='Reindex vertices', file=sys.stdout, verbose=(self._verbose >= 2)) | |||||
| iterator = get_iters(self._graphs, desc='Reindex vertices', file=sys.stdout, verbose=(self.verbose >= 2)) | |||||
| self._graphs = [nx.convert_node_labels_to_integers(g, first_label=0, label_attribute='label_orignal') for g in iterator] | self._graphs = [nx.convert_node_labels_to_integers(g, first_label=0, label_attribute='label_orignal') for g in iterator] | ||||
| if self._p is None and self._q is None: # p and q are uniform distributions as default. | if self._p is None and self._q is None: # p and q are uniform distributions as default. | ||||
| @@ -86,7 +86,7 @@ class ConjugateGradient(RandomWalkMeta): | |||||
| do_fun = self._wrapper_kernel_do | do_fun = self._wrapper_kernel_do | ||||
| parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker, | parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker, | ||||
| glbv=(self._graphs,), n_jobs=self._n_jobs, verbose=self._verbose) | |||||
| glbv=(self._graphs,), n_jobs=self.n_jobs, verbose=self.verbose) | |||||
| else: # @todo | else: # @todo | ||||
| pass | pass | ||||
| @@ -95,7 +95,7 @@ class ConjugateGradient(RandomWalkMeta): | |||||
| def _compute_kernel_list_series(self, g1, g_list): | def _compute_kernel_list_series(self, g1, g_list): | ||||
| self._check_edge_weight(g_list + [g1], self._verbose) | |||||
| self._check_edge_weight(g_list + [g1], self.verbose) | |||||
| self._check_graphs(g_list + [g1]) | self._check_graphs(g_list + [g1]) | ||||
| lmda = self._weight | lmda = self._weight | ||||
| @@ -105,11 +105,11 @@ class ConjugateGradient(RandomWalkMeta): | |||||
| # Reindex nodes using consecutive integers for the convenience of kernel computation. | # Reindex nodes using consecutive integers for the convenience of kernel computation. | ||||
| g1 = nx.convert_node_labels_to_integers(g1, first_label=0, label_attribute='label_orignal') | g1 = nx.convert_node_labels_to_integers(g1, first_label=0, label_attribute='label_orignal') | ||||
| iterator = get_iters(g_list, desc='Reindex vertices', file=sys.stdout, verbose=(self._verbose >= 2)) | |||||
| iterator = get_iters(g_list, desc='Reindex vertices', file=sys.stdout, verbose=(self.verbose >= 2)) | |||||
| g_list = [nx.convert_node_labels_to_integers(g, first_label=0, label_attribute='label_orignal') for g in iterator] | g_list = [nx.convert_node_labels_to_integers(g, first_label=0, label_attribute='label_orignal') for g in iterator] | ||||
| if self._p is None and self._q is None: # p and q are uniform distributions as default. | if self._p is None and self._q is None: # p and q are uniform distributions as default. | ||||
| iterator = get_iters(range(len(g_list)), desc='Computing kernels', file=sys.stdout, length=len(g_list), verbose=(self._verbose >= 2)) | |||||
| iterator = get_iters(range(len(g_list)), desc='Computing kernels', file=sys.stdout, length=len(g_list), verbose=(self.verbose >= 2)) | |||||
| for i in iterator: | for i in iterator: | ||||
| kernel = self._kernel_do(g1, g_list[i], lmda) | kernel = self._kernel_do(g1, g_list[i], lmda) | ||||
| @@ -122,7 +122,7 @@ class ConjugateGradient(RandomWalkMeta): | |||||
| def _compute_kernel_list_imap_unordered(self, g1, g_list): | def _compute_kernel_list_imap_unordered(self, g1, g_list): | ||||
| self._check_edge_weight(g_list + [g1], self._verbose) | |||||
| self._check_edge_weight(g_list + [g1], self.verbose) | |||||
| self._check_graphs(g_list + [g1]) | self._check_graphs(g_list + [g1]) | ||||
| # compute kernel list. | # compute kernel list. | ||||
| @@ -131,7 +131,7 @@ class ConjugateGradient(RandomWalkMeta): | |||||
| # Reindex nodes using consecutive integers for the convenience of kernel computation. | # Reindex nodes using consecutive integers for the convenience of kernel computation. | ||||
| g1 = nx.convert_node_labels_to_integers(g1, first_label=0, label_attribute='label_orignal') | g1 = nx.convert_node_labels_to_integers(g1, first_label=0, label_attribute='label_orignal') | ||||
| # @todo: parallel this. | # @todo: parallel this. | ||||
| iterator = get_iters(g_list, desc='Reindex vertices', file=sys.stdout, verbose=(self._verbose >= 2)) | |||||
| iterator = get_iters(g_list, desc='Reindex vertices', file=sys.stdout, verbose=(self.verbose >= 2)) | |||||
| g_list = [nx.convert_node_labels_to_integers(g, first_label=0, label_attribute='label_orignal') for g in iterator] | g_list = [nx.convert_node_labels_to_integers(g, first_label=0, label_attribute='label_orignal') for g in iterator] | ||||
| if self._p is None and self._q is None: # p and q are uniform distributions as default. | if self._p is None and self._q is None: # p and q are uniform distributions as default. | ||||
| @@ -149,7 +149,7 @@ class ConjugateGradient(RandomWalkMeta): | |||||
| len_itr = len(g_list) | len_itr = len(g_list) | ||||
| parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, | parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, | ||||
| init_worker=init_worker, glbv=(g1, g_list), method='imap_unordered', | init_worker=init_worker, glbv=(g1, g_list), method='imap_unordered', | ||||
| n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose) | |||||
| n_jobs=self.n_jobs, itr_desc='Computing kernels', verbose=self.verbose) | |||||
| else: # @todo | else: # @todo | ||||
| pass | pass | ||||
| @@ -162,7 +162,7 @@ class ConjugateGradient(RandomWalkMeta): | |||||
| def _compute_single_kernel_series(self, g1, g2): | def _compute_single_kernel_series(self, g1, g2): | ||||
| self._check_edge_weight([g1] + [g2], self._verbose) | |||||
| self._check_edge_weight([g1] + [g2], self.verbose) | |||||
| self._check_graphs([g1] + [g2]) | self._check_graphs([g1] + [g2]) | ||||
| lmda = self._weight | lmda = self._weight | ||||
| @@ -35,7 +35,7 @@ class FixedPoint(RandomWalkMeta): | |||||
| def _compute_gm_series(self): | def _compute_gm_series(self): | ||||
| self._check_edge_weight(self._graphs, self._verbose) | |||||
| self._check_edge_weight(self._graphs, self.verbose) | |||||
| self._check_graphs(self._graphs) | self._check_graphs(self._graphs) | ||||
| lmda = self._weight | lmda = self._weight | ||||
| @@ -44,7 +44,7 @@ class FixedPoint(RandomWalkMeta): | |||||
| gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) | gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) | ||||
| # Reindex nodes using consecutive integers for the convenience of kernel computation. | # Reindex nodes using consecutive integers for the convenience of kernel computation. | ||||
| iterator = get_iters(self._graphs, desc='Reindex vertices', file=sys.stdout,verbose=(self._verbose >= 2)) | |||||
| iterator = get_iters(self._graphs, desc='Reindex vertices', file=sys.stdout,verbose=(self.verbose >= 2)) | |||||
| self._graphs = [nx.convert_node_labels_to_integers(g, first_label=0, label_attribute='label_orignal') for g in iterator] | self._graphs = [nx.convert_node_labels_to_integers(g, first_label=0, label_attribute='label_orignal') for g in iterator] | ||||
| if self._p is None and self._q is None: # p and q are uniform distributions as default. | if self._p is None and self._q is None: # p and q are uniform distributions as default. | ||||
| @@ -52,7 +52,7 @@ class FixedPoint(RandomWalkMeta): | |||||
| from itertools import combinations_with_replacement | from itertools import combinations_with_replacement | ||||
| itr = combinations_with_replacement(range(0, len(self._graphs)), 2) | itr = combinations_with_replacement(range(0, len(self._graphs)), 2) | ||||
| len_itr = int(len(self._graphs) * (len(self._graphs) + 1) / 2) | len_itr = int(len(self._graphs) * (len(self._graphs) + 1) / 2) | ||||
| iterator = get_iters(itr, desc='Computing kernels', file=sys.stdout, length=len_itr, verbose=(self._verbose >= 2)) | |||||
| iterator = get_iters(itr, desc='Computing kernels', file=sys.stdout, length=len_itr, verbose=(self.verbose >= 2)) | |||||
| for i, j in iterator: | for i, j in iterator: | ||||
| kernel = self._kernel_do(self._graphs[i], self._graphs[j], lmda) | kernel = self._kernel_do(self._graphs[i], self._graphs[j], lmda) | ||||
| @@ -66,7 +66,7 @@ class FixedPoint(RandomWalkMeta): | |||||
| def _compute_gm_imap_unordered(self): | def _compute_gm_imap_unordered(self): | ||||
| self._check_edge_weight(self._graphs, self._verbose) | |||||
| self._check_edge_weight(self._graphs, self.verbose) | |||||
| self._check_graphs(self._graphs) | self._check_graphs(self._graphs) | ||||
| # Compute Gram matrix. | # Compute Gram matrix. | ||||
| @@ -74,7 +74,7 @@ class FixedPoint(RandomWalkMeta): | |||||
| # @todo: parallel this. | # @todo: parallel this. | ||||
| # Reindex nodes using consecutive integers for the convenience of kernel computation. | # Reindex nodes using consecutive integers for the convenience of kernel computation. | ||||
| iterator = get_iters(self._graphs, desc='Reindex vertices', file=sys.stdout, verbose=(self._verbose >= 2)) | |||||
| iterator = get_iters(self._graphs, desc='Reindex vertices', file=sys.stdout, verbose=(self.verbose >= 2)) | |||||
| self._graphs = [nx.convert_node_labels_to_integers(g, first_label=0, label_attribute='label_orignal') for g in iterator] | self._graphs = [nx.convert_node_labels_to_integers(g, first_label=0, label_attribute='label_orignal') for g in iterator] | ||||
| if self._p is None and self._q is None: # p and q are uniform distributions as default. | if self._p is None and self._q is None: # p and q are uniform distributions as default. | ||||
| @@ -86,7 +86,7 @@ class FixedPoint(RandomWalkMeta): | |||||
| do_fun = self._wrapper_kernel_do | do_fun = self._wrapper_kernel_do | ||||
| parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker, | parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker, | ||||
| glbv=(self._graphs,), n_jobs=self._n_jobs, verbose=self._verbose) | |||||
| glbv=(self._graphs,), n_jobs=self.n_jobs, verbose=self.verbose) | |||||
| else: # @todo | else: # @todo | ||||
| pass | pass | ||||
| @@ -95,7 +95,7 @@ class FixedPoint(RandomWalkMeta): | |||||
| def _compute_kernel_list_series(self, g1, g_list): | def _compute_kernel_list_series(self, g1, g_list): | ||||
| self._check_edge_weight(g_list + [g1], self._verbose) | |||||
| self._check_edge_weight(g_list + [g1], self.verbose) | |||||
| self._check_graphs(g_list + [g1]) | self._check_graphs(g_list + [g1]) | ||||
| lmda = self._weight | lmda = self._weight | ||||
| @@ -105,12 +105,12 @@ class FixedPoint(RandomWalkMeta): | |||||
| # Reindex nodes using consecutive integers for the convenience of kernel computation. | # Reindex nodes using consecutive integers for the convenience of kernel computation. | ||||
| g1 = nx.convert_node_labels_to_integers(g1, first_label=0, label_attribute='label_orignal') | g1 = nx.convert_node_labels_to_integers(g1, first_label=0, label_attribute='label_orignal') | ||||
| iterator = get_iters(g_list, desc='Reindex vertices', file=sys.stdout, verbose=(self._verbose >= 2)) | |||||
| iterator = get_iters(g_list, desc='Reindex vertices', file=sys.stdout, verbose=(self.verbose >= 2)) | |||||
| g_list = [nx.convert_node_labels_to_integers(g, first_label=0, label_attribute='label_orignal') for g in iterator] | g_list = [nx.convert_node_labels_to_integers(g, first_label=0, label_attribute='label_orignal') for g in iterator] | ||||
| if self._p is None and self._q is None: # p and q are uniform distributions as default. | if self._p is None and self._q is None: # p and q are uniform distributions as default. | ||||
| iterator = get_iters(range(len(g_list)), desc='Computing kernels', file=sys.stdout, length=len(g_list), verbose=(self._verbose >= 2)) | |||||
| iterator = get_iters(range(len(g_list)), desc='Computing kernels', file=sys.stdout, length=len(g_list), verbose=(self.verbose >= 2)) | |||||
| for i in iterator: | for i in iterator: | ||||
| kernel = self._kernel_do(g1, g_list[i], lmda) | kernel = self._kernel_do(g1, g_list[i], lmda) | ||||
| @@ -123,7 +123,7 @@ class FixedPoint(RandomWalkMeta): | |||||
| def _compute_kernel_list_imap_unordered(self, g1, g_list): | def _compute_kernel_list_imap_unordered(self, g1, g_list): | ||||
| self._check_edge_weight(g_list + [g1], self._verbose) | |||||
| self._check_edge_weight(g_list + [g1], self.verbose) | |||||
| self._check_graphs(g_list + [g1]) | self._check_graphs(g_list + [g1]) | ||||
| # compute kernel list. | # compute kernel list. | ||||
| @@ -132,7 +132,7 @@ class FixedPoint(RandomWalkMeta): | |||||
| # Reindex nodes using consecutive integers for the convenience of kernel computation. | # Reindex nodes using consecutive integers for the convenience of kernel computation. | ||||
| g1 = nx.convert_node_labels_to_integers(g1, first_label=0, label_attribute='label_orignal') | g1 = nx.convert_node_labels_to_integers(g1, first_label=0, label_attribute='label_orignal') | ||||
| # @todo: parallel this. | # @todo: parallel this. | ||||
| iterator = get_iters(g_list, desc='Reindex vertices', file=sys.stdout, verbose=(self._verbose >= 2)) | |||||
| iterator = get_iters(g_list, desc='Reindex vertices', file=sys.stdout, verbose=(self.verbose >= 2)) | |||||
| g_list = [nx.convert_node_labels_to_integers(g, first_label=0, label_attribute='label_orignal') for g in iterator] | g_list = [nx.convert_node_labels_to_integers(g, first_label=0, label_attribute='label_orignal') for g in iterator] | ||||
| if self._p is None and self._q is None: # p and q are uniform distributions as default. | if self._p is None and self._q is None: # p and q are uniform distributions as default. | ||||
| @@ -150,7 +150,7 @@ class FixedPoint(RandomWalkMeta): | |||||
| len_itr = len(g_list) | len_itr = len(g_list) | ||||
| parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, | parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, | ||||
| init_worker=init_worker, glbv=(g1, g_list), method='imap_unordered', | init_worker=init_worker, glbv=(g1, g_list), method='imap_unordered', | ||||
| n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose) | |||||
| n_jobs=self.n_jobs, itr_desc='Computing kernels', verbose=self.verbose) | |||||
| else: # @todo | else: # @todo | ||||
| pass | pass | ||||
| @@ -163,7 +163,7 @@ class FixedPoint(RandomWalkMeta): | |||||
| def _compute_single_kernel_series(self, g1, g2): | def _compute_single_kernel_series(self, g1, g2): | ||||
| self._check_edge_weight([g1] + [g2], self._verbose) | |||||
| self._check_edge_weight([g1] + [g2], self.verbose) | |||||
| self._check_graphs([g1] + [g2]) | self._check_graphs([g1] + [g2]) | ||||
| lmda = self._weight | lmda = self._weight | ||||
| @@ -9,27 +9,372 @@ import numpy as np | |||||
| import networkx as nx | import networkx as nx | ||||
| import multiprocessing | import multiprocessing | ||||
| import time | import time | ||||
| # from abc import ABC, abstractmethod | |||||
| from sklearn.base import BaseEstimator # , TransformerMixin | |||||
| from sklearn.utils.validation import check_is_fitted # check_X_y, check_array, | |||||
| from sklearn.exceptions import NotFittedError | |||||
| from gklearn.utils import normalize_gram_matrix | from gklearn.utils import normalize_gram_matrix | ||||
| class GraphKernel(object): | |||||
| class GraphKernel(BaseEstimator): #, ABC): | |||||
| """The basic graph kernel class. | |||||
| def __init__(self): | |||||
| self._graphs = None | |||||
| self._parallel = '' | |||||
| self._n_jobs = 0 | |||||
| self._verbose = None | |||||
| self._normalize = True | |||||
| self._run_time = 0 | |||||
| self._gram_matrix = None | |||||
| self._gram_matrix_unnorm = None | |||||
| Attributes | |||||
| ---------- | |||||
| _graphs : list | |||||
| Stores the input graphs on fit input data. | |||||
| Default format of the list objects is `NetworkX` graphs. | |||||
| **We don't guarantee that the input graphs remain unchanged during the | |||||
| computation.** | |||||
| References | |||||
| ---------- | |||||
| https://ysig.github.io/GraKeL/0.1a8/_modules/grakel/kernels/kernel.html#Kernel. | |||||
| """ | |||||
| def __init__(self, parallel=None, n_jobs=None, chunksize=None, normalize=True, verbose=2): | |||||
| """`__init__` for `GraphKernel` object.""" | |||||
| # @todo: the default settings of the parameters are different from those in the self.compute method. | |||||
| # self._graphs = None | |||||
| self.parallel = parallel | |||||
| self.n_jobs = n_jobs | |||||
| self.chunksize = chunksize | |||||
| self.normalize = normalize | |||||
| self.verbose = verbose | |||||
| # self._run_time = 0 | |||||
| # self._gram_matrix = None | |||||
| # self._gram_matrix_unnorm = None | |||||
| ########################################################################## | |||||
| # The following is the 1st paradigm to compute kernel matrix, which is | |||||
| # compatible with `scikit-learn`. | |||||
| # ------------------------------------------------------------------- | |||||
| # Special thanks to the "GraKeL" library for providing an excellent template! | |||||
| ########################################################################## | |||||
| def fit(self, X, y=None): | |||||
| """Fit a graph dataset for a transformer. | |||||
| Parameters | |||||
| ---------- | |||||
| X : iterable | |||||
| DESCRIPTION. | |||||
| y : None, optional | |||||
| There is no need of a target in a transformer, yet the `scikit-learn` | |||||
| pipeline API requires this parameter. | |||||
| Returns | |||||
| ------- | |||||
| object | |||||
| Returns self. | |||||
| """ | |||||
| # self._is_tranformed = False | |||||
| # Clear any prior attributes stored on the estimator, # @todo: unless warm_start is used; | |||||
| self.clear_attributes() | |||||
| # X = check_array(X, accept_sparse=True) | |||||
| # Validate parameters for the transformer. | |||||
| self.validate_parameters() | |||||
| # Validate the input. | |||||
| self._graphs = self.validate_input(X) | |||||
| # self._X = X | |||||
| # self._kernel = self._get_kernel_instance() | |||||
| # Return the transformer. | |||||
| return self | |||||
| def transform(self, X): | |||||
| """Compute the graph kernel matrix between given and fitted data. | |||||
| Parameters | |||||
| ---------- | |||||
| X : TYPE | |||||
| DESCRIPTION. | |||||
| Raises | |||||
| ------ | |||||
| ValueError | |||||
| DESCRIPTION. | |||||
| Returns | |||||
| ------- | |||||
| None. | |||||
| """ | |||||
| # Check if method "fit" had been called. | |||||
| check_is_fitted(self, '_graphs') | |||||
| # Validate the input. | |||||
| Y = self.validate_input(X) | |||||
| # Transform: compute the graph kernel matrix. | |||||
| kernel_matrix = self.compute_kernel_matrix(Y) | |||||
| self._Y = Y | |||||
| # Self transform must appear before the diagonal call on normilization. | |||||
| self._is_transformed = True | |||||
| if self.normalize: | |||||
| X_diag, Y_diag = self.diagonals() | |||||
| kernel_matrix /= np.sqrt(np.outer(Y_diag, X_diag)) | |||||
| return kernel_matrix | |||||
| def fit_transform(self, X): | |||||
| """Fit and transform: compute Gram matrix on the same data. | |||||
| Parameters | |||||
| ---------- | |||||
| X : list of graphs | |||||
| Input graphs. | |||||
| Returns | |||||
| ------- | |||||
| gram_matrix : numpy array, shape = [len(X), len(X)] | |||||
| The Gram matrix of X. | |||||
| """ | |||||
| self.fit(X) | |||||
| # Transform: compute Gram matrix. | |||||
| gram_matrix = self.compute_kernel_matrix() | |||||
| # Normalize. | |||||
| self._X_diag = np.diagonal(gram_matrix).copy() | |||||
| if self.normalize: | |||||
| gram_matrix /= np.sqrt(np.outer(self._X_diag, self._X_diag)) | |||||
| return gram_matrix | |||||
| def get_params(self): | |||||
| pass | |||||
| def set_params(self): | |||||
| pass | |||||
| def clear_attributes(self): | |||||
| if hasattr(self, '_X_diag'): | |||||
| delattr(self, '_X_diag') | |||||
| if hasattr(self, '_graphs'): | |||||
| delattr(self, '_graphs') | |||||
| if hasattr(self, '_Y'): | |||||
| delattr(self, '_Y') | |||||
| if hasattr(self, '_run_time'): | |||||
| delattr(self, '_run_time') | |||||
| def validate_parameters(self): | |||||
| """Validate all parameters for the transformer. | |||||
| Returns | |||||
| ------- | |||||
| None. | |||||
| """ | |||||
| if self.parallel is not None and self.parallel != 'imap_unordered': | |||||
| raise ValueError('Parallel mode is not set correctly.') | |||||
| if self.parallel == 'imap_unordered' and self.n_jobs is None: | |||||
| self.n_jobs = multiprocessing.cpu_count() | |||||
| def validate_input(self, X): | |||||
| """Validate the given input and raise errors if it is invalid. | |||||
| Parameters | |||||
| ---------- | |||||
| X : list | |||||
| The input to check. Should be a list of graph. | |||||
| Raises | |||||
| ------ | |||||
| ValueError | |||||
| Raise if the input is not correct. | |||||
| Returns | |||||
| ------- | |||||
| X : list | |||||
| The input. A list of graph. | |||||
| """ | |||||
| if X is None: | |||||
| raise ValueError('Please add graphs before computing.') | |||||
| elif not isinstance(X, list): | |||||
| raise ValueError('Cannot detect graphs.') | |||||
| elif len(X) == 0: | |||||
| raise ValueError('The graph list given is empty. No computation will be performed.') | |||||
| return X | |||||
| def compute_kernel_matrix(self, Y=None): | |||||
| """Compute the kernel matrix between a given target graphs (Y) and | |||||
| the fitted graphs (X / self._graphs) or the Gram matrix for the fitted | |||||
| graphs (X / self._graphs). | |||||
| Parameters | |||||
| ---------- | |||||
| Y : list of graphs, optional | |||||
| The target graphs. The default is None. If None kernel is computed | |||||
| between X and itself. | |||||
| Returns | |||||
| ------- | |||||
| kernel_matrix : numpy array, shape = [n_targets, n_inputs] | |||||
| The computed kernel matrix. | |||||
| """ | |||||
| if Y is None: | |||||
| # Compute Gram matrix for self._graphs (X). | |||||
| kernel_matrix = self._compute_gram_matrix() | |||||
| # self._gram_matrix_unnorm = np.copy(self._gram_matrix) | |||||
| else: | |||||
| # Compute kernel matrix between Y and self._graphs (X). | |||||
| start_time = time.time() | |||||
| if self.parallel == 'imap_unordered': | |||||
| kernel_matrix = self._compute_kernel_matrix_imap_unordered(Y) | |||||
| elif self.parallel is None: | |||||
| kernel_matrix = self._compute_kernel_matrix_series(Y) | |||||
| self._run_time = time.time() - start_time | |||||
| if self.verbose: | |||||
| print('Kernel matrix of size (%d, %d) built in %s seconds.' | |||||
| % (len(Y), len(self._graphs), self._run_time)) | |||||
| return kernel_matrix | |||||
| def _compute_kernel_matrix_series(self, Y): | |||||
| """Compute the kernel matrix between a given target graphs (Y) and | |||||
| the fitted graphs (X / self._graphs) without parallelization. | |||||
| Parameters | |||||
| ---------- | |||||
| Y : list of graphs, optional | |||||
| The target graphs. | |||||
| Returns | |||||
| ------- | |||||
| kernel_matrix : numpy array, shape = [n_targets, n_inputs] | |||||
| The computed kernel matrix. | |||||
| """ | |||||
| kernel_matrix = np.zeros((len(Y), len(self._graphs))) | |||||
| for i_y, g_y in enumerate(Y): | |||||
| for i_x, g_x in enumerate(self._graphs): | |||||
| kernel_matrix[i_y, i_x] = self.pairwise_kernel(g_y, g_x) | |||||
| return kernel_matrix | |||||
| def _compute_kernel_matrix_imap_unordered(self, Y): | |||||
| """Compute the kernel matrix between a given target graphs (Y) and | |||||
| the fitted graphs (X / self._graphs) using imap unordered parallelization. | |||||
| Parameters | |||||
| ---------- | |||||
| Y : list of graphs, optional | |||||
| The target graphs. | |||||
| Returns | |||||
| ------- | |||||
| kernel_matrix : numpy array, shape = [n_targets, n_inputs] | |||||
| The computed kernel matrix. | |||||
| """ | |||||
| raise Exception('Parallelization for kernel matrix is not implemented.') | |||||
| def diagonals(self): | |||||
| """Compute the kernel matrix diagonals of the fit/transformed data. | |||||
| Returns | |||||
| ------- | |||||
| X_diag : numpy array | |||||
| The diagonal of the kernel matrix between the fitted data. | |||||
| This consists of each element calculated with itself. | |||||
| Y_diag : numpy array | |||||
| The diagonal of the kernel matrix, of the transform. | |||||
| This consists of each element calculated with itself. | |||||
| """ | |||||
| # Check if method "fit" had been called. | |||||
| check_is_fitted(self, ['_graphs']) | |||||
| # Check if the diagonals of X exist. | |||||
| try: | |||||
| check_is_fitted(self, ['_X_diag']) | |||||
| except NotFittedError: | |||||
| # Compute diagonals of X. | |||||
| self._X_diag = np.empty(shape=(len(self._graphs),)) | |||||
| for i, x in enumerate(self._graphs): | |||||
| self._X_diag[i] = self.pairwise_kernel(x, x) # @todo: parallel? | |||||
| try: | |||||
| # If transform has happened, return both diagonals. | |||||
| check_is_fitted(self, ['_Y']) | |||||
| self._Y_diag = np.empty(shape=(len(self._Y),)) | |||||
| for (i, y) in enumerate(self._Y): | |||||
| self._Y_diag[i] = self.pairwise_kernel(y, y) # @todo: parallel? | |||||
| return self._X_diag, self._Y_diag | |||||
| except NotFittedError: | |||||
| # Else just return both X_diag | |||||
| return self._X_diag | |||||
| # @abstractmethod | |||||
| def pairwise_kernel(self, x, y): | |||||
| """Compute pairwise kernel between two graphs. | |||||
| Parameters | |||||
| ---------- | |||||
| x, y : NetworkX Graph. | |||||
| Graphs bewteen which the kernel is computed. | |||||
| Returns | |||||
| ------- | |||||
| kernel: float | |||||
| The computed kernel. | |||||
| # Notes | |||||
| # ----- | |||||
| # This method is abstract and must be implemented by a subclass. | |||||
| """ | |||||
| raise NotImplementedError('Pairwise kernel computation is not implemented!') | |||||
| ########################################################################## | |||||
| # The following is the 2nd paradigm to compute kernel matrix. It is | |||||
| # simplified and not compatible with `scikit-learn`. | |||||
| ########################################################################## | |||||
| def compute(self, *graphs, **kwargs): | def compute(self, *graphs, **kwargs): | ||||
| self._parallel = kwargs.get('parallel', 'imap_unordered') | |||||
| self._n_jobs = kwargs.get('n_jobs', multiprocessing.cpu_count()) | |||||
| self._normalize = kwargs.get('normalize', True) | |||||
| self._verbose = kwargs.get('verbose', 2) | |||||
| self.parallel = kwargs.get('parallel', 'imap_unordered') | |||||
| self.n_jobs = kwargs.get('n_jobs', multiprocessing.cpu_count()) | |||||
| self.normalize = kwargs.get('normalize', True) | |||||
| self.verbose = kwargs.get('verbose', 2) | |||||
| self.validate_parameters() | |||||
| if len(graphs) == 1: | if len(graphs) == 1: | ||||
| if not isinstance(graphs[0], list): | if not isinstance(graphs[0], list): | ||||
| @@ -40,7 +385,7 @@ class GraphKernel(object): | |||||
| self._graphs = [g.copy() for g in graphs[0]] # @todo: might be very slow. | self._graphs = [g.copy() for g in graphs[0]] # @todo: might be very slow. | ||||
| self._gram_matrix = self._compute_gram_matrix() | self._gram_matrix = self._compute_gram_matrix() | ||||
| self._gram_matrix_unnorm = np.copy(self._gram_matrix) | self._gram_matrix_unnorm = np.copy(self._gram_matrix) | ||||
| if self._normalize: | |||||
| if self.normalize: | |||||
| self._gram_matrix = normalize_gram_matrix(self._gram_matrix) | self._gram_matrix = normalize_gram_matrix(self._gram_matrix) | ||||
| return self._gram_matrix, self._run_time | return self._gram_matrix, self._run_time | ||||
| @@ -103,15 +448,15 @@ class GraphKernel(object): | |||||
| def _compute_gram_matrix(self): | def _compute_gram_matrix(self): | ||||
| start_time = time.time() | start_time = time.time() | ||||
| if self._parallel == 'imap_unordered': | |||||
| if self.parallel == 'imap_unordered': | |||||
| gram_matrix = self._compute_gm_imap_unordered() | gram_matrix = self._compute_gm_imap_unordered() | ||||
| elif self._parallel is None: | |||||
| elif self.parallel is None: | |||||
| gram_matrix = self._compute_gm_series() | gram_matrix = self._compute_gm_series() | ||||
| else: | else: | ||||
| raise Exception('Parallel mode is not set correctly.') | raise Exception('Parallel mode is not set correctly.') | ||||
| self._run_time = time.time() - start_time | self._run_time = time.time() - start_time | ||||
| if self._verbose: | |||||
| if self.verbose: | |||||
| print('Gram matrix of size %d built in %s seconds.' | print('Gram matrix of size %d built in %s seconds.' | ||||
| % (len(self._graphs), self._run_time)) | % (len(self._graphs), self._run_time)) | ||||
| @@ -129,15 +474,15 @@ class GraphKernel(object): | |||||
| def _compute_kernel_list(self, g1, g_list): | def _compute_kernel_list(self, g1, g_list): | ||||
| start_time = time.time() | start_time = time.time() | ||||
| if self._parallel == 'imap_unordered': | |||||
| if self.parallel == 'imap_unordered': | |||||
| kernel_list = self._compute_kernel_list_imap_unordered(g1, g_list) | kernel_list = self._compute_kernel_list_imap_unordered(g1, g_list) | ||||
| elif self._parallel is None: | |||||
| elif self.parallel is None: | |||||
| kernel_list = self._compute_kernel_list_series(g1, g_list) | kernel_list = self._compute_kernel_list_series(g1, g_list) | ||||
| else: | else: | ||||
| raise Exception('Parallel mode is not set correctly.') | raise Exception('Parallel mode is not set correctly.') | ||||
| self._run_time = time.time() - start_time | self._run_time = time.time() - start_time | ||||
| if self._verbose: | |||||
| if self.verbose: | |||||
| print('Graph kernel bewteen a graph and a list of %d graphs built in %s seconds.' | print('Graph kernel bewteen a graph and a list of %d graphs built in %s seconds.' | ||||
| % (len(g_list), self._run_time)) | % (len(g_list), self._run_time)) | ||||
| @@ -158,7 +503,7 @@ class GraphKernel(object): | |||||
| kernel = self._compute_single_kernel_series(g1, g2) | kernel = self._compute_single_kernel_series(g1, g2) | ||||
| self._run_time = time.time() - start_time | self._run_time = time.time() - start_time | ||||
| if self._verbose: | |||||
| if self.verbose: | |||||
| print('Graph kernel bewteen two graphs built in %s seconds.' % (self._run_time)) | print('Graph kernel bewteen two graphs built in %s seconds.' % (self._run_time)) | ||||
| return kernel | return kernel | ||||
| @@ -185,24 +530,24 @@ class GraphKernel(object): | |||||
| return self._graphs | return self._graphs | ||||
| @property | |||||
| def parallel(self): | |||||
| return self._parallel | |||||
| # @property | |||||
| # def parallel(self): | |||||
| # return self.parallel | |||||
| @property | |||||
| def n_jobs(self): | |||||
| return self._n_jobs | |||||
| # @property | |||||
| # def n_jobs(self): | |||||
| # return self.n_jobs | |||||
| @property | |||||
| def verbose(self): | |||||
| return self._verbose | |||||
| # @property | |||||
| # def verbose(self): | |||||
| # return self.verbose | |||||
| @property | |||||
| def normalize(self): | |||||
| return self._normalize | |||||
| # @property | |||||
| # def normalize(self): | |||||
| # return self.normalize | |||||
| @property | @property | ||||
| @@ -46,7 +46,7 @@ class Marginalized(GraphKernel): | |||||
| self._add_dummy_labels(self._graphs) | self._add_dummy_labels(self._graphs) | ||||
| if self._remove_totters: | if self._remove_totters: | ||||
| iterator = get_iters(self._graphs, desc='removing tottering', file=sys.stdout, verbose=(self._verbose >= 2)) | |||||
| iterator = get_iters(self._graphs, desc='removing tottering', file=sys.stdout, verbose=(self.verbose >= 2)) | |||||
| # @todo: this may not work. | # @todo: this may not work. | ||||
| self._graphs = [untotterTransformation(G, self._node_labels, self._edge_labels) for G in iterator] | self._graphs = [untotterTransformation(G, self._node_labels, self._edge_labels) for G in iterator] | ||||
| @@ -57,7 +57,7 @@ class Marginalized(GraphKernel): | |||||
| itr = combinations_with_replacement(range(0, len(self._graphs)), 2) | itr = combinations_with_replacement(range(0, len(self._graphs)), 2) | ||||
| len_itr = int(len(self._graphs) * (len(self._graphs) + 1) / 2) | len_itr = int(len(self._graphs) * (len(self._graphs) + 1) / 2) | ||||
| iterator = get_iters(itr, desc='Computing kernels', file=sys.stdout, | iterator = get_iters(itr, desc='Computing kernels', file=sys.stdout, | ||||
| length=len_itr, verbose=(self._verbose >= 2)) | |||||
| length=len_itr, verbose=(self.verbose >= 2)) | |||||
| for i, j in iterator: | for i, j in iterator: | ||||
| kernel = self._kernel_do(self._graphs[i], self._graphs[j]) | kernel = self._kernel_do(self._graphs[i], self._graphs[j]) | ||||
| gram_matrix[i][j] = kernel | gram_matrix[i][j] = kernel | ||||
| @@ -70,16 +70,16 @@ class Marginalized(GraphKernel): | |||||
| self._add_dummy_labels(self._graphs) | self._add_dummy_labels(self._graphs) | ||||
| if self._remove_totters: | if self._remove_totters: | ||||
| pool = Pool(self._n_jobs) | |||||
| pool = Pool(self.n_jobs) | |||||
| itr = range(0, len(self._graphs)) | itr = range(0, len(self._graphs)) | ||||
| if len(self._graphs) < 100 * self._n_jobs: | |||||
| chunksize = int(len(self._graphs) / self._n_jobs) + 1 | |||||
| if len(self._graphs) < 100 * self.n_jobs: | |||||
| chunksize = int(len(self._graphs) / self.n_jobs) + 1 | |||||
| else: | else: | ||||
| chunksize = 100 | chunksize = 100 | ||||
| remove_fun = self._wrapper_untotter | remove_fun = self._wrapper_untotter | ||||
| iterator = get_iters(pool.imap_unordered(remove_fun, itr, chunksize), | iterator = get_iters(pool.imap_unordered(remove_fun, itr, chunksize), | ||||
| desc='removing tottering', file=sys.stdout, | desc='removing tottering', file=sys.stdout, | ||||
| length=len(self._graphs), verbose=(self._verbose >= 2)) | |||||
| length=len(self._graphs), verbose=(self.verbose >= 2)) | |||||
| for i, g in iterator: | for i, g in iterator: | ||||
| self._graphs[i] = g | self._graphs[i] = g | ||||
| pool.close() | pool.close() | ||||
| @@ -93,7 +93,7 @@ class Marginalized(GraphKernel): | |||||
| G_gn = gn_toshare | G_gn = gn_toshare | ||||
| do_fun = self._wrapper_kernel_do | do_fun = self._wrapper_kernel_do | ||||
| parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker, | parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker, | ||||
| glbv=(self._graphs,), n_jobs=self._n_jobs, verbose=self._verbose) | |||||
| glbv=(self._graphs,), n_jobs=self.n_jobs, verbose=self.verbose) | |||||
| return gram_matrix | return gram_matrix | ||||
| @@ -103,13 +103,13 @@ class Marginalized(GraphKernel): | |||||
| if self._remove_totters: | if self._remove_totters: | ||||
| g1 = untotterTransformation(g1, self._node_labels, self._edge_labels) # @todo: this may not work. | g1 = untotterTransformation(g1, self._node_labels, self._edge_labels) # @todo: this may not work. | ||||
| iterator = get_iters(g_list, desc='removing tottering', file=sys.stdout, verbose=(self._verbose >= 2)) | |||||
| iterator = get_iters(g_list, desc='removing tottering', file=sys.stdout, verbose=(self.verbose >= 2)) | |||||
| # @todo: this may not work. | # @todo: this may not work. | ||||
| g_list = [untotterTransformation(G, self._node_labels, self._edge_labels) for G in iterator] | g_list = [untotterTransformation(G, self._node_labels, self._edge_labels) for G in iterator] | ||||
| # compute kernel list. | # compute kernel list. | ||||
| kernel_list = [None] * len(g_list) | kernel_list = [None] * len(g_list) | ||||
| iterator = get_iters(range(len(g_list)), desc='Computing kernels', file=sys.stdout, length=len(g_list), verbose=(self._verbose >= 2)) | |||||
| iterator = get_iters(range(len(g_list)), desc='Computing kernels', file=sys.stdout, length=len(g_list), verbose=(self.verbose >= 2)) | |||||
| for i in iterator: | for i in iterator: | ||||
| kernel = self._kernel_do(g1, g_list[i]) | kernel = self._kernel_do(g1, g_list[i]) | ||||
| kernel_list[i] = kernel | kernel_list[i] = kernel | ||||
| @@ -122,16 +122,16 @@ class Marginalized(GraphKernel): | |||||
| if self._remove_totters: | if self._remove_totters: | ||||
| g1 = untotterTransformation(g1, self._node_labels, self._edge_labels) # @todo: this may not work. | g1 = untotterTransformation(g1, self._node_labels, self._edge_labels) # @todo: this may not work. | ||||
| pool = Pool(self._n_jobs) | |||||
| pool = Pool(self.n_jobs) | |||||
| itr = range(0, len(g_list)) | itr = range(0, len(g_list)) | ||||
| if len(g_list) < 100 * self._n_jobs: | |||||
| chunksize = int(len(g_list) / self._n_jobs) + 1 | |||||
| if len(g_list) < 100 * self.n_jobs: | |||||
| chunksize = int(len(g_list) / self.n_jobs) + 1 | |||||
| else: | else: | ||||
| chunksize = 100 | chunksize = 100 | ||||
| remove_fun = self._wrapper_untotter | remove_fun = self._wrapper_untotter | ||||
| iterator = get_iters(pool.imap_unordered(remove_fun, itr, chunksize), | iterator = get_iters(pool.imap_unordered(remove_fun, itr, chunksize), | ||||
| desc='removing tottering', file=sys.stdout, | desc='removing tottering', file=sys.stdout, | ||||
| length=len(g_list), verbose=(self._verbose >= 2)) | |||||
| length=len(g_list), verbose=(self.verbose >= 2)) | |||||
| for i, g in iterator: | for i, g in iterator: | ||||
| g_list[i] = g | g_list[i] = g | ||||
| pool.close() | pool.close() | ||||
| @@ -151,7 +151,7 @@ class Marginalized(GraphKernel): | |||||
| len_itr = len(g_list) | len_itr = len(g_list) | ||||
| parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, | parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, | ||||
| init_worker=init_worker, glbv=(g1, g_list), method='imap_unordered', | init_worker=init_worker, glbv=(g1, g_list), method='imap_unordered', | ||||
| n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose) | |||||
| n_jobs=self.n_jobs, itr_desc='Computing kernels', verbose=self.verbose) | |||||
| return kernel_list | return kernel_list | ||||
| @@ -41,10 +41,10 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None | |||||
| from itertools import combinations_with_replacement | from itertools import combinations_with_replacement | ||||
| itr_kernel = combinations_with_replacement(range(0, len(self._graphs)), 2) | itr_kernel = combinations_with_replacement(range(0, len(self._graphs)), 2) | ||||
| iterator_ps = get_iters(range(0, len(self._graphs)), desc='getting paths', file=sys.stdout, length=len(self._graphs), verbose=(self._verbose >= 2)) | |||||
| iterator_ps = get_iters(range(0, len(self._graphs)), desc='getting paths', file=sys.stdout, length=len(self._graphs), verbose=(self.verbose >= 2)) | |||||
| len_itr = int(len(self._graphs) * (len(self._graphs) + 1) / 2) | len_itr = int(len(self._graphs) * (len(self._graphs) + 1) / 2) | ||||
| iterator_kernel = get_iters(itr_kernel, desc='Computing kernels', | iterator_kernel = get_iters(itr_kernel, desc='Computing kernels', | ||||
| file=sys.stdout, length=len_itr, verbose=(self._verbose >= 2)) | |||||
| file=sys.stdout, length=len_itr, verbose=(self.verbose >= 2)) | |||||
| gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) | gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) | ||||
| @@ -69,10 +69,10 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None | |||||
| # get all paths of all graphs before computing kernels to save time, | # get all paths of all graphs before computing kernels to save time, | ||||
| # but this may cost a lot of memory for large datasets. | # but this may cost a lot of memory for large datasets. | ||||
| pool = Pool(self._n_jobs) | |||||
| pool = Pool(self.n_jobs) | |||||
| itr = zip(self._graphs, range(0, len(self._graphs))) | itr = zip(self._graphs, range(0, len(self._graphs))) | ||||
| if len(self._graphs) < 100 * self._n_jobs: | |||||
| chunksize = int(len(self._graphs) / self._n_jobs) + 1 | |||||
| if len(self._graphs) < 100 * self.n_jobs: | |||||
| chunksize = int(len(self._graphs) / self.n_jobs) + 1 | |||||
| else: | else: | ||||
| chunksize = 100 | chunksize = 100 | ||||
| all_paths = [[] for _ in range(len(self._graphs))] | all_paths = [[] for _ in range(len(self._graphs))] | ||||
| @@ -84,7 +84,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None | |||||
| get_ps_fun = partial(self._wrapper_find_all_paths_until_length, False) | get_ps_fun = partial(self._wrapper_find_all_paths_until_length, False) | ||||
| iterator = get_iters(pool.imap_unordered(get_ps_fun, itr, chunksize), | iterator = get_iters(pool.imap_unordered(get_ps_fun, itr, chunksize), | ||||
| desc='getting paths', file=sys.stdout, | desc='getting paths', file=sys.stdout, | ||||
| length=len(self._graphs), verbose=(self._verbose >= 2)) | |||||
| length=len(self._graphs), verbose=(self.verbose >= 2)) | |||||
| for i, ps in iterator: | for i, ps in iterator: | ||||
| all_paths[i] = ps | all_paths[i] = ps | ||||
| pool.close() | pool.close() | ||||
| @@ -109,7 +109,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None | |||||
| G_plist = plist_toshare | G_plist = plist_toshare | ||||
| do_fun = self._wrapper_kernel_do_kernelless # @todo: what is this? | do_fun = self._wrapper_kernel_do_kernelless # @todo: what is this? | ||||
| parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker, | parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker, | ||||
| glbv=(all_paths,), n_jobs=self._n_jobs, verbose=self._verbose) | |||||
| glbv=(all_paths,), n_jobs=self.n_jobs, verbose=self.verbose) | |||||
| return gram_matrix | return gram_matrix | ||||
| @@ -117,8 +117,8 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None | |||||
| def _compute_kernel_list_series(self, g1, g_list): | def _compute_kernel_list_series(self, g1, g_list): | ||||
| self._add_dummy_labels(g_list + [g1]) | self._add_dummy_labels(g_list + [g1]) | ||||
| iterator_ps = get_iters(g_list, desc='getting paths', file=sys.stdout, verbose=(self._verbose >= 2)) | |||||
| iterator_kernel = get_iters(range(len(g_list)), desc='Computing kernels', file=sys.stdout, length=len(g_list), verbose=(self._verbose >= 2)) | |||||
| iterator_ps = get_iters(g_list, desc='getting paths', file=sys.stdout, verbose=(self.verbose >= 2)) | |||||
| iterator_kernel = get_iters(range(len(g_list)), desc='Computing kernels', file=sys.stdout, length=len(g_list), verbose=(self.verbose >= 2)) | |||||
| kernel_list = [None] * len(g_list) | kernel_list = [None] * len(g_list) | ||||
| @@ -143,10 +143,10 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None | |||||
| # get all paths of all graphs before computing kernels to save time, | # get all paths of all graphs before computing kernels to save time, | ||||
| # but this may cost a lot of memory for large datasets. | # but this may cost a lot of memory for large datasets. | ||||
| pool = Pool(self._n_jobs) | |||||
| pool = Pool(self.n_jobs) | |||||
| itr = zip(g_list, range(0, len(g_list))) | itr = zip(g_list, range(0, len(g_list))) | ||||
| if len(g_list) < 100 * self._n_jobs: | |||||
| chunksize = int(len(g_list) / self._n_jobs) + 1 | |||||
| if len(g_list) < 100 * self.n_jobs: | |||||
| chunksize = int(len(g_list) / self.n_jobs) + 1 | |||||
| else: | else: | ||||
| chunksize = 100 | chunksize = 100 | ||||
| paths_g_list = [[] for _ in range(len(g_list))] | paths_g_list = [[] for _ in range(len(g_list))] | ||||
| @@ -161,7 +161,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None | |||||
| get_ps_fun = partial(self._wrapper_find_all_paths_until_length, False) | get_ps_fun = partial(self._wrapper_find_all_paths_until_length, False) | ||||
| iterator = get_iters(pool.imap_unordered(get_ps_fun, itr, chunksize), | iterator = get_iters(pool.imap_unordered(get_ps_fun, itr, chunksize), | ||||
| desc='getting paths', file=sys.stdout, | desc='getting paths', file=sys.stdout, | ||||
| length=len(g_list), verbose=(self._verbose >= 2)) | |||||
| length=len(g_list), verbose=(self.verbose >= 2)) | |||||
| for i, ps in iterator: | for i, ps in iterator: | ||||
| paths_g_list[i] = ps | paths_g_list[i] = ps | ||||
| pool.close() | pool.close() | ||||
| @@ -180,7 +180,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None | |||||
| itr = range(len(g_list)) | itr = range(len(g_list)) | ||||
| len_itr = len(g_list) | len_itr = len(g_list) | ||||
| parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, | parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, | ||||
| init_worker=init_worker, glbv=(paths_g1, paths_g_list), method='imap_unordered', n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose) | |||||
| init_worker=init_worker, glbv=(paths_g1, paths_g_list), method='imap_unordered', n_jobs=self.n_jobs, itr_desc='Computing kernels', verbose=self.verbose) | |||||
| return kernel_list | return kernel_list | ||||
| @@ -38,7 +38,7 @@ class ShortestPath(GraphKernel): | |||||
| def _compute_gm_series(self): | def _compute_gm_series(self): | ||||
| self._all_graphs_have_edges(self._graphs) | self._all_graphs_have_edges(self._graphs) | ||||
| # get shortest path graph of each graph. | # get shortest path graph of each graph. | ||||
| iterator = get_iters(self._graphs, desc='getting sp graphs', file=sys.stdout, verbose=(self._verbose >= 2)) | |||||
| iterator = get_iters(self._graphs, desc='getting sp graphs', file=sys.stdout, verbose=(self.verbose >= 2)) | |||||
| self._graphs = [getSPGraph(g, edge_weight=self._edge_weight) for g in iterator] | self._graphs = [getSPGraph(g, edge_weight=self._edge_weight) for g in iterator] | ||||
| # compute Gram matrix. | # compute Gram matrix. | ||||
| @@ -48,7 +48,7 @@ class ShortestPath(GraphKernel): | |||||
| itr = combinations_with_replacement(range(0, len(self._graphs)), 2) | itr = combinations_with_replacement(range(0, len(self._graphs)), 2) | ||||
| len_itr = int(len(self._graphs) * (len(self._graphs) + 1) / 2) | len_itr = int(len(self._graphs) * (len(self._graphs) + 1) / 2) | ||||
| iterator = get_iters(itr, desc='Computing kernels', | iterator = get_iters(itr, desc='Computing kernels', | ||||
| length=len_itr, file=sys.stdout,verbose=(self._verbose >= 2)) | |||||
| length=len_itr, file=sys.stdout,verbose=(self.verbose >= 2)) | |||||
| for i, j in iterator: | for i, j in iterator: | ||||
| kernel = self._sp_do(self._graphs[i], self._graphs[j]) | kernel = self._sp_do(self._graphs[i], self._graphs[j]) | ||||
| gram_matrix[i][j] = kernel | gram_matrix[i][j] = kernel | ||||
| @@ -60,16 +60,16 @@ class ShortestPath(GraphKernel): | |||||
| def _compute_gm_imap_unordered(self): | def _compute_gm_imap_unordered(self): | ||||
| self._all_graphs_have_edges(self._graphs) | self._all_graphs_have_edges(self._graphs) | ||||
| # get shortest path graph of each graph. | # get shortest path graph of each graph. | ||||
| pool = Pool(self._n_jobs) | |||||
| pool = Pool(self.n_jobs) | |||||
| get_sp_graphs_fun = self._wrapper_get_sp_graphs | get_sp_graphs_fun = self._wrapper_get_sp_graphs | ||||
| itr = zip(self._graphs, range(0, len(self._graphs))) | itr = zip(self._graphs, range(0, len(self._graphs))) | ||||
| if len(self._graphs) < 100 * self._n_jobs: | |||||
| chunksize = int(len(self._graphs) / self._n_jobs) + 1 | |||||
| if len(self._graphs) < 100 * self.n_jobs: | |||||
| chunksize = int(len(self._graphs) / self.n_jobs) + 1 | |||||
| else: | else: | ||||
| chunksize = 100 | chunksize = 100 | ||||
| iterator = get_iters(pool.imap_unordered(get_sp_graphs_fun, itr, chunksize), | iterator = get_iters(pool.imap_unordered(get_sp_graphs_fun, itr, chunksize), | ||||
| desc='getting sp graphs', file=sys.stdout, | desc='getting sp graphs', file=sys.stdout, | ||||
| length=len(self._graphs), verbose=(self._verbose >= 2)) | |||||
| length=len(self._graphs), verbose=(self.verbose >= 2)) | |||||
| for i, g in iterator: | for i, g in iterator: | ||||
| self._graphs[i] = g | self._graphs[i] = g | ||||
| pool.close() | pool.close() | ||||
| @@ -83,7 +83,7 @@ class ShortestPath(GraphKernel): | |||||
| G_gs = gs_toshare | G_gs = gs_toshare | ||||
| do_fun = self._wrapper_sp_do | do_fun = self._wrapper_sp_do | ||||
| parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker, | parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker, | ||||
| glbv=(self._graphs,), n_jobs=self._n_jobs, verbose=self._verbose) | |||||
| glbv=(self._graphs,), n_jobs=self.n_jobs, verbose=self.verbose) | |||||
| return gram_matrix | return gram_matrix | ||||
| @@ -92,12 +92,12 @@ class ShortestPath(GraphKernel): | |||||
| self._all_graphs_have_edges([g1] + g_list) | self._all_graphs_have_edges([g1] + g_list) | ||||
| # get shortest path graphs of g1 and each graph in g_list. | # get shortest path graphs of g1 and each graph in g_list. | ||||
| g1 = getSPGraph(g1, edge_weight=self._edge_weight) | g1 = getSPGraph(g1, edge_weight=self._edge_weight) | ||||
| iterator = get_iters(g_list, desc='getting sp graphs', file=sys.stdout, verbose=(self._verbose >= 2)) | |||||
| iterator = get_iters(g_list, desc='getting sp graphs', file=sys.stdout, verbose=(self.verbose >= 2)) | |||||
| g_list = [getSPGraph(g, edge_weight=self._edge_weight) for g in iterator] | g_list = [getSPGraph(g, edge_weight=self._edge_weight) for g in iterator] | ||||
| # compute kernel list. | # compute kernel list. | ||||
| kernel_list = [None] * len(g_list) | kernel_list = [None] * len(g_list) | ||||
| iterator = get_iters(range(len(g_list)), desc='Computing kernels', file=sys.stdout, length=len(g_list), verbose=(self._verbose >= 2)) | |||||
| iterator = get_iters(range(len(g_list)), desc='Computing kernels', file=sys.stdout, length=len(g_list), verbose=(self.verbose >= 2)) | |||||
| for i in iterator: | for i in iterator: | ||||
| kernel = self._sp_do(g1, g_list[i]) | kernel = self._sp_do(g1, g_list[i]) | ||||
| kernel_list[i] = kernel | kernel_list[i] = kernel | ||||
| @@ -109,16 +109,16 @@ class ShortestPath(GraphKernel): | |||||
| self._all_graphs_have_edges([g1] + g_list) | self._all_graphs_have_edges([g1] + g_list) | ||||
| # get shortest path graphs of g1 and each graph in g_list. | # get shortest path graphs of g1 and each graph in g_list. | ||||
| g1 = getSPGraph(g1, edge_weight=self._edge_weight) | g1 = getSPGraph(g1, edge_weight=self._edge_weight) | ||||
| pool = Pool(self._n_jobs) | |||||
| pool = Pool(self.n_jobs) | |||||
| get_sp_graphs_fun = self._wrapper_get_sp_graphs | get_sp_graphs_fun = self._wrapper_get_sp_graphs | ||||
| itr = zip(g_list, range(0, len(g_list))) | itr = zip(g_list, range(0, len(g_list))) | ||||
| if len(g_list) < 100 * self._n_jobs: | |||||
| chunksize = int(len(g_list) / self._n_jobs) + 1 | |||||
| if len(g_list) < 100 * self.n_jobs: | |||||
| chunksize = int(len(g_list) / self.n_jobs) + 1 | |||||
| else: | else: | ||||
| chunksize = 100 | chunksize = 100 | ||||
| iterator = get_iters(pool.imap_unordered(get_sp_graphs_fun, itr, chunksize), | iterator = get_iters(pool.imap_unordered(get_sp_graphs_fun, itr, chunksize), | ||||
| desc='getting sp graphs', file=sys.stdout, | desc='getting sp graphs', file=sys.stdout, | ||||
| length=len(g_list), verbose=(self._verbose >= 2)) | |||||
| length=len(g_list), verbose=(self.verbose >= 2)) | |||||
| for i, g in iterator: | for i, g in iterator: | ||||
| g_list[i] = g | g_list[i] = g | ||||
| pool.close() | pool.close() | ||||
| @@ -137,7 +137,7 @@ class ShortestPath(GraphKernel): | |||||
| itr = range(len(g_list)) | itr = range(len(g_list)) | ||||
| len_itr = len(g_list) | len_itr = len(g_list) | ||||
| parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, | parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, | ||||
| init_worker=init_worker, glbv=(g1, g_list), method='imap_unordered', n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose) | |||||
| init_worker=init_worker, glbv=(g1, g_list), method='imap_unordered', n_jobs=self.n_jobs, itr_desc='Computing kernels', verbose=self.verbose) | |||||
| return kernel_list | return kernel_list | ||||
| @@ -28,9 +28,9 @@ class SpectralDecomposition(RandomWalkMeta): | |||||
| def _compute_gm_series(self): | def _compute_gm_series(self): | ||||
| self._check_edge_weight(self._graphs, self._verbose) | |||||
| self._check_edge_weight(self._graphs, self.verbose) | |||||
| self._check_graphs(self._graphs) | self._check_graphs(self._graphs) | ||||
| if self._verbose >= 2: | |||||
| if self.verbose >= 2: | |||||
| import warnings | import warnings | ||||
| warnings.warn('All labels are ignored. Only works for undirected graphs.') | warnings.warn('All labels are ignored. Only works for undirected graphs.') | ||||
| @@ -41,7 +41,7 @@ class SpectralDecomposition(RandomWalkMeta): | |||||
| # precompute the spectral decomposition of each graph. | # precompute the spectral decomposition of each graph. | ||||
| P_list = [] | P_list = [] | ||||
| D_list = [] | D_list = [] | ||||
| iterator = get_iters(self._graphs, desc='spectral decompose', file=sys.stdout, verbose=(self._verbose >= 2)) | |||||
| iterator = get_iters(self._graphs, desc='spectral decompose', file=sys.stdout, verbose=(self.verbose >= 2)) | |||||
| for G in iterator: | for G in iterator: | ||||
| # don't normalize adjacency matrices if q is a uniform vector. Note | # don't normalize adjacency matrices if q is a uniform vector. Note | ||||
| # A actually is the transpose of the adjacency matrix. | # A actually is the transpose of the adjacency matrix. | ||||
| @@ -58,7 +58,7 @@ class SpectralDecomposition(RandomWalkMeta): | |||||
| from itertools import combinations_with_replacement | from itertools import combinations_with_replacement | ||||
| itr = combinations_with_replacement(range(0, len(self._graphs)), 2) | itr = combinations_with_replacement(range(0, len(self._graphs)), 2) | ||||
| len_itr = int(len(self._graphs) * (len(self._graphs) + 1) / 2) | len_itr = int(len(self._graphs) * (len(self._graphs) + 1) / 2) | ||||
| iterator = get_iters(itr, desc='Computing kernels', file=sys.stdout, length=len_itr, verbose=(self._verbose >= 2)) | |||||
| iterator = get_iters(itr, desc='Computing kernels', file=sys.stdout, length=len_itr, verbose=(self.verbose >= 2)) | |||||
| for i, j in iterator: | for i, j in iterator: | ||||
| kernel = self._kernel_do(q_T_list[i], q_T_list[j], P_list[i], P_list[j], D_list[i], D_list[j], self._weight, self._sub_kernel) | kernel = self._kernel_do(q_T_list[i], q_T_list[j], P_list[i], P_list[j], D_list[i], D_list[j], self._weight, self._sub_kernel) | ||||
| @@ -74,9 +74,9 @@ class SpectralDecomposition(RandomWalkMeta): | |||||
| def _compute_gm_imap_unordered(self): | def _compute_gm_imap_unordered(self): | ||||
| self._check_edge_weight(self._graphs, self._verbose) | |||||
| self._check_edge_weight(self._graphs, self.verbose) | |||||
| self._check_graphs(self._graphs) | self._check_graphs(self._graphs) | ||||
| if self._verbose >= 2: | |||||
| if self.verbose >= 2: | |||||
| import warnings | import warnings | ||||
| warnings.warn('All labels are ignored. Only works for undirected graphs.') | warnings.warn('All labels are ignored. Only works for undirected graphs.') | ||||
| @@ -87,7 +87,7 @@ class SpectralDecomposition(RandomWalkMeta): | |||||
| # precompute the spectral decomposition of each graph. | # precompute the spectral decomposition of each graph. | ||||
| P_list = [] | P_list = [] | ||||
| D_list = [] | D_list = [] | ||||
| iterator = get_iters(self._graphs, desc='spectral decompose', file=sys.stdout, verbose=(self._verbose >= 2)) | |||||
| iterator = get_iters(self._graphs, desc='spectral decompose', file=sys.stdout, verbose=(self.verbose >= 2)) | |||||
| for G in iterator: | for G in iterator: | ||||
| # don't normalize adjacency matrices if q is a uniform vector. Note | # don't normalize adjacency matrices if q is a uniform vector. Note | ||||
| # A actually is the transpose of the adjacency matrix. | # A actually is the transpose of the adjacency matrix. | ||||
| @@ -107,7 +107,7 @@ class SpectralDecomposition(RandomWalkMeta): | |||||
| do_fun = self._wrapper_kernel_do | do_fun = self._wrapper_kernel_do | ||||
| parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker, | parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker, | ||||
| glbv=(q_T_list, P_list, D_list), n_jobs=self._n_jobs, verbose=self._verbose) | |||||
| glbv=(q_T_list, P_list, D_list), n_jobs=self.n_jobs, verbose=self.verbose) | |||||
| else: # @todo | else: # @todo | ||||
| pass | pass | ||||
| @@ -118,9 +118,9 @@ class SpectralDecomposition(RandomWalkMeta): | |||||
| def _compute_kernel_list_series(self, g1, g_list): | def _compute_kernel_list_series(self, g1, g_list): | ||||
| self._check_edge_weight(g_list + [g1], self._verbose) | |||||
| self._check_edge_weight(g_list + [g1], self.verbose) | |||||
| self._check_graphs(g_list + [g1]) | self._check_graphs(g_list + [g1]) | ||||
| if self._verbose >= 2: | |||||
| if self.verbose >= 2: | |||||
| import warnings | import warnings | ||||
| warnings.warn('All labels are ignored. Only works for undirected graphs.') | warnings.warn('All labels are ignored. Only works for undirected graphs.') | ||||
| @@ -133,7 +133,7 @@ class SpectralDecomposition(RandomWalkMeta): | |||||
| D1, P1 = np.linalg.eig(A1) | D1, P1 = np.linalg.eig(A1) | ||||
| P_list = [] | P_list = [] | ||||
| D_list = [] | D_list = [] | ||||
| iterator = get_iters(g_list, desc='spectral decompose', file=sys.stdout, verbose=(self._verbose >= 2)) | |||||
| iterator = get_iters(g_list, desc='spectral decompose', file=sys.stdout, verbose=(self.verbose >= 2)) | |||||
| for G in iterator: | for G in iterator: | ||||
| # don't normalize adjacency matrices if q is a uniform vector. Note | # don't normalize adjacency matrices if q is a uniform vector. Note | ||||
| # A actually is the transpose of the adjacency matrix. | # A actually is the transpose of the adjacency matrix. | ||||
| @@ -145,7 +145,7 @@ class SpectralDecomposition(RandomWalkMeta): | |||||
| if self._p is None: # p is uniform distribution as default. | if self._p is None: # p is uniform distribution as default. | ||||
| q_T1 = 1 / nx.number_of_nodes(g1) | q_T1 = 1 / nx.number_of_nodes(g1) | ||||
| q_T_list = [np.full((1, nx.number_of_nodes(G)), 1 / nx.number_of_nodes(G)) for G in g_list] | q_T_list = [np.full((1, nx.number_of_nodes(G)), 1 / nx.number_of_nodes(G)) for G in g_list] | ||||
| iterator = get_iters(range(len(g_list)), desc='Computing kernels', file=sys.stdout, length=len(g_list), verbose=(self._verbose >= 2)) | |||||
| iterator = get_iters(range(len(g_list)), desc='Computing kernels', file=sys.stdout, length=len(g_list), verbose=(self.verbose >= 2)) | |||||
| for i in iterator: | for i in iterator: | ||||
| kernel = self._kernel_do(q_T1, q_T_list[i], P1, P_list[i], D1, D_list[i], self._weight, self._sub_kernel) | kernel = self._kernel_do(q_T1, q_T_list[i], P1, P_list[i], D1, D_list[i], self._weight, self._sub_kernel) | ||||
| @@ -160,9 +160,9 @@ class SpectralDecomposition(RandomWalkMeta): | |||||
| def _compute_kernel_list_imap_unordered(self, g1, g_list): | def _compute_kernel_list_imap_unordered(self, g1, g_list): | ||||
| self._check_edge_weight(g_list + [g1], self._verbose) | |||||
| self._check_edge_weight(g_list + [g1], self.verbose) | |||||
| self._check_graphs(g_list + [g1]) | self._check_graphs(g_list + [g1]) | ||||
| if self._verbose >= 2: | |||||
| if self.verbose >= 2: | |||||
| import warnings | import warnings | ||||
| warnings.warn('All labels are ignored. Only works for undirected graphs.') | warnings.warn('All labels are ignored. Only works for undirected graphs.') | ||||
| @@ -175,8 +175,8 @@ class SpectralDecomposition(RandomWalkMeta): | |||||
| D1, P1 = np.linalg.eig(A1) | D1, P1 = np.linalg.eig(A1) | ||||
| P_list = [] | P_list = [] | ||||
| D_list = [] | D_list = [] | ||||
| if self._verbose >= 2: | |||||
| iterator = tqdm(g_list, desc='spectral decompose', file=sys.stdout) | |||||
| if self.verbose >= 2: | |||||
| iterator = get_iters(g_list, desc='spectral decompose', file=sys.stdout) | |||||
| else: | else: | ||||
| iterator = g_list | iterator = g_list | ||||
| for G in iterator: | for G in iterator: | ||||
| @@ -207,7 +207,7 @@ class SpectralDecomposition(RandomWalkMeta): | |||||
| itr = range(len(g_list)) | itr = range(len(g_list)) | ||||
| len_itr = len(g_list) | len_itr = len(g_list) | ||||
| parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, | parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, | ||||
| init_worker=init_worker, glbv=(q_T1, P1, D1, q_T_list, P_list, D_list), method='imap_unordered', n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose) | |||||
| init_worker=init_worker, glbv=(q_T1, P1, D1, q_T_list, P_list, D_list), method='imap_unordered', n_jobs=self.n_jobs, itr_desc='Computing kernels', verbose=self.verbose) | |||||
| else: # @todo | else: # @todo | ||||
| pass | pass | ||||
| @@ -222,9 +222,9 @@ class SpectralDecomposition(RandomWalkMeta): | |||||
| def _compute_single_kernel_series(self, g1, g2): | def _compute_single_kernel_series(self, g1, g2): | ||||
| self._check_edge_weight([g1] + [g2], self._verbose) | |||||
| self._check_edge_weight([g1] + [g2], self.verbose) | |||||
| self._check_graphs([g1] + [g2]) | self._check_graphs([g1] + [g2]) | ||||
| if self._verbose >= 2: | |||||
| if self.verbose >= 2: | |||||
| import warnings | import warnings | ||||
| warnings.warn('All labels are ignored. Only works for undirected graphs.') | warnings.warn('All labels are ignored. Only works for undirected graphs.') | ||||
| @@ -41,7 +41,7 @@ class StructuralSP(GraphKernel): | |||||
| def _compute_gm_series(self): | def _compute_gm_series(self): | ||||
| # get shortest paths of each graph in the graphs. | # get shortest paths of each graph in the graphs. | ||||
| splist = [] | splist = [] | ||||
| iterator = get_iters(self._graphs, desc='getting sp graphs', file=sys.stdout, verbose=(self._verbose >= 2)) | |||||
| iterator = get_iters(self._graphs, desc='getting sp graphs', file=sys.stdout, verbose=(self.verbose >= 2)) | |||||
| if self._compute_method == 'trie': | if self._compute_method == 'trie': | ||||
| for g in iterator: | for g in iterator: | ||||
| splist.append(self._get_sps_as_trie(g)) | splist.append(self._get_sps_as_trie(g)) | ||||
| @@ -56,7 +56,7 @@ class StructuralSP(GraphKernel): | |||||
| itr = combinations_with_replacement(range(0, len(self._graphs)), 2) | itr = combinations_with_replacement(range(0, len(self._graphs)), 2) | ||||
| len_itr = int(len(self._graphs) * (len(self._graphs) + 1) / 2) | len_itr = int(len(self._graphs) * (len(self._graphs) + 1) / 2) | ||||
| iterator = get_iters(itr, desc='Computing kernels', file=sys.stdout, | iterator = get_iters(itr, desc='Computing kernels', file=sys.stdout, | ||||
| length=len_itr, verbose=(self._verbose >= 2)) | |||||
| length=len_itr, verbose=(self.verbose >= 2)) | |||||
| if self._compute_method == 'trie': | if self._compute_method == 'trie': | ||||
| for i, j in iterator: | for i, j in iterator: | ||||
| kernel = self._ssp_do_trie(self._graphs[i], self._graphs[j], splist[i], splist[j]) | kernel = self._ssp_do_trie(self._graphs[i], self._graphs[j], splist[i], splist[j]) | ||||
| @@ -76,10 +76,10 @@ class StructuralSP(GraphKernel): | |||||
| def _compute_gm_imap_unordered(self): | def _compute_gm_imap_unordered(self): | ||||
| # get shortest paths of each graph in the graphs. | # get shortest paths of each graph in the graphs. | ||||
| splist = [None] * len(self._graphs) | splist = [None] * len(self._graphs) | ||||
| pool = Pool(self._n_jobs) | |||||
| pool = Pool(self.n_jobs) | |||||
| itr = zip(self._graphs, range(0, len(self._graphs))) | itr = zip(self._graphs, range(0, len(self._graphs))) | ||||
| if len(self._graphs) < 100 * self._n_jobs: | |||||
| chunksize = int(len(self._graphs) / self._n_jobs) + 1 | |||||
| if len(self._graphs) < 100 * self.n_jobs: | |||||
| chunksize = int(len(self._graphs) / self.n_jobs) + 1 | |||||
| else: | else: | ||||
| chunksize = 100 | chunksize = 100 | ||||
| # get shortest path graphs of self._graphs | # get shortest path graphs of self._graphs | ||||
| @@ -89,7 +89,7 @@ class StructuralSP(GraphKernel): | |||||
| get_sps_fun = self._wrapper_get_sps_naive | get_sps_fun = self._wrapper_get_sps_naive | ||||
| iterator = get_iters(pool.imap_unordered(get_sps_fun, itr, chunksize), | iterator = get_iters(pool.imap_unordered(get_sps_fun, itr, chunksize), | ||||
| desc='getting shortest paths', file=sys.stdout, | desc='getting shortest paths', file=sys.stdout, | ||||
| length=len(self._graphs), verbose=(self._verbose >= 2)) | |||||
| length=len(self._graphs), verbose=(self.verbose >= 2)) | |||||
| for i, sp in iterator: | for i, sp in iterator: | ||||
| splist[i] = sp | splist[i] = sp | ||||
| pool.close() | pool.close() | ||||
| @@ -107,7 +107,7 @@ class StructuralSP(GraphKernel): | |||||
| else: | else: | ||||
| do_fun = self._wrapper_ssp_do_naive | do_fun = self._wrapper_ssp_do_naive | ||||
| parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker, | parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker, | ||||
| glbv=(splist, self._graphs), n_jobs=self._n_jobs, verbose=self._verbose) | |||||
| glbv=(splist, self._graphs), n_jobs=self.n_jobs, verbose=self.verbose) | |||||
| return gram_matrix | return gram_matrix | ||||
| @@ -117,7 +117,7 @@ class StructuralSP(GraphKernel): | |||||
| sp1 = get_shortest_paths(g1, self._edge_weight, self._ds_infos['directed']) | sp1 = get_shortest_paths(g1, self._edge_weight, self._ds_infos['directed']) | ||||
| splist = [] | splist = [] | ||||
| iterator = get_iters(g_list, desc='getting sp graphs', file=sys.stdout, | iterator = get_iters(g_list, desc='getting sp graphs', file=sys.stdout, | ||||
| verbose=(self._verbose >= 2)) | |||||
| verbose=(self.verbose >= 2)) | |||||
| if self._compute_method == 'trie': | if self._compute_method == 'trie': | ||||
| for g in iterator: | for g in iterator: | ||||
| splist.append(self._get_sps_as_trie(g)) | splist.append(self._get_sps_as_trie(g)) | ||||
| @@ -128,7 +128,7 @@ class StructuralSP(GraphKernel): | |||||
| # compute kernel list. | # compute kernel list. | ||||
| kernel_list = [None] * len(g_list) | kernel_list = [None] * len(g_list) | ||||
| iterator = get_iters(range(len(g_list)), desc='Computing kernels', | iterator = get_iters(range(len(g_list)), desc='Computing kernels', | ||||
| file=sys.stdout, length=len(g_list), verbose=(self._verbose >= 2)) | |||||
| file=sys.stdout, length=len(g_list), verbose=(self.verbose >= 2)) | |||||
| if self._compute_method == 'trie': | if self._compute_method == 'trie': | ||||
| for i in iterator: | for i in iterator: | ||||
| kernel = self._ssp_do_trie(g1, g_list[i], sp1, splist[i]) | kernel = self._ssp_do_trie(g1, g_list[i], sp1, splist[i]) | ||||
| @@ -145,10 +145,10 @@ class StructuralSP(GraphKernel): | |||||
| # get shortest paths of g1 and each graph in g_list. | # get shortest paths of g1 and each graph in g_list. | ||||
| sp1 = get_shortest_paths(g1, self._edge_weight, self._ds_infos['directed']) | sp1 = get_shortest_paths(g1, self._edge_weight, self._ds_infos['directed']) | ||||
| splist = [None] * len(g_list) | splist = [None] * len(g_list) | ||||
| pool = Pool(self._n_jobs) | |||||
| pool = Pool(self.n_jobs) | |||||
| itr = zip(g_list, range(0, len(g_list))) | itr = zip(g_list, range(0, len(g_list))) | ||||
| if len(g_list) < 100 * self._n_jobs: | |||||
| chunksize = int(len(g_list) / self._n_jobs) + 1 | |||||
| if len(g_list) < 100 * self.n_jobs: | |||||
| chunksize = int(len(g_list) / self.n_jobs) + 1 | |||||
| else: | else: | ||||
| chunksize = 100 | chunksize = 100 | ||||
| # get shortest path graphs of g_list | # get shortest path graphs of g_list | ||||
| @@ -158,7 +158,7 @@ class StructuralSP(GraphKernel): | |||||
| get_sps_fun = self._wrapper_get_sps_naive | get_sps_fun = self._wrapper_get_sps_naive | ||||
| iterator = get_iters(pool.imap_unordered(get_sps_fun, itr, chunksize), | iterator = get_iters(pool.imap_unordered(get_sps_fun, itr, chunksize), | ||||
| desc='getting shortest paths', file=sys.stdout, | desc='getting shortest paths', file=sys.stdout, | ||||
| length=len(g_list), verbose=(self._verbose >= 2)) | |||||
| length=len(g_list), verbose=(self.verbose >= 2)) | |||||
| for i, sp in iterator: | for i, sp in iterator: | ||||
| splist[i] = sp | splist[i] = sp | ||||
| pool.close() | pool.close() | ||||
| @@ -182,7 +182,7 @@ class StructuralSP(GraphKernel): | |||||
| itr = range(len(g_list)) | itr = range(len(g_list)) | ||||
| len_itr = len(g_list) | len_itr = len(g_list) | ||||
| parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, | parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, | ||||
| init_worker=init_worker, glbv=(sp1, splist, g1, g_list), method='imap_unordered', n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose) | |||||
| init_worker=init_worker, glbv=(sp1, splist, g1, g_list), method='imap_unordered', n_jobs=self.n_jobs, itr_desc='Computing kernels', verbose=self.verbose) | |||||
| return kernel_list | return kernel_list | ||||
| @@ -27,9 +27,9 @@ class SylvesterEquation(RandomWalkMeta): | |||||
| def _compute_gm_series(self): | def _compute_gm_series(self): | ||||
| self._check_edge_weight(self._graphs, self._verbose) | |||||
| self._check_edge_weight(self._graphs, self.verbose) | |||||
| self._check_graphs(self._graphs) | self._check_graphs(self._graphs) | ||||
| if self._verbose >= 2: | |||||
| if self.verbose >= 2: | |||||
| import warnings | import warnings | ||||
| warnings.warn('All labels are ignored.') | warnings.warn('All labels are ignored.') | ||||
| @@ -41,7 +41,7 @@ class SylvesterEquation(RandomWalkMeta): | |||||
| if self._q is None: | if self._q is None: | ||||
| # don't normalize adjacency matrices if q is a uniform vector. Note | # don't normalize adjacency matrices if q is a uniform vector. Note | ||||
| # A_wave_list actually contains the transposes of the adjacency matrices. | # A_wave_list actually contains the transposes of the adjacency matrices. | ||||
| iterator = get_iters(self._graphs, desc='compute adjacency matrices', file=sys.stdout, verbose=(self._verbose >= 2)) | |||||
| iterator = get_iters(self._graphs, desc='compute adjacency matrices', file=sys.stdout, verbose=(self.verbose >= 2)) | |||||
| A_wave_list = [nx.adjacency_matrix(G, self._edge_weight).todense().transpose() for G in iterator] | A_wave_list = [nx.adjacency_matrix(G, self._edge_weight).todense().transpose() for G in iterator] | ||||
| # # normalized adjacency matrices | # # normalized adjacency matrices | ||||
| # A_wave_list = [] | # A_wave_list = [] | ||||
| @@ -55,7 +55,7 @@ class SylvesterEquation(RandomWalkMeta): | |||||
| from itertools import combinations_with_replacement | from itertools import combinations_with_replacement | ||||
| itr = combinations_with_replacement(range(0, len(self._graphs)), 2) | itr = combinations_with_replacement(range(0, len(self._graphs)), 2) | ||||
| len_itr = int(len(self._graphs) * (len(self._graphs) + 1) / 2) | len_itr = int(len(self._graphs) * (len(self._graphs) + 1) / 2) | ||||
| iterator = get_iters(itr, desc='Computing kernels', file=sys.stdout, length=len_itr, verbose=(self._verbose >= 2)) | |||||
| iterator = get_iters(itr, desc='Computing kernels', file=sys.stdout, length=len_itr, verbose=(self.verbose >= 2)) | |||||
| for i, j in iterator: | for i, j in iterator: | ||||
| kernel = self._kernel_do(A_wave_list[i], A_wave_list[j], lmda) | kernel = self._kernel_do(A_wave_list[i], A_wave_list[j], lmda) | ||||
| @@ -71,9 +71,9 @@ class SylvesterEquation(RandomWalkMeta): | |||||
| def _compute_gm_imap_unordered(self): | def _compute_gm_imap_unordered(self): | ||||
| self._check_edge_weight(self._graphs, self._verbose) | |||||
| self._check_edge_weight(self._graphs, self.verbose) | |||||
| self._check_graphs(self._graphs) | self._check_graphs(self._graphs) | ||||
| if self._verbose >= 2: | |||||
| if self.verbose >= 2: | |||||
| import warnings | import warnings | ||||
| warnings.warn('All labels are ignored.') | warnings.warn('All labels are ignored.') | ||||
| @@ -83,7 +83,7 @@ class SylvesterEquation(RandomWalkMeta): | |||||
| if self._q is None: | if self._q is None: | ||||
| # don't normalize adjacency matrices if q is a uniform vector. Note | # don't normalize adjacency matrices if q is a uniform vector. Note | ||||
| # A_wave_list actually contains the transposes of the adjacency matrices. | # A_wave_list actually contains the transposes of the adjacency matrices. | ||||
| iterator = get_iters(self._graphs, desc='compute adjacency matrices', file=sys.stdout, verbose=(self._verbose >= 2)) | |||||
| iterator = get_iters(self._graphs, desc='compute adjacency matrices', file=sys.stdout, verbose=(self.verbose >= 2)) | |||||
| A_wave_list = [nx.adjacency_matrix(G, self._edge_weight).todense().transpose() for G in iterator] # @todo: parallel? | A_wave_list = [nx.adjacency_matrix(G, self._edge_weight).todense().transpose() for G in iterator] # @todo: parallel? | ||||
| if self._p is None: # p is uniform distribution as default. | if self._p is None: # p is uniform distribution as default. | ||||
| @@ -94,7 +94,7 @@ class SylvesterEquation(RandomWalkMeta): | |||||
| do_fun = self._wrapper_kernel_do | do_fun = self._wrapper_kernel_do | ||||
| parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker, | parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker, | ||||
| glbv=(A_wave_list,), n_jobs=self._n_jobs, verbose=self._verbose) | |||||
| glbv=(A_wave_list,), n_jobs=self.n_jobs, verbose=self.verbose) | |||||
| else: # @todo | else: # @todo | ||||
| pass | pass | ||||
| @@ -105,9 +105,9 @@ class SylvesterEquation(RandomWalkMeta): | |||||
| def _compute_kernel_list_series(self, g1, g_list): | def _compute_kernel_list_series(self, g1, g_list): | ||||
| self._check_edge_weight(g_list + [g1], self._verbose) | |||||
| self._check_edge_weight(g_list + [g1], self.verbose) | |||||
| self._check_graphs(g_list + [g1]) | self._check_graphs(g_list + [g1]) | ||||
| if self._verbose >= 2: | |||||
| if self.verbose >= 2: | |||||
| import warnings | import warnings | ||||
| warnings.warn('All labels are ignored.') | warnings.warn('All labels are ignored.') | ||||
| @@ -120,11 +120,11 @@ class SylvesterEquation(RandomWalkMeta): | |||||
| # don't normalize adjacency matrices if q is a uniform vector. Note | # don't normalize adjacency matrices if q is a uniform vector. Note | ||||
| # A_wave_list actually contains the transposes of the adjacency matrices. | # A_wave_list actually contains the transposes of the adjacency matrices. | ||||
| A_wave_1 = nx.adjacency_matrix(g1, self._edge_weight).todense().transpose() | A_wave_1 = nx.adjacency_matrix(g1, self._edge_weight).todense().transpose() | ||||
| iterator = get_iters(g_list, desc='compute adjacency matrices', file=sys.stdout, verbose=(self._verbose >= 2)) | |||||
| iterator = get_iters(g_list, desc='compute adjacency matrices', file=sys.stdout, verbose=(self.verbose >= 2)) | |||||
| A_wave_list = [nx.adjacency_matrix(G, self._edge_weight).todense().transpose() for G in iterator] | A_wave_list = [nx.adjacency_matrix(G, self._edge_weight).todense().transpose() for G in iterator] | ||||
| if self._p is None: # p is uniform distribution as default. | if self._p is None: # p is uniform distribution as default. | ||||
| iterator = get_iters(range(len(g_list)), desc='Computing kernels', file=sys.stdout, length=len(g_list), verbose=(self._verbose >= 2)) | |||||
| iterator = get_iters(range(len(g_list)), desc='Computing kernels', file=sys.stdout, length=len(g_list), verbose=(self.verbose >= 2)) | |||||
| for i in iterator: | for i in iterator: | ||||
| kernel = self._kernel_do(A_wave_1, A_wave_list[i], lmda) | kernel = self._kernel_do(A_wave_1, A_wave_list[i], lmda) | ||||
| @@ -139,9 +139,9 @@ class SylvesterEquation(RandomWalkMeta): | |||||
| def _compute_kernel_list_imap_unordered(self, g1, g_list): | def _compute_kernel_list_imap_unordered(self, g1, g_list): | ||||
| self._check_edge_weight(g_list + [g1], self._verbose) | |||||
| self._check_edge_weight(g_list + [g1], self.verbose) | |||||
| self._check_graphs(g_list + [g1]) | self._check_graphs(g_list + [g1]) | ||||
| if self._verbose >= 2: | |||||
| if self.verbose >= 2: | |||||
| import warnings | import warnings | ||||
| warnings.warn('All labels are ignored.') | warnings.warn('All labels are ignored.') | ||||
| @@ -152,7 +152,7 @@ class SylvesterEquation(RandomWalkMeta): | |||||
| # don't normalize adjacency matrices if q is a uniform vector. Note | # don't normalize adjacency matrices if q is a uniform vector. Note | ||||
| # A_wave_list actually contains the transposes of the adjacency matrices. | # A_wave_list actually contains the transposes of the adjacency matrices. | ||||
| A_wave_1 = nx.adjacency_matrix(g1, self._edge_weight).todense().transpose() | A_wave_1 = nx.adjacency_matrix(g1, self._edge_weight).todense().transpose() | ||||
| iterator = get_iters(g_list, desc='compute adjacency matrices', file=sys.stdout, verbose=(self._verbose >= 2)) | |||||
| iterator = get_iters(g_list, desc='compute adjacency matrices', file=sys.stdout, verbose=(self.verbose >= 2)) | |||||
| A_wave_list = [nx.adjacency_matrix(G, self._edge_weight).todense().transpose() for G in iterator] # @todo: parallel? | A_wave_list = [nx.adjacency_matrix(G, self._edge_weight).todense().transpose() for G in iterator] # @todo: parallel? | ||||
| if self._p is None: # p is uniform distribution as default. | if self._p is None: # p is uniform distribution as default. | ||||
| @@ -169,7 +169,7 @@ class SylvesterEquation(RandomWalkMeta): | |||||
| len_itr = len(g_list) | len_itr = len(g_list) | ||||
| parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, | parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, | ||||
| init_worker=init_worker, glbv=(A_wave_1, A_wave_list), method='imap_unordered', | init_worker=init_worker, glbv=(A_wave_1, A_wave_list), method='imap_unordered', | ||||
| n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose) | |||||
| n_jobs=self.n_jobs, itr_desc='Computing kernels', verbose=self.verbose) | |||||
| else: # @todo | else: # @todo | ||||
| pass | pass | ||||
| @@ -184,9 +184,9 @@ class SylvesterEquation(RandomWalkMeta): | |||||
| def _compute_single_kernel_series(self, g1, g2): | def _compute_single_kernel_series(self, g1, g2): | ||||
| self._check_edge_weight([g1] + [g2], self._verbose) | |||||
| self._check_edge_weight([g1] + [g2], self.verbose) | |||||
| self._check_graphs([g1] + [g2]) | self._check_graphs([g1] + [g2]) | ||||
| if self._verbose >= 2: | |||||
| if self.verbose >= 2: | |||||
| import warnings | import warnings | ||||
| warnings.warn('All labels are ignored.') | warnings.warn('All labels are ignored.') | ||||
| @@ -18,6 +18,8 @@ import numpy as np | |||||
| import networkx as nx | import networkx as nx | ||||
| from collections import Counter | from collections import Counter | ||||
| from itertools import chain | from itertools import chain | ||||
| from sklearn.utils.validation import check_is_fitted | |||||
| from sklearn.exceptions import NotFittedError | |||||
| from gklearn.utils import SpecialLabel | from gklearn.utils import SpecialLabel | ||||
| from gklearn.utils.parallel import parallel_gm, parallel_me | from gklearn.utils.parallel import parallel_gm, parallel_me | ||||
| from gklearn.utils.utils import find_all_paths, get_mlti_dim_node_attrs | from gklearn.utils.utils import find_all_paths, get_mlti_dim_node_attrs | ||||
| @@ -26,14 +28,211 @@ from gklearn.kernels import GraphKernel | |||||
| class Treelet(GraphKernel): | class Treelet(GraphKernel): | ||||
| def __init__(self, **kwargs): | |||||
| GraphKernel.__init__(self) | |||||
| self._node_labels = kwargs.get('node_labels', []) | |||||
| self._edge_labels = kwargs.get('edge_labels', []) | |||||
| self._sub_kernel = kwargs.get('sub_kernel', None) | |||||
| self._ds_infos = kwargs.get('ds_infos', {}) | |||||
| if self._sub_kernel is None: | |||||
| raise Exception('Sub kernel not set.') | |||||
| def __init__(self, parallel=None, n_jobs=None, chunksize=None, normalize=True, verbose=2, precompute_canonkeys=True, save_canonkeys=False, **kwargs): | |||||
| """Initialise a treelet kernel. | |||||
| """ | |||||
| super().__init__(parallel=parallel, n_jobs=n_jobs, chunksize=chunksize, normalize=normalize, verbose=verbose) | |||||
| self.node_labels = kwargs.get('node_labels', []) | |||||
| self.edge_labels = kwargs.get('edge_labels', []) | |||||
| self.sub_kernel = kwargs.get('sub_kernel', None) | |||||
| self.ds_infos = kwargs.get('ds_infos', {}) | |||||
| self.precompute_canonkeys = precompute_canonkeys | |||||
| self.save_canonkeys = save_canonkeys | |||||
| ########################################################################## | |||||
| # The following is the 1st paradigm to compute kernel matrix, which is | |||||
| # compatible with `scikit-learn`. | |||||
| # ------------------------------------------------------------------- | |||||
| # Special thanks to the "GraKeL" library for providing an excellent template! | |||||
| ########################################################################## | |||||
| def clear_attributes(self): | |||||
| super().clear_attributes() | |||||
| if hasattr(self, '_canonkeys'): | |||||
| delattr(self, '_canonkeys') | |||||
| if hasattr(self, '_Y_canonkeys'): | |||||
| delattr(self, '_Y_canonkeys') | |||||
| if hasattr(self, '_dummy_labels_considered'): | |||||
| delattr(self, '_dummy_labels_considered') | |||||
| def validate_parameters(self): | |||||
| """Validate all parameters for the transformer. | |||||
| Returns | |||||
| ------- | |||||
| None. | |||||
| """ | |||||
| super().validate_parameters() | |||||
| if self.sub_kernel is None: | |||||
| raise ValueError('Sub-kernel not set.') | |||||
| def _compute_kernel_matrix_series(self, Y): | |||||
| """Compute the kernel matrix between a given target graphs (Y) and | |||||
| the fitted graphs (X / self._graphs) without parallelization. | |||||
| Parameters | |||||
| ---------- | |||||
| Y : list of graphs, optional | |||||
| The target graphs. | |||||
| Returns | |||||
| ------- | |||||
| kernel_matrix : numpy array, shape = [n_targets, n_inputs] | |||||
| The computed kernel matrix. | |||||
| """ | |||||
| # self._add_dummy_labels will modify the input in place. | |||||
| self._add_dummy_labels() # For self._graphs | |||||
| # Y = [g.copy() for g in Y] # @todo: ? | |||||
| self._add_dummy_labels(Y) | |||||
| # get all canonical keys of all graphs before computing kernels to save | |||||
| # time, but this may cost a lot of memory for large dataset. | |||||
| # Canonical keys for self._graphs. | |||||
| try: | |||||
| check_is_fitted(self, ['_canonkeys']) | |||||
| canonkeys_list1 = self._canonkeys | |||||
| except NotFittedError: | |||||
| canonkeys_list1 = [] | |||||
| iterator = get_iters(self._graphs, desc='getting canonkeys for X', file=sys.stdout, verbose=(self.verbose >= 2)) | |||||
| for g in iterator: | |||||
| canonkeys_list1.append(self._get_canonkeys(g)) | |||||
| if self.save_canonkeys: | |||||
| self._canonkeys = canonkeys_list1 | |||||
| # Canonical keys for Y. | |||||
| canonkeys_list2 = [] | |||||
| iterator = get_iters(Y, desc='getting canonkeys for Y', file=sys.stdout, verbose=(self.verbose >= 2)) | |||||
| for g in iterator: | |||||
| canonkeys_list2.append(self._get_canonkeys(g)) | |||||
| if self.save_canonkeys: | |||||
| self._Y_canonkeys = canonkeys_list2 | |||||
| # compute kernel matrix. | |||||
| kernel_matrix = np.zeros((len(Y), len(canonkeys_list1))) | |||||
| from itertools import product | |||||
| itr = product(range(len(Y)), range(len(canonkeys_list1))) | |||||
| len_itr = int(len(Y) * len(canonkeys_list1)) | |||||
| iterator = get_iters(itr, desc='Computing kernels', file=sys.stdout, | |||||
| length=len_itr, verbose=(self.verbose >= 2)) | |||||
| for i_y, i_x in iterator: | |||||
| kernel = self._kernel_do(canonkeys_list2[i_y], canonkeys_list1[i_x]) | |||||
| kernel_matrix[i_y][i_x] = kernel | |||||
| return kernel_matrix | |||||
| def _compute_kernel_matrix_imap_unordered(self, Y): | |||||
| """Compute the kernel matrix between a given target graphs (Y) and | |||||
| the fitted graphs (X / self._graphs) using imap unordered parallelization. | |||||
| Parameters | |||||
| ---------- | |||||
| Y : list of graphs, optional | |||||
| The target graphs. | |||||
| Returns | |||||
| ------- | |||||
| kernel_matrix : numpy array, shape = [n_targets, n_inputs] | |||||
| The computed kernel matrix. | |||||
| """ | |||||
| raise Exception('Parallelization for kernel matrix is not implemented.') | |||||
| def pairwise_kernel(self, x, y, are_keys=False): | |||||
| """Compute pairwise kernel between two graphs. | |||||
| Parameters | |||||
| ---------- | |||||
| x, y : NetworkX Graph. | |||||
| Graphs bewteen which the kernel is computed. | |||||
| are_keys : boolean, optional | |||||
| If `True`, `x` and `y` are canonical keys, otherwise are graphs. | |||||
| The default is False. | |||||
| Returns | |||||
| ------- | |||||
| kernel: float | |||||
| The computed kernel. | |||||
| """ | |||||
| if are_keys: | |||||
| # x, y are canonical keys. | |||||
| kernel = self._kernel_do(x, y) | |||||
| else: | |||||
| # x, y are graphs. | |||||
| kernel = self._compute_single_kernel_series(x, y) | |||||
| return kernel | |||||
| def diagonals(self): | |||||
| """Compute the kernel matrix diagonals of the fit/transformed data. | |||||
| Returns | |||||
| ------- | |||||
| X_diag : numpy array | |||||
| The diagonal of the kernel matrix between the fitted data. | |||||
| This consists of each element calculated with itself. | |||||
| Y_diag : numpy array | |||||
| The diagonal of the kernel matrix, of the transform. | |||||
| This consists of each element calculated with itself. | |||||
| """ | |||||
| # Check if method "fit" had been called. | |||||
| check_is_fitted(self, ['_graphs']) | |||||
| # Check if the diagonals of X exist. | |||||
| try: | |||||
| check_is_fitted(self, ['_X_diag']) | |||||
| except NotFittedError: | |||||
| # Compute diagonals of X. | |||||
| self._X_diag = np.empty(shape=(len(self._graphs),)) | |||||
| try: | |||||
| check_is_fitted(self, ['_canonkeys']) | |||||
| for i, x in enumerate(self._canonkeys): | |||||
| self._X_diag[i] = self.pairwise_kernel(x, x, are_keys=True) # @todo: parallel? | |||||
| except NotFittedError: | |||||
| for i, x in enumerate(self._graphs): | |||||
| self._X_diag[i] = self.pairwise_kernel(x, x, are_keys=False) # @todo: parallel? | |||||
| try: | |||||
| # If transform has happened, return both diagonals. | |||||
| check_is_fitted(self, ['_Y']) | |||||
| self._Y_diag = np.empty(shape=(len(self._Y),)) | |||||
| try: | |||||
| check_is_fitted(self, ['_Y_canonkeys']) | |||||
| for (i, y) in enumerate(self._Y_canonkeys): | |||||
| self._Y_diag[i] = self.pairwise_kernel(y, y, are_keys=True) # @todo: parallel? | |||||
| except NotFittedError: | |||||
| for (i, y) in enumerate(self._Y): | |||||
| self._Y_diag[i] = self.pairwise_kernel(y, y, are_keys=False) # @todo: parallel? | |||||
| return self._X_diag, self._Y_diag | |||||
| except NotFittedError: | |||||
| # Else just return both X_diag | |||||
| return self._X_diag | |||||
| ########################################################################## | |||||
| # The following is the 2nd paradigm to compute kernel matrix. It is | |||||
| # simplified and not compatible with `scikit-learn`. | |||||
| ########################################################################## | |||||
| def _compute_gm_series(self): | def _compute_gm_series(self): | ||||
| @@ -43,10 +242,13 @@ class Treelet(GraphKernel): | |||||
| # time, but this may cost a lot of memory for large dataset. | # time, but this may cost a lot of memory for large dataset. | ||||
| canonkeys = [] | canonkeys = [] | ||||
| iterator = get_iters(self._graphs, desc='getting canonkeys', file=sys.stdout, | iterator = get_iters(self._graphs, desc='getting canonkeys', file=sys.stdout, | ||||
| verbose=(self._verbose >= 2)) | |||||
| verbose=(self.verbose >= 2)) | |||||
| for g in iterator: | for g in iterator: | ||||
| canonkeys.append(self._get_canonkeys(g)) | canonkeys.append(self._get_canonkeys(g)) | ||||
| if self.save_canonkeys: | |||||
| self._canonkeys = canonkeys | |||||
| # compute Gram matrix. | # compute Gram matrix. | ||||
| gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) | gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) | ||||
| @@ -54,7 +256,7 @@ class Treelet(GraphKernel): | |||||
| itr = combinations_with_replacement(range(0, len(self._graphs)), 2) | itr = combinations_with_replacement(range(0, len(self._graphs)), 2) | ||||
| len_itr = int(len(self._graphs) * (len(self._graphs) + 1) / 2) | len_itr = int(len(self._graphs) * (len(self._graphs) + 1) / 2) | ||||
| iterator = get_iters(itr, desc='Computing kernels', file=sys.stdout, | iterator = get_iters(itr, desc='Computing kernels', file=sys.stdout, | ||||
| length=len_itr, verbose=(self._verbose >= 2)) | |||||
| length=len_itr, verbose=(self.verbose >= 2)) | |||||
| for i, j in iterator: | for i, j in iterator: | ||||
| kernel = self._kernel_do(canonkeys[i], canonkeys[j]) | kernel = self._kernel_do(canonkeys[i], canonkeys[j]) | ||||
| gram_matrix[i][j] = kernel | gram_matrix[i][j] = kernel | ||||
| @@ -68,22 +270,25 @@ class Treelet(GraphKernel): | |||||
| # get all canonical keys of all graphs before computing kernels to save | # get all canonical keys of all graphs before computing kernels to save | ||||
| # time, but this may cost a lot of memory for large dataset. | # time, but this may cost a lot of memory for large dataset. | ||||
| pool = Pool(self._n_jobs) | |||||
| pool = Pool(self.n_jobs) | |||||
| itr = zip(self._graphs, range(0, len(self._graphs))) | itr = zip(self._graphs, range(0, len(self._graphs))) | ||||
| if len(self._graphs) < 100 * self._n_jobs: | |||||
| chunksize = int(len(self._graphs) / self._n_jobs) + 1 | |||||
| if len(self._graphs) < 100 * self.n_jobs: | |||||
| chunksize = int(len(self._graphs) / self.n_jobs) + 1 | |||||
| else: | else: | ||||
| chunksize = 100 | chunksize = 100 | ||||
| canonkeys = [[] for _ in range(len(self._graphs))] | canonkeys = [[] for _ in range(len(self._graphs))] | ||||
| get_fun = self._wrapper_get_canonkeys | get_fun = self._wrapper_get_canonkeys | ||||
| iterator = get_iters(pool.imap_unordered(get_fun, itr, chunksize), | iterator = get_iters(pool.imap_unordered(get_fun, itr, chunksize), | ||||
| desc='getting canonkeys', file=sys.stdout, | desc='getting canonkeys', file=sys.stdout, | ||||
| length=len(self._graphs), verbose=(self._verbose >= 2)) | |||||
| length=len(self._graphs), verbose=(self.verbose >= 2)) | |||||
| for i, ck in iterator: | for i, ck in iterator: | ||||
| canonkeys[i] = ck | canonkeys[i] = ck | ||||
| pool.close() | pool.close() | ||||
| pool.join() | pool.join() | ||||
| if self.save_canonkeys: | |||||
| self._canonkeys = canonkeys | |||||
| # compute Gram matrix. | # compute Gram matrix. | ||||
| gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) | gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) | ||||
| @@ -92,7 +297,7 @@ class Treelet(GraphKernel): | |||||
| G_canonkeys = canonkeys_toshare | G_canonkeys = canonkeys_toshare | ||||
| do_fun = self._wrapper_kernel_do | do_fun = self._wrapper_kernel_do | ||||
| parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker, | parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker, | ||||
| glbv=(canonkeys,), n_jobs=self._n_jobs, verbose=self._verbose) | |||||
| glbv=(canonkeys,), n_jobs=self.n_jobs, verbose=self.verbose) | |||||
| return gram_matrix | return gram_matrix | ||||
| @@ -104,13 +309,13 @@ class Treelet(GraphKernel): | |||||
| # time, but this may cost a lot of memory for large dataset. | # time, but this may cost a lot of memory for large dataset. | ||||
| canonkeys_1 = self._get_canonkeys(g1) | canonkeys_1 = self._get_canonkeys(g1) | ||||
| canonkeys_list = [] | canonkeys_list = [] | ||||
| iterator = get_iters(g_list, desc='getting canonkeys', file=sys.stdout, verbose=(self._verbose >= 2)) | |||||
| iterator = get_iters(g_list, desc='getting canonkeys', file=sys.stdout, verbose=(self.verbose >= 2)) | |||||
| for g in iterator: | for g in iterator: | ||||
| canonkeys_list.append(self._get_canonkeys(g)) | canonkeys_list.append(self._get_canonkeys(g)) | ||||
| # compute kernel list. | # compute kernel list. | ||||
| kernel_list = [None] * len(g_list) | kernel_list = [None] * len(g_list) | ||||
| iterator = get_iters(range(len(g_list)), desc='Computing kernels', file=sys.stdout, length=len(g_list), verbose=(self._verbose >= 2)) | |||||
| iterator = get_iters(range(len(g_list)), desc='Computing kernels', file=sys.stdout, length=len(g_list), verbose=(self.verbose >= 2)) | |||||
| for i in iterator: | for i in iterator: | ||||
| kernel = self._kernel_do(canonkeys_1, canonkeys_list[i]) | kernel = self._kernel_do(canonkeys_1, canonkeys_list[i]) | ||||
| kernel_list[i] = kernel | kernel_list[i] = kernel | ||||
| @@ -125,16 +330,16 @@ class Treelet(GraphKernel): | |||||
| # time, but this may cost a lot of memory for large dataset. | # time, but this may cost a lot of memory for large dataset. | ||||
| canonkeys_1 = self._get_canonkeys(g1) | canonkeys_1 = self._get_canonkeys(g1) | ||||
| canonkeys_list = [[] for _ in range(len(g_list))] | canonkeys_list = [[] for _ in range(len(g_list))] | ||||
| pool = Pool(self._n_jobs) | |||||
| pool = Pool(self.n_jobs) | |||||
| itr = zip(g_list, range(0, len(g_list))) | itr = zip(g_list, range(0, len(g_list))) | ||||
| if len(g_list) < 100 * self._n_jobs: | |||||
| chunksize = int(len(g_list) / self._n_jobs) + 1 | |||||
| if len(g_list) < 100 * self.n_jobs: | |||||
| chunksize = int(len(g_list) / self.n_jobs) + 1 | |||||
| else: | else: | ||||
| chunksize = 100 | chunksize = 100 | ||||
| get_fun = self._wrapper_get_canonkeys | get_fun = self._wrapper_get_canonkeys | ||||
| iterator = get_iters(pool.imap_unordered(get_fun, itr, chunksize), | iterator = get_iters(pool.imap_unordered(get_fun, itr, chunksize), | ||||
| desc='getting canonkeys', file=sys.stdout, | desc='getting canonkeys', file=sys.stdout, | ||||
| length=len(g_list), verbose=(self._verbose >= 2)) | |||||
| length=len(g_list), verbose=(self.verbose >= 2)) | |||||
| for i, ck in iterator: | for i, ck in iterator: | ||||
| canonkeys_list[i] = ck | canonkeys_list[i] = ck | ||||
| pool.close() | pool.close() | ||||
| @@ -154,7 +359,7 @@ class Treelet(GraphKernel): | |||||
| len_itr = len(g_list) | len_itr = len(g_list) | ||||
| parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, | parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, | ||||
| init_worker=init_worker, glbv=(canonkeys_1, canonkeys_list), method='imap_unordered', | init_worker=init_worker, glbv=(canonkeys_1, canonkeys_list), method='imap_unordered', | ||||
| n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose) | |||||
| n_jobs=self.n_jobs, itr_desc='Computing kernels', verbose=self.verbose) | |||||
| return kernel_list | return kernel_list | ||||
| @@ -187,7 +392,7 @@ class Treelet(GraphKernel): | |||||
| keys = set(canonkey1.keys()) & set(canonkey2.keys()) # find same canonical keys in both graphs | keys = set(canonkey1.keys()) & set(canonkey2.keys()) # find same canonical keys in both graphs | ||||
| vector1 = np.array([(canonkey1[key] if (key in canonkey1.keys()) else 0) for key in keys]) | vector1 = np.array([(canonkey1[key] if (key in canonkey1.keys()) else 0) for key in keys]) | ||||
| vector2 = np.array([(canonkey2[key] if (key in canonkey2.keys()) else 0) for key in keys]) | vector2 = np.array([(canonkey2[key] if (key in canonkey2.keys()) else 0) for key in keys]) | ||||
| kernel = self._sub_kernel(vector1, vector2) | |||||
| kernel = self.sub_kernel(vector1, vector2) | |||||
| return kernel | return kernel | ||||
| @@ -223,7 +428,7 @@ class Treelet(GraphKernel): | |||||
| patterns['0'] = list(G.nodes()) | patterns['0'] = list(G.nodes()) | ||||
| canonkey['0'] = nx.number_of_nodes(G) | canonkey['0'] = nx.number_of_nodes(G) | ||||
| for i in range(1, 6): # for i in range(1, 6): | for i in range(1, 6): # for i in range(1, 6): | ||||
| patterns[str(i)] = find_all_paths(G, i, self._ds_infos['directed']) | |||||
| patterns[str(i)] = find_all_paths(G, i, self.ds_infos['directed']) | |||||
| canonkey[str(i)] = len(patterns[str(i)]) | canonkey[str(i)] = len(patterns[str(i)]) | ||||
| # n-star patterns | # n-star patterns | ||||
| @@ -317,11 +522,11 @@ class Treelet(GraphKernel): | |||||
| ### pattern obtained in the structural analysis section above, which is a | ### pattern obtained in the structural analysis section above, which is a | ||||
| ### string corresponding to a unique treelet. A dictionary is built to keep | ### string corresponding to a unique treelet. A dictionary is built to keep | ||||
| ### track of the amount of every treelet. | ### track of the amount of every treelet. | ||||
| if len(self._node_labels) > 0 or len(self._edge_labels) > 0: | |||||
| if len(self.node_labels) > 0 or len(self.edge_labels) > 0: | |||||
| canonkey_l = {} # canonical key, a dictionary which keeps track of amount of every treelet. | canonkey_l = {} # canonical key, a dictionary which keeps track of amount of every treelet. | ||||
| # linear patterns | # linear patterns | ||||
| canonkey_t = Counter(get_mlti_dim_node_attrs(G, self._node_labels)) | |||||
| canonkey_t = Counter(get_mlti_dim_node_attrs(G, self.node_labels)) | |||||
| for key in canonkey_t: | for key in canonkey_t: | ||||
| canonkey_l[('0', key)] = canonkey_t[key] | canonkey_l[('0', key)] = canonkey_t[key] | ||||
| @@ -330,9 +535,9 @@ class Treelet(GraphKernel): | |||||
| for pattern in patterns[str(i)]: | for pattern in patterns[str(i)]: | ||||
| canonlist = [] | canonlist = [] | ||||
| for idx, node in enumerate(pattern[:-1]): | for idx, node in enumerate(pattern[:-1]): | ||||
| canonlist.append(tuple(G.nodes[node][nl] for nl in self._node_labels)) | |||||
| canonlist.append(tuple(G[node][pattern[idx+1]][el] for el in self._edge_labels)) | |||||
| canonlist.append(tuple(G.nodes[pattern[-1]][nl] for nl in self._node_labels)) | |||||
| canonlist.append(tuple(G.nodes[node][nl] for nl in self.node_labels)) | |||||
| canonlist.append(tuple(G[node][pattern[idx+1]][el] for el in self.edge_labels)) | |||||
| canonlist.append(tuple(G.nodes[pattern[-1]][nl] for nl in self.node_labels)) | |||||
| canonkey_t = canonlist if canonlist < canonlist[::-1] else canonlist[::-1] | canonkey_t = canonlist if canonlist < canonlist[::-1] else canonlist[::-1] | ||||
| treelet.append(tuple([str(i)] + canonkey_t)) | treelet.append(tuple([str(i)] + canonkey_t)) | ||||
| canonkey_l.update(Counter(treelet)) | canonkey_l.update(Counter(treelet)) | ||||
| @@ -343,13 +548,13 @@ class Treelet(GraphKernel): | |||||
| for pattern in patterns[str(i) + 'star']: | for pattern in patterns[str(i) + 'star']: | ||||
| canonlist = [] | canonlist = [] | ||||
| for leaf in pattern[1:]: | for leaf in pattern[1:]: | ||||
| nlabels = tuple(G.nodes[leaf][nl] for nl in self._node_labels) | |||||
| elabels = tuple(G[leaf][pattern[0]][el] for el in self._edge_labels) | |||||
| nlabels = tuple(G.nodes[leaf][nl] for nl in self.node_labels) | |||||
| elabels = tuple(G[leaf][pattern[0]][el] for el in self.edge_labels) | |||||
| canonlist.append(tuple((nlabels, elabels))) | canonlist.append(tuple((nlabels, elabels))) | ||||
| canonlist.sort() | canonlist.sort() | ||||
| canonlist = list(chain.from_iterable(canonlist)) | canonlist = list(chain.from_iterable(canonlist)) | ||||
| canonkey_t = tuple(['d' if i == 5 else str(i * 2)] + | canonkey_t = tuple(['d' if i == 5 else str(i * 2)] + | ||||
| [tuple(G.nodes[pattern[0]][nl] for nl in self._node_labels)] | |||||
| [tuple(G.nodes[pattern[0]][nl] for nl in self.node_labels)] | |||||
| + canonlist) | + canonlist) | ||||
| treelet.append(canonkey_t) | treelet.append(canonkey_t) | ||||
| canonkey_l.update(Counter(treelet)) | canonkey_l.update(Counter(treelet)) | ||||
| @@ -359,17 +564,17 @@ class Treelet(GraphKernel): | |||||
| for pattern in patterns['7']: | for pattern in patterns['7']: | ||||
| canonlist = [] | canonlist = [] | ||||
| for leaf in pattern[1:3]: | for leaf in pattern[1:3]: | ||||
| nlabels = tuple(G.nodes[leaf][nl] for nl in self._node_labels) | |||||
| elabels = tuple(G[leaf][pattern[0]][el] for el in self._edge_labels) | |||||
| nlabels = tuple(G.nodes[leaf][nl] for nl in self.node_labels) | |||||
| elabels = tuple(G[leaf][pattern[0]][el] for el in self.edge_labels) | |||||
| canonlist.append(tuple((nlabels, elabels))) | canonlist.append(tuple((nlabels, elabels))) | ||||
| canonlist.sort() | canonlist.sort() | ||||
| canonlist = list(chain.from_iterable(canonlist)) | canonlist = list(chain.from_iterable(canonlist)) | ||||
| canonkey_t = tuple(['7'] | canonkey_t = tuple(['7'] | ||||
| + [tuple(G.nodes[pattern[0]][nl] for nl in self._node_labels)] + canonlist | |||||
| + [tuple(G.nodes[pattern[3]][nl] for nl in self._node_labels)] | |||||
| + [tuple(G[pattern[3]][pattern[0]][el] for el in self._edge_labels)] | |||||
| + [tuple(G.nodes[pattern[4]][nl] for nl in self._node_labels)] | |||||
| + [tuple(G[pattern[4]][pattern[3]][el] for el in self._edge_labels)]) | |||||
| + [tuple(G.nodes[pattern[0]][nl] for nl in self.node_labels)] + canonlist | |||||
| + [tuple(G.nodes[pattern[3]][nl] for nl in self.node_labels)] | |||||
| + [tuple(G[pattern[3]][pattern[0]][el] for el in self.edge_labels)] | |||||
| + [tuple(G.nodes[pattern[4]][nl] for nl in self.node_labels)] | |||||
| + [tuple(G[pattern[4]][pattern[3]][el] for el in self.edge_labels)]) | |||||
| treelet.append(canonkey_t) | treelet.append(canonkey_t) | ||||
| canonkey_l.update(Counter(treelet)) | canonkey_l.update(Counter(treelet)) | ||||
| @@ -378,38 +583,38 @@ class Treelet(GraphKernel): | |||||
| for pattern in patterns['11']: | for pattern in patterns['11']: | ||||
| canonlist = [] | canonlist = [] | ||||
| for leaf in pattern[1:4]: | for leaf in pattern[1:4]: | ||||
| nlabels = tuple(G.nodes[leaf][nl] for nl in self._node_labels) | |||||
| elabels = tuple(G[leaf][pattern[0]][el] for el in self._edge_labels) | |||||
| nlabels = tuple(G.nodes[leaf][nl] for nl in self.node_labels) | |||||
| elabels = tuple(G[leaf][pattern[0]][el] for el in self.edge_labels) | |||||
| canonlist.append(tuple((nlabels, elabels))) | canonlist.append(tuple((nlabels, elabels))) | ||||
| canonlist.sort() | canonlist.sort() | ||||
| canonlist = list(chain.from_iterable(canonlist)) | canonlist = list(chain.from_iterable(canonlist)) | ||||
| canonkey_t = tuple(['b'] | canonkey_t = tuple(['b'] | ||||
| + [tuple(G.nodes[pattern[0]][nl] for nl in self._node_labels)] + canonlist | |||||
| + [tuple(G.nodes[pattern[4]][nl] for nl in self._node_labels)] | |||||
| + [tuple(G[pattern[4]][pattern[0]][el] for el in self._edge_labels)] | |||||
| + [tuple(G.nodes[pattern[5]][nl] for nl in self._node_labels)] | |||||
| + [tuple(G[pattern[5]][pattern[4]][el] for el in self._edge_labels)]) | |||||
| + [tuple(G.nodes[pattern[0]][nl] for nl in self.node_labels)] + canonlist | |||||
| + [tuple(G.nodes[pattern[4]][nl] for nl in self.node_labels)] | |||||
| + [tuple(G[pattern[4]][pattern[0]][el] for el in self.edge_labels)] | |||||
| + [tuple(G.nodes[pattern[5]][nl] for nl in self.node_labels)] | |||||
| + [tuple(G[pattern[5]][pattern[4]][el] for el in self.edge_labels)]) | |||||
| treelet.append(canonkey_t) | treelet.append(canonkey_t) | ||||
| canonkey_l.update(Counter(treelet)) | canonkey_l.update(Counter(treelet)) | ||||
| # pattern 10 | # pattern 10 | ||||
| treelet = [] | treelet = [] | ||||
| for pattern in patterns['10']: | for pattern in patterns['10']: | ||||
| canonkey4 = [tuple(G.nodes[pattern[5]][nl] for nl in self._node_labels), | |||||
| tuple(G[pattern[5]][pattern[4]][el] for el in self._edge_labels)] | |||||
| canonkey4 = [tuple(G.nodes[pattern[5]][nl] for nl in self.node_labels), | |||||
| tuple(G[pattern[5]][pattern[4]][el] for el in self.edge_labels)] | |||||
| canonlist = [] | canonlist = [] | ||||
| for leaf in pattern[1:3]: | for leaf in pattern[1:3]: | ||||
| nlabels = tuple(G.nodes[leaf][nl] for nl in self._node_labels) | |||||
| elabels = tuple(G[leaf][pattern[0]][el] for el in self._edge_labels) | |||||
| nlabels = tuple(G.nodes[leaf][nl] for nl in self.node_labels) | |||||
| elabels = tuple(G[leaf][pattern[0]][el] for el in self.edge_labels) | |||||
| canonlist.append(tuple((nlabels, elabels))) | canonlist.append(tuple((nlabels, elabels))) | ||||
| canonlist.sort() | canonlist.sort() | ||||
| canonkey0 = list(chain.from_iterable(canonlist)) | canonkey0 = list(chain.from_iterable(canonlist)) | ||||
| canonkey_t = tuple(['a'] | canonkey_t = tuple(['a'] | ||||
| + [tuple(G.nodes[pattern[3]][nl] for nl in self._node_labels)] | |||||
| + [tuple(G.nodes[pattern[4]][nl] for nl in self._node_labels)] | |||||
| + [tuple(G[pattern[4]][pattern[3]][el] for el in self._edge_labels)] | |||||
| + [tuple(G.nodes[pattern[0]][nl] for nl in self._node_labels)] | |||||
| + [tuple(G[pattern[0]][pattern[3]][el] for el in self._edge_labels)] | |||||
| + [tuple(G.nodes[pattern[3]][nl] for nl in self.node_labels)] | |||||
| + [tuple(G.nodes[pattern[4]][nl] for nl in self.node_labels)] | |||||
| + [tuple(G[pattern[4]][pattern[3]][el] for el in self.edge_labels)] | |||||
| + [tuple(G.nodes[pattern[0]][nl] for nl in self.node_labels)] | |||||
| + [tuple(G[pattern[0]][pattern[3]][el] for el in self.edge_labels)] | |||||
| + canonkey4 + canonkey0) | + canonkey4 + canonkey0) | ||||
| treelet.append(canonkey_t) | treelet.append(canonkey_t) | ||||
| canonkey_l.update(Counter(treelet)) | canonkey_l.update(Counter(treelet)) | ||||
| @@ -419,15 +624,15 @@ class Treelet(GraphKernel): | |||||
| for pattern in patterns['12']: | for pattern in patterns['12']: | ||||
| canonlist0 = [] | canonlist0 = [] | ||||
| for leaf in pattern[1:3]: | for leaf in pattern[1:3]: | ||||
| nlabels = tuple(G.nodes[leaf][nl] for nl in self._node_labels) | |||||
| elabels = tuple(G[leaf][pattern[0]][el] for el in self._edge_labels) | |||||
| nlabels = tuple(G.nodes[leaf][nl] for nl in self.node_labels) | |||||
| elabels = tuple(G[leaf][pattern[0]][el] for el in self.edge_labels) | |||||
| canonlist0.append(tuple((nlabels, elabels))) | canonlist0.append(tuple((nlabels, elabels))) | ||||
| canonlist0.sort() | canonlist0.sort() | ||||
| canonlist0 = list(chain.from_iterable(canonlist0)) | canonlist0 = list(chain.from_iterable(canonlist0)) | ||||
| canonlist3 = [] | canonlist3 = [] | ||||
| for leaf in pattern[4:6]: | for leaf in pattern[4:6]: | ||||
| nlabels = tuple(G.nodes[leaf][nl] for nl in self._node_labels) | |||||
| elabels = tuple(G[leaf][pattern[3]][el] for el in self._edge_labels) | |||||
| nlabels = tuple(G.nodes[leaf][nl] for nl in self.node_labels) | |||||
| elabels = tuple(G[leaf][pattern[3]][el] for el in self.edge_labels) | |||||
| canonlist3.append(tuple((nlabels, elabels))) | canonlist3.append(tuple((nlabels, elabels))) | ||||
| canonlist3.sort() | canonlist3.sort() | ||||
| canonlist3 = list(chain.from_iterable(canonlist3)) | canonlist3 = list(chain.from_iterable(canonlist3)) | ||||
| @@ -435,14 +640,14 @@ class Treelet(GraphKernel): | |||||
| # 2 possible key can be generated from 2 nodes with extended label 3, | # 2 possible key can be generated from 2 nodes with extended label 3, | ||||
| # select the one with lower lexicographic order. | # select the one with lower lexicographic order. | ||||
| canonkey_t1 = tuple(['c'] | canonkey_t1 = tuple(['c'] | ||||
| + [tuple(G.nodes[pattern[0]][nl] for nl in self._node_labels)] + canonlist0 | |||||
| + [tuple(G.nodes[pattern[3]][nl] for nl in self._node_labels)] | |||||
| + [tuple(G[pattern[3]][pattern[0]][el] for el in self._edge_labels)] | |||||
| + [tuple(G.nodes[pattern[0]][nl] for nl in self.node_labels)] + canonlist0 | |||||
| + [tuple(G.nodes[pattern[3]][nl] for nl in self.node_labels)] | |||||
| + [tuple(G[pattern[3]][pattern[0]][el] for el in self.edge_labels)] | |||||
| + canonlist3) | + canonlist3) | ||||
| canonkey_t2 = tuple(['c'] | canonkey_t2 = tuple(['c'] | ||||
| + [tuple(G.nodes[pattern[3]][nl] for nl in self._node_labels)] + canonlist3 | |||||
| + [tuple(G.nodes[pattern[0]][nl] for nl in self._node_labels)] | |||||
| + [tuple(G[pattern[0]][pattern[3]][el] for el in self._edge_labels)] | |||||
| + [tuple(G.nodes[pattern[3]][nl] for nl in self.node_labels)] + canonlist3 | |||||
| + [tuple(G.nodes[pattern[0]][nl] for nl in self.node_labels)] | |||||
| + [tuple(G[pattern[0]][pattern[3]][el] for el in self.edge_labels)] | |||||
| + canonlist0) | + canonlist0) | ||||
| treelet.append(canonkey_t1 if canonkey_t1 < canonkey_t2 else canonkey_t2) | treelet.append(canonkey_t1 if canonkey_t1 < canonkey_t2 else canonkey_t2) | ||||
| canonkey_l.update(Counter(treelet)) | canonkey_l.update(Counter(treelet)) | ||||
| @@ -450,24 +655,24 @@ class Treelet(GraphKernel): | |||||
| # pattern 9 | # pattern 9 | ||||
| treelet = [] | treelet = [] | ||||
| for pattern in patterns['9']: | for pattern in patterns['9']: | ||||
| canonkey2 = [tuple(G.nodes[pattern[4]][nl] for nl in self._node_labels), | |||||
| tuple(G[pattern[4]][pattern[2]][el] for el in self._edge_labels)] | |||||
| canonkey3 = [tuple(G.nodes[pattern[5]][nl] for nl in self._node_labels), | |||||
| tuple(G[pattern[5]][pattern[3]][el] for el in self._edge_labels)] | |||||
| prekey2 = [tuple(G.nodes[pattern[2]][nl] for nl in self._node_labels), | |||||
| tuple(G[pattern[2]][pattern[0]][el] for el in self._edge_labels)] | |||||
| prekey3 = [tuple(G.nodes[pattern[3]][nl] for nl in self._node_labels), | |||||
| tuple(G[pattern[3]][pattern[0]][el] for el in self._edge_labels)] | |||||
| canonkey2 = [tuple(G.nodes[pattern[4]][nl] for nl in self.node_labels), | |||||
| tuple(G[pattern[4]][pattern[2]][el] for el in self.edge_labels)] | |||||
| canonkey3 = [tuple(G.nodes[pattern[5]][nl] for nl in self.node_labels), | |||||
| tuple(G[pattern[5]][pattern[3]][el] for el in self.edge_labels)] | |||||
| prekey2 = [tuple(G.nodes[pattern[2]][nl] for nl in self.node_labels), | |||||
| tuple(G[pattern[2]][pattern[0]][el] for el in self.edge_labels)] | |||||
| prekey3 = [tuple(G.nodes[pattern[3]][nl] for nl in self.node_labels), | |||||
| tuple(G[pattern[3]][pattern[0]][el] for el in self.edge_labels)] | |||||
| if prekey2 + canonkey2 < prekey3 + canonkey3: | if prekey2 + canonkey2 < prekey3 + canonkey3: | ||||
| canonkey_t = [tuple(G.nodes[pattern[1]][nl] for nl in self._node_labels)] \ | |||||
| + [tuple(G[pattern[1]][pattern[0]][el] for el in self._edge_labels)] \ | |||||
| canonkey_t = [tuple(G.nodes[pattern[1]][nl] for nl in self.node_labels)] \ | |||||
| + [tuple(G[pattern[1]][pattern[0]][el] for el in self.edge_labels)] \ | |||||
| + prekey2 + prekey3 + canonkey2 + canonkey3 | + prekey2 + prekey3 + canonkey2 + canonkey3 | ||||
| else: | else: | ||||
| canonkey_t = [tuple(G.nodes[pattern[1]][nl] for nl in self._node_labels)] \ | |||||
| + [tuple(G[pattern[1]][pattern[0]][el] for el in self._edge_labels)] \ | |||||
| canonkey_t = [tuple(G.nodes[pattern[1]][nl] for nl in self.node_labels)] \ | |||||
| + [tuple(G[pattern[1]][pattern[0]][el] for el in self.edge_labels)] \ | |||||
| + prekey3 + prekey2 + canonkey3 + canonkey2 | + prekey3 + prekey2 + canonkey3 + canonkey2 | ||||
| treelet.append(tuple(['9'] | treelet.append(tuple(['9'] | ||||
| + [tuple(G.nodes[pattern[0]][nl] for nl in self._node_labels)] | |||||
| + [tuple(G.nodes[pattern[0]][nl] for nl in self.node_labels)] | |||||
| + canonkey_t)) | + canonkey_t)) | ||||
| canonkey_l.update(Counter(treelet)) | canonkey_l.update(Counter(treelet)) | ||||
| @@ -482,12 +687,33 @@ class Treelet(GraphKernel): | |||||
| return i, self._get_canonkeys(g) | return i, self._get_canonkeys(g) | ||||
| def _add_dummy_labels(self, Gn): | |||||
| if len(self._node_labels) == 0 or (len(self._node_labels) == 1 and self._node_labels[0] == SpecialLabel.DUMMY): | |||||
| for i in range(len(Gn)): | |||||
| nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY) | |||||
| self._node_labels = [SpecialLabel.DUMMY] | |||||
| if len(self._edge_labels) == 0 or (len(self._edge_labels) == 1 and self._edge_labels[0] == SpecialLabel.DUMMY): | |||||
| for i in range(len(Gn)): | |||||
| nx.set_edge_attributes(Gn[i], '0', SpecialLabel.DUMMY) | |||||
| self._edge_labels = [SpecialLabel.DUMMY] | |||||
| def _add_dummy_labels(self, Gn=None): | |||||
| def _add_dummy(Gn): | |||||
| if len(self.node_labels) == 0 or (len(self.node_labels) == 1 and self.node_labels[0] == SpecialLabel.DUMMY): | |||||
| for i in range(len(Gn)): | |||||
| nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY) | |||||
| self.node_labels = [SpecialLabel.DUMMY] | |||||
| if len(self.edge_labels) == 0 or (len(self.edge_labels) == 1 and self.edge_labels[0] == SpecialLabel.DUMMY): | |||||
| for i in range(len(Gn)): | |||||
| nx.set_edge_attributes(Gn[i], '0', SpecialLabel.DUMMY) | |||||
| self.edge_labels = [SpecialLabel.DUMMY] | |||||
| if Gn is None or Gn is self._graphs: | |||||
| # Add dummy labels for the copy of self._graphs. | |||||
| try: | |||||
| check_is_fitted(self, ['_dummy_labels_considered']) | |||||
| if not self._dummy_labels_considered: | |||||
| Gn = self._graphs # @todo: ?[g.copy() for g in self._graphs] | |||||
| _add_dummy(Gn) | |||||
| self._graphs = Gn | |||||
| self._dummy_labels_considered = True | |||||
| except NotFittedError: | |||||
| Gn = self._graphs # @todo: ?[g.copy() for g in self._graphs] | |||||
| _add_dummy(Gn) | |||||
| self._graphs = Gn | |||||
| self._dummy_labels_considered = True | |||||
| else: | |||||
| # Add dummy labels for the input. | |||||
| _add_dummy(Gn) | |||||
| @@ -33,7 +33,7 @@ class WeisfeilerLehman(GraphKernel): # @todo: sp, edge user kernel. | |||||
| def _compute_gm_series(self): | def _compute_gm_series(self): | ||||
| # if self._verbose >= 2: | |||||
| # if self.verbose >= 2: | |||||
| # import warnings | # import warnings | ||||
| # warnings.warn('A part of the computation is parallelized.') | # warnings.warn('A part of the computation is parallelized.') | ||||
| @@ -74,17 +74,17 @@ class WeisfeilerLehman(GraphKernel): # @todo: sp, edge user kernel. | |||||
| G_gn = gn_toshare | G_gn = gn_toshare | ||||
| do_fun = self._wrapper_pairwise | do_fun = self._wrapper_pairwise | ||||
| parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker, | parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker, | ||||
| glbv=(self._graphs,), n_jobs=self._n_jobs, verbose=self._verbose) | |||||
| glbv=(self._graphs,), n_jobs=self.n_jobs, verbose=self.verbose) | |||||
| return gram_matrix | return gram_matrix | ||||
| else: | else: | ||||
| if self._verbose >= 2: | |||||
| if self.verbose >= 2: | |||||
| import warnings | import warnings | ||||
| warnings.warn('This base kernel is not parallelized. The serial computation is used instead.') | warnings.warn('This base kernel is not parallelized. The serial computation is used instead.') | ||||
| return self._compute_gm_series() | return self._compute_gm_series() | ||||
| def _compute_kernel_list_series(self, g1, g_list): # @todo: this should be better. | def _compute_kernel_list_series(self, g1, g_list): # @todo: this should be better. | ||||
| # if self._verbose >= 2: | |||||
| # if self.verbose >= 2: | |||||
| # import warnings | # import warnings | ||||
| # warnings.warn('A part of the computation is parallelized.') | # warnings.warn('A part of the computation is parallelized.') | ||||
| @@ -126,10 +126,10 @@ class WeisfeilerLehman(GraphKernel): # @todo: sp, edge user kernel. | |||||
| len_itr = len(g_list) | len_itr = len(g_list) | ||||
| parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, | parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, | ||||
| init_worker=init_worker, glbv=(g1, g_list), method='imap_unordered', | init_worker=init_worker, glbv=(g1, g_list), method='imap_unordered', | ||||
| n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose) | |||||
| n_jobs=self.n_jobs, itr_desc='Computing kernels', verbose=self.verbose) | |||||
| return kernel_list | return kernel_list | ||||
| else: | else: | ||||
| if self._verbose >= 2: | |||||
| if self.verbose >= 2: | |||||
| import warnings | import warnings | ||||
| warnings.warn('This base kernel is not parallelized. The serial computation is used instead.') | warnings.warn('This base kernel is not parallelized. The serial computation is used instead.') | ||||
| return self._compute_kernel_list_series(g1, g_list) | return self._compute_kernel_list_series(g1, g_list) | ||||
| @@ -332,15 +332,15 @@ class WeisfeilerLehman(GraphKernel): # @todo: sp, edge user kernel. | |||||
| def _compute_gram_itr(self, gram_matrix, all_num_of_each_label): | def _compute_gram_itr(self, gram_matrix, all_num_of_each_label): | ||||
| """Compute Gram matrix using the base kernel. | """Compute Gram matrix using the base kernel. | ||||
| """ | """ | ||||
| # if self._parallel == 'imap_unordered': | |||||
| # if self.parallel == 'imap_unordered': | |||||
| # # compute kernels. | # # compute kernels. | ||||
| # def init_worker(alllabels_toshare): | # def init_worker(alllabels_toshare): | ||||
| # global G_alllabels | # global G_alllabels | ||||
| # G_alllabels = alllabels_toshare | # G_alllabels = alllabels_toshare | ||||
| # do_partial = partial(self._wrapper_compute_subtree_kernel, gram_matrix) | # do_partial = partial(self._wrapper_compute_subtree_kernel, gram_matrix) | ||||
| # parallel_gm(do_partial, gram_matrix, Gn, init_worker=init_worker, | # parallel_gm(do_partial, gram_matrix, Gn, init_worker=init_worker, | ||||
| # glbv=(all_num_of_each_label,), n_jobs=self._n_jobs, verbose=self._verbose) | |||||
| # elif self._parallel is None: | |||||
| # glbv=(all_num_of_each_label,), n_jobs=self.n_jobs, verbose=self.verbose) | |||||
| # elif self.parallel is None: | |||||
| for i in range(len(gram_matrix)): | for i in range(len(gram_matrix)): | ||||
| for j in range(i, len(gram_matrix)): | for j in range(i, len(gram_matrix)): | ||||
| gram_matrix[i][j] = self._compute_subtree_kernel(all_num_of_each_label[i], | gram_matrix[i][j] = self._compute_subtree_kernel(all_num_of_each_label[i], | ||||