| @@ -96,7 +96,10 @@ def untilhpathkernel(*args, | |||||
| pool.join() | pool.join() | ||||
| # for g in Gn: | # for g in Gn: | ||||
| # find_all_path_as_trie(g, depth, ds_attrs, node_label, edge_label) | |||||
| # if compute_method == 'trie': | |||||
| # find_all_path_as_trie(g, depth, ds_attrs, node_label, edge_label) | |||||
| # else: | |||||
| # find_all_paths_until_length(g, depth, ds_attrs, node_label, edge_label) | |||||
| ## size = sys.getsizeof(all_paths) | ## size = sys.getsizeof(all_paths) | ||||
| ## for item in all_paths: | ## for item in all_paths: | ||||
| @@ -112,7 +115,7 @@ def untilhpathkernel(*args, | |||||
| ## desc='getting paths', file=sys.stdout): | ## desc='getting paths', file=sys.stdout): | ||||
| ## all_paths[i] = ps | ## all_paths[i] = ps | ||||
| ## print(time.time() - ttt) | ## print(time.time() - ttt) | ||||
| # | |||||
| if compute_method == 'trie': | if compute_method == 'trie': | ||||
| def init_worker(trie_toshare): | def init_worker(trie_toshare): | ||||
| global G_trie | global G_trie | ||||
| @@ -127,24 +130,20 @@ def untilhpathkernel(*args, | |||||
| do_partial = partial(wrapper_uhpath_do_naive, k_func) | do_partial = partial(wrapper_uhpath_do_naive, k_func) | ||||
| parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker, | parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker, | ||||
| glbv=(all_paths,), n_jobs=n_jobs) | glbv=(all_paths,), n_jobs=n_jobs) | ||||
| # | |||||
| # | |||||
| ## # ---- direct running, normally use single CPU core. ---- | |||||
| ## all_paths = [ | |||||
| ## find_all_paths_until_length( | |||||
| ## Gn[i], | |||||
| ## depth, | |||||
| ## ds_attrs, | |||||
| ## node_label=node_label, | |||||
| ## edge_label=edge_label) for i in tqdm( | |||||
| ## range(0, len(Gn)), desc='getting paths', file=sys.stdout) | |||||
| ## ] | |||||
| ## | |||||
| # if compute_method == 'trie': | |||||
| # # build generalized suffix tree of sets of paths for each graph. | |||||
| ## all_gstree = [paths2GSuffixTree(all_paths[i]) for i in tqdm( | |||||
| ## range(0, len(Gn)), desc='getting generalized suffix trees', file=sys.stdout)] | |||||
| # # ---- direct running, normally use single CPU core. ---- | |||||
| # all_paths = [ | |||||
| # find_all_paths_until_length( | |||||
| # Gn[i], | |||||
| # depth, | |||||
| # ds_attrs, | |||||
| # node_label=node_label, | |||||
| # edge_label=edge_label) for i in tqdm( | |||||
| # range(0, len(Gn)), desc='getting paths', file=sys.stdout) | |||||
| # ] | |||||
| # | # | ||||
| # if compute_method == 'trie': | |||||
| # pbar = tqdm( | # pbar = tqdm( | ||||
| # total=((len(Gn) + 1) * len(Gn) / 2), | # total=((len(Gn) + 1) * len(Gn) / 2), | ||||
| # desc='calculating kernels', | # desc='calculating kernels', | ||||
| @@ -155,18 +154,18 @@ def untilhpathkernel(*args, | |||||
| # all_paths[j], k_func) | # all_paths[j], k_func) | ||||
| # Kmatrix[j][i] = Kmatrix[i][j] | # Kmatrix[j][i] = Kmatrix[i][j] | ||||
| # pbar.update(1) | # pbar.update(1) | ||||
| ## else: | |||||
| ## pbar = tqdm( | |||||
| ## total=((len(Gn) + 1) * len(Gn) / 2), | |||||
| ## desc='calculating kernels', | |||||
| ## file=sys.stdout) | |||||
| ## for i in range(0, len(Gn)): | |||||
| ## for j in range(i, len(Gn)): | |||||
| ## Kmatrix[i][j] = _untilhpathkernel_do_naive(all_paths[i], all_paths[j], | |||||
| ## k_func) | |||||
| ## Kmatrix[j][i] = Kmatrix[i][j] | |||||
| ## pbar.update(1) | |||||
| # | |||||
| # else: | |||||
| # pbar = tqdm( | |||||
| # total=((len(Gn) + 1) * len(Gn) / 2), | |||||
| # desc='calculating kernels', | |||||
| # file=sys.stdout) | |||||
| # for i in range(0, len(Gn)): | |||||
| # for j in range(i, len(Gn)): | |||||
| # Kmatrix[i][j] = _untilhpathkernel_do_naive(all_paths[i], all_paths[j], | |||||
| # k_func) | |||||
| # Kmatrix[j][i] = Kmatrix[i][j] | |||||
| # pbar.update(1) | |||||
| run_time = time.time() - start_time | run_time = time.time() - start_time | ||||
| print( | print( | ||||
| "\n --- kernel matrix of path kernel up to %d of size %d built in %s seconds ---" | "\n --- kernel matrix of path kernel up to %d of size %d built in %s seconds ---" | ||||
| @@ -197,8 +196,8 @@ def _untilhpathkernel_do_trie(trie1, trie2, k_func): | |||||
| # search is applied. | # search is applied. | ||||
| def traverseTrie1t(root, trie2, setlist, pcurrent=[]): | def traverseTrie1t(root, trie2, setlist, pcurrent=[]): | ||||
| for key, node in root['children'].items(): | for key, node in root['children'].items(): | ||||
| if node['isEndOfWord']: | |||||
| pcurrent.append(key) | |||||
| pcurrent.append(key) | |||||
| if node['isEndOfWord']: | |||||
| setlist[1] += 1 | setlist[1] += 1 | ||||
| count2 = trie2.searchWord(pcurrent) | count2 = trie2.searchWord(pcurrent) | ||||
| if count2 != 0: | if count2 != 0: | ||||
| @@ -215,8 +214,8 @@ def _untilhpathkernel_do_trie(trie1, trie2, k_func): | |||||
| # graph1. Deep-first search is applied. | # graph1. Deep-first search is applied. | ||||
| def traverseTrie2t(root, trie1, setlist, pcurrent=[]): | def traverseTrie2t(root, trie1, setlist, pcurrent=[]): | ||||
| for key, node in root['children'].items(): | for key, node in root['children'].items(): | ||||
| pcurrent.append(key) | |||||
| if node['isEndOfWord']: | if node['isEndOfWord']: | ||||
| pcurrent.append(key) | |||||
| # print(node['count']) | # print(node['count']) | ||||
| count1 = trie1.searchWord(pcurrent) | count1 = trie1.searchWord(pcurrent) | ||||
| if count1 == 0: | if count1 == 0: | ||||
| @@ -242,8 +241,8 @@ def _untilhpathkernel_do_trie(trie1, trie2, k_func): | |||||
| # search is applied. | # search is applied. | ||||
| def traverseTrie1m(root, trie2, sumlist, pcurrent=[]): | def traverseTrie1m(root, trie2, sumlist, pcurrent=[]): | ||||
| for key, node in root['children'].items(): | for key, node in root['children'].items(): | ||||
| pcurrent.append(key) | |||||
| if node['isEndOfWord']: | if node['isEndOfWord']: | ||||
| pcurrent.append(key) | |||||
| # print(node['count']) | # print(node['count']) | ||||
| count1 = node['count'] | count1 = node['count'] | ||||
| count2 = trie2.searchWord(pcurrent) | count2 = trie2.searchWord(pcurrent) | ||||
| @@ -260,8 +259,8 @@ def _untilhpathkernel_do_trie(trie1, trie2, k_func): | |||||
| # graph1. Deep-first search is applied. | # graph1. Deep-first search is applied. | ||||
| def traverseTrie2m(root, trie1, sumlist, pcurrent=[]): | def traverseTrie2m(root, trie1, sumlist, pcurrent=[]): | ||||
| for key, node in root['children'].items(): | for key, node in root['children'].items(): | ||||
| if node['isEndOfWord']: | |||||
| pcurrent.append(key) | |||||
| pcurrent.append(key) | |||||
| if node['isEndOfWord']: | |||||
| # print(node['count']) | # print(node['count']) | ||||
| count1 = trie1.searchWord(pcurrent) | count1 = trie1.searchWord(pcurrent) | ||||
| if count1 == 0: | if count1 == 0: | ||||
| @@ -405,6 +404,7 @@ def find_all_paths_until_length(G, | |||||
| # all_paths.extend(new_paths) | # all_paths.extend(new_paths) | ||||
| # consider labels | # consider labels | ||||
| # print(paths2labelseqs(all_paths, G, ds_attrs, node_label, edge_label)) | |||||
| return paths2labelseqs(all_paths, G, ds_attrs, node_label, edge_label) | return paths2labelseqs(all_paths, G, ds_attrs, node_label, edge_label) | ||||
| @@ -479,7 +479,32 @@ def find_all_path_as_trie(G, | |||||
| for n in G.nodes: | for n in G.nodes: | ||||
| traverseGraph(n, ptrie, length, G, ds_attrs, node_label, edge_label, | traverseGraph(n, ptrie, length, G, ds_attrs, node_label, edge_label, | ||||
| pcurrent=[n]) | pcurrent=[n]) | ||||
| # def traverseGraph(root, all_paths, length, G, ds_attrs, node_label, edge_label, | |||||
| # pcurrent=[]): | |||||
| # if len(pcurrent) < length + 1: | |||||
| # for neighbor in G[root]: | |||||
| # if neighbor not in pcurrent: | |||||
| # pcurrent.append(neighbor) | |||||
| # plstr = paths2labelseqs([pcurrent], G, ds_attrs, | |||||
| # node_label, edge_label) | |||||
| # all_paths.append(pcurrent[:]) | |||||
| # traverseGraph(neighbor, all_paths, length, G, ds_attrs, | |||||
| # node_label, edge_label, pcurrent) | |||||
| # del pcurrent[-1] | |||||
| # | |||||
| # | |||||
| # path_l = [[n] for n in G.nodes] # paths of length l | |||||
| # all_paths = path_l[:] | |||||
| # path_l_str = paths2labelseqs(path_l, G, ds_attrs, node_label, edge_label) | |||||
| ## for p in path_l_str: | |||||
| ## ptrie.insertWord(p) | |||||
| # for n in G.nodes: | |||||
| # traverseGraph(n, all_paths, length, G, ds_attrs, node_label, edge_label, | |||||
| # pcurrent=[n]) | |||||
| # print(ptrie.root) | |||||
| return ptrie | return ptrie | ||||