| @@ -0,0 +1,17 @@ | |||||
| #!/usr/bin/env python3 | |||||
| # -*- coding: utf-8 -*- | |||||
| """ | |||||
| Created on Thu Mar 19 18:17:38 2020 | |||||
| @author: ljia | |||||
| """ | |||||
| from enum import Enum, auto | |||||
| class AlgorithmState(Enum): | |||||
| """can be used to specify the state of an algorithm. | |||||
| """ | |||||
| CALLED = auto # The algorithm has been called. | |||||
| INITIALIZED = auto # The algorithm has been initialized. | |||||
| CONVERGED = auto # The algorithm has converged. | |||||
| TERMINATED = auto # The algorithm has terminated. | |||||
| @@ -0,0 +1,134 @@ | |||||
| #!/usr/bin/env python3 | |||||
| # -*- coding: utf-8 -*- | |||||
| """ | |||||
| Created on Fri Mar 20 11:09:04 2020 | |||||
| @author: ljia | |||||
| """ | |||||
| import re | |||||
| def convert_function(cpp_code): | |||||
| # f_cpp = open('cpp_code.cpp', 'r') | |||||
| # # f_cpp = open('cpp_ext/src/median_graph_estimator.ipp', 'r') | |||||
| # cpp_code = f_cpp.read() | |||||
| python_code = cpp_code.replace('else if (', 'elif ') | |||||
| python_code = python_code.replace('if (', 'if ') | |||||
| python_code = python_code.replace('else {', 'else:') | |||||
| python_code = python_code.replace(') {', ':') | |||||
| python_code = python_code.replace(';\n', '\n') | |||||
| python_code = re.sub('\n(.*)}\n', '\n\n', python_code) | |||||
| # python_code = python_code.replace('}\n', '') | |||||
| python_code = python_code.replace('throw', 'raise') | |||||
| python_code = python_code.replace('error', 'Exception') | |||||
| python_code = python_code.replace('"', '\'') | |||||
| python_code = python_code.replace('\\\'', '"') | |||||
| python_code = python_code.replace('try {', 'try:') | |||||
| python_code = python_code.replace('true', 'True') | |||||
| python_code = python_code.replace('false', 'False') | |||||
| python_code = python_code.replace('catch (...', 'except') | |||||
| # python_code = re.sub('std::string\(\'(.*)\'\)', '$1', python_code) | |||||
| return python_code | |||||
| # # python_code = python_code.replace('}\n', '') | |||||
| # python_code = python_code.replace('option.first', 'opt_name') | |||||
| # python_code = python_code.replace('option.second', 'opt_val') | |||||
| # python_code = python_code.replace('ged::Error', 'Exception') | |||||
| # python_code = python_code.replace('std::string(\'Invalid argument "\')', '\'Invalid argument "\'') | |||||
| # f_cpp.close() | |||||
| # f_python = open('python_code.py', 'w') | |||||
| # f_python.write(python_code) | |||||
| # f_python.close() | |||||
| def convert_function_comment(cpp_fun_cmt, param_types): | |||||
| cpp_fun_cmt = cpp_fun_cmt.replace('\t', '') | |||||
| cpp_fun_cmt = cpp_fun_cmt.replace('\n * ', ' ') | |||||
| # split the input comment according to key words. | |||||
| param_split = None | |||||
| note = None | |||||
| cmt_split = cpp_fun_cmt.split('@brief')[1] | |||||
| brief = cmt_split | |||||
| if '@param' in cmt_split: | |||||
| cmt_split = cmt_split.split('@param') | |||||
| brief = cmt_split[0] | |||||
| param_split = cmt_split[1:] | |||||
| if '@note' in cmt_split[-1]: | |||||
| note_split = cmt_split[-1].split('@note') | |||||
| if param_split is not None: | |||||
| param_split.pop() | |||||
| param_split.append(note_split[0]) | |||||
| else: | |||||
| brief = note_split[0] | |||||
| note = note_split[1] | |||||
| # get parameters. | |||||
| if param_split is not None: | |||||
| for idx, param in enumerate(param_split): | |||||
| _, param_name, param_desc = param.split(' ', 2) | |||||
| param_name = function_comment_strip(param_name, ' *\n\t/') | |||||
| param_desc = function_comment_strip(param_desc, ' *\n\t/') | |||||
| param_split[idx] = (param_name, param_desc) | |||||
| # strip comments. | |||||
| brief = function_comment_strip(brief, ' *\n\t/') | |||||
| if note is not None: | |||||
| note = function_comment_strip(note, ' *\n\t/') | |||||
| # construct the Python function comment. | |||||
| python_fun_cmt = '"""' | |||||
| python_fun_cmt += brief + '\n' | |||||
| if param_split is not None and len(param_split) > 0: | |||||
| python_fun_cmt += '\nParameters\n----------' | |||||
| for idx, param in enumerate(param_split): | |||||
| python_fun_cmt += '\n' + param[0] + ' : ' + param_types[idx] | |||||
| python_fun_cmt += '\n\t' + param[1] + '\n' | |||||
| if note is not None: | |||||
| python_fun_cmt += '\nNote\n----\n' + note + '\n' | |||||
| python_fun_cmt += '"""' | |||||
| return python_fun_cmt | |||||
| def function_comment_strip(comment, bad_chars): | |||||
| head_removed, tail_removed = False, False | |||||
| while not head_removed or not tail_removed: | |||||
| if comment[0] in bad_chars: | |||||
| comment = comment[1:] | |||||
| head_removed = False | |||||
| else: | |||||
| head_removed = True | |||||
| if comment[-1] in bad_chars: | |||||
| comment = comment[:-1] | |||||
| tail_removed = False | |||||
| else: | |||||
| tail_removed = True | |||||
| return comment | |||||
| if __name__ == '__main__': | |||||
| # python_code = convert_function(""" | |||||
| # if (print_to_stdout_ == 2) { | |||||
| # std::cout << "\n===========================================================\n"; | |||||
| # std::cout << "Block gradient descent for initial median " << median_pos + 1 << " of " << medians.size() << ".\n"; | |||||
| # std::cout << "-----------------------------------------------------------\n"; | |||||
| # } | |||||
| # """) | |||||
| python_fun_cmt = convert_function_comment(""" | |||||
| /*! | |||||
| * @brief Returns the sum of distances. | |||||
| * @param[in] state The state of the estimator. | |||||
| * @return The sum of distances of the median when the estimator was in the state @p state during the last call to run(). | |||||
| */ | |||||
| """, ['string', 'string']) | |||||
| @@ -260,10 +260,29 @@ def update_costs(nb_cost_mat, dis_k_vec, dataset='monoterpenoides', | |||||
| nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]] | nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]] | ||||
| x = cp.Variable(nb_cost_mat_new.shape[1]) | x = cp.Variable(nb_cost_mat_new.shape[1]) | ||||
| cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec) | cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec) | ||||
| constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])], | |||||
| constraints = [x >= [0.001 for i in range(nb_cost_mat_new.shape[1])], | |||||
| np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0] | np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0] | ||||
| prob = cp.Problem(cp.Minimize(cost_fun), constraints) | prob = cp.Problem(cp.Minimize(cost_fun), constraints) | ||||
| prob.solve() | |||||
| try: | |||||
| prob.solve(verbose=True) | |||||
| except MemoryError as error0: | |||||
| print('\nUsing solver "OSQP" caused a memory error.') | |||||
| print('the original error message is\n', error0) | |||||
| print('solver status: ', prob.status) | |||||
| print('trying solver "CVXOPT" instead...\n') | |||||
| try: | |||||
| prob.solve(solver=cp.CVXOPT, verbose=True) | |||||
| except Exception as error1: | |||||
| print('\nAn error occured when using solver "CVXOPT".') | |||||
| print('the original error message is\n', error1) | |||||
| print('solver status: ', prob.status) | |||||
| print('trying solver "MOSEK" instead. Notice this solver is commercial and a lisence is required.\n') | |||||
| prob.solve(solver=cp.MOSEK, verbose=True) | |||||
| else: | |||||
| print('solver status: ', prob.status) | |||||
| else: | |||||
| print('solver status: ', prob.status) | |||||
| print() | |||||
| edit_costs_new = x.value | edit_costs_new = x.value | ||||
| residual = np.sqrt(prob.value) | residual = np.sqrt(prob.value) | ||||
| elif rw_constraints == '2constraints': | elif rw_constraints == '2constraints': | ||||
| @@ -14,42 +14,13 @@ from multiprocessing import Pool | |||||
| from functools import partial | from functools import partial | ||||
| #from gedlibpy_linlin import librariesImport, gedlibpy | #from gedlibpy_linlin import librariesImport, gedlibpy | ||||
| from libs import * | |||||
| from gklearn.gedlib import librariesImport, gedlibpy | |||||
| def GED(g1, g2, dataset='monoterpenoides', lib='gedlibpy', cost='CHEM_1', method='IPFP', | def GED(g1, g2, dataset='monoterpenoides', lib='gedlibpy', cost='CHEM_1', method='IPFP', | ||||
| edit_cost_constant=[], algo_options='', stabilizer='min', repeat=50): | edit_cost_constant=[], algo_options='', stabilizer='min', repeat=50): | ||||
| """ | """ | ||||
| Compute GED for 2 graphs. | Compute GED for 2 graphs. | ||||
| """ | |||||
| def convertGraph(G, cost): | |||||
| """Convert a graph to the proper NetworkX format that can be | |||||
| recognized by library gedlibpy. | |||||
| """ | |||||
| G_new = nx.Graph() | |||||
| if cost == 'LETTER' or cost == 'LETTER2': | |||||
| for nd, attrs in G.nodes(data=True): | |||||
| G_new.add_node(str(nd), x=str(attrs['attributes'][0]), | |||||
| y=str(attrs['attributes'][1])) | |||||
| for nd1, nd2, attrs in G.edges(data=True): | |||||
| G_new.add_edge(str(nd1), str(nd2)) | |||||
| elif cost == 'NON_SYMBOLIC': | |||||
| for nd, attrs in G.nodes(data=True): | |||||
| G_new.add_node(str(nd)) | |||||
| for a_name in G.graph['node_attrs']: | |||||
| G_new.nodes[str(nd)][a_name] = str(attrs[a_name]) | |||||
| for nd1, nd2, attrs in G.edges(data=True): | |||||
| G_new.add_edge(str(nd1), str(nd2)) | |||||
| for a_name in G.graph['edge_attrs']: | |||||
| G_new.edges[str(nd1), str(nd2)][a_name] = str(attrs[a_name]) | |||||
| else: | |||||
| for nd, attrs in G.nodes(data=True): | |||||
| G_new.add_node(str(nd), chem=attrs['atom']) | |||||
| for nd1, nd2, attrs in G.edges(data=True): | |||||
| G_new.add_edge(str(nd1), str(nd2), valence=attrs['bond_type']) | |||||
| # G_new.add_edge(str(nd1), str(nd2)) | |||||
| return G_new | |||||
| """ | |||||
| # dataset = dataset.lower() | # dataset = dataset.lower() | ||||
| @@ -178,6 +149,36 @@ def GED(g1, g2, dataset='monoterpenoides', lib='gedlibpy', cost='CHEM_1', method | |||||
| return dis, pi_forward, pi_backward | return dis, pi_forward, pi_backward | ||||
| def convertGraph(G, cost): | |||||
| """Convert a graph to the proper NetworkX format that can be | |||||
| recognized by library gedlibpy. | |||||
| """ | |||||
| G_new = nx.Graph() | |||||
| if cost == 'LETTER' or cost == 'LETTER2': | |||||
| for nd, attrs in G.nodes(data=True): | |||||
| G_new.add_node(str(nd), x=str(attrs['attributes'][0]), | |||||
| y=str(attrs['attributes'][1])) | |||||
| for nd1, nd2, attrs in G.edges(data=True): | |||||
| G_new.add_edge(str(nd1), str(nd2)) | |||||
| elif cost == 'NON_SYMBOLIC': | |||||
| for nd, attrs in G.nodes(data=True): | |||||
| G_new.add_node(str(nd)) | |||||
| for a_name in G.graph['node_attrs']: | |||||
| G_new.nodes[str(nd)][a_name] = str(attrs[a_name]) | |||||
| for nd1, nd2, attrs in G.edges(data=True): | |||||
| G_new.add_edge(str(nd1), str(nd2)) | |||||
| for a_name in G.graph['edge_attrs']: | |||||
| G_new.edges[str(nd1), str(nd2)][a_name] = str(attrs[a_name]) | |||||
| else: | |||||
| for nd, attrs in G.nodes(data=True): | |||||
| G_new.add_node(str(nd), chem=attrs['atom']) | |||||
| for nd1, nd2, attrs in G.edges(data=True): | |||||
| G_new.add_edge(str(nd1), str(nd2), valence=attrs['bond_type']) | |||||
| # G_new.add_edge(str(nd1), str(nd2)) | |||||
| return G_new | |||||
| def GED_n(Gn, lib='gedlibpy', cost='CHEM_1', method='IPFP', | def GED_n(Gn, lib='gedlibpy', cost='CHEM_1', method='IPFP', | ||||
| edit_cost_constant=[], stabilizer='min', repeat=50): | edit_cost_constant=[], stabilizer='min', repeat=50): | ||||
| """ | """ | ||||
| @@ -0,0 +1,826 @@ | |||||
| #!/usr/bin/env python3 | |||||
| # -*- coding: utf-8 -*- | |||||
| """ | |||||
| Created on Mon Mar 16 18:04:55 2020 | |||||
| @author: ljia | |||||
| """ | |||||
| import numpy as np | |||||
| from gklearn.preimage.common_types import AlgorithmState | |||||
| from gklearn.preimage import misc | |||||
| from gklearn.preimage.timer import Timer | |||||
| from gklearn.utils.utils import graph_isIdentical | |||||
| import time | |||||
| from tqdm import tqdm | |||||
| import sys | |||||
| import networkx as nx | |||||
| class MedianGraphEstimator(object): | |||||
| def __init__(self, ged_env, constant_node_costs): | |||||
| """Constructor. | |||||
| Parameters | |||||
| ---------- | |||||
| ged_env : gklearn.gedlib.gedlibpy.GEDEnv | |||||
| Initialized GED environment. The edit costs must be set by the user. | |||||
| constant_node_costs : Boolean | |||||
| Set to True if the node relabeling costs are constant. | |||||
| """ | |||||
| self.__ged_env = ged_env | |||||
| self.__init_method = 'BRANCH_FAST' | |||||
| self.__init_options = '' | |||||
| self.__descent_method = 'BRANCH_FAST' | |||||
| self.__descent_options = '' | |||||
| self.__refine_method = 'IPFP' | |||||
| self.__refine_options = '' | |||||
| self.__constant_node_costs = constant_node_costs | |||||
| self.__labeled_nodes = (ged_env.get_num_node_labels() > 1) | |||||
| self.__node_del_cost = ged_env.get_node_del_cost(ged_env.get_node_label(1)) | |||||
| self.__node_ins_cost = ged_env.get_node_ins_cost(ged_env.get_node_label(1)) | |||||
| self.__labeled_edges = (ged_env.get_num_edge_labels() > 1) | |||||
| self.__edge_del_cost = ged_env.get_edge_del_cost(ged_env.get_edge_label(1)) | |||||
| self.__edge_ins_cost = ged_env.get_edge_ins_cost(ged_env.get_edge_label(1)) | |||||
| self.__init_type = 'RANDOM' | |||||
| self.__num_random_inits = 10 | |||||
| self.__desired_num_random_inits = 10 | |||||
| self.__use_real_randomness = True | |||||
| self.__seed = 0 | |||||
| self.__refine = True | |||||
| self.__time_limit_in_sec = 0 | |||||
| self.__epsilon = 0.0001 | |||||
| self.__max_itrs = 100 | |||||
| self.__max_itrs_without_update = 3 | |||||
| self.__num_inits_increase_order = 10 | |||||
| self.__init_type_increase_order = 'K-MEANS++' | |||||
| self.__max_itrs_increase_order = 10 | |||||
| self.__print_to_stdout = 2 | |||||
| self.__median_id = np.inf # @todo: check | |||||
| self.__median_node_id_prefix = '' # @todo: check | |||||
| self.__node_maps_from_median = {} | |||||
| self.__sum_of_distances = 0 | |||||
| self.__best_init_sum_of_distances = np.inf | |||||
| self.__converged_sum_of_distances = np.inf | |||||
| self.__runtime = None | |||||
| self.__runtime_initialized = None | |||||
| self.__runtime_converged = None | |||||
| self.__itrs = [] # @todo: check: {} ? | |||||
| self.__num_decrease_order = 0 | |||||
| self.__num_increase_order = 0 | |||||
| self.__num_converged_descents = 0 | |||||
| self.__state = AlgorithmState.TERMINATED | |||||
| if ged_env is None: | |||||
| raise Exception('The GED environment pointer passed to the constructor of MedianGraphEstimator is null.') | |||||
| elif not ged_env.is_initialized(): | |||||
| raise Exception('The GED environment is uninitialized. Call gedlibpy.GEDEnv.init() before passing it to the constructor of MedianGraphEstimator.') | |||||
| def set_options(self, options): | |||||
| """Sets the options of the estimator. | |||||
| Parameters | |||||
| ---------- | |||||
| options : string | |||||
| String that specifies with which options to run the estimator. | |||||
| """ | |||||
| self.__set_default_options() | |||||
| options_map = misc.options_string_to_options_map(options) | |||||
| for opt_name, opt_val in options_map.items(): | |||||
| if opt_name == 'init-type': | |||||
| self.__init_type = opt_val | |||||
| if opt_val != 'MEDOID' and opt_val != 'RANDOM' and opt_val != 'MIN' and opt_val != 'MAX' and opt_val != 'MEAN': | |||||
| raise Exception('Invalid argument ' + opt_val + ' for option init-type. Usage: options = "[--init-type RANDOM|MEDOID|EMPTY|MIN|MAX|MEAN] [...]"') | |||||
| elif opt_name == 'random-inits': | |||||
| try: | |||||
| self.__num_random_inits = int(opt_val) | |||||
| self.__desired_num_random_inits = self.__num_random_inits | |||||
| except: | |||||
| raise Exception('Invalid argument "' + opt_val + '" for option random-inits. Usage: options = "[--random-inits <convertible to int greater 0>]"') | |||||
| if self.__num_random_inits <= 0: | |||||
| raise Exception('Invalid argument "' + opt_val + '" for option random-inits. Usage: options = "[--random-inits <convertible to int greater 0>]"') | |||||
| elif opt_name == 'randomness': | |||||
| if opt_val == 'PSEUDO': | |||||
| self.__use_real_randomness = False | |||||
| elif opt_val == 'REAL': | |||||
| self.__use_real_randomness = True | |||||
| else: | |||||
| raise Exception('Invalid argument "' + opt_val + '" for option randomness. Usage: options = "[--randomness REAL|PSEUDO] [...]"') | |||||
| elif opt_name == 'stdout': | |||||
| if opt_val == '0': | |||||
| self.__print_to_stdout = 0 | |||||
| elif opt_val == '1': | |||||
| self.__print_to_stdout = 1 | |||||
| elif opt_val == '2': | |||||
| self.__print_to_stdout = 2 | |||||
| else: | |||||
| raise Exception('Invalid argument "' + opt_val + '" for option stdout. Usage: options = "[--stdout 0|1|2] [...]"') | |||||
| elif opt_name == 'refine': | |||||
| if opt_val == 'TRUE': | |||||
| self.__refine = True | |||||
| elif opt_val == 'FALSE': | |||||
| self.__refine = False | |||||
| else: | |||||
| raise Exception('Invalid argument "' + opt_val + '" for option refine. Usage: options = "[--refine TRUE|FALSE] [...]"') | |||||
| elif opt_name == 'time-limit': | |||||
| try: | |||||
| self.__time_limit_in_sec = float(opt_val) | |||||
| except: | |||||
| raise Exception('Invalid argument "' + opt_val + '" for option time-limit. Usage: options = "[--time-limit <convertible to double>] [...]') | |||||
| elif opt_name == 'max-itrs': | |||||
| try: | |||||
| self.__max_itrs = int(opt_val) | |||||
| except: | |||||
| raise Exception('Invalid argument "' + opt_val + '" for option max-itrs. Usage: options = "[--max-itrs <convertible to int>] [...]') | |||||
| elif opt_name == 'max-itrs-without-update': | |||||
| try: | |||||
| self.__max_itrs_without_update = int(opt_val) | |||||
| except: | |||||
| raise Exception('Invalid argument "' + opt_val + '" for option max-itrs-without-update. Usage: options = "[--max-itrs-without-update <convertible to int>] [...]') | |||||
| elif opt_name == 'seed': | |||||
| try: | |||||
| self.__seed = int(opt_val) | |||||
| except: | |||||
| raise Exception('Invalid argument "' + opt_val + '" for option seed. Usage: options = "[--seed <convertible to int greater equal 0>] [...]') | |||||
| elif opt_name == 'epsilon': | |||||
| try: | |||||
| self.__epsilon = float(opt_val) | |||||
| except: | |||||
| raise Exception('Invalid argument "' + opt_val + '" for option epsilon. Usage: options = "[--epsilon <convertible to double greater 0>] [...]') | |||||
| if self.__epsilon <= 0: | |||||
| raise Exception('Invalid argument "' + opt_val + '" for option epsilon. Usage: options = "[--epsilon <convertible to double greater 0>] [...]') | |||||
| elif opt_name == 'inits-increase-order': | |||||
| try: | |||||
| self.__num_inits_increase_order = int(opt_val) | |||||
| except: | |||||
| raise Exception('Invalid argument "' + opt_val + '" for option inits-increase-order. Usage: options = "[--inits-increase-order <convertible to int greater 0>]"') | |||||
| if self.__num_inits_increase_order <= 0: | |||||
| raise Exception('Invalid argument "' + opt_val + '" for option inits-increase-order. Usage: options = "[--inits-increase-order <convertible to int greater 0>]"') | |||||
| elif opt_name == 'init-type-increase-order': | |||||
| self.__init_type_increase_order = opt_val | |||||
| if opt_val != 'CLUSTERS' and opt_val != 'K-MEANS++': | |||||
| raise Exception('Invalid argument ' + opt_val + ' for option init-type-increase-order. Usage: options = "[--init-type-increase-order CLUSTERS|K-MEANS++] [...]"') | |||||
| elif opt_name == 'max-itrs-increase-order': | |||||
| try: | |||||
| self.__max_itrs_increase_order = int(opt_val) | |||||
| except: | |||||
| raise Exception('Invalid argument "' + opt_val + '" for option max-itrs-increase-order. Usage: options = "[--max-itrs-increase-order <convertible to int>] [...]') | |||||
| else: | |||||
| valid_options = '[--init-type <arg>] [--random-inits <arg>] [--randomness <arg>] [--seed <arg>] [--stdout <arg>] ' | |||||
| valid_options += '[--time-limit <arg>] [--max-itrs <arg>] [--epsilon <arg>] ' | |||||
| valid_options += '[--inits-increase-order <arg>] [--init-type-increase-order <arg>] [--max-itrs-increase-order <arg>]' | |||||
| raise Exception('Invalid option "' + opt_name + '". Usage: options = "' + valid_options + '"') | |||||
| def set_init_method(self, init_method, init_options=''): | |||||
| """Selects method to be used for computing the initial medoid graph. | |||||
| Parameters | |||||
| ---------- | |||||
| init_method : string | |||||
| The selected method. Default: ged::Options::GEDMethod::BRANCH_UNIFORM. | |||||
| init_options : string | |||||
| The options for the selected method. Default: "". | |||||
| Notes | |||||
| ----- | |||||
| Has no effect unless "--init-type MEDOID" is passed to set_options(). | |||||
| """ | |||||
| self.__init_method = init_method; | |||||
| self.__init_options = init_options; | |||||
| def set_descent_method(self, descent_method, descent_options=''): | |||||
| """Selects method to be used for block gradient descent.. | |||||
| Parameters | |||||
| ---------- | |||||
| descent_method : string | |||||
| The selected method. Default: ged::Options::GEDMethod::BRANCH_FAST. | |||||
| descent_options : string | |||||
| The options for the selected method. Default: "". | |||||
| Notes | |||||
| ----- | |||||
| Has no effect unless "--init-type MEDOID" is passed to set_options(). | |||||
| """ | |||||
| self.__descent_method = descent_method; | |||||
| self.__descent_options = descent_options; | |||||
| def set_refine_method(self, refine_method, refine_options): | |||||
| """Selects method to be used for improving the sum of distances and the node maps for the converged median. | |||||
| Parameters | |||||
| ---------- | |||||
| refine_method : string | |||||
| The selected method. Default: "IPFP". | |||||
| refine_options : string | |||||
| The options for the selected method. Default: "". | |||||
| Notes | |||||
| ----- | |||||
| Has no effect if "--refine FALSE" is passed to set_options(). | |||||
| """ | |||||
| self.__refine_method = refine_method | |||||
| self.__refine_options = refine_options | |||||
| def run(self, graph_ids, set_median_id, gen_median_id): | |||||
| """Computes a generalized median graph. | |||||
| Parameters | |||||
| ---------- | |||||
| graph_ids : list[integer] | |||||
| The IDs of the graphs for which the median should be computed. Must have been added to the environment passed to the constructor. | |||||
| set_median_id : integer | |||||
| The ID of the computed set-median. A dummy graph with this ID must have been added to the environment passed to the constructor. Upon termination, the computed median can be obtained via gklearn.gedlib.gedlibpy.GEDEnv.get_graph(). | |||||
| gen_median_id : integer | |||||
| The ID of the computed generalized median. Upon termination, the computed median can be obtained via gklearn.gedlib.gedlibpy.GEDEnv.get_graph(). | |||||
| """ | |||||
| # Sanity checks. | |||||
| if len(graph_ids) == 0: | |||||
| raise Exception('Empty vector of graph IDs, unable to compute median.') | |||||
| all_graphs_empty = True | |||||
| for graph_id in graph_ids: | |||||
| if self.__ged_env.get_graph_num_nodes(graph_id) > 0: | |||||
| self.__median_node_id_prefix = self.__ged_env.get_original_node_ids(graph_id)[0] | |||||
| all_graphs_empty = False | |||||
| break | |||||
| if all_graphs_empty: | |||||
| raise Exception('All graphs in the collection are empty.') | |||||
| # Start timer and record start time. | |||||
| start = time.time() | |||||
| timer = Timer(self.__time_limit_in_sec) | |||||
| self.__median_id = gen_median_id | |||||
| self.__state = AlgorithmState.TERMINATED | |||||
| # Get ExchangeGraph representations of the input graphs. | |||||
| graphs = {} | |||||
| for graph_id in graph_ids: | |||||
| # @todo: get_nx_graph() function may need to be modified according to the coming code. | |||||
| graphs[graph_id] = self.__ged_env.get_nx_graph(graph_id, True, True, False) | |||||
| # print(self.__ged_env.get_graph_internal_id(0)) | |||||
| # print(graphs[0].graph) | |||||
| # print(graphs[0].nodes(data=True)) | |||||
| # print(graphs[0].edges(data=True)) | |||||
| # print(nx.adjacency_matrix(graphs[0])) | |||||
| # Construct initial medians. | |||||
| medians = [] | |||||
| self.__construct_initial_medians(graph_ids, timer, medians) | |||||
| end_init = time.time() | |||||
| self.__runtime_initialized = end_init - start | |||||
| # print(medians[0].graph) | |||||
| # print(medians[0].nodes(data=True)) | |||||
| # print(medians[0].edges(data=True)) | |||||
| # print(nx.adjacency_matrix(medians[0])) | |||||
| # Reset information about iterations and number of times the median decreases and increases. | |||||
| self.__itrs = [0] * len(medians) | |||||
| self.__num_decrease_order = 0 | |||||
| self.__num_increase_order = 0 | |||||
| self.__num_converged_descents = 0 | |||||
| # Initialize the best median. | |||||
| best_sum_of_distances = np.inf | |||||
| self.__best_init_sum_of_distances = np.inf | |||||
| node_maps_from_best_median = {} | |||||
| # Run block gradient descent from all initial medians. | |||||
| self.__ged_env.set_method(self.__descent_method, self.__descent_options) | |||||
| for median_pos in range(0, len(medians)): | |||||
| # Terminate if the timer has expired and at least one SOD has been computed. | |||||
| if timer.expired() and median_pos > 0: | |||||
| break | |||||
| # Print information about current iteration. | |||||
| if self.__print_to_stdout == 2: | |||||
| print('\n===========================================================') | |||||
| print('Block gradient descent for initial median', str(median_pos + 1), 'of', str(len(medians)), '.') | |||||
| print('-----------------------------------------------------------') | |||||
| # Get reference to the median. | |||||
| median = medians[median_pos] | |||||
| # Load initial median into the environment. | |||||
| self.__ged_env.load_nx_graph(median, gen_median_id) | |||||
| self.__ged_env.init(self.__ged_env.get_init_type()) | |||||
| # Print information about current iteration. | |||||
| if self.__print_to_stdout == 2: | |||||
| progress = tqdm(desc='\rComputing initial node maps', total=len(graph_ids), file=sys.stdout) | |||||
| # Compute node maps and sum of distances for initial median. | |||||
| self.__sum_of_distances = 0 | |||||
| self.__node_maps_from_median.clear() # @todo | |||||
| for graph_id in graph_ids: | |||||
| self.__ged_env.run_method(gen_median_id, graph_id) | |||||
| self.__node_maps_from_median[graph_id] = self.__ged_env.get_node_map(gen_median_id, graph_id) | |||||
| # print(self.__node_maps_from_median[graph_id]) | |||||
| self.__sum_of_distances += self.__ged_env.get_induced_cost(gen_median_id, graph_id) # @todo: the C++ implementation for this function in GedLibBind.ipp re-call get_node_map() once more, this is not neccessary. | |||||
| # print(self.__sum_of_distances) | |||||
| # Print information about current iteration. | |||||
| if self.__print_to_stdout == 2: | |||||
| progress.update(1) | |||||
| self.__best_init_sum_of_distances = min(self.__best_init_sum_of_distances, self.__sum_of_distances) | |||||
| self.__ged_env.load_nx_graph(median, set_median_id) | |||||
| # print(self.__best_init_sum_of_distances) | |||||
| # Print information about current iteration. | |||||
| if self.__print_to_stdout == 2: | |||||
| print('\n') | |||||
| # Run block gradient descent from initial median. | |||||
| converged = False | |||||
| itrs_without_update = 0 | |||||
| while not self.__termination_criterion_met(converged, timer, self.__itrs[median_pos], itrs_without_update): | |||||
| # Print information about current iteration. | |||||
| if self.__print_to_stdout == 2: | |||||
| print('\n===========================================================') | |||||
| print('Iteration', str(self.__itrs[median_pos] + 1), 'for initial median', str(median_pos + 1), 'of', str(len(medians)), '.') | |||||
| print('-----------------------------------------------------------') | |||||
| # Initialize flags that tell us what happened in the iteration. | |||||
| median_modified = False | |||||
| node_maps_modified = False | |||||
| decreased_order = False | |||||
| increased_order = False | |||||
| # Update the median. # @todo!!!!!!!!!!!!!!!!!!!!!! | |||||
| median_modified = self.__update_median(graphs, median) | |||||
| if not median_modified or self.__itrs[median_pos] == 0: | |||||
| decreased_order = False | |||||
| if not decreased_order or self.__itrs[median_pos] == 0: | |||||
| increased_order = False | |||||
| # Update the number of iterations without update of the median. | |||||
| if median_modified or decreased_order or increased_order: | |||||
| itrs_without_update = 0 | |||||
| else: | |||||
| itrs_without_update += 1 | |||||
| # Print information about current iteration. | |||||
| if self.__print_to_stdout == 2: | |||||
| print('Loading median to environment: ... ', end='') | |||||
| # Load the median into the environment. | |||||
| # @todo: should this function use the original node label? | |||||
| self.__ged_env.load_nx_graph(median, gen_median_id) | |||||
| self.__ged_env.init(self.__ged_env.get_init_type()) | |||||
| # Print information about current iteration. | |||||
| if self.__print_to_stdout == 2: | |||||
| print('done.') | |||||
| # Print information about current iteration. | |||||
| if self.__print_to_stdout == 2: | |||||
| print('Updating induced costs: ... ', end='') | |||||
| # Compute induced costs of the old node maps w.r.t. the updated median. | |||||
| for graph_id in graph_ids: | |||||
| # print(self.__ged_env.get_induced_cost(gen_median_id, graph_id)) | |||||
| # @todo: watch out if compute_induced_cost is correct, this may influence: increase/decrease order, induced_cost() in the following code.!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! | |||||
| self.__ged_env.compute_induced_cost(gen_median_id, graph_id) | |||||
| # print('---------------------------------------') | |||||
| # print(self.__ged_env.get_induced_cost(gen_median_id, graph_id)) | |||||
| # Print information about current iteration. | |||||
| if self.__print_to_stdout == 2: | |||||
| print('done.') | |||||
| # Update the node maps. | |||||
| node_maps_modified = self.__update_node_maps() # @todo | |||||
| # Update the order of the median if no improvement can be found with the current order. | |||||
| # Update the sum of distances. | |||||
| old_sum_of_distances = self.__sum_of_distances | |||||
| self.__sum_of_distances = 0 | |||||
| for graph_id in self.__node_maps_from_median: | |||||
| self.__sum_of_distances += self.__ged_env.get_induced_cost(gen_median_id, graph_id) # @todo: see above. | |||||
| # Print information about current iteration. | |||||
| if self.__print_to_stdout == 2: | |||||
| print('Old local SOD: ', old_sum_of_distances) | |||||
| print('New local SOD: ', self.__sum_of_distances) | |||||
| print('Best converged SOD: ', best_sum_of_distances) | |||||
| print('Modified median: ', median_modified) | |||||
| print('Modified node maps: ', node_maps_modified) | |||||
| print('Decreased order: ', decreased_order) | |||||
| print('Increased order: ', increased_order) | |||||
| print('===========================================================\n') | |||||
| converged = not (median_modified or node_maps_modified or decreased_order or increased_order) | |||||
| self.__itrs[median_pos] += 1 | |||||
| # Update the best median. | |||||
| if self.__sum_of_distances < self.__best_init_sum_of_distances: | |||||
| best_sum_of_distances = self.__sum_of_distances | |||||
| node_maps_from_best_median = self.__node_maps_from_median | |||||
| best_median = median | |||||
| # Update the number of converged descents. | |||||
| if converged: | |||||
| self.__num_converged_descents += 1 | |||||
| # Store the best encountered median. | |||||
| self.__sum_of_distances = best_sum_of_distances | |||||
| self.__node_maps_from_median = node_maps_from_best_median | |||||
| self.__ged_env.load_nx_graph(best_median, gen_median_id) | |||||
| self.__ged_env.init(self.__ged_env.get_init_type()) | |||||
| end_descent = time.time() | |||||
| self.__runtime_converged = end_descent - start | |||||
| # Refine the sum of distances and the node maps for the converged median. | |||||
| self.__converged_sum_of_distances = self.__sum_of_distances | |||||
| if self.__refine: | |||||
| self.__improve_sum_of_distances(timer) # @todo | |||||
| # Record end time, set runtime and reset the number of initial medians. | |||||
| end = time.time() | |||||
| self.__runtime = end - start | |||||
| self.__num_random_inits = self.__desired_num_random_inits | |||||
| # Print global information. | |||||
| if self.__print_to_stdout != 0: | |||||
| print('\n===========================================================') | |||||
| print('Finished computation of generalized median graph.') | |||||
| print('-----------------------------------------------------------') | |||||
| print('Best SOD after initialization: ', self.__best_init_sum_of_distances) | |||||
| print('Converged SOD: ', self.__converged_sum_of_distances) | |||||
| if self.__refine: | |||||
| print('Refined SOD: ', self.__sum_of_distances) | |||||
| print('Overall runtime: ', self.__runtime) | |||||
| print('Runtime of initialization: ', self.__runtime_initialized) | |||||
| print('Runtime of block gradient descent: ', self.__runtime_converged - self.__runtime_initialized) | |||||
| if self.__refine: | |||||
| print('Runtime of refinement: ', self.__runtime - self.__runtime_converged) | |||||
| print('Number of initial medians: ', len(medians)) | |||||
| total_itr = 0 | |||||
| num_started_descents = 0 | |||||
| for itr in self.__itrs: | |||||
| total_itr += itr | |||||
| if itr > 0: | |||||
| num_started_descents += 1 | |||||
| print('Size of graph collection: ', len(graph_ids)) | |||||
| print('Number of started descents: ', num_started_descents) | |||||
| print('Number of converged descents: ', self.__num_converged_descents) | |||||
| print('Overall number of iterations: ', total_itr) | |||||
| print('Overall number of times the order decreased: ', self.__num_decrease_order) | |||||
| print('Overall number of times the order increased: ', self.__num_increase_order) | |||||
| print('===========================================================\n') | |||||
| def get_sum_of_distances(self, state=''): | |||||
| """Returns the sum of distances. | |||||
| Parameters | |||||
| ---------- | |||||
| state : string | |||||
| The state of the estimator. Can be 'initialized' or 'converged'. Default: "" | |||||
| Returns | |||||
| ------- | |||||
| float | |||||
| The sum of distances (SOD) of the median when the estimator was in the state `state` during the last call to run(). If `state` is not given, the converged SOD (without refinement) or refined SOD (with refinement) is returned. | |||||
| """ | |||||
| if not self.__median_available(): | |||||
| raise Exception('No median has been computed. Call run() before calling get_sum_of_distances().') | |||||
| if state == 'initialized': | |||||
| return self.__best_init_sum_of_distances | |||||
| if state == 'converged': | |||||
| return self.__converged_sum_of_distances | |||||
| return self.__sum_of_distances | |||||
| def __set_default_options(self): | |||||
| self.__init_type = 'RANDOM' | |||||
| self.__num_random_inits = 10 | |||||
| self.__desired_num_random_inits = 10 | |||||
| self.__use_real_randomness = True | |||||
| self.__seed = 0 | |||||
| self.__refine = True | |||||
| self.__time_limit_in_sec = 0 | |||||
| self.__epsilon = 0.0001 | |||||
| self.__max_itrs = 100 | |||||
| self.__max_itrs_without_update = 3 | |||||
| self.__num_inits_increase_order = 10 | |||||
| self.__init_type_increase_order = 'K-MEANS++' | |||||
| self.__max_itrs_increase_order = 10 | |||||
| self.__print_to_stdout = 2 | |||||
| def __construct_initial_medians(self, graph_ids, timer, initial_medians): | |||||
| # Print information about current iteration. | |||||
| if self.__print_to_stdout == 2: | |||||
| print('\n===========================================================') | |||||
| print('Constructing initial median(s).') | |||||
| print('-----------------------------------------------------------') | |||||
| # Compute or sample the initial median(s). | |||||
| initial_medians.clear() | |||||
| if self.__init_type == 'MEDOID': | |||||
| self.__compute_medoid(graph_ids, timer, initial_medians) | |||||
| elif self.__init_type == 'MAX': | |||||
| pass # @todo | |||||
| # compute_max_order_graph_(graph_ids, initial_medians) | |||||
| elif self.__init_type == 'MIN': | |||||
| pass # @todo | |||||
| # compute_min_order_graph_(graph_ids, initial_medians) | |||||
| elif self.__init_type == 'MEAN': | |||||
| pass # @todo | |||||
| # compute_mean_order_graph_(graph_ids, initial_medians) | |||||
| else: | |||||
| pass # @todo | |||||
| # sample_initial_medians_(graph_ids, initial_medians) | |||||
| # Print information about current iteration. | |||||
| if self.__print_to_stdout == 2: | |||||
| print('===========================================================') | |||||
| def __compute_medoid(self, graph_ids, timer, initial_medians): | |||||
| # Use method selected for initialization phase. | |||||
| self.__ged_env.set_method(self.__init_method, self.__init_options) | |||||
| # Print information about current iteration. | |||||
| if self.__print_to_stdout == 2: | |||||
| progress = tqdm(desc='\rComputing medoid', total=len(graph_ids), file=sys.stdout) | |||||
| # Compute the medoid. | |||||
| medoid_id = graph_ids[0] | |||||
| best_sum_of_distances = np.inf | |||||
| for g_id in graph_ids: | |||||
| if timer.expired(): | |||||
| self.__state = AlgorithmState.CALLED | |||||
| break | |||||
| sum_of_distances = 0 | |||||
| for h_id in graph_ids: | |||||
| self.__ged_env.run_method(g_id, h_id) | |||||
| sum_of_distances += self.__ged_env.get_upper_bound(g_id, h_id) | |||||
| if sum_of_distances < best_sum_of_distances: | |||||
| best_sum_of_distances = sum_of_distances | |||||
| medoid_id = g_id | |||||
| # Print information about current iteration. | |||||
| if self.__print_to_stdout == 2: | |||||
| progress.update(1) | |||||
| initial_medians.append(self.__ged_env.get_nx_graph(medoid_id, True, True, False)) # @todo | |||||
| # Print information about current iteration. | |||||
| if self.__print_to_stdout == 2: | |||||
| print('\n') | |||||
| def __termination_criterion_met(self, converged, timer, itr, itrs_without_update): | |||||
| if timer.expired() or (itr >= self.__max_itrs if self.__max_itrs >= 0 else False): | |||||
| if self.__state == AlgorithmState.TERMINATED: | |||||
| self.__state = AlgorithmState.INITIALIZED | |||||
| return True | |||||
| return converged or (itrs_without_update > self.__max_itrs_without_update if self.__max_itrs_without_update >= 0 else False) | |||||
| def __update_median(self, graphs, median): | |||||
| # Print information about current iteration. | |||||
| if self.__print_to_stdout == 2: | |||||
| print('Updating median: ', end='') | |||||
| # Store copy of the old median. | |||||
| old_median = median.copy() # @todo: this is just a shallow copy. | |||||
| # Update the node labels. | |||||
| if self.__labeled_nodes: | |||||
| self.__update_node_labels(graphs, median) | |||||
| # Update the edges and their labels. | |||||
| self.__update_edges(graphs, median) | |||||
| # Print information about current iteration. | |||||
| if self.__print_to_stdout == 2: | |||||
| print('done.') | |||||
| return not self.__are_graphs_equal(median, old_median) | |||||
| def __update_node_labels(self, graphs, median): | |||||
| # Print information about current iteration. | |||||
| if self.__print_to_stdout == 2: | |||||
| print('nodes ... ', end='') | |||||
| # Iterate through all nodes of the median. | |||||
| for i in range(0, nx.number_of_nodes(median)): | |||||
| # print('i: ', i) | |||||
| # Collect the labels of the substituted nodes. | |||||
| node_labels = [] | |||||
| for graph_id, graph in graphs.items(): | |||||
| # print('graph_id: ', graph_id) | |||||
| # print(self.__node_maps_from_median[graph_id]) | |||||
| k = self.__get_node_image_from_map(self.__node_maps_from_median[graph_id], i) | |||||
| # print('k: ', k) | |||||
| if k != np.inf: | |||||
| node_labels.append(graph.nodes[k]) | |||||
| # Compute the median label and update the median. | |||||
| if len(node_labels) > 0: | |||||
| median_label = self.__ged_env.get_median_node_label(node_labels) | |||||
| if self.__ged_env.get_node_rel_cost(median.nodes[i], median_label) > self.__epsilon: | |||||
| nx.set_node_attributes(median, {i: median_label}) | |||||
| def __update_edges(self, graphs, median): | |||||
| # Print information about current iteration. | |||||
| if self.__print_to_stdout == 2: | |||||
| print('edges ... ', end='') | |||||
| # Clear the adjacency lists of the median and reset number of edges to 0. | |||||
| median_edges = list(median.edges) | |||||
| for (head, tail) in median_edges: | |||||
| median.remove_edge(head, tail) | |||||
| # @todo: what if edge is not labeled? | |||||
| # Iterate through all possible edges (i,j) of the median. | |||||
| for i in range(0, nx.number_of_nodes(median)): | |||||
| for j in range(i + 1, nx.number_of_nodes(median)): | |||||
| # Collect the labels of the edges to which (i,j) is mapped by the node maps. | |||||
| edge_labels = [] | |||||
| for graph_id, graph in graphs.items(): | |||||
| k = self.__get_node_image_from_map(self.__node_maps_from_median[graph_id], i) | |||||
| l = self.__get_node_image_from_map(self.__node_maps_from_median[graph_id], j) | |||||
| if k != np.inf and l != np.inf: | |||||
| if graph.has_edge(k, l): | |||||
| edge_labels.append(graph.edges[(k, l)]) | |||||
| # Compute the median edge label and the overall edge relabeling cost. | |||||
| rel_cost = 0 | |||||
| median_label = self.__ged_env.get_edge_label(1) | |||||
| if median.has_edge(i, j): | |||||
| median_label = median.edges[(i, j)] | |||||
| if self.__labeled_edges and len(edge_labels) > 0: | |||||
| new_median_label = self.__ged_env.median_edge_label(edge_labels) | |||||
| if self.__ged_env.get_edge_rel_cost(median_label, new_median_label) > self.__epsilon: | |||||
| median_label = new_median_label | |||||
| for edge_label in edge_labels: | |||||
| rel_cost += self.__ged_env.get_edge_rel_cost(median_label, edge_label) | |||||
| # Update the median. | |||||
| if rel_cost < (self.__edge_ins_cost + self.__edge_del_cost) * len(edge_labels) - self.__edge_del_cost * len(graphs): | |||||
| median.add_edge(i, j, **median_label) | |||||
| else: | |||||
| if median.has_edge(i, j): | |||||
| median.remove_edge(i, j) | |||||
| def __update_node_maps(self): | |||||
| # Print information about current iteration. | |||||
| if self.__print_to_stdout == 2: | |||||
| progress = tqdm(desc='\rUpdating node maps', total=len(self.__node_maps_from_median), file=sys.stdout) | |||||
| # Update the node maps. | |||||
| node_maps_were_modified = False | |||||
| for graph_id in self.__node_maps_from_median: | |||||
| self.__ged_env.run_method(self.__median_id, graph_id) | |||||
| if self.__ged_env.get_upper_bound(self.__median_id, graph_id) < self.__ged_env.get_induced_cost(self.__median_id, graph_id) - self.__epsilon: # @todo: see above. | |||||
| self.__node_maps_from_median[graph_id] = self.__ged_env.get_node_map(self.__median_id, graph_id) # @todo: node_map may not assigned. | |||||
| node_maps_were_modified = True | |||||
| # Print information about current iteration. | |||||
| if self.__print_to_stdout == 2: | |||||
| progress.update(1) | |||||
| # Print information about current iteration. | |||||
| if self.__print_to_stdout == 2: | |||||
| print('\n') | |||||
| # Return true if the node maps were modified. | |||||
| return node_maps_were_modified | |||||
| def __improve_sum_of_distances(self, timer): | |||||
| pass | |||||
| def __median_available(self): | |||||
| return self.__median_id != np.inf | |||||
| def __get_node_image_from_map(self, node_map, node): | |||||
| """ | |||||
| Return ID of the node mapping of `node` in `node_map`. | |||||
| Parameters | |||||
| ---------- | |||||
| node_map : list[tuple(int, int)] | |||||
| List of node maps where the mapping node is found. | |||||
| node : int | |||||
| The mapping node of this node is returned | |||||
| Raises | |||||
| ------ | |||||
| Exception | |||||
| If the node with ID `node` is not contained in the source nodes of the node map. | |||||
| Returns | |||||
| ------- | |||||
| int | |||||
| ID of the mapping of `node`. | |||||
| Notes | |||||
| ----- | |||||
| This function is not implemented in the `ged::MedianGraphEstimator` class of the `GEDLIB` library. Instead it is a Python implementation of the `ged::NodeMap::image` function. | |||||
| """ | |||||
| if node < len(node_map): | |||||
| return node_map[node][1] if node_map[node][1] < len(node_map) else np.inf | |||||
| else: | |||||
| raise Exception('The node with ID ', str(node), ' is not contained in the source nodes of the node map.') | |||||
| return np.inf | |||||
| def __are_graphs_equal(self, g1, g2): | |||||
| """ | |||||
| Check if the two graphs are equal. | |||||
| Parameters | |||||
| ---------- | |||||
| g1 : NetworkX graph object | |||||
| Graph 1 to be compared. | |||||
| g2 : NetworkX graph object | |||||
| Graph 2 to be compared. | |||||
| Returns | |||||
| ------- | |||||
| bool | |||||
| True if the two graph are equal. | |||||
| Notes | |||||
| ----- | |||||
| This is not an identical check. Here the two graphs are equal if and only if their original_node_ids, nodes, all node labels, edges and all edge labels are equal. This function is specifically designed for class `MedianGraphEstimator` and should not be used elsewhere. | |||||
| """ | |||||
| # check original node ids. | |||||
| if not g1.graph['original_node_ids'] == g2.graph['original_node_ids']: | |||||
| return False | |||||
| # check nodes. | |||||
| nlist1 = [n for n in g1.nodes(data=True)] | |||||
| nlist2 = [n for n in g2.nodes(data=True)] | |||||
| if not nlist1 == nlist2: | |||||
| return False | |||||
| # check edges. | |||||
| elist1 = [n for n in g1.edges(data=True)] | |||||
| elist2 = [n for n in g2.edges(data=True)] | |||||
| if not elist1 == elist2: | |||||
| return False | |||||
| return True | |||||
| def compute_my_cost(g, h, node_map): | |||||
| cost = 0.0 | |||||
| for node in g.nodes: | |||||
| cost += 0 | |||||
| @@ -0,0 +1,15 @@ | |||||
| #!/usr/bin/env python3 | |||||
| # -*- coding: utf-8 -*- | |||||
| """ | |||||
| Created on Thu Mar 26 18:27:22 2020 | |||||
| @author: ljia | |||||
| """ | |||||
| from gklearn.preimage.preimage_generator import PreimageGenerator | |||||
| # from gklearn.utils.dataset import Dataset | |||||
| class MedianPreimageGenerator(PreimageGenerator): | |||||
| def __init__(self, mge, dataset): | |||||
| self.__mge = mge | |||||
| self.__dataset = dataset | |||||
| @@ -0,0 +1,108 @@ | |||||
| #!/usr/bin/env python3 | |||||
| # -*- coding: utf-8 -*- | |||||
| """ | |||||
| Created on Thu Mar 19 18:13:56 2020 | |||||
| @author: ljia | |||||
| """ | |||||
| def options_string_to_options_map(options_string): | |||||
| """Transforms an options string into an options map. | |||||
| Parameters | |||||
| ---------- | |||||
| options_string : string | |||||
| Options string of the form "[--<option> <arg>] [...]". | |||||
| Return | |||||
| ------ | |||||
| options_map : dict{string : string} | |||||
| Map with one key-value pair (<option>, <arg>) for each option contained in the string. | |||||
| """ | |||||
| if options_string == '': | |||||
| return | |||||
| options_map = {} | |||||
| words = [] | |||||
| tokenize(options_string, ' ', words) | |||||
| expect_option_name = True | |||||
| for word in words: | |||||
| if expect_option_name: | |||||
| is_opt_name, word = is_option_name(word) | |||||
| if is_opt_name: | |||||
| option_name = word | |||||
| if option_name in options_map: | |||||
| raise Exception('Multiple specification of option "' + option_name + '".') | |||||
| options_map[option_name] = '' | |||||
| else: | |||||
| raise Exception('Invalid options "' + options_string + '". Usage: options = "[--<option> <arg>] [...]"') | |||||
| else: | |||||
| is_opt_name, word = is_option_name(word) | |||||
| if is_opt_name: | |||||
| raise Exception('Invalid options "' + options_string + '". Usage: options = "[--<option> <arg>] [...]"') | |||||
| else: | |||||
| options_map[option_name] = word | |||||
| expect_option_name = not expect_option_name | |||||
| return options_map | |||||
| def tokenize(sentence, sep, words): | |||||
| """Separates a sentence into words separated by sep (unless contained in single quotes). | |||||
| Parameters | |||||
| ---------- | |||||
| sentence : string | |||||
| The sentence that should be tokenized. | |||||
| sep : string | |||||
| The separator. Must be different from "'". | |||||
| words : list[string] | |||||
| The obtained words. | |||||
| """ | |||||
| outside_quotes = True | |||||
| word_length = 0 | |||||
| pos_word_start = 0 | |||||
| for pos in range(0, len(sentence)): | |||||
| if sentence[pos] == '\'': | |||||
| if not outside_quotes and pos < len(sentence) - 1: | |||||
| if sentence[pos + 1] != sep: | |||||
| raise Exception('Sentence contains closing single quote which is followed by a char different from ' + sep + '.') | |||||
| word_length += 1 | |||||
| outside_quotes = not outside_quotes | |||||
| elif outside_quotes and sentence[pos] == sep: | |||||
| if word_length > 0: | |||||
| words.append(sentence[pos_word_start:pos_word_start + word_length]) | |||||
| pos_word_start = pos + 1 | |||||
| word_length = 0 | |||||
| else: | |||||
| word_length += 1 | |||||
| if not outside_quotes: | |||||
| raise Exception('Sentence contains unbalanced single quotes.') | |||||
| if word_length > 0: | |||||
| words.append(sentence[pos_word_start:pos_word_start + word_length]) | |||||
| def is_option_name(word): | |||||
| """Checks whether a word is an option name and, if so, removes the leading dashes. | |||||
| Parameters | |||||
| ---------- | |||||
| word : string | |||||
| Word. | |||||
| return | |||||
| ------ | |||||
| True if word is of the form "--<option>". | |||||
| word : string | |||||
| The word without the leading dashes. | |||||
| """ | |||||
| if word[0] == '\'': | |||||
| word = word[1:len(word) - 2] | |||||
| return False, word | |||||
| if len(word) < 3: | |||||
| return False, word | |||||
| if word[0] == '-' and word[1] == '-' and word[2] != '-': | |||||
| word = word[2:] | |||||
| return True, word | |||||
| return False, word | |||||
| @@ -0,0 +1,12 @@ | |||||
| #!/usr/bin/env python3 | |||||
| # -*- coding: utf-8 -*- | |||||
| """ | |||||
| Created on Thu Mar 26 18:26:36 2020 | |||||
| @author: ljia | |||||
| """ | |||||
| class PreimageGenerator(object): | |||||
| def __init__(self): | |||||
| pass | |||||
| @@ -0,0 +1,122 @@ | |||||
| elif opt_name == 'random-inits': | |||||
| try: | |||||
| num_random_inits_ = std::stoul(opt_val) | |||||
| desired_num_random_inits_ = num_random_inits_ | |||||
| except: | |||||
| raise Error('Invalid argument "' + opt_val + '" for option random-inits. Usage: options = "[--random-inits <convertible to int greater 0>]"') | |||||
| if num_random_inits_ <= 0: | |||||
| raise Error('Invalid argument "' + opt_val + '" for option random-inits. Usage: options = "[--random-inits <convertible to int greater 0>]"') | |||||
| } | |||||
| elif opt_name == 'randomness': | |||||
| if opt_val == 'PSEUDO': | |||||
| use_real_randomness_ = False | |||||
| elif opt_val == 'REAL': | |||||
| use_real_randomness_ = True | |||||
| else: | |||||
| raise Error('Invalid argument "' + opt_val + '" for option randomness. Usage: options = "[--randomness REAL|PSEUDO] [...]"') | |||||
| } | |||||
| elif opt_name == 'stdout': | |||||
| if opt_val == '0': | |||||
| print_to_stdout_ = 0 | |||||
| elif opt_val == '1': | |||||
| print_to_stdout_ = 1 | |||||
| elif opt_val == '2': | |||||
| print_to_stdout_ = 2 | |||||
| else: | |||||
| raise Error('Invalid argument "' + opt_val + '" for option stdout. Usage: options = "[--stdout 0|1|2] [...]"') | |||||
| } | |||||
| elif opt_name == 'refine': | |||||
| if opt_val == 'TRUE': | |||||
| refine_ = True | |||||
| elif opt_val == 'FALSE': | |||||
| refine_ = False | |||||
| else: | |||||
| raise Error('Invalid argument "' + opt_val + '" for option refine. Usage: options = "[--refine TRUE|FALSE] [...]"') | |||||
| } | |||||
| elif opt_name == 'time-limit': | |||||
| try: | |||||
| time_limit_in_sec_ = std::stod(opt_val) | |||||
| except: | |||||
| raise Error('Invalid argument "' + opt_val + '" for option time-limit. Usage: options = "[--time-limit <convertible to double>] [...]') | |||||
| } | |||||
| elif opt_name == 'max-itrs': | |||||
| try: | |||||
| max_itrs_ = std::stoi(opt_val) | |||||
| except: | |||||
| raise Error('Invalid argument "' + opt_val + '" for option max-itrs. Usage: options = "[--max-itrs <convertible to int>] [...]') | |||||
| } | |||||
| elif opt_name == 'max-itrs-without-update': | |||||
| try: | |||||
| max_itrs_without_update_ = std::stoi(opt_val) | |||||
| except: | |||||
| raise Error('Invalid argument "' + opt_val + '" for option max-itrs-without-update. Usage: options = "[--max-itrs-without-update <convertible to int>] [...]') | |||||
| } | |||||
| elif opt_name == 'seed': | |||||
| try: | |||||
| seed_ = std::stoul(opt_val) | |||||
| except: | |||||
| raise Error('Invalid argument "' + opt_val + '" for option seed. Usage: options = "[--seed <convertible to int greater equal 0>] [...]') | |||||
| } | |||||
| elif opt_name == 'epsilon': | |||||
| try: | |||||
| epsilon_ = std::stod(opt_val) | |||||
| except: | |||||
| raise Error('Invalid argument "' + opt_val + '" for option epsilon. Usage: options = "[--epsilon <convertible to double greater 0>] [...]') | |||||
| if epsilon_ <= 0: | |||||
| raise Error('Invalid argument "' + opt_val + '" for option epsilon. Usage: options = "[--epsilon <convertible to double greater 0>] [...]') | |||||
| } | |||||
| elif opt_name == 'inits-increase-order': | |||||
| try: | |||||
| num_inits_increase_order_ = std::stoul(opt_val) | |||||
| except: | |||||
| raise Error('Invalid argument "' + opt_val + '" for option inits-increase-order. Usage: options = "[--inits-increase-order <convertible to int greater 0>]"') | |||||
| if num_inits_increase_order_ <= 0: | |||||
| raise Error('Invalid argument "' + opt_val + '" for option inits-increase-order. Usage: options = "[--inits-increase-order <convertible to int greater 0>]"') | |||||
| } | |||||
| elif opt_name == 'init-type-increase-order': | |||||
| init_type_increase_order_ = opt_val | |||||
| if opt_val != 'CLUSTERS' and opt_val != 'K-MEANS++': | |||||
| raise Exception(std::string('Invalid argument ') + opt_val + ' for option init-type-increase-order. Usage: options = "[--init-type-increase-order CLUSTERS|K-MEANS++] [...]"') | |||||
| } | |||||
| elif opt_name == 'max-itrs-increase-order': | |||||
| try: | |||||
| max_itrs_increase_order_ = std::stoi(opt_val) | |||||
| except: | |||||
| raise Error('Invalid argument "' + opt_val + '" for option max-itrs-increase-order. Usage: options = "[--max-itrs-increase-order <convertible to int>] [...]') | |||||
| } | |||||
| else: | |||||
| std::string valid_options('[--init-type <arg>] [--random-inits <arg>] [--randomness <arg>] [--seed <arg>] [--stdout <arg>] ') | |||||
| valid_options += '[--time-limit <arg>] [--max-itrs <arg>] [--epsilon <arg>] ' | |||||
| valid_options += '[--inits-increase-order <arg>] [--init-type-increase-order <arg>] [--max-itrs-increase-order <arg>]' | |||||
| raise Error(std::string('Invalid option "') + opt_name + '". Usage: options = "' + valid_options + '"') | |||||
| @@ -0,0 +1,91 @@ | |||||
| #!/usr/bin/env python3 | |||||
| # -*- coding: utf-8 -*- | |||||
| """ | |||||
| Created on Mon Mar 16 17:26:40 2020 | |||||
| @author: ljia | |||||
| """ | |||||
| def test_median_graph_estimator(): | |||||
| from gklearn.utils.graphfiles import loadDataset | |||||
| from gklearn.preimage.median_graph_estimator import MedianGraphEstimator | |||||
| from gklearn.gedlib import librariesImport, gedlibpy | |||||
| from gklearn.preimage.utils import get_same_item_indices | |||||
| from gklearn.preimage.ged import convertGraph | |||||
| import multiprocessing | |||||
| # estimator parameters. | |||||
| init_type = 'MEDOID' | |||||
| num_inits = 1 | |||||
| threads = multiprocessing.cpu_count() | |||||
| time_limit = 60000 | |||||
| # algorithm parameters. | |||||
| algo = 'IPFP' | |||||
| initial_solutions = 40 | |||||
| algo_options_suffix = ' --initial-solutions ' + str(initial_solutions) + ' --ratio-runs-from-initial-solutions 1' | |||||
| edit_cost_name = 'LETTER2' | |||||
| edit_cost_constants = [0.02987291, 0.0178211, 0.01431966, 0.001, 0.001] | |||||
| ds_name = 'COIL-DEL' | |||||
| # Load dataset. | |||||
| # dataset = '../../datasets/COIL-DEL/COIL-DEL_A.txt' | |||||
| dataset = '../../datasets/Letter-high/Letter-high_A.txt' | |||||
| Gn, y_all = loadDataset(dataset) | |||||
| y_idx = get_same_item_indices(y_all) | |||||
| for i, (y, values) in enumerate(y_idx.items()): | |||||
| Gn_i = [Gn[val] for val in values] | |||||
| break | |||||
| # Set up the environment. | |||||
| ged_env = gedlibpy.GEDEnv() | |||||
| # gedlibpy.restart_env() | |||||
| ged_env.set_edit_cost(edit_cost_name, edit_cost_constant=edit_cost_constants) | |||||
| for G in Gn_i: | |||||
| ged_env.add_nx_graph(convertGraph(G, edit_cost_name), '') | |||||
| graph_ids = ged_env.get_all_graph_ids() | |||||
| set_median_id = ged_env.add_graph('set_median') | |||||
| gen_median_id = ged_env.add_graph('gen_median') | |||||
| ged_env.init(init_option='EAGER_WITHOUT_SHUFFLED_COPIES') | |||||
| # Set up the estimator. | |||||
| mge = MedianGraphEstimator(ged_env, constant_node_costs(edit_cost_name)) | |||||
| mge.set_refine_method(algo, '--threads ' + str(threads) + ' --initial-solutions ' + str(initial_solutions) + ' --ratio-runs-from-initial-solutions 1') | |||||
| mge_options = '--time-limit ' + str(time_limit) + ' --stdout 2 --init-type ' + init_type | |||||
| mge_options += ' --random-inits ' + str(num_inits) + ' --seed ' + '1' + ' --refine FALSE'# @todo: std::to_string(rng()) | |||||
| # Select the GED algorithm. | |||||
| algo_options = '--threads ' + str(threads) + algo_options_suffix | |||||
| mge.set_options(mge_options) | |||||
| mge.set_init_method(algo, algo_options) | |||||
| mge.set_descent_method(algo, algo_options) | |||||
| # Run the estimator. | |||||
| mge.run(graph_ids, set_median_id, gen_median_id) | |||||
| # Get SODs. | |||||
| sod_sm = mge.get_sum_of_distances('initialized') | |||||
| sod_gm = mge.get_sum_of_distances('converged') | |||||
| print('sod_sm, sod_gm: ', sod_sm, sod_gm) | |||||
| # Get median graphs. | |||||
| set_median = ged_env.get_nx_graph(set_median_id) | |||||
| gen_median = ged_env.get_nx_graph(gen_median_id) | |||||
| return set_median, gen_median | |||||
| def constant_node_costs(edit_cost_name): | |||||
| if edit_cost_name == 'NON_SYMBOLIC' or edit_cost_name == 'LETTER2' or edit_cost_name == 'LETTER': | |||||
| return False | |||||
| # elif edit_cost_name != '': | |||||
| # # throw ged::Error("Invalid dataset " + dataset + ". Usage: ./median_tests <AIDS|Mutagenicity|Letter-high|Letter-med|Letter-low|monoterpenoides|SYNTHETICnew|Fingerprint|COIL-DEL>"); | |||||
| # return False | |||||
| # return True | |||||
| if __name__ == '__main__': | |||||
| set_median, gen_median = test_median_graph_estimator() | |||||
| @@ -0,0 +1,40 @@ | |||||
| #!/usr/bin/env python3 | |||||
| # -*- coding: utf-8 -*- | |||||
| """ | |||||
| Created on Mon Mar 23 09:52:50 2020 | |||||
| @author: ljia | |||||
| """ | |||||
| import time | |||||
| class Timer(object): | |||||
| """A timer class that can be used by methods that support time limits. | |||||
| Note | |||||
| ---- | |||||
| This is the Python implementation of `the C++ code in GEDLIB <https://github.com/dbblumenthal/gedlib/blob/master/src/env/timer.hpp>`__. | |||||
| """ | |||||
| def __init__(self, time_limit_in_sec): | |||||
| """Constructs a timer for a given time limit. | |||||
| Parameters | |||||
| ---------- | |||||
| time_limit_in_sec : string | |||||
| The time limit in seconds. | |||||
| """ | |||||
| self.__time_limit_in_sec = time_limit_in_sec | |||||
| self.__start_time = time.time() | |||||
| def expired(self): | |||||
| """Checks if the time limit has expired. | |||||
| Return | |||||
| ------ | |||||
| Boolean true if the time limit has expired and false otherwise. | |||||
| """ | |||||
| if self.__time_limit_in_sec > 0: | |||||
| runtime = time.time() - self.__start_time | |||||
| return runtime >= self.__time_limit_in_sec | |||||
| return False | |||||
| @@ -12,6 +12,7 @@ from shutil import copyfile | |||||
| import networkx as nx | import networkx as nx | ||||
| import matplotlib.pyplot as plt | import matplotlib.pyplot as plt | ||||
| import os | import os | ||||
| import time | |||||
| from gklearn.utils.graphfiles import loadDataset, loadGXL, saveGXL | from gklearn.utils.graphfiles import loadDataset, loadGXL, saveGXL | ||||
| from gklearn.preimage.test_k_closest_graphs import median_on_k_closest_graphs, reform_attributes | from gklearn.preimage.test_k_closest_graphs import median_on_k_closest_graphs, reform_attributes | ||||
| @@ -69,6 +70,10 @@ def get_dataset(ds_name): | |||||
| Gn, y_all = loadDataset(dataset) | Gn, y_all = loadDataset(dataset) | ||||
| elif ds_name == 'Synthie': | elif ds_name == 'Synthie': | ||||
| pass | pass | ||||
| elif ds_name == 'COIL-DEL': | |||||
| dataset = '../../datasets/COIL-DEL/COIL-DEL_A.txt' | |||||
| graph_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/generated_datsets/COIL-DEL/' | |||||
| Gn, y_all = loadDataset(dataset) | |||||
| elif ds_name == 'COIL-RAG': | elif ds_name == 'COIL-RAG': | ||||
| pass | pass | ||||
| elif ds_name == 'COLORS-3': | elif ds_name == 'COLORS-3': | ||||
| @@ -109,7 +114,8 @@ def init_output_file(ds_name, gkernel, fit_method, dir_output): | |||||
| def xp_fit_method_for_non_symbolic(parameters, save_results=True, initial_solutions=1, | def xp_fit_method_for_non_symbolic(parameters, save_results=True, initial_solutions=1, | ||||
| Gn_data=None, k_dis_data=None, Kmatrix=None): | |||||
| Gn_data=None, k_dis_data=None, Kmatrix=None, | |||||
| is_separate=False): | |||||
| # 1. set parameters. | # 1. set parameters. | ||||
| print('1. setting parameters...') | print('1. setting parameters...') | ||||
| @@ -142,11 +148,12 @@ def xp_fit_method_for_non_symbolic(parameters, save_results=True, initial_soluti | |||||
| dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, | dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, | ||||
| None, Kmatrix=Kmatrix, gkernel=gkernel) | None, Kmatrix=Kmatrix, gkernel=gkernel) | ||||
| else: | else: | ||||
| dis_mat = k_dis_data[0] | |||||
| dis_max = k_dis_data[1] | |||||
| dis_min = k_dis_data[2] | |||||
| dis_mean = k_dis_data[3] | |||||
| print('pair distances - dis_max, dis_min, dis_mean:', dis_max, dis_min, dis_mean) | |||||
| # dis_mat = k_dis_data[0] | |||||
| # dis_max = k_dis_data[1] | |||||
| # dis_min = k_dis_data[2] | |||||
| # dis_mean = k_dis_data[3] | |||||
| # print('pair distances - dis_max, dis_min, dis_mean:', dis_max, dis_min, dis_mean) | |||||
| pass | |||||
| if save_results: | if save_results: | ||||
| @@ -213,8 +220,11 @@ def xp_fit_method_for_non_symbolic(parameters, save_results=True, initial_soluti | |||||
| # get Gram matrix for this part of data. | # get Gram matrix for this part of data. | ||||
| if Kmatrix is not None: | if Kmatrix is not None: | ||||
| Kmatrix_sub = Kmatrix[values,:] | |||||
| Kmatrix_sub = Kmatrix_sub[:,values] | |||||
| if is_separate: | |||||
| Kmatrix_sub = Kmatrix[i].copy() | |||||
| else: | |||||
| Kmatrix_sub = Kmatrix[values,:] | |||||
| Kmatrix_sub = Kmatrix_sub[:,values] | |||||
| else: | else: | ||||
| Kmatrix_sub = None | Kmatrix_sub = None | ||||
| @@ -395,7 +405,48 @@ def draw_Letter_graph(graph, file_prefix): | |||||
| plt.savefig(file_prefix + '.eps', format='eps', dpi=300) | plt.savefig(file_prefix + '.eps', format='eps', dpi=300) | ||||
| # plt.show() | # plt.show() | ||||
| plt.clf() | plt.clf() | ||||
| def compute_gm_for_each_class(Gn, y_all, gkernel, parallel='imap_unordered', is_separate=True): | |||||
| if is_separate: | |||||
| print('the Gram matrix is computed for each class.') | |||||
| y_idx = get_same_item_indices(y_all) | |||||
| Kmatrix = [] | |||||
| run_time = [] | |||||
| k_dis_data = [] | |||||
| for i, (y, values) in enumerate(y_idx.items()): | |||||
| print('The ', str(i), ' class:') | |||||
| Gn_i = [Gn[val] for val in values] | |||||
| time0 = time.time() | |||||
| Kmatrix.append(compute_kernel(Gn_i, gkernel, None, None, True, parallel=parallel)) | |||||
| run_time.append(time.time() - time0) | |||||
| k_dis_data.append(kernel_distance_matrix(Gn_i, None, None, | |||||
| Kmatrix=Kmatrix[i], gkernel=gkernel, verbose=True)) | |||||
| np.savez('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm', | |||||
| Kmatrix=Kmatrix, run_time=run_time, is_separate=is_separate) | |||||
| dis_max = np.max([item[1] for item in k_dis_data]) | |||||
| dis_min = np.min([item[2] for item in k_dis_data]) | |||||
| dis_mean = np.mean([item[3] for item in k_dis_data]) | |||||
| print('pair distances - dis_max, dis_min, dis_mean:', dis_max, dis_min, | |||||
| dis_mean) | |||||
| else: | |||||
| time0 = time.time() | |||||
| Kmatrix = compute_kernel(Gn, gkernel, None, None, True, parallel=parallel) | |||||
| run_time = time.time() - time0 | |||||
| np.savez('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm', | |||||
| Kmatrix=Kmatrix, run_time=run_time, is_separate=is_separate) | |||||
| k_dis_data = kernel_distance_matrix(Gn, None, None, | |||||
| Kmatrix=Kmatrix, gkernel=gkernel, verbose=True) | |||||
| print('the Gram matrix is computed for the whole dataset.') | |||||
| print('pair distances - dis_max, dis_min, dis_mean:', k_dis_data[1], | |||||
| k_dis_data[2], k_dis_data[3]) | |||||
| print('\nTime to compute Gram matrix for the whole dataset: ', run_time) | |||||
| # k_dis_data = [dis_mat, dis_max, dis_min, dis_mean] | |||||
| return Kmatrix, run_time, k_dis_data | |||||
| if __name__ == "__main__": | if __name__ == "__main__": | ||||
| # #### xp 1: Letter-high, spkernel. | # #### xp 1: Letter-high, spkernel. | ||||
| @@ -573,7 +624,7 @@ if __name__ == "__main__": | |||||
| # Kmatrix=Kmatrix) | # Kmatrix=Kmatrix) | ||||
| # #### xp 5: Fingerprint, sspkernel, using LETTER2. | |||||
| # #### xp 5: Fingerprint, sspkernel, using LETTER2, only node attrs. | |||||
| # # load dataset. | # # load dataset. | ||||
| # print('getting dataset and computing kernel distance matrix first...') | # print('getting dataset and computing kernel distance matrix first...') | ||||
| # ds_name = 'Fingerprint' | # ds_name = 'Fingerprint' | ||||
| @@ -593,17 +644,17 @@ if __name__ == "__main__": | |||||
| # del G.edges[edge]['attributes'] | # del G.edges[edge]['attributes'] | ||||
| # del G.edges[edge]['orient'] | # del G.edges[edge]['orient'] | ||||
| # del G.edges[edge]['angle'] | # del G.edges[edge]['angle'] | ||||
| # Gn = Gn[805:815] | |||||
| # y_all = y_all[805:815] | |||||
| ## Gn = Gn[805:815] | |||||
| ## y_all = y_all[805:815] | |||||
| # for G in Gn: | # for G in Gn: | ||||
| # G.graph['filename'] = 'graph' + str(G.graph['name']) + '.gxl' | # G.graph['filename'] = 'graph' + str(G.graph['name']) + '.gxl' | ||||
| # | # | ||||
| # # compute/read Gram matrix and pair distances. | # # compute/read Gram matrix and pair distances. | ||||
| # Kmatrix = compute_kernel(Gn, gkernel, None, None, True, parallel='imap_unordered') | |||||
| # np.savez('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm', | |||||
| # Kmatrix=Kmatrix) | |||||
| ## gmfile = np.load('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm.npz') | |||||
| ## Kmatrix = gmfile['Kmatrix'] | |||||
| ## Kmatrix = compute_kernel(Gn, gkernel, None, None, True, parallel='imap_unordered') | |||||
| ## np.savez('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm', | |||||
| ## Kmatrix=Kmatrix) | |||||
| # gmfile = np.load('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm.npz') | |||||
| # Kmatrix = gmfile['Kmatrix'] | |||||
| ## run_time = gmfile['run_time'] | ## run_time = gmfile['run_time'] | ||||
| ## Kmatrix = Kmatrix[[0,1,2,3,4],:] | ## Kmatrix = Kmatrix[[0,1,2,3,4],:] | ||||
| ## Kmatrix = Kmatrix[:,[0,1,2,3,4]] | ## Kmatrix = Kmatrix[:,[0,1,2,3,4]] | ||||
| @@ -612,11 +663,7 @@ if __name__ == "__main__": | |||||
| # Kmatrix=Kmatrix, gkernel=gkernel, verbose=True) | # Kmatrix=Kmatrix, gkernel=gkernel, verbose=True) | ||||
| ## Kmatrix = np.zeros((len(Gn), len(Gn))) | ## Kmatrix = np.zeros((len(Gn), len(Gn))) | ||||
| ## dis_mat, dis_max, dis_min, dis_mean = 0, 0, 0, 0 | ## dis_mat, dis_max, dis_min, dis_mean = 0, 0, 0, 0 | ||||
| # | |||||
| # # compute pair distances. | |||||
| ## dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None, | |||||
| ## Kmatrix=None, gkernel=gkernel, verbose=True) | |||||
| ## dis_mat, dis_max, dis_min, dis_mean = 0, 0, 0, 0 | |||||
| # | |||||
| # # fitting and computing. | # # fitting and computing. | ||||
| # fit_methods = ['k-graphs', 'random', 'random', 'random'] | # fit_methods = ['k-graphs', 'random', 'random', 'random'] | ||||
| # for fit_method in fit_methods: | # for fit_method in fit_methods: | ||||
| @@ -627,7 +674,8 @@ if __name__ == "__main__": | |||||
| # 'edit_cost_name': 'LETTER2', | # 'edit_cost_name': 'LETTER2', | ||||
| # 'ged_method': 'mIPFP', | # 'ged_method': 'mIPFP', | ||||
| # 'attr_distance': 'euclidean', | # 'attr_distance': 'euclidean', | ||||
| # 'fit_method': fit_method} | |||||
| # 'fit_method': fit_method, | |||||
| # 'init_ecc': [1,1,1,1,1]} # [0.525, 0.525, 0.001, 0.125, 0.125]} | |||||
| # xp_fit_method_for_non_symbolic(parameters, save_results=True, | # xp_fit_method_for_non_symbolic(parameters, save_results=True, | ||||
| # initial_solutions=40, | # initial_solutions=40, | ||||
| # Gn_data = [Gn, y_all, graph_dir], | # Gn_data = [Gn, y_all, graph_dir], | ||||
| @@ -773,38 +821,102 @@ if __name__ == "__main__": | |||||
| # Kmatrix=Kmatrix) | # Kmatrix=Kmatrix) | ||||
| #### xp 9: Letter-low, spkernel. | |||||
| # #### xp 9: Letter-low, spkernel. | |||||
| # # load dataset. | |||||
| # print('getting dataset and computing kernel distance matrix first...') | |||||
| # ds_name = 'Letter-low' | |||||
| # gkernel = 'spkernel' | |||||
| # Gn, y_all, graph_dir = get_dataset(ds_name) | |||||
| # # remove graphs without nodes and edges. | |||||
| # Gn = [(idx, G) for idx, G in enumerate(Gn) if (nx.number_of_nodes(G) != 0 | |||||
| # and nx.number_of_edges(G) != 0)] | |||||
| # idx = [G[0] for G in Gn] | |||||
| # Gn = [G[1] for G in Gn] | |||||
| # y_all = [y_all[i] for i in idx] | |||||
| ## Gn = Gn[0:50] | |||||
| ## y_all = y_all[0:50] | |||||
| # | |||||
| # # compute/read Gram matrix and pair distances. | |||||
| # Kmatrix = compute_kernel(Gn, gkernel, None, None, True, parallel='imap_unordered') | |||||
| # np.savez('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm', | |||||
| # Kmatrix=Kmatrix) | |||||
| ## gmfile = np.load('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm.npz') | |||||
| ## Kmatrix = gmfile['Kmatrix'] | |||||
| ## run_time = gmfile['run_time'] | |||||
| ## Kmatrix = Kmatrix[[0,1,2,3,4],:] | |||||
| ## Kmatrix = Kmatrix[:,[0,1,2,3,4]] | |||||
| ## print('\nTime to compute Gram matrix for the whole dataset: ', run_time) | |||||
| # dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None, | |||||
| # Kmatrix=Kmatrix, gkernel=gkernel, verbose=True) | |||||
| ## Kmatrix = np.zeros((len(Gn), len(Gn))) | |||||
| ## dis_mat, dis_max, dis_min, dis_mean = 0, 0, 0, 0 | |||||
| # | |||||
| # # fitting and computing. | |||||
| # fit_methods = ['k-graphs', 'expert', 'random', 'random', 'random'] | |||||
| # for fit_method in fit_methods: | |||||
| # print('\n-------------------------------------') | |||||
| # print('fit method:', fit_method) | |||||
| # parameters = {'ds_name': ds_name, | |||||
| # 'gkernel': gkernel, | |||||
| # 'edit_cost_name': 'LETTER2', | |||||
| # 'ged_method': 'mIPFP', | |||||
| # 'attr_distance': 'euclidean', | |||||
| # 'fit_method': fit_method, | |||||
| # 'init_ecc': [0.075, 0.075, 0.25, 0.075, 0.075]} | |||||
| # print('parameters: ', parameters) | |||||
| # xp_fit_method_for_non_symbolic(parameters, save_results=True, | |||||
| # initial_solutions=40, | |||||
| # Gn_data = [Gn, y_all, graph_dir], | |||||
| # k_dis_data = [dis_mat, dis_max, dis_min, dis_mean], | |||||
| # Kmatrix=Kmatrix) | |||||
| #### xp 5: COIL-DEL, sspkernel, using LETTER2, only node attrs. | |||||
| # load dataset. | # load dataset. | ||||
| print('getting dataset and computing kernel distance matrix first...') | print('getting dataset and computing kernel distance matrix first...') | ||||
| ds_name = 'Letter-low' | |||||
| gkernel = 'spkernel' | |||||
| ds_name = 'COIL-DEL' | |||||
| gkernel = 'structuralspkernel' | |||||
| Gn, y_all, graph_dir = get_dataset(ds_name) | Gn, y_all, graph_dir = get_dataset(ds_name) | ||||
| # remove graphs without nodes and edges. | # remove graphs without nodes and edges. | ||||
| Gn = [(idx, G) for idx, G in enumerate(Gn) if (nx.number_of_nodes(G) != 0 | |||||
| and nx.number_of_edges(G) != 0)] | |||||
| Gn = [(idx, G) for idx, G in enumerate(Gn) if nx.number_of_nodes(G) != 0] | |||||
| # and nx.number_of_edges(G) != 0)] | |||||
| idx = [G[0] for G in Gn] | idx = [G[0] for G in Gn] | ||||
| Gn = [G[1] for G in Gn] | Gn = [G[1] for G in Gn] | ||||
| y_all = [y_all[i] for i in idx] | y_all = [y_all[i] for i in idx] | ||||
| # Gn = Gn[0:50] | |||||
| # y_all = y_all[0:50] | |||||
| # remove unused labels. | |||||
| for G in Gn: | |||||
| G.graph['edge_labels'] = [] | |||||
| for edge in G.edges: | |||||
| del G.edges[edge]['bond_type'] | |||||
| del G.edges[edge]['valence'] | |||||
| # Gn = Gn[805:815] | |||||
| # y_all = y_all[805:815] | |||||
| for G in Gn: | |||||
| G.graph['filename'] = 'graph' + str(G.graph['name']) + '.gxl' | |||||
| # compute/read Gram matrix and pair distances. | # compute/read Gram matrix and pair distances. | ||||
| Kmatrix = compute_kernel(Gn, gkernel, None, None, True, parallel='imap_unordered') | |||||
| np.savez('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm', | |||||
| Kmatrix=Kmatrix) | |||||
| is_separate = True | |||||
| Kmatrix, run_time, k_dis_data = compute_gm_for_each_class(Gn, | |||||
| y_all, | |||||
| gkernel, | |||||
| parallel='imap_unordered', | |||||
| is_separate=is_separate) | |||||
| # Kmatrix = compute_kernel(Gn, gkernel, None, None, True, parallel='imap_unordered') | |||||
| # np.savez('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm', | |||||
| # Kmatrix=Kmatrix) | |||||
| # gmfile = np.load('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm.npz') | # gmfile = np.load('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm.npz') | ||||
| # Kmatrix = gmfile['Kmatrix'] | # Kmatrix = gmfile['Kmatrix'] | ||||
| # run_time = gmfile['run_time'] | # run_time = gmfile['run_time'] | ||||
| # Kmatrix = Kmatrix[[0,1,2,3,4],:] | # Kmatrix = Kmatrix[[0,1,2,3,4],:] | ||||
| # Kmatrix = Kmatrix[:,[0,1,2,3,4]] | # Kmatrix = Kmatrix[:,[0,1,2,3,4]] | ||||
| # print('\nTime to compute Gram matrix for the whole dataset: ', run_time) | # print('\nTime to compute Gram matrix for the whole dataset: ', run_time) | ||||
| dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None, | |||||
| Kmatrix=Kmatrix, gkernel=gkernel, verbose=True) | |||||
| # dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None, | |||||
| # Kmatrix=Kmatrix, gkernel=gkernel, verbose=True) | |||||
| # Kmatrix = np.zeros((len(Gn), len(Gn))) | # Kmatrix = np.zeros((len(Gn), len(Gn))) | ||||
| # dis_mat, dis_max, dis_min, dis_mean = 0, 0, 0, 0 | # dis_mat, dis_max, dis_min, dis_mean = 0, 0, 0, 0 | ||||
| # fitting and computing. | # fitting and computing. | ||||
| fit_methods = ['k-graphs', 'expert', 'random', 'random', 'random'] | |||||
| fit_methods = ['k-graphs', 'random', 'random', 'random'] | |||||
| for fit_method in fit_methods: | for fit_method in fit_methods: | ||||
| print('\n-------------------------------------') | print('\n-------------------------------------') | ||||
| print('fit method:', fit_method) | print('fit method:', fit_method) | ||||
| @@ -814,10 +926,10 @@ if __name__ == "__main__": | |||||
| 'ged_method': 'mIPFP', | 'ged_method': 'mIPFP', | ||||
| 'attr_distance': 'euclidean', | 'attr_distance': 'euclidean', | ||||
| 'fit_method': fit_method, | 'fit_method': fit_method, | ||||
| 'init_ecc': [0.075, 0.075, 0.25, 0.075, 0.075]} | |||||
| print('parameters: ', parameters) | |||||
| 'init_ecc': [3,3,1,3,3]} # [0.525, 0.525, 0.001, 0.125, 0.125]} | |||||
| xp_fit_method_for_non_symbolic(parameters, save_results=True, | xp_fit_method_for_non_symbolic(parameters, save_results=True, | ||||
| initial_solutions=40, | initial_solutions=40, | ||||
| Gn_data = [Gn, y_all, graph_dir], | |||||
| k_dis_data = [dis_mat, dis_max, dis_min, dis_mean], | |||||
| Kmatrix=Kmatrix) | |||||
| Gn_data=[Gn, y_all, graph_dir], | |||||
| k_dis_data=k_dis_data, | |||||
| Kmatrix=Kmatrix, | |||||
| is_separate=is_separate) | |||||