New translations model_selection_precomputed.py (Chinese Simplified)

5 years ago · 72968ef4f6
--- a/lang/zh/gklearn/utils/model_selection_precomputed.py
+++ b/lang/zh/gklearn/utils/model_selection_precomputed.py
@@ -0,0 +1,959 @@
 import numpy as np
 import matplotlib
 matplotlib.use('Agg')
 from matplotlib import pyplot as plt
 from sklearn.kernel_ridge import KernelRidge
 from sklearn.svm import SVC
 from sklearn.metrics import accuracy_score, mean_squared_error
 from sklearn.model_selection import KFold, train_test_split, ParameterGrid

 #from joblib import Parallel, delayed
 from multiprocessing import Pool, Array
 from functools import partial
 import sys
 import os
 import time
 import datetime
 #from os.path import basename, splitext
 from gklearn.utils.graphfiles import loadDataset
 from tqdm import tqdm

 #from memory_profiler import profile

 #@profile
 def model_selection_for_precomputed_kernel(datafile,
 										   estimator,
 										   param_grid_precomputed,
 										   param_grid,
 										   model_type,
 										   NUM_TRIALS=30,
 										   datafile_y=None,
 										   extra_params=None,
 										   ds_name='ds-unknown',
 										   output_dir='outputs/',
 										   n_jobs=1,
 										   read_gm_from_file=False,
 										   verbose=True):
 	"""Perform model selection, fitting and testing for precomputed kernels 
 	using nested CV. Print out neccessary data during the process then finally 
 	the results.

 	Parameters
 	----------
 	datafile : string
 		Path of dataset file.
 	estimator : function
 		kernel function used to estimate. This function needs to return a gram matrix.
 	param_grid_precomputed : dictionary
 		Dictionary with names (string) of parameters used to calculate gram 
 		matrices as keys and lists of parameter settings to try as values. This 
 		enables searching over any sequence of parameter settings. Params with 
 		length 1 will be omitted.
 	param_grid : dictionary
 		Dictionary with names (string) of parameters used as penelties as keys 
 		and lists of parameter settings to try as values. This enables 
 		searching over any sequence of parameter settings. Params with length 1
 		will be omitted.
 	model_type : string
 		Type of the problem, can be 'regression' or 'classification'.
 	NUM_TRIALS : integer
 		Number of random trials of the outer CV loop. The default is 30.
 	datafile_y : string
 		Path of file storing y data. This parameter is optional depending on 
 		the given dataset file.
 	extra_params : dict
 		Extra parameters for loading dataset. See function gklearn.utils.
 		graphfiles.loadDataset for detail.
 	ds_name : string
 		Name of the dataset.
 	n_jobs : int
 		Number of jobs for parallelization.
 	read_gm_from_file : boolean
 		Whether gram matrices are loaded from a file.

 	Examples
 	--------
 	>>> import numpy as np
 	>>> from gklearn.utils.model_selection_precomputed import model_selection_for_precomputed_kernel
 	>>> from gklearn.kernels.untilHPathKernel import untilhpathkernel
 	>>>
 	>>> datafile = '../datasets/MUTAG/MUTAG_A.txt'
 	>>> estimator = untilhpathkernel
 	>>> param_grid_precomputed = {’depth’:  np.linspace(1, 10, 10), ’k_func’:
 			[’MinMax’, ’tanimoto’], ’compute_method’:  [’trie’]}
 	>>> # ’C’ for classification problems and ’alpha’ for regression problems.
 	>>> param_grid = [{’C’: np.logspace(-10, 10, num=41, base=10)}, {’alpha’:
 			np.logspace(-10, 10, num=41, base=10)}]
 	>>>
 	>>> model_selection_for_precomputed_kernel(datafile, estimator, 
 			param_grid_precomputed, param_grid[0], 'classification', ds_name=’MUTAG’)
 	"""
 	tqdm.monitor_interval = 0

 	output_dir += estimator.__name__
 	if not os.path.exists(output_dir):
 		os.makedirs(output_dir)
 	# a string to save all the results.
 	str_fw = '###################### log time: ' + datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") + '. ######################\n\n'
 	str_fw += '# This file contains results of ' + estimator.__name__ + ' on dataset ' + ds_name + ',\n# including gram matrices, serial numbers for gram matrix figures and performance.\n\n'

 	# setup the model type
 	model_type = model_type.lower()
 	if model_type != 'regression' and model_type != 'classification':
 		raise Exception(
 			'The model type is incorrect! Please choose from regression or classification.'
 		)
 	if verbose:
 		print()
 		print('--- This is a %s problem ---' % model_type)
 	str_fw += 'This is a %s problem.\n' % model_type
 	
 	# calculate gram matrices rather than read them from file.
 	if read_gm_from_file == False:
 		# Load the dataset
 		if verbose:
 			print()
 			print('\n1. Loading dataset from file...')
 		if isinstance(datafile, str):
 			dataset, y_all = loadDataset(
 					datafile, filename_y=datafile_y, extra_params=extra_params)
 		else: # load data directly from variable.
 			dataset = datafile
 			y_all = datafile_y				

 		#	 import matplotlib.pyplot as plt
 		#	 import networkx as nx
 		#	 nx.draw_networkx(dataset[30])
 		#	 plt.show()
 	
 		# Grid of parameters with a discrete number of values for each.
 		param_list_precomputed = list(ParameterGrid(param_grid_precomputed))
 		param_list = list(ParameterGrid(param_grid))
 	
 		gram_matrices = [
 		]  # a list to store gram matrices for all param_grid_precomputed
 		gram_matrix_time = [
 		]  # a list to store time to calculate gram matrices
 		param_list_pre_revised = [
 		]  # list to store param grids precomputed ignoring the useless ones
 	
 		# calculate all gram matrices
 		if verbose:
 			print()
 			print('2. Calculating gram matrices. This could take a while...')
 		str_fw += '\nII. Gram matrices.\n\n'
 		tts = time.time()  # start training time
 		nb_gm_ignore = 0  # the number of gram matrices those should not be considered, as they may contain elements that are not numbers (NaN)
 		for idx, params_out in enumerate(param_list_precomputed):
 			y = y_all[:]
 			params_out['n_jobs'] = n_jobs
 			params_out['verbose'] = verbose
 #			print(dataset)
 #			import networkx as nx
 #			nx.draw_networkx(dataset[1])
 #			plt.show()
 			rtn_data = estimator(dataset[:], **params_out)
 			Kmatrix = rtn_data[0]
 			current_run_time = rtn_data[1]
 			# for some kernels, some graphs in datasets may not meet the 
 			# kernels' requirements for graph structure. These graphs are trimmed. 
 			if len(rtn_data) == 3:
 				idx_trim = rtn_data[2]  # the index of trimmed graph list
 				y = [y[idxt] for idxt in idx_trim] # trim y accordingly
 #			Kmatrix = np.random.rand(2250, 2250)
 #			current_run_time = 0.1
 			
 			# remove graphs whose kernels with themselves are zeros 
 			# @todo: y not changed accordingly?
 			Kmatrix_diag = Kmatrix.diagonal().copy()
 			nb_g_ignore = 0
 			for idxk, diag in enumerate(Kmatrix_diag):
 				if diag == 0:
 					Kmatrix = np.delete(Kmatrix, (idxk - nb_g_ignore), axis=0)
 					Kmatrix = np.delete(Kmatrix, (idxk - nb_g_ignore), axis=1)
 					nb_g_ignore += 1
 			# normalization
 			# @todo: works only for undirected graph?
 			Kmatrix_diag = Kmatrix.diagonal().copy()
 			for i in range(len(Kmatrix)):
 				for j in range(i, len(Kmatrix)):
 					Kmatrix[i][j] /= np.sqrt(Kmatrix_diag[i] * Kmatrix_diag[j])
 					Kmatrix[j][i] = Kmatrix[i][j]
 			if verbose:
 				print()
 			if params_out == {}:
 				if verbose:
 					print('the gram matrix is: ')
 				str_fw += 'the gram matrix is:\n\n'
 			else:
 				if verbose:
 					print('the gram matrix with parameters', params_out, 'is: \n\n')
 				str_fw += 'the gram matrix with parameters %s is:\n\n' % params_out
 			if len(Kmatrix) < 2:
 				nb_gm_ignore += 1
 				if verbose:
 					print('ignored, as at most only one of all its diagonal value is non-zero.')
 				str_fw += 'ignored, as at most only one of all its diagonal value is non-zero.\n\n'
 			else:				
 				if np.isnan(Kmatrix).any(
 				):  # if the matrix contains elements that are not numbers
 					nb_gm_ignore += 1
 					if verbose:
 						print('ignored, as it contains elements that are not numbers.')
 					str_fw += 'ignored, as it contains elements that are not numbers.\n\n'
 				else:
 #					print(Kmatrix)
 					str_fw += np.array2string(
 							Kmatrix,
 							separator=',') + '\n\n'
 #							separator=',',
 #							threshold=np.inf,
 #							floatmode='unique') + '\n\n'

 					fig_file_name = output_dir + '/GM[ds]' + ds_name
 					if params_out != {}:
 						fig_file_name += '[params]' + str(idx)
 					plt.imshow(Kmatrix)
 					plt.colorbar()
 					plt.savefig(fig_file_name + '.eps', format='eps', dpi=300)
 #					plt.show()
 					plt.clf()
 					gram_matrices.append(Kmatrix)
 					gram_matrix_time.append(current_run_time)
 					param_list_pre_revised.append(params_out)
 					if nb_g_ignore > 0:
 						if verbose:
 							print(', where %d graphs are ignored as their graph kernels with themselves are zeros.' % nb_g_ignore)
 						str_fw += ', where %d graphs are ignored as their graph kernels with themselves are zeros.' % nb_g_ignore
 		if verbose:
 			print()
 			print(
 			'{} gram matrices are calculated, {} of which are ignored.'.format(
 				len(param_list_precomputed), nb_gm_ignore))
 		str_fw += '{} gram matrices are calculated, {} of which are ignored.\n\n'.format(len(param_list_precomputed), nb_gm_ignore)
 		str_fw += 'serial numbers of gram matrix figures and their corresponding parameters settings:\n\n'
 		str_fw += ''.join([
 			'{}: {}\n'.format(idx, params_out)
 			for idx, params_out in enumerate(param_list_precomputed)
 		])

 		if verbose:
 			print()
 		if len(gram_matrices) == 0:
 			if verbose:
 				print('all gram matrices are ignored, no results obtained.')
 			str_fw += '\nall gram matrices are ignored, no results obtained.\n\n'
 		else:
 			# save gram matrices to file.
 #			np.savez(output_dir + '/' + ds_name + '.gm', 
 #					 gms=gram_matrices, params=param_list_pre_revised, y=y, 
 #					 gmtime=gram_matrix_time)
 			if verbose:
 				print(
 				'3. Fitting and predicting using nested cross validation. This could really take a while...'
 				)
 			
 			# ---- use pool.imap_unordered to parallel and track progress. ----
 #			train_pref = []
 #			val_pref = []
 #			test_pref = []
 #			def func_assign(result, var_to_assign):
 #				for idx, itm in enumerate(var_to_assign):
 #					itm.append(result[idx])				
 #			trial_do_partial = partial(trial_do, param_list_pre_revised, param_list, y, model_type)
 #					  
 #			parallel_me(trial_do_partial, range(NUM_TRIALS), func_assign, 
 #						[train_pref, val_pref, test_pref], glbv=gram_matrices,
 #						method='imap_unordered', n_jobs=n_jobs, chunksize=1,
 #						itr_desc='cross validation')
 			
 			def init_worker(gms_toshare):
 				global G_gms
 				G_gms = gms_toshare
 			
 #			gram_matrices = np.array(gram_matrices)
 #			gms_shape = gram_matrices.shape
 #			gms_array = Array('d', np.reshape(gram_matrices.copy(), -1, order='C'))
 #			pool = Pool(processes=n_jobs, initializer=init_worker, initargs=(gms_array, gms_shape))
 			pool = Pool(processes=n_jobs, initializer=init_worker, initargs=(gram_matrices,))
 			trial_do_partial = partial(parallel_trial_do, param_list_pre_revised, param_list, y, model_type)
 			train_pref = []
 			val_pref = []
 			test_pref = []
 #			if NUM_TRIALS < 1000 * n_jobs:
 #				chunksize = int(NUM_TRIALS / n_jobs) + 1
 #			else:
 #				chunksize = 1000
 			chunksize = 1
 			if verbose:
 				iterator = tqdm(pool.imap_unordered(trial_do_partial, 
 						range(NUM_TRIALS), chunksize), desc='cross validation', file=sys.stdout)
 			else:
 				iterator = pool.imap_unordered(trial_do_partial, range(NUM_TRIALS), chunksize)
 			for o1, o2, o3 in iterator:
 				train_pref.append(o1)
 				val_pref.append(o2)
 				test_pref.append(o3)
 			pool.close()
 			pool.join()
 	
 #			# ---- use pool.map to parallel. ----
 #			pool =  Pool(n_jobs)
 #			trial_do_partial = partial(trial_do, param_list_pre_revised, param_list, gram_matrices, y[0:250], model_type)
 #			result_perf = pool.map(trial_do_partial, range(NUM_TRIALS))
 #			train_pref = [item[0] for item in result_perf]
 #			val_pref = [item[1] for item in result_perf]
 #			test_pref = [item[2] for item in result_perf]
 	
 #			# ---- direct running, normally use a single CPU core. ----
 #			train_pref = []
 #			val_pref = []
 #			test_pref = []
 #			for i in tqdm(range(NUM_TRIALS), desc='cross validation', file=sys.stdout):
 #				o1, o2, o3 = trial_do(param_list_pre_revised, param_list, gram_matrices, y, model_type, i)
 #				train_pref.append(o1)
 #				val_pref.append(o2)
 #				test_pref.append(o3)
 #			print()
 	
 			if verbose:
 				print()
 				print('4. Getting final performance...')
 			str_fw += '\nIII. Performance.\n\n'
 			# averages and confidences of performances on outer trials for each combination of parameters
 			average_train_scores = np.mean(train_pref, axis=0)
 #			print('val_pref: ', val_pref[0][0])
 			average_val_scores = np.mean(val_pref, axis=0)
 #			print('test_pref: ', test_pref[0][0])
 			average_perf_scores = np.mean(test_pref, axis=0)
 			# sample std is used here
 			std_train_scores = np.std(train_pref, axis=0, ddof=1)
 			std_val_scores = np.std(val_pref, axis=0, ddof=1)
 			std_perf_scores = np.std(test_pref, axis=0, ddof=1)
 	
 			if model_type == 'regression':
 				best_val_perf = np.amin(average_val_scores)
 			else:
 				best_val_perf = np.amax(average_val_scores)
 #			print('average_val_scores: ', average_val_scores)
 #			print('best_val_perf: ', best_val_perf)
 #			print()
 			best_params_index = np.where(average_val_scores == best_val_perf)
 			# find smallest val std with best val perf.
 			best_val_stds = [
 				std_val_scores[value][best_params_index[1][idx]]
 				for idx, value in enumerate(best_params_index[0])
 			]
 			min_val_std = np.amin(best_val_stds)
 			best_params_index = np.where(std_val_scores == min_val_std)
 			best_params_out = [
 				param_list_pre_revised[i] for i in best_params_index[0]
 			]
 			best_params_in = [param_list[i] for i in best_params_index[1]]
 			if verbose:
 				print('best_params_out: ', best_params_out)
 				print('best_params_in: ', best_params_in)
 				print()
 				print('best_val_perf: ', best_val_perf)
 				print('best_val_std: ', min_val_std)
 			str_fw += 'best settings of hyper-params to build gram matrix: %s\n' % best_params_out
 			str_fw += 'best settings of other hyper-params: %s\n\n' % best_params_in
 			str_fw += 'best_val_perf: %s\n' % best_val_perf
 			str_fw += 'best_val_std: %s\n' % min_val_std
 	
 #			print(best_params_index)
 #			print(best_params_index[0])
 #			print(average_perf_scores)
 			final_performance = [
 				average_perf_scores[value][best_params_index[1][idx]]
 				for idx, value in enumerate(best_params_index[0])
 			]
 			final_confidence = [
 				std_perf_scores[value][best_params_index[1][idx]]
 				for idx, value in enumerate(best_params_index[0])
 			]
 			if verbose:
 				print('final_performance: ', final_performance)
 				print('final_confidence: ', final_confidence)
 			str_fw += 'final_performance: %s\n' % final_performance
 			str_fw += 'final_confidence: %s\n' % final_confidence
 			train_performance = [
 				average_train_scores[value][best_params_index[1][idx]]
 				for idx, value in enumerate(best_params_index[0])
 			]
 			train_std = [
 				std_train_scores[value][best_params_index[1][idx]]
 				for idx, value in enumerate(best_params_index[0])
 			]
 			if verbose:
 				print('train_performance: %s' % train_performance)
 				print('train_std: ', train_std)
 			str_fw += 'train_performance: %s\n' % train_performance
 			str_fw += 'train_std: %s\n\n' % train_std

 			if verbose:
 				print()
 			tt_total = time.time() - tts  # training time for all hyper-parameters
 			average_gram_matrix_time = np.mean(gram_matrix_time)
 			std_gram_matrix_time = np.std(gram_matrix_time, ddof=1) if len(gram_matrix_time) > 1 else 0
 			best_gram_matrix_time = [
 				gram_matrix_time[i] for i in best_params_index[0]
 			]
 			ave_bgmt = np.mean(best_gram_matrix_time)
 			std_bgmt = np.std(best_gram_matrix_time, ddof=1) if len(best_gram_matrix_time) > 1 else 0
 			if verbose:
 				print('time to calculate gram matrix with different hyper-params: {:.2f}±{:.2f}s'
 					  .format(average_gram_matrix_time, std_gram_matrix_time))
 				print('time to calculate best gram matrix: {:.2f}±{:.2f}s'.format(
 						ave_bgmt, std_bgmt))
 				print('total training time with all hyper-param choices: {:.2f}s'.format(
 						tt_total))
 			str_fw += 'time to calculate gram matrix with different hyper-params: {:.2f}±{:.2f}s\n'.format(average_gram_matrix_time, std_gram_matrix_time)
 			str_fw += 'time to calculate best gram matrix: {:.2f}±{:.2f}s\n'.format(ave_bgmt, std_bgmt)
 			str_fw += 'total training time with all hyper-param choices: {:.2f}s\n\n'.format(tt_total)
 	
 			# # save results to file
 			# np.savetxt(results_name_pre + 'average_train_scores.dt',
 			#			average_train_scores)
 			# np.savetxt(results_name_pre + 'average_val_scores', average_val_scores)
 			# np.savetxt(results_name_pre + 'average_perf_scores.dt',
 			#			average_perf_scores)
 			# np.savetxt(results_name_pre + 'std_train_scores.dt', std_train_scores)
 			# np.savetxt(results_name_pre + 'std_val_scores.dt', std_val_scores)
 			# np.savetxt(results_name_pre + 'std_perf_scores.dt', std_perf_scores)
 	
 			# np.save(results_name_pre + 'best_params_index', best_params_index)
 			# np.save(results_name_pre + 'best_params_pre.dt', best_params_out)
 			# np.save(results_name_pre + 'best_params_in.dt', best_params_in)
 			# np.save(results_name_pre + 'best_val_perf.dt', best_val_perf)
 			# np.save(results_name_pre + 'best_val_std.dt', best_val_std)
 			# np.save(results_name_pre + 'final_performance.dt', final_performance)
 			# np.save(results_name_pre + 'final_confidence.dt', final_confidence)
 			# np.save(results_name_pre + 'train_performance.dt', train_performance)
 			# np.save(results_name_pre + 'train_std.dt', train_std)
 	
 			# np.save(results_name_pre + 'gram_matrix_time.dt', gram_matrix_time)
 			# np.save(results_name_pre + 'average_gram_matrix_time.dt',
 			#		 average_gram_matrix_time)
 			# np.save(results_name_pre + 'std_gram_matrix_time.dt',
 			#		 std_gram_matrix_time)
 			# np.save(results_name_pre + 'best_gram_matrix_time.dt',
 			#		 best_gram_matrix_time)
 	
 	# read gram matrices from file.
 	else:	
 		# Grid of parameters with a discrete number of values for each.
 #		param_list_precomputed = list(ParameterGrid(param_grid_precomputed))
 		param_list = list(ParameterGrid(param_grid))
 	
 		# read gram matrices from file.
 		if verbose:
 			print()
 			print('2. Reading gram matrices from file...')
 		str_fw += '\nII. Gram matrices.\n\nGram matrices are read from file, see last log for detail.\n'
 		gmfile = np.load(output_dir + '/' + ds_name + '.gm.npz')
 		gram_matrices = gmfile['gms'] # a list to store gram matrices for all param_grid_precomputed
 		gram_matrix_time = gmfile['gmtime'] # time used to compute the gram matrices
 		param_list_pre_revised = gmfile['params'] # list to store param grids precomputed ignoring the useless ones
 		y = gmfile['y'].tolist()
 		
 		tts = time.time()  # start training time
 #		nb_gm_ignore = 0  # the number of gram matrices those should not be considered, as they may contain elements that are not numbers (NaN)			
 		if verbose:
 			print(
 					'3. Fitting and predicting using nested cross validation. This could really take a while...'
 					)
 
 		# ---- use pool.imap_unordered to parallel and track progress. ----
 		def init_worker(gms_toshare):
 			global G_gms
 			G_gms = gms_toshare

 		pool = Pool(processes=n_jobs, initializer=init_worker, initargs=(gram_matrices,))
 		trial_do_partial = partial(parallel_trial_do, param_list_pre_revised, param_list, y, model_type)
 		train_pref = []
 		val_pref = []
 		test_pref = []
 		chunksize = 1
 		if verbose:
 			iterator = tqdm(pool.imap_unordered(trial_do_partial, 
 					range(NUM_TRIALS), chunksize), desc='cross validation', file=sys.stdout)
 		else:
 			iterator = pool.imap_unordered(trial_do_partial, range(NUM_TRIALS), chunksize)
 		for o1, o2, o3 in iterator:
 			train_pref.append(o1)
 			val_pref.append(o2)
 			test_pref.append(o3)
 		pool.close()
 		pool.join()
 		
 		# # ---- use pool.map to parallel. ----
 		# result_perf = pool.map(trial_do_partial, range(NUM_TRIALS))
 		# train_pref = [item[0] for item in result_perf]
 		# val_pref = [item[1] for item in result_perf]
 		# test_pref = [item[2] for item in result_perf]

 		# # ---- use joblib.Parallel to parallel and track progress. ----
 		# trial_do_partial = partial(trial_do, param_list_pre_revised, param_list, gram_matrices, y, model_type)
 		# result_perf = Parallel(n_jobs=n_jobs, verbose=10)(delayed(trial_do_partial)(trial) for trial in range(NUM_TRIALS))
 		# train_pref = [item[0] for item in result_perf]
 		# val_pref = [item[1] for item in result_perf]
 		# test_pref = [item[2] for item in result_perf]

 #		# ---- direct running, normally use a single CPU core. ----
 #		train_pref = []
 #		val_pref = []
 #		test_pref = []
 #		for i in tqdm(range(NUM_TRIALS), desc='cross validation', file=sys.stdout):
 #			o1, o2, o3 = trial_do(param_list_pre_revised, param_list, gram_matrices, y, model_type, i)
 #			train_pref.append(o1)
 #			val_pref.append(o2)
 #			test_pref.append(o3)

 		if verbose:
 			print()
 			print('4. Getting final performance...')
 		str_fw += '\nIII. Performance.\n\n'
 		# averages and confidences of performances on outer trials for each combination of parameters
 		average_train_scores = np.mean(train_pref, axis=0)
 		average_val_scores = np.mean(val_pref, axis=0)
 		average_perf_scores = np.mean(test_pref, axis=0)
 		# sample std is used here
 		std_train_scores = np.std(train_pref, axis=0, ddof=1)
 		std_val_scores = np.std(val_pref, axis=0, ddof=1)
 		std_perf_scores = np.std(test_pref, axis=0, ddof=1)

 		if model_type == 'regression':
 			best_val_perf = np.amin(average_val_scores)
 		else:
 			best_val_perf = np.amax(average_val_scores)
 		best_params_index = np.where(average_val_scores == best_val_perf)
 		# find smallest val std with best val perf.
 		best_val_stds = [
 			std_val_scores[value][best_params_index[1][idx]]
 			for idx, value in enumerate(best_params_index[0])
 		]
 		min_val_std = np.amin(best_val_stds)
 		best_params_index = np.where(std_val_scores == min_val_std)
 		best_params_out = [
 			param_list_pre_revised[i] for i in best_params_index[0]
 		]
 		best_params_in = [param_list[i] for i in best_params_index[1]]
 		if verbose:
 			print('best_params_out: ', best_params_out)
 			print('best_params_in: ', best_params_in)
 			print()
 			print('best_val_perf: ', best_val_perf)
 			print('best_val_std: ', min_val_std)
 		str_fw += 'best settings of hyper-params to build gram matrix: %s\n' % best_params_out
 		str_fw += 'best settings of other hyper-params: %s\n\n' % best_params_in
 		str_fw += 'best_val_perf: %s\n' % best_val_perf
 		str_fw += 'best_val_std: %s\n' % min_val_std

 		final_performance = [
 			average_perf_scores[value][best_params_index[1][idx]]
 			for idx, value in enumerate(best_params_index[0])
 		]
 		final_confidence = [
 			std_perf_scores[value][best_params_index[1][idx]]
 			for idx, value in enumerate(best_params_index[0])
 		]
 		if verbose:
 			print('final_performance: ', final_performance)
 			print('final_confidence: ', final_confidence)
 		str_fw += 'final_performance: %s\n' % final_performance
 		str_fw += 'final_confidence: %s\n' % final_confidence
 		train_performance = [
 			average_train_scores[value][best_params_index[1][idx]]
 			for idx, value in enumerate(best_params_index[0])
 		]
 		train_std = [
 			std_train_scores[value][best_params_index[1][idx]]
 			for idx, value in enumerate(best_params_index[0])
 		]
 		if verbose:
 			print('train_performance: %s' % train_performance)
 			print('train_std: ', train_std)
 		str_fw += 'train_performance: %s\n' % train_performance
 		str_fw += 'train_std: %s\n\n' % train_std

 		if verbose:
 			print()
 		average_gram_matrix_time = np.mean(gram_matrix_time)
 		std_gram_matrix_time = np.std(gram_matrix_time, ddof=1) if len(gram_matrix_time) > 1 else 0
 		best_gram_matrix_time = [
 			gram_matrix_time[i] for i in best_params_index[0]
 		]
 		ave_bgmt = np.mean(best_gram_matrix_time)
 		std_bgmt = np.std(best_gram_matrix_time, ddof=1) if len(best_gram_matrix_time) > 1 else 0
 		if verbose:		
 			print(
 					'time to calculate gram matrix with different hyper-params: {:.2f}±{:.2f}s'
 					.format(average_gram_matrix_time, std_gram_matrix_time))
 			print('time to calculate best gram matrix: {:.2f}±{:.2f}s'.format(
 					ave_bgmt, std_bgmt))
 		tt_poster = time.time() - tts  # training time with hyper-param choices who did not participate in calculation of gram matrices
 		if verbose:
 			print(
 					'training time with hyper-param choices who did not participate in calculation of gram matrices: {:.2f}s'.format(
 							tt_poster))
 			print('total training time with all hyper-param choices: {:.2f}s'.format(
 					tt_poster + np.sum(gram_matrix_time)))
 #		str_fw += 'time to calculate gram matrix with different hyper-params: {:.2f}±{:.2f}s\n'.format(average_gram_matrix_time, std_gram_matrix_time)
 #		str_fw += 'time to calculate best gram matrix: {:.2f}±{:.2f}s\n'.format(ave_bgmt, std_bgmt)
 		str_fw += 'training time with hyper-param choices who did not participate in calculation of gram matrices: {:.2f}s\n\n'.format(tt_poster)

 		# open file to save all results for this dataset.
 		if not os.path.exists(output_dir):
 			os.makedirs(output_dir)
 			
 	# print out results as table.
 	str_fw += printResultsInTable(param_list, param_list_pre_revised, average_val_scores,
 			  std_val_scores, average_perf_scores, std_perf_scores,
 			  average_train_scores, std_train_scores, gram_matrix_time,
 			  model_type, verbose)
 			
 	# open file to save all results for this dataset.
 	if not os.path.exists(output_dir + '/' + ds_name + '.output.txt'):
 		with open(output_dir + '/' + ds_name + '.output.txt', 'w') as f:
 			f.write(str_fw)
 	else:
 		with open(output_dir + '/' + ds_name + '.output.txt', 'r+') as f:
 			content = f.read()
 			f.seek(0, 0)
 			f.write(str_fw + '\n\n\n' + content)
 			
 	return final_performance, final_confidence


 def trial_do(param_list_pre_revised, param_list, gram_matrices, y, model_type, trial): # Test set level

 #	# get gram matrices from global variables.
 #	gram_matrices = np.reshape(G_gms.copy(), G_gms_shape, order='C')
 	
 	# Arrays to store scores
 	train_pref = np.zeros((len(param_list_pre_revised), len(param_list)))
 	val_pref = np.zeros((len(param_list_pre_revised), len(param_list)))
 	test_pref = np.zeros((len(param_list_pre_revised), len(param_list)))

 	# randomness added to seeds of split function below. "high" is "size" times
 	# 10 so that at least 10 different random output will be yielded. Remove
 	# these lines if identical outputs is required.
 	rdm_out = np.random.RandomState(seed=None)
 	rdm_seed_out_l = rdm_out.uniform(high=len(param_list_pre_revised) * 10, 
 								   size=len(param_list_pre_revised))
 #	print(trial, rdm_seed_out_l)
 #	print()
 	# loop for each outer param tuple
 	for index_out, params_out in enumerate(param_list_pre_revised):
 		# get gram matrices from global variables.
 #		gm_now = G_gms[index_out * G_gms_shape[1] * G_gms_shape[2]:(index_out + 1) * G_gms_shape[1] * G_gms_shape[2]]
 #		gm_now = np.reshape(gm_now.copy(), (G_gms_shape[1], G_gms_shape[2]), order='C')
 		gm_now = gram_matrices[index_out].copy()
 	
 		# split gram matrix and y to app and test sets.
 		indices = range(len(y))
 		# The argument "random_state" in function "train_test_split" can not be
 		# set to None, because it will use RandomState instance used by 
 		# np.random, which is possible for multiple subprocesses to inherit the
 		# same seed if they forked at the same time, leading to identical 
 		# random variates for different subprocesses. Instead, we use "trial" 
 		# and "index_out" parameters to generate different seeds for different 
 		# trials/subprocesses and outer loops. "rdm_seed_out_l" is used to add 
 		# randomness into seeds, so that it yields a different output every 
 		# time the program is run. To yield identical outputs every time,
 		# remove the second line below. Same method is used to the "KFold"
 		# function in the inner loop.
 		rdm_seed_out = (trial + 1) * (index_out + 1)
 		rdm_seed_out = (rdm_seed_out + int(rdm_seed_out_l[index_out])) % (2 ** 32 - 1)
 #		print(trial, rdm_seed_out)
 		X_app, X_test, y_app, y_test, idx_app, idx_test = train_test_split(
 			gm_now, y, indices, test_size=0.1, 
 			random_state=rdm_seed_out, shuffle=True)
 #		print(trial, idx_app, idx_test)
 #		print()
 		X_app = X_app[:, idx_app]
 		X_test = X_test[:, idx_app]
 		y_app = np.array(y_app)
 		y_test = np.array(y_test)

 		rdm_seed_in_l = rdm_out.uniform(high=len(param_list) * 10, 
 								   size=len(param_list))
 		# loop for each inner param tuple
 		for index_in, params_in in enumerate(param_list):
 #			if trial == 0:
 #				print(index_out, index_in)
 #				print('params_in: ', params_in)
 #			st = time.time()
 			rdm_seed_in = (trial + 1) * (index_out + 1) * (index_in + 1)
 #			print("rdm_seed_in1: ", trial, index_in, rdm_seed_in)
 			rdm_seed_in = (rdm_seed_in + int(rdm_seed_in_l[index_in])) % (2 ** 32 - 1)
 #			print("rdm_seed_in2: ", trial, index_in, rdm_seed_in)
 			inner_cv = KFold(n_splits=10, shuffle=True, random_state=rdm_seed_in)
 			current_train_perf = []
 			current_valid_perf = []
 			current_test_perf = [] 

 			# For regression use the Kernel Ridge method
 #			try:
 			if model_type == 'regression':
 				kr = KernelRidge(kernel='precomputed', **params_in)
 				# loop for each split on validation set level
 				# validation set level
 				for train_index, valid_index in inner_cv.split(X_app):
 #					print("train_index, valid_index: ", trial, index_in, train_index, valid_index)
 #					if trial == 0:
 #						print('train_index: ', train_index)
 #						print('valid_index: ', valid_index)
 #						print('idx_test: ', idx_test)
 #						print('y_app[train_index]: ', y_app[train_index])
 #						print('X_app[train_index, :][:, train_index]: ', X_app[train_index, :][:, train_index])
 #						print('X_app[valid_index, :][:, train_index]: ', X_app[valid_index, :][:, train_index])
 					kr.fit(X_app[train_index, :][:, train_index],
 						   y_app[train_index])

 					# predict on the train, validation and test set
 					y_pred_train = kr.predict(
 						X_app[train_index, :][:, train_index])
 					y_pred_valid = kr.predict(
 						X_app[valid_index, :][:, train_index])
 #					if trial == 0:	 
 #						print('y_pred_valid: ', y_pred_valid)
 #						print()
 					y_pred_test = kr.predict(
 						X_test[:, train_index])

 					# root mean squared errors
 					current_train_perf.append(
 						np.sqrt(
 							mean_squared_error(
 								y_app[train_index], y_pred_train)))
 					current_valid_perf.append(
 						np.sqrt(
 							mean_squared_error(
 								y_app[valid_index], y_pred_valid)))
 #					if trial == 0:
 #						print(mean_squared_error(
 #								y_app[valid_index], y_pred_valid))
 					current_test_perf.append(
 						np.sqrt(
 							mean_squared_error(
 								y_test, y_pred_test)))
 			# For clcassification use SVM
 			else:
 				svc = SVC(kernel='precomputed', cache_size=200, 
 						  verbose=False, **params_in)
 				# loop for each split on validation set level
 				# validation set level
 				for train_index, valid_index in inner_cv.split(X_app):
 #						np.savez("bug.npy",X_app[train_index, :][:, train_index],y_app[train_index])
 #					if trial == 0:
 #						print('train_index: ', train_index)
 #						print('valid_index: ', valid_index)
 #						print('idx_test: ', idx_test)
 #						print('y_app[train_index]: ', y_app[train_index])
 #						print('X_app[train_index, :][:, train_index]: ', X_app[train_index, :][:, train_index])
 #						print('X_app[valid_index, :][:, train_index]: ', X_app[valid_index, :][:, train_index])
 					svc.fit(X_app[train_index, :][:, train_index],
 						   y_app[train_index])
 					
 					# predict on the train, validation and test set
 					y_pred_train = svc.predict(
 						X_app[train_index, :][:, train_index])
 					y_pred_valid = svc.predict(
 						X_app[valid_index, :][:, train_index])
 					y_pred_test = svc.predict(
 						X_test[:, train_index])

 					# root mean squared errors
 					current_train_perf.append(
 						accuracy_score(y_app[train_index],
 									   y_pred_train))
 					current_valid_perf.append(
 						accuracy_score(y_app[valid_index],
 									   y_pred_valid))
 					current_test_perf.append(
 						accuracy_score(y_test, y_pred_test))
 #			except ValueError:
 #				print(sys.exc_info()[0])
 #				print(params_out, params_in)

 			# average performance on inner splits
 			train_pref[index_out][index_in] = np.mean(
 				current_train_perf)
 			val_pref[index_out][index_in] = np.mean(
 				current_valid_perf)
 			test_pref[index_out][index_in] = np.mean(
 				current_test_perf)
 #			print(time.time() - st)
 #	if trial == 0:
 #		print('val_pref: ', val_pref)
 #		print('test_pref: ', test_pref)

 	return train_pref, val_pref, test_pref

 def parallel_trial_do(param_list_pre_revised, param_list, y, model_type, trial):
 	train_pref, val_pref, test_pref = trial_do(param_list_pre_revised, 
 											   param_list, G_gms, y, 
 											   model_type, trial)
 	return train_pref, val_pref, test_pref


 def compute_gram_matrices(dataset, y, estimator, param_list_precomputed, 
 						  output_dir, ds_name,
 						  n_jobs=1, str_fw='', verbose=True):
 	gram_matrices = [
 		]  # a list to store gram matrices for all param_grid_precomputed
 	gram_matrix_time = [
 		]  # a list to store time to calculate gram matrices
 	param_list_pre_revised = [
 		]  # list to store param grids precomputed ignoring the useless ones
 	
 	nb_gm_ignore = 0  # the number of gram matrices those should not be considered, as they may contain elements that are not numbers (NaN)
 	for idx, params_out in enumerate(param_list_precomputed):
 		params_out['n_jobs'] = n_jobs
 #			print(dataset)
 #			import networkx as nx
 #			nx.draw_networkx(dataset[1])
 #			plt.show()
 		rtn_data = estimator(dataset[:], **params_out)
 		Kmatrix = rtn_data[0]
 		current_run_time = rtn_data[1]
 		# for some kernels, some graphs in datasets may not meet the 
 		# kernels' requirements for graph structure. These graphs are trimmed. 
 		if len(rtn_data) == 3:
 			idx_trim = rtn_data[2]  # the index of trimmed graph list
 			y = [y[idxt] for idxt in idx_trim] # trim y accordingly

 		Kmatrix_diag = Kmatrix.diagonal().copy()
 		# remove graphs whose kernels with themselves are zeros
 		nb_g_ignore = 0
 		for idxk, diag in enumerate(Kmatrix_diag):
 			if diag == 0:
 				Kmatrix = np.delete(Kmatrix, (idxk - nb_g_ignore), axis=0)
 				Kmatrix = np.delete(Kmatrix, (idxk - nb_g_ignore), axis=1)
 				nb_g_ignore += 1
 		# normalization
 		for i in range(len(Kmatrix)):
 			for j in range(i, len(Kmatrix)):
 				Kmatrix[i][j] /= np.sqrt(Kmatrix_diag[i] * Kmatrix_diag[j])
 				Kmatrix[j][i] = Kmatrix[i][j]

 		if verbose:
 			print()
 		if params_out == {}:
 			if verbose:
 				print('the gram matrix is: ')
 			str_fw += 'the gram matrix is:\n\n'
 		else:
 			if verbose:
 				print('the gram matrix with parameters', params_out, 'is: ')
 			str_fw += 'the gram matrix with parameters %s is:\n\n' % params_out
 		if len(Kmatrix) < 2:
 			nb_gm_ignore += 1
 			if verbose:
 				print('ignored, as at most only one of all its diagonal value is non-zero.')
 			str_fw += 'ignored, as at most only one of all its diagonal value is non-zero.\n\n'
 		else:				
 			if np.isnan(Kmatrix).any(
 			):  # if the matrix contains elements that are not numbers
 				nb_gm_ignore += 1
 				if verbose:
 					print('ignored, as it contains elements that are not numbers.')
 				str_fw += 'ignored, as it contains elements that are not numbers.\n\n'
 			else:
 #					print(Kmatrix)
 				str_fw += np.array2string(
 						Kmatrix,
 						separator=',') + '\n\n'
 #							separator=',',
 #							threshold=np.inf,
 #							floatmode='unique') + '\n\n'

 				fig_file_name = output_dir + '/GM[ds]' + ds_name
 				if params_out != {}:
 					fig_file_name += '[params]' + str(idx)
 				plt.imshow(Kmatrix)
 				plt.colorbar()
 				plt.savefig(fig_file_name + '.eps', format='eps', dpi=300)
 #					plt.show()
 				plt.clf()
 				gram_matrices.append(Kmatrix)
 				gram_matrix_time.append(current_run_time)
 				param_list_pre_revised.append(params_out)
 				if nb_g_ignore > 0:
 					if verbose:
 						print(', where %d graphs are ignored as their graph kernels with themselves are zeros.' % nb_g_ignore)
 					str_fw += ', where %d graphs are ignored as their graph kernels with themselves are zeros.' % nb_g_ignore
 	if verbose:
 		print()
 		print(
 			'{} gram matrices are calculated, {} of which are ignored.'.format(
 				len(param_list_precomputed), nb_gm_ignore))
 	str_fw += '{} gram matrices are calculated, {} of which are ignored.\n\n'.format(len(param_list_precomputed), nb_gm_ignore)
 	str_fw += 'serial numbers of gram matrix figures and their corresponding parameters settings:\n\n'
 	str_fw += ''.join([
 		'{}: {}\n'.format(idx, params_out)
 		for idx, params_out in enumerate(param_list_precomputed)
 	])
 			
 	return gram_matrices, gram_matrix_time, param_list_pre_revised, y, str_fw


 def read_gram_matrices_from_file(output_dir, ds_name):
 	gmfile = np.load(output_dir + '/' + ds_name + '.gm.npz')
 	gram_matrices = gmfile['gms'] # a list to store gram matrices for all param_grid_precomputed
 	param_list_pre_revised = gmfile['params'] # list to store param grids precomputed ignoring the useless ones
 	y = gmfile['y'].tolist()
 	return gram_matrices, param_list_pre_revised, y


 def printResultsInTable(param_list, param_list_pre_revised, average_val_scores,
 						std_val_scores, average_perf_scores, std_perf_scores,
 						average_train_scores, std_train_scores, gram_matrix_time,
 						model_type, verbose):
 	from collections import OrderedDict
 	from tabulate import tabulate
 	table_dict = {}
 	if model_type == 'regression':
 		for param_in in param_list:
 			param_in['alpha'] = '{:.2e}'.format(param_in['alpha'])
 	else:
 		for param_in in param_list:
 			param_in['C'] = '{:.2e}'.format(param_in['C'])
 	table_dict['params'] = [{**param_out, **param_in}
 							for param_in in param_list for param_out in param_list_pre_revised]
 	table_dict['gram_matrix_time'] = [
 		'{:.2f}'.format(gram_matrix_time[index_out])
 		for param_in in param_list
 		for index_out, _ in enumerate(param_list_pre_revised)
 	]
 	table_dict['valid_perf'] = [
 		'{:.2f}±{:.2f}'.format(average_val_scores[index_out][index_in],
 							   std_val_scores[index_out][index_in])
 		for index_in, _ in enumerate(param_list)
 		for index_out, _ in enumerate(param_list_pre_revised)
 	]
 	table_dict['test_perf'] = [
 		'{:.2f}±{:.2f}'.format(average_perf_scores[index_out][index_in],
 							   std_perf_scores[index_out][index_in])
 		for index_in, _ in enumerate(param_list)
 		for index_out, _ in enumerate(param_list_pre_revised)
 	]
 	table_dict['train_perf'] = [
 		'{:.2f}±{:.2f}'.format(average_train_scores[index_out][index_in],
 							   std_train_scores[index_out][index_in])
 		for index_in, _ in enumerate(param_list)
 		for index_out, _ in enumerate(param_list_pre_revised)
 	]
 	
 	keyorder = [
 		'params', 'train_perf', 'valid_perf', 'test_perf',
 		'gram_matrix_time'
 	]
 	if verbose:
 		print()
 	tb_print = tabulate(OrderedDict(sorted(table_dict.items(), 
 						key=lambda i: keyorder.index(i[0]))), headers='keys')
 #			print(tb_print)
 	return 'table of performance v.s. hyper-params:\n\n%s\n\n' % tb_print