| @@ -13,6 +13,7 @@ import os | |||||
| class Dataset(object): | class Dataset(object): | ||||
| def __init__(self, filename=None, filename_targets=None, **kwargs): | def __init__(self, filename=None, filename_targets=None, **kwargs): | ||||
| if filename is None: | if filename is None: | ||||
| self.__graphs = None | self.__graphs = None | ||||
| @@ -180,13 +181,13 @@ class Dataset(object): | |||||
| # return 0 | # return 0 | ||||
| def get_dataset_infos(self, keys=None): | |||||
| def get_dataset_infos(self, keys=None, params=None): | |||||
| """Computes and returns the structure and property information of the graph dataset. | """Computes and returns the structure and property information of the graph dataset. | ||||
| Parameters | Parameters | ||||
| ---------- | ---------- | ||||
| keys : list | |||||
| List of strings which indicate which informations will be returned. The | |||||
| keys : list, optional | |||||
| A list of strings which indicate which informations will be returned. The | |||||
| possible choices includes: | possible choices includes: | ||||
| 'substructures': sub-structures graphs contains, including 'linear', 'non | 'substructures': sub-structures graphs contains, including 'linear', 'non | ||||
| @@ -241,7 +242,15 @@ class Dataset(object): | |||||
| 'class_number': number of classes. Only available for classification problems. | 'class_number': number of classes. Only available for classification problems. | ||||
| 'all_degree_entropy': the entropy of degree distribution of each graph. | |||||
| 'ave_degree_entropy': the average entropy of degree distribution of all graphs. | |||||
| All informations above will be returned if `keys` is not given. | All informations above will be returned if `keys` is not given. | ||||
| params: dict of dict, optional | |||||
| A dictinary which contains extra parameters for each possible | |||||
| element in ``keys``. | |||||
| Return | Return | ||||
| ------ | ------ | ||||
| @@ -276,6 +285,8 @@ class Dataset(object): | |||||
| 'node_attr_dim', | 'node_attr_dim', | ||||
| 'edge_attr_dim', | 'edge_attr_dim', | ||||
| 'class_number', | 'class_number', | ||||
| 'all_degree_entropy', | |||||
| 'ave_degree_entropy' | |||||
| ] | ] | ||||
| # dataset size | # dataset size | ||||
| @@ -420,6 +431,22 @@ class Dataset(object): | |||||
| self.__edge_attr_dim = self.__get_edge_attr_dim() | self.__edge_attr_dim = self.__get_edge_attr_dim() | ||||
| infos['edge_attr_dim'] = self.__edge_attr_dim | infos['edge_attr_dim'] = self.__edge_attr_dim | ||||
| # entropy of degree distribution. | |||||
| if 'all_degree_entropy' in keys: | |||||
| if params is not None and ('all_degree_entropy' in params) and ('base' in params['all_degree_entropy']): | |||||
| base = params['all_degree_entropy']['base'] | |||||
| else: | |||||
| base = None | |||||
| infos['all_degree_entropy'] = self.__compute_all_degree_entropy(base=base) | |||||
| if 'ave_degree_entropy' in keys: | |||||
| if params is not None and ('ave_degree_entropy' in params) and ('base' in params['ave_degree_entropy']): | |||||
| base = params['ave_degree_entropy']['base'] | |||||
| else: | |||||
| base = None | |||||
| infos['ave_degree_entropy'] = np.mean(self.__compute_all_degree_entropy(base=base)) | |||||
| return infos | return infos | ||||
| @@ -653,8 +680,7 @@ class Dataset(object): | |||||
| def __get_all_fill_factors(self): | def __get_all_fill_factors(self): | ||||
| """ | |||||
| Get fill factor, the number of non-zero entries in the adjacency matrix. | |||||
| """Get fill factor, the number of non-zero entries in the adjacency matrix. | |||||
| Returns | Returns | ||||
| ------- | ------- | ||||
| @@ -721,7 +747,30 @@ class Dataset(object): | |||||
| def __get_edge_attr_dim(self): | def __get_edge_attr_dim(self): | ||||
| return len(self.__edge_attrs) | return len(self.__edge_attrs) | ||||
| def __compute_all_degree_entropy(self, base=None): | |||||
| """Compute the entropy of degree distribution of each graph. | |||||
| Parameters | |||||
| ---------- | |||||
| base : float, optional | |||||
| The logarithmic base to use. The default is ``e`` (natural logarithm). | |||||
| Returns | |||||
| ------- | |||||
| degree_entropy : float | |||||
| The calculated entropy. | |||||
| """ | |||||
| from gklearn.utils.stats import entropy | |||||
| degree_entropy = [] | |||||
| for g in self.__graphs: | |||||
| degrees = list(dict(g.degree()).values()) | |||||
| en = entropy(degrees, base=base) | |||||
| degree_entropy.append(en) | |||||
| return degree_entropy | |||||
| @property | @property | ||||
| def graphs(self): | def graphs(self): | ||||