diff --git a/autogl/module/feature/__init__.py b/autogl/module/feature/__init__.py index 7f3746d..dbdea86 100644 --- a/autogl/module/feature/__init__.py +++ b/autogl/module/feature/__init__.py @@ -33,6 +33,9 @@ from ._graph import ( from ._selectors import ( FilterConstant, GBDTFeatureSelector ) +from ._auto_feature import ( + IdentityFeature, OnlyConstFeature, AutoFeatureEngineer +) __all__ = [ "BaseFeatureEngineer", @@ -61,5 +64,8 @@ __all__ = [ "NXGlobalEfficiency", "NXIsEulerian", "FilterConstant", - "GBDTFeatureSelector" + "GBDTFeatureSelector", + "IdentityFeature", + "OnlyConstFeature", + "AutoFeatureEngineer" ] \ No newline at end of file diff --git a/autogl/module/feature/_auto_feature.py b/autogl/module/feature/_auto_feature.py index 113eb65..8e4e58a 100644 --- a/autogl/module/feature/_auto_feature.py +++ b/autogl/module/feature/_auto_feature.py @@ -166,7 +166,7 @@ class AutoFeatureEngineer(BaseFeatureEngineer): verbosity: int = 0, *args, **kwargs ): - super(AutoFeatureEngineer, self).__init__(multi_graph=False) + super(AutoFeatureEngineer, self).__init__() self._ops = [op_sum, op_mean, op_max, op_min] self._sim = cosine_similarity self._fixlen = fix_length @@ -208,7 +208,7 @@ class AutoFeatureEngineer(BaseFeatureEngineer): for u, v in homogeneous_static_graph.edges.connections.t().numpy(): neighbours[u].append(v) self.__neighbours: _typing.Sequence[np.ndarray] = tuple( - [np.ndarray(v) for v in neighbours] + [np.array(v) for v in neighbours] ) x: np.ndarray = _original_features.numpy() diff --git a/autogl/module/feature/_base_feature_engineer/_base_feature_engineer.py b/autogl/module/feature/_base_feature_engineer/_base_feature_engineer.py index 4920566..e3b2f97 100644 --- a/autogl/module/feature/_base_feature_engineer/_base_feature_engineer.py +++ b/autogl/module/feature/_base_feature_engineer/_base_feature_engineer.py @@ -29,7 +29,11 @@ class _ComposedFeatureEngineer(_AbstractBaseFeatureEngineer): return dataset def fit_transform(self, dataset, inplace: bool = True): - return self.fit_transform(dataset, inplace) + for fe in self.fe_components: + dataset = fe.fit(dataset) + for fe in self.fe_components: + dataset = fe.fit(dataset) + return dataset def __init__(self, feature_engineers: _typing.Iterable[_AbstractBaseFeatureEngineer]): self.__fe_components: _typing.List[_AbstractBaseFeatureEngineer] = [] diff --git a/autogl/module/feature/_base_feature_engineer/_base_feature_engineer_dgl.py b/autogl/module/feature/_base_feature_engineer/_base_feature_engineer_dgl.py index 695099b..cf29284 100644 --- a/autogl/module/feature/_base_feature_engineer/_base_feature_engineer_dgl.py +++ b/autogl/module/feature/_base_feature_engineer/_base_feature_engineer_dgl.py @@ -40,7 +40,7 @@ class BaseFeatureEngineer( with torch.no_grad(): for i, data in enumerate(dataset): dataset[i] = self.__postprocess( - self._postprocess(self._transform(self._fit(self._preprocess(self.__preprocess(data))))) + self._postprocess(self._fit(self._preprocess(self.__preprocess(data)))) ) return dataset diff --git a/autogl/module/feature/_base_feature_engineer/_base_feature_engineer_pyg.py b/autogl/module/feature/_base_feature_engineer/_base_feature_engineer_pyg.py index 5b213eb..74d42ee 100644 --- a/autogl/module/feature/_base_feature_engineer/_base_feature_engineer_pyg.py +++ b/autogl/module/feature/_base_feature_engineer/_base_feature_engineer_pyg.py @@ -25,7 +25,7 @@ class BaseFeatureEngineer( with torch.no_grad(): for i, data in enumerate(dataset): dataset[i] = self.__postprocess( - self._postprocess(self._transform(self._fit(self._preprocess(self.__preprocess(data))))) + self._postprocess(self._fit(self._preprocess(self.__preprocess(data)))) ) return dataset diff --git a/autogl/module/feature/_selectors/_basic.py b/autogl/module/feature/_selectors/_basic.py index d7d93db..413d3a2 100644 --- a/autogl/module/feature/_selectors/_basic.py +++ b/autogl/module/feature/_selectors/_basic.py @@ -16,16 +16,12 @@ class BaseFeatureSelector(BaseFeatureEngineer): ) -> GeneralStaticGraph: if ( 'x' in static_graph.nodes.data and - self._selection not in (Ellipsis, None) and - isinstance(self._selection, torch.Tensor) and - torch.is_tensor(self._selection) and self._selection.dim() == 1 + isinstance(self._selection, (torch.Tensor, np.ndarray)) ): static_graph.nodes.data['x'] = static_graph.nodes.data['x'][:, self._selection] if ( 'feat' in static_graph.nodes.data and - self._selection not in (Ellipsis, None) and - isinstance(self._selection, torch.Tensor) and - torch.is_tensor(self._selection) and self._selection.dim() == 1 + isinstance(self._selection, (torch.Tensor, np.ndarray)) ): static_graph.nodes.data['feat'] = static_graph.nodes.data['feat'][:, self._selection] return static_graph diff --git a/autogl/module/feature/_selectors/_gbdt.py b/autogl/module/feature/_selectors/_gbdt.py index 88cb9c6..f35974f 100644 --- a/autogl/module/feature/_selectors/_gbdt.py +++ b/autogl/module/feature/_selectors/_gbdt.py @@ -41,6 +41,7 @@ def _gbdt_generator( ) num_classes: int = torch.max(data.y).item() + 1 + parameters["num_class"] = num_classes __optimizer_parameters = { "num_boost_round": 100, "early_stopping_rounds": 5, @@ -78,7 +79,7 @@ def _gbdt_generator( train_x = pd.DataFrame(x, columns=feature_index, index=None) dtrain = lightgbm.Dataset(train_x, label=label) clf = lightgbm.train( - train_set=dtrain, params=params, + train_set=dtrain, params=parameters, **__optimizer_parameters )