diff --git a/learnware/market/heterogeneous/organizer/__init__.py b/learnware/market/heterogeneous/organizer/__init__.py index f397726..1e2fd23 100644 --- a/learnware/market/heterogeneous/organizer/__init__.py +++ b/learnware/market/heterogeneous/organizer/__init__.py @@ -1,4 +1,5 @@ import os +import traceback import pandas as pd from collections import defaultdict from typing import List, Tuple, Union @@ -125,6 +126,7 @@ class HeteroMapTableOrganizer(EasyOrganizer): hetero_spec.save(save_path) except Exception as err: + traceback.print_exc() logger.warning(f"Learnware {idx} generate HeteroMapTableSpecification failed! Due to {err}") def _get_hetero_learnware_ids(self, ids: Union[str, List[str]]) -> List[str]: diff --git a/learnware/market/heterogeneous/organizer/hetero_map/__init__.py b/learnware/market/heterogeneous/organizer/hetero_map/__init__.py index 97a92da..9afe1b5 100644 --- a/learnware/market/heterogeneous/organizer/hetero_map/__init__.py +++ b/learnware/market/heterogeneous/organizer/hetero_map/__init__.py @@ -39,7 +39,7 @@ class HeteroMap(nn.Module): temperature=10, base_temperature=10, activation="relu", - device="cuda:0", + device="cpu", **kwargs, ): """ @@ -174,7 +174,7 @@ class HeteroMap(nn.Module): def hetero_mapping(self, rkme_spec: RKMETableSpecification, features: dict) -> HeteroMapTableSpecification: hetero_spec = HeteroMapTableSpecification() data = rkme_spec.get_z() - cols = [features.get(str(i), "") for i in range(data.shape[1])] + cols = [features.get(str(i), "Unknown Feature") for i in range(data.shape[1])] hetero_input_df = pd.DataFrame(data=data, columns=cols) hetero_embedding = self._extract_batch_features(hetero_input_df) hetero_spec.generate_stat_spec_from_system(hetero_embedding, rkme_spec) diff --git a/learnware/market/heterogeneous/organizer/hetero_map/feature_extractor.py b/learnware/market/heterogeneous/organizer/hetero_map/feature_extractor.py index 40a019c..9b6928f 100644 --- a/learnware/market/heterogeneous/organizer/hetero_map/feature_extractor.py +++ b/learnware/market/heterogeneous/organizer/hetero_map/feature_extractor.py @@ -53,6 +53,7 @@ class NumEmbedding(nn.Module): x_ts : Any numerical features, (bs, emb_dim) """ + print(np.array(col_emb).shape, np.array(x_ts).shape) col_emb = col_emb.unsqueeze(0).expand((x_ts.shape[0], -1, -1)) feat_emb = col_emb * x_ts.unsqueeze(-1).float() + self.num_bias return feat_emb @@ -99,13 +100,18 @@ class FeatureTokenizer: } """ encoded_inputs = {"x_num": None, "num_col_input_ids": None} - num_cols = x.columns.tolist() if not shuffle else np.random.shuffle(x.columns.tolist()) - x_num = x[num_cols].fillna(0) + num_cols = x.columns.tolist() if not shuffle else np.random.shuffle(x.columns.tolist()) + index_cols = ( + [i for i in range(len(x.columns))] if not shuffle else np.random.shuffle([i for i in range(len(x.columns))]) + ) + num_cols = [x.columns[i] for i in index_cols] + x_num = x.iloc(axis=1)[index_cols].fillna(0) if keep_input_grad: x_num_ts = torch.tensor(x_num.values, dtype=float, requires_grad=True) # keep the grad else: x_num_ts = torch.tensor(x_num.values, dtype=float) + num_col_ts = self.tokenizer( num_cols, padding=True, @@ -195,9 +201,11 @@ class FeatureProcessor(nn.Module): **kwargs, ) -> Tensor: x_num = x_num.to(self.device) - + print("?1", np.array(x_num).shape, np.array(num_col_input_ids).shape) num_col_emb = self.word_embedding(num_col_input_ids.to(self.device)) + print("?2", np.array(x_num).shape, np.array(num_col_emb).shape) num_col_emb = self._avg_embedding_by_mask(num_col_emb, num_att_mask) + print("?3", np.array(x_num).shape, np.array(num_col_emb).shape) num_feat_embedding = self.num_embedding(num_col_emb, x_num) num_feat_embedding = self.align_layer(num_feat_embedding).float() diff --git a/learnware/market/heterogeneous/searcher.py b/learnware/market/heterogeneous/searcher.py index 3605609..5161126 100644 --- a/learnware/market/heterogeneous/searcher.py +++ b/learnware/market/heterogeneous/searcher.py @@ -1,5 +1,5 @@ from typing import Tuple, List - +import traceback from ...learnware import Learnware from ...logger import get_module_logger from ..base import BaseUserInfo @@ -34,9 +34,8 @@ class HeteroSearcher(EasySearcher): return True except Exception as e: - logger.warning( - f"Invalid heterogeneous search information provided. Use homogeneous search instead. Error: {e}" - ) + traceback.print_exc() + logger.warning(f"Invalid heterogeneous search information provided. Use homogeneous search instead.") return False def __call__( diff --git a/learnware/reuse/feature_augment.py b/learnware/reuse/feature_augment.py index ea3d27d..01daae3 100644 --- a/learnware/reuse/feature_augment.py +++ b/learnware/reuse/feature_augment.py @@ -12,8 +12,8 @@ class FeatureAugmentReuser(BaseReuser): FeatureAugmentReuser is a class for augmenting features using predictions of a given learnware model and applying regression or classification on the augmented dataset. This class supports two modes: - - "regression": Uses RidgeCV for regression tasks. - - "classification": Uses LogisticRegressionCV for classification tasks. + - "regression": Uses RidgeCV for regression tasks. + - "classification": Uses LogisticRegressionCV for classification tasks. """ def __init__(self, learnware_list: List[Learnware] = None, mode: str = None):