Browse Source

[FIX] fix bugs for feature_embedding

tags/v0.3.2
bxdd 2 years ago
parent
commit
5cc6c1bbcb
5 changed files with 20 additions and 11 deletions
  1. +2
    -0
      learnware/market/heterogeneous/organizer/__init__.py
  2. +2
    -2
      learnware/market/heterogeneous/organizer/hetero_map/__init__.py
  3. +11
    -3
      learnware/market/heterogeneous/organizer/hetero_map/feature_extractor.py
  4. +3
    -4
      learnware/market/heterogeneous/searcher.py
  5. +2
    -2
      learnware/reuse/feature_augment.py

+ 2
- 0
learnware/market/heterogeneous/organizer/__init__.py View File

@@ -1,4 +1,5 @@
import os
import traceback
import pandas as pd
from collections import defaultdict
from typing import List, Tuple, Union
@@ -125,6 +126,7 @@ class HeteroMapTableOrganizer(EasyOrganizer):
hetero_spec.save(save_path)

except Exception as err:
traceback.print_exc()
logger.warning(f"Learnware {idx} generate HeteroMapTableSpecification failed! Due to {err}")

def _get_hetero_learnware_ids(self, ids: Union[str, List[str]]) -> List[str]:


+ 2
- 2
learnware/market/heterogeneous/organizer/hetero_map/__init__.py View File

@@ -39,7 +39,7 @@ class HeteroMap(nn.Module):
temperature=10,
base_temperature=10,
activation="relu",
device="cuda:0",
device="cpu",
**kwargs,
):
"""
@@ -174,7 +174,7 @@ class HeteroMap(nn.Module):
def hetero_mapping(self, rkme_spec: RKMETableSpecification, features: dict) -> HeteroMapTableSpecification:
hetero_spec = HeteroMapTableSpecification()
data = rkme_spec.get_z()
cols = [features.get(str(i), "") for i in range(data.shape[1])]
cols = [features.get(str(i), "Unknown Feature") for i in range(data.shape[1])]
hetero_input_df = pd.DataFrame(data=data, columns=cols)
hetero_embedding = self._extract_batch_features(hetero_input_df)
hetero_spec.generate_stat_spec_from_system(hetero_embedding, rkme_spec)


+ 11
- 3
learnware/market/heterogeneous/organizer/hetero_map/feature_extractor.py View File

@@ -53,6 +53,7 @@ class NumEmbedding(nn.Module):
x_ts : Any
numerical features, (bs, emb_dim)
"""
print(np.array(col_emb).shape, np.array(x_ts).shape)
col_emb = col_emb.unsqueeze(0).expand((x_ts.shape[0], -1, -1))
feat_emb = col_emb * x_ts.unsqueeze(-1).float() + self.num_bias
return feat_emb
@@ -99,13 +100,18 @@ class FeatureTokenizer:
}
"""
encoded_inputs = {"x_num": None, "num_col_input_ids": None}
num_cols = x.columns.tolist() if not shuffle else np.random.shuffle(x.columns.tolist())
x_num = x[num_cols].fillna(0)

num_cols = x.columns.tolist() if not shuffle else np.random.shuffle(x.columns.tolist())
index_cols = (
[i for i in range(len(x.columns))] if not shuffle else np.random.shuffle([i for i in range(len(x.columns))])
)
num_cols = [x.columns[i] for i in index_cols]
x_num = x.iloc(axis=1)[index_cols].fillna(0)
if keep_input_grad:
x_num_ts = torch.tensor(x_num.values, dtype=float, requires_grad=True) # keep the grad
else:
x_num_ts = torch.tensor(x_num.values, dtype=float)

num_col_ts = self.tokenizer(
num_cols,
padding=True,
@@ -195,9 +201,11 @@ class FeatureProcessor(nn.Module):
**kwargs,
) -> Tensor:
x_num = x_num.to(self.device)
print("?1", np.array(x_num).shape, np.array(num_col_input_ids).shape)
num_col_emb = self.word_embedding(num_col_input_ids.to(self.device))
print("?2", np.array(x_num).shape, np.array(num_col_emb).shape)
num_col_emb = self._avg_embedding_by_mask(num_col_emb, num_att_mask)
print("?3", np.array(x_num).shape, np.array(num_col_emb).shape)

num_feat_embedding = self.num_embedding(num_col_emb, x_num)
num_feat_embedding = self.align_layer(num_feat_embedding).float()


+ 3
- 4
learnware/market/heterogeneous/searcher.py View File

@@ -1,5 +1,5 @@
from typing import Tuple, List
import traceback
from ...learnware import Learnware
from ...logger import get_module_logger
from ..base import BaseUserInfo
@@ -34,9 +34,8 @@ class HeteroSearcher(EasySearcher):
return True

except Exception as e:
logger.warning(
f"Invalid heterogeneous search information provided. Use homogeneous search instead. Error: {e}"
)
traceback.print_exc()
logger.warning(f"Invalid heterogeneous search information provided. Use homogeneous search instead.")
return False

def __call__(


+ 2
- 2
learnware/reuse/feature_augment.py View File

@@ -12,8 +12,8 @@ class FeatureAugmentReuser(BaseReuser):
FeatureAugmentReuser is a class for augmenting features using predictions of a given learnware model and applying regression or classification on the augmented dataset.

This class supports two modes:
- "regression": Uses RidgeCV for regression tasks.
- "classification": Uses LogisticRegressionCV for classification tasks.
- "regression": Uses RidgeCV for regression tasks.
- "classification": Uses LogisticRegressionCV for classification tasks.
"""

def __init__(self, learnware_list: List[Learnware] = None, mode: str = None):


Loading…
Cancel
Save