diff --git a/examples/example_image/example_files/example_init.py b/examples/example_image/example_files/example_init.py index e75a116..b318ee8 100644 --- a/examples/example_image/example_files/example_init.py +++ b/examples/example_image/example_files/example_init.py @@ -8,6 +8,7 @@ import torch class Model(BaseModel): def __init__(self): + super().__init__(input_shape=(3, 32, 32), output_shape=(10,)) dir_path = os.path.dirname(os.path.abspath(__file__)) self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") self.model = ConvModel(channel=3, n_random_features=10).to(self.device) diff --git a/examples/example_image/main.py b/examples/example_image/main.py index 70589dc..757fe44 100644 --- a/examples/example_image/main.py +++ b/examples/example_image/main.py @@ -4,7 +4,7 @@ from get_data import * import os import random from utils import generate_uploader, generate_user, ImageDataLoader, train, eval_prediction -from learnware.learnware import Learnware, JobSelectorReuser, EnsembleReuser +from learnware.learnware import Learnware, JobSelectorReuser, AveragingReuser import time from learnware.market import EasyMarket, BaseUserInfo @@ -40,17 +40,17 @@ semantic_specs = [ { "Data": {"Values": ["Tabular"], "Type": "Class"}, "Task": {"Values": ["Classification"], "Type": "Class"}, - "Device": {"Values": ["GPU"], "Type": "Tag"}, + "Library": {"Values": ["Pytorch"], "Type": "Class"}, "Scenario": {"Values": ["Business"], "Type": "Tag"}, "Description": {"Values": "", "Type": "String"}, "Name": {"Values": "learnware_1", "Type": "String"}, } ] -user_senmantic = { +user_semantic = { "Data": {"Values": ["Tabular"], "Type": "Class"}, "Task": {"Values": ["Classification"], "Type": "Class"}, - "Device": {"Values": ["GPU"], "Type": "Tag"}, + "Library": {"Values": ["Pytorch"], "Type": "Class"}, "Scenario": {"Values": ["Business"], "Type": "Tag"}, "Description": {"Values": "", "Type": "String"}, "Name": {"Values": "", "Type": "String"}, @@ -133,10 +133,10 @@ def prepare_market(): def test_search(gamma=0.1, load_market=True): if load_market: - image_market = EasyMarket() + image_market = EasyMarket(market_id="image") else: prepare_market() - image_market = EasyMarket() + image_market = EasyMarket(market_id="image") logger.info("Number of items in the market: %d" % len(image_market)) select_list = [] @@ -151,7 +151,7 @@ def test_search(gamma=0.1, load_market=True): user_label = np.load(user_label_path) user_stat_spec = specification.utils.generate_rkme_spec(X=user_data, gamma=gamma, cuda_idx=0) user_info = BaseUserInfo( - id=f"user_{i}", semantic_spec=user_senmantic, stat_info={"RKMEStatSpecification": user_stat_spec} + id=f"user_{i}", semantic_spec=user_semantic, stat_info={"RKMEStatSpecification": user_stat_spec} ) logger.info("Searching Market for user: %d" % (i)) sorted_score_list, single_learnware_list, mixture_score, mixture_learnware_list = image_market.search_learnware( @@ -167,13 +167,14 @@ def test_search(gamma=0.1, load_market=True): acc_list.append(acc) logger.info("search rank: %d, score: %.3f, learnware_id: %s, acc: %.3f" % (idx, score, learnware.id, acc)) # test reuse + reuse_baseline = JobSelectorReuser(learnware_list=mixture_learnware_list) reuse_predict = reuse_baseline.predict(user_data=user_data) reuse_score = eval_prediction(reuse_predict, user_label) job_selector_score_list.append(reuse_score) print(f"mixture reuse loss: {reuse_score}\n") - reuse_ensemble = EnsembleReuser(learnware_list=mixture_learnware_list, mode="vote") + reuse_ensemble = AveragingReuser(learnware_list=mixture_learnware_list, mode="vote") ensemble_predict_y = reuse_ensemble.predict(user_data=user_data) ensemble_score = eval_prediction(ensemble_predict_y, user_label) ensemble_score_list.append(ensemble_score) @@ -198,4 +199,4 @@ def test_search(gamma=0.1, load_market=True): if __name__ == "__main__": # prepare_data() # prepare_model() - test_search(load_market=True) + test_search(load_market=False) diff --git a/examples/example_m5/example_init.py b/examples/example_m5/example_init.py index d875d96..e0aabdd 100644 --- a/examples/example_m5/example_init.py +++ b/examples/example_m5/example_init.py @@ -1,13 +1,15 @@ import os import joblib import numpy as np +import lightgbm as lgb from learnware.model import BaseModel class Model(BaseModel): def __init__(self): + super(Model, self).__init__(input_shape=(82,), output_shape=()) dir_path = os.path.dirname(os.path.abspath(__file__)) - self.model = joblib.load(os.path.join(dir_path, "model.out")) + self.model = lgb.Booster(model_file=os.path.join(dir_path, "model.out")) def fit(self, X: np.ndarray, y: np.ndarray): pass diff --git a/examples/example_m5/main.py b/examples/example_m5/main.py index 6b7544e..310bb40 100644 --- a/examples/example_m5/main.py +++ b/examples/example_m5/main.py @@ -1,13 +1,14 @@ import os import fire import zipfile +import numpy as np from tqdm import tqdm from shutil import copyfile, rmtree import learnware from learnware.market import EasyMarket, BaseUserInfo from learnware.market import database_ops -from learnware.learnware import Learnware, JobSelectorReuser +from learnware.learnware import Learnware, JobSelectorReuser, AveragingReuser import learnware.specification as specification from m5 import DataLoader @@ -16,17 +17,17 @@ semantic_specs = [ { "Data": {"Values": ["Tabular"], "Type": "Class"}, "Task": {"Values": ["Classification"], "Type": "Class"}, - "Device": {"Values": ["GPU"], "Type": "Tag"}, + "Library": {"Values": ["Scikit-learn"], "Type": "Class"}, "Scenario": {"Values": ["Business"], "Type": "Tag"}, "Description": {"Values": "", "Type": "String"}, "Name": {"Values": "learnware_1", "Type": "String"}, } ] -user_senmantic = { +user_semantic = { "Data": {"Values": ["Tabular"], "Type": "Class"}, "Task": {"Values": ["Classification"], "Type": "Class"}, - "Device": {"Values": ["GPU"], "Type": "Tag"}, + "Library": {"Values": ["Scikit-learn"], "Type": "Class"}, "Scenario": {"Values": ["Business"], "Type": "Tag"}, "Description": {"Values": "", "Type": "String"}, "Name": {"Values": "", "Type": "String"}, @@ -122,13 +123,20 @@ class M5DatasetWorkflow: m5 = DataLoader() idx_list = m5.get_idx_list() + os.makedirs("./user_spec", exist_ok=True) + single_score_list = [] + random_score_list = [] + job_selector_score_list = [] + ensemble_score_list = [] for idx in idx_list: train_x, train_y, test_x, test_y = m5.get_idx_data(idx) user_spec = specification.utils.generate_rkme_spec(X=test_x, gamma=0.1, cuda_idx=0) + user_spec_path = f"./user_spec/user_{idx}.json" + user_spec.save(user_spec_path) user_info = BaseUserInfo( - id=f"user_{idx}", semantic_spec=user_senmantic, stat_info={"RKMEStatSpecification": user_spec} + id=f"user_{idx}", semantic_spec=user_semantic, stat_info={"RKMEStatSpecification": user_spec} ) ( sorted_score_list, @@ -141,18 +149,36 @@ class M5DatasetWorkflow: print( f"single model num: {len(sorted_score_list)}, max_score: {sorted_score_list[0]}, min_score: {sorted_score_list[-1]}" ) + loss_list = [] for score, learnware in zip(sorted_score_list, single_learnware_list): pred_y = learnware.predict(test_x) - loss = m5.score(test_y, pred_y) - print(f"score: {score}, learnware_id: {learnware.id}, loss: {loss}") + loss_list.append(m5.score(test_y, pred_y)) + print( + f"Top1-score: {sorted_score_list[0]}, learnware_id: {single_learnware_list[0].id}, loss: {loss_list[0]}" + ) mixture_id = " ".join([learnware.id for learnware in mixture_learnware_list]) - print(f"mixture_learnware: {mixture_id}\n") - - reuse_baseline = JobSelectorReuser(learnware_list=mixture_learnware_list) - reuse_predict = reuse_baseline.predict(user_data=test_x) - reuse_score = m5.score(test_y, reuse_predict) - print(f"mixture reuse loss: {reuse_score}\n") + print(f"mixture_score: {mixture_score}, mixture_learnware: {mixture_id}") + + reuse_job_selector = JobSelectorReuser(learnware_list=mixture_learnware_list, use_herding=False) + job_selector_predict_y = reuse_job_selector.predict(user_data=test_x) + job_selector_score = m5.score(test_y, job_selector_predict_y) + print(f"mixture reuse loss (job selector): {job_selector_score}") + + reuse_ensemble = AveragingReuser(learnware_list=mixture_learnware_list) + ensemble_predict_y = reuse_ensemble.predict(user_data=test_x) + ensemble_score = m5.score(test_y, ensemble_predict_y) + print(f"mixture reuse loss (ensemble): {ensemble_score}\n") + + single_score_list.append(loss_list[0]) + random_score_list.append(np.mean(loss_list)) + job_selector_score_list.append(job_selector_score) + ensemble_score_list.append(ensemble_score) + + print(f"Single search score: {np.mean(single_score_list)}") + print(f"Job selector score: {np.mean(job_selector_score_list)}") + print(f"Average ensemble score: {np.mean(ensemble_score_list)}") + print(f"Random search score: {np.mean(random_score_list)}") if __name__ == "__main__": diff --git a/examples/example_market_db/example_db.py b/examples/example_market_db/example_db.py index 11eb4cc..b4e146a 100644 --- a/examples/example_market_db/example_db.py +++ b/examples/example_market_db/example_db.py @@ -15,7 +15,7 @@ semantic_specs = [ { "Data": {"Values": ["Tabular"], "Type": "Class"}, "Task": {"Values": ["Classification"], "Type": "Class"}, - "Device": {"Values": ["GPU"], "Type": "Tag"}, + "Library": {"Values": ["Scikit-learn"], "Type": "Class"}, "Scenario": {"Values": ["Nature"], "Type": "Tag"}, "Description": {"Values": "", "Type": "String"}, "Name": {"Values": "learnware_1", "Type": "String"}, @@ -23,7 +23,7 @@ semantic_specs = [ { "Data": {"Values": ["Tabular"], "Type": "Class"}, "Task": {"Values": ["Classification"], "Type": "Class"}, - "Device": {"Values": ["GPU"], "Type": "Tag"}, + "Library": {"Values": ["Scikit-learn"], "Type": "Class"}, "Scenario": {"Values": ["Business", "Nature"], "Type": "Tag"}, "Description": {"Values": "", "Type": "String"}, "Name": {"Values": "learnware_2", "Type": "String"}, @@ -31,7 +31,7 @@ semantic_specs = [ { "Data": {"Values": ["Tabular"], "Type": "Class"}, "Task": {"Values": ["Regression"], "Type": "Class"}, - "Device": {"Values": ["GPU"], "Type": "Tag"}, + "Library": {"Values": ["Scikit-learn"], "Type": "Class"}, "Scenario": {"Values": ["Business"], "Type": "Tag"}, "Description": {"Values": "", "Type": "String"}, "Name": {"Values": "learnware_3", "Type": "String"}, @@ -40,11 +40,8 @@ semantic_specs = [ user_senmantic = { "Data": {"Values": ["Tabular"], "Type": "Class"}, - "Task": { - "Values": ["Classification"], - "Type": "Class", - }, - "Device": {"Values": ["GPU"], "Type": "Tag"}, + "Task": {"Values": ["Classification"], "Type": "Class"}, + "Device": {"Values": ["GPU"], "Type": "Class"}, "Scenario": {"Values": ["Business"], "Type": "Tag"}, "Description": {"Values": "", "Type": "String"}, "Name": {"Values": "learnware", "Type": "String"}, diff --git a/examples/example_pfs/example_init.py b/examples/example_pfs/example_init.py index d875d96..88b788a 100644 --- a/examples/example_pfs/example_init.py +++ b/examples/example_pfs/example_init.py @@ -6,6 +6,7 @@ from learnware.model import BaseModel class Model(BaseModel): def __init__(self): + super(Model, self).__init__(input_shape=(31,), output_shape=()) dir_path = os.path.dirname(os.path.abspath(__file__)) self.model = joblib.load(os.path.join(dir_path, "model.out")) diff --git a/examples/example_pfs/main.py b/examples/example_pfs/main.py index 5d3ae6a..95e2662 100644 --- a/examples/example_pfs/main.py +++ b/examples/example_pfs/main.py @@ -17,17 +17,17 @@ semantic_specs = [ { "Data": {"Values": ["Tabular"], "Type": "Class"}, "Task": {"Values": ["Classification"], "Type": "Class"}, - "Device": {"Values": ["GPU"], "Type": "Tag"}, + "Library": {"Values": ["Scikit-learn"], "Type": "Class"}, "Scenario": {"Values": ["Business"], "Type": "Tag"}, "Description": {"Values": "", "Type": "String"}, "Name": {"Values": "learnware_1", "Type": "String"}, } ] -user_senmantic = { +user_semantic = { "Data": {"Values": ["Tabular"], "Type": "Class"}, "Task": {"Values": ["Classification"], "Type": "Class"}, - "Device": {"Values": ["GPU"], "Type": "Tag"}, + "Library": {"Values": ["Scikit-learn"], "Type": "Class"}, "Scenario": {"Values": ["Business"], "Type": "Tag"}, "Description": {"Values": "", "Type": "String"}, "Name": {"Values": "", "Type": "String"}, @@ -122,7 +122,7 @@ class PFSDatasetWorkflow: pfs = Dataloader() idx_list = pfs.get_idx_list() os.makedirs("./user_spec", exist_ok=True) - sinle_score_list = [] + single_score_list = [] random_score_list = [] job_selector_score_list = [] ensemble_score_list = [] @@ -134,7 +134,7 @@ class PFSDatasetWorkflow: user_spec.save(user_spec_path) user_info = BaseUserInfo( - id=f"user_{idx}", semantic_spec=user_senmantic, stat_info={"RKMEStatSpecification": user_spec} + id=f"user_{idx}", semantic_spec=user_semantic, stat_info={"RKMEStatSpecification": user_spec} ) ( sorted_score_list, @@ -152,13 +152,13 @@ class PFSDatasetWorkflow: pred_y = learnware.predict(test_x) loss_list.append(pfs.score(test_y, pred_y)) print( - f"Top1-score: {sorted_score_list[0]}, learnware_id: {single_learnware_list[0].id}, loss: {loss_list[-1]}" + f"Top1-score: {sorted_score_list[0]}, learnware_id: {single_learnware_list[0].id}, loss: {loss_list[0]}" ) mixture_id = " ".join([learnware.id for learnware in mixture_learnware_list]) print(f"mixture_score: {mixture_score}, mixture_learnware: {mixture_id}") - reuse_job_selector = JobSelectorReuser(learnware_list=mixture_learnware_list) + reuse_job_selector = JobSelectorReuser(learnware_list=mixture_learnware_list, use_herding=False) job_selector_predict_y = reuse_job_selector.predict(user_data=test_x) job_selector_score = pfs.score(test_y, job_selector_predict_y) print(f"mixture reuse loss (job selector): {job_selector_score}") @@ -168,12 +168,12 @@ class PFSDatasetWorkflow: ensemble_score = pfs.score(test_y, ensemble_predict_y) print(f"mixture reuse loss (ensemble): {ensemble_score}\n") - sinle_score_list.append(loss_list[0]) + single_score_list.append(loss_list[0]) random_score_list.append(np.mean(loss_list)) job_selector_score_list.append(job_selector_score) ensemble_score_list.append(ensemble_score) - print(f"Single search score: {np.mean(sinle_score_list)}") + print(f"Single search score: {np.mean(single_score_list)}") print(f"Job selector score: {np.mean(job_selector_score_list)}") print(f"Average ensemble score: {np.mean(ensemble_score_list)}") print(f"Random search score: {np.mean(random_score_list)}") diff --git a/examples/workflow_by_code/main.py b/examples/workflow_by_code/main.py index 01f09f9..97d2fd0 100644 --- a/examples/workflow_by_code/main.py +++ b/examples/workflow_by_code/main.py @@ -19,21 +19,21 @@ semantic_specs = [ { "Data": {"Values": ["Tabular"], "Type": "Class"}, "Task": {"Values": ["Classification"], "Type": "Class"}, - "Device": {"Values": ["GPU"], "Type": "Tag"}, + "Library": {"Values": ["Scikit-learn"], "Type": "Class"}, "Scenario": {"Values": ["Business"], "Type": "Tag"}, "Description": {"Values": "", "Type": "String"}, "Name": {"Values": "learnware_1", "Type": "String"}, } ] -user_senmantic = { +user_semantic = { "Data": {"Values": ["Tabular"], "Type": "Class"}, "Task": { "Values": ["Classification"], "Type": "Class", }, - "Device": {"Values": ["GPU"], "Type": "Tag"}, - "Scenario": {"Values": ["Business"], "Type": "Tag"}, + "Library": {"Values": ["Scikit-learn"], "Type": "Tag"}, + "Scenario": {"Values": ["Business"], "Type": "Class"}, "Description": {"Values": "", "Type": "String"}, "Name": {"Values": "", "Type": "String"}, } @@ -130,7 +130,7 @@ class LearnwareMarketWorkflow: with zipfile.ZipFile(zip_path, "r") as zip_obj: zip_obj.extractall(path=unzip_dir) - user_info = BaseUserInfo(id="user_0", semantic_spec=user_senmantic) + user_info = BaseUserInfo(id="user_0", semantic_spec=user_semantic) _, single_learnware_list, _ = easy_market.search_learnware(user_info) print("User info:", user_info.get_semantic_spec()) @@ -159,7 +159,7 @@ class LearnwareMarketWorkflow: user_spec = specification.rkme.RKMEStatSpecification() user_spec.load(os.path.join(unzip_dir, "svm.json")) user_info = BaseUserInfo( - id="user_0", semantic_spec=user_senmantic, stat_info={"RKMEStatSpecification": user_spec} + id="user_0", semantic_spec=user_semantic, stat_info={"RKMEStatSpecification": user_spec} ) ( sorted_score_list, diff --git a/learnware/config.py b/learnware/config.py index 18c2a15..be274ba 100644 --- a/learnware/config.py +++ b/learnware/config.py @@ -79,12 +79,17 @@ semantic_config = { # "Generation", "Segmentation", "Object Detection", + "Others", ], "Type": "Class", # Choose only one class }, - "Device": { - "Values": ["CPU", "GPU"], - "Type": "Tag", + # "Device": { + # "Values": ["CPU", "GPU"], + # "Type": "Tag", + # }, # Choose one or more tags + "Library": { + "Values": ["Scikit-learn", "PyTorch", "TensorFlow", "Others"], + "Type": "Class", }, # Choose one or more tags "Scenario": { "Values": [ diff --git a/learnware/learnware/reuse.py b/learnware/learnware/reuse.py index ccd1da8..97ee44e 100644 --- a/learnware/learnware/reuse.py +++ b/learnware/learnware/reuse.py @@ -17,7 +17,7 @@ logger = get_module_logger("Reuser") class JobSelectorReuser(BaseReuser): """Baseline Multiple Learnware Reuser uing Job Selector Method""" - def __init__(self, learnware_list: List[Learnware], herding_num: int = 1000): + def __init__(self, learnware_list: List[Learnware], herding_num: int = 1000, use_herding: bool = True): """The initialization method for job selector reuser Parameters @@ -29,6 +29,7 @@ class JobSelectorReuser(BaseReuser): """ super(JobSelectorReuser, self).__init__(learnware_list) self.herding_num = herding_num + self.use_herding = use_herding def predict(self, user_data: np.ndarray) -> np.ndarray: """Give prediction for user data using baseline job-selector method @@ -53,13 +54,15 @@ class JobSelectorReuser(BaseReuser): return selector_pred_y - def job_selector(self, user_data: np.ndarray): + def job_selector(self, user_data: np.ndarray, use_herding: bool = True): """Train job selector based on user's data, which predicts which learnware in the pool should be selected Parameters ---------- user_data : np.ndarray User's labeled raw data. + use_herding: bool + Whether create job selector training samples by herding """ if len(self.learnware_list) == 1: user_data_num = user_data.shape[0] @@ -69,26 +72,32 @@ class JobSelectorReuser(BaseReuser): learnware.specification.get_stat_spec_by_name("RKMEStatSpecification") for learnware in self.learnware_list ] - task_matrix = np.zeros((len(learnware_rkme_spec_list), len(learnware_rkme_spec_list))) - for i in range(len(self.learnware_list)): - task_rkme1 = learnware_rkme_spec_list[i] - for j in range(i, len(self.learnware_list)): - task_rkme2 = learnware_rkme_spec_list[j] - task_matrix[i][j] = task_matrix[j][i] = task_rkme1.inner_prod(task_rkme2) - - task_mixture_weight = self._calculate_rkme_spec_mixture_weight( - user_data, learnware_rkme_spec_list, task_matrix - ) + if self.use_herding: + task_matrix = np.zeros((len(learnware_rkme_spec_list), len(learnware_rkme_spec_list))) + for i in range(len(self.learnware_list)): + task_rkme1 = learnware_rkme_spec_list[i] + task_matrix[i][i] = task_rkme1.inner_prod(task_rkme1) + for j in range(i + 1, len(self.learnware_list)): + task_rkme2 = learnware_rkme_spec_list[j] + task_matrix[i][j] = task_matrix[j][i] = task_rkme1.inner_prod(task_rkme2) + + task_mixture_weight = self._calculate_rkme_spec_mixture_weight( + user_data, learnware_rkme_spec_list, task_matrix + ) herding_X, train_herding_X, val_herding_X = None, None, None herding_y, train_herding_y, val_herding_y = [], [], [] for i in range(len(self.learnware_list)): task_spec = learnware_rkme_spec_list[i] - task_herding_num = max(5, int(self.herding_num * task_mixture_weight[i])) + if self.use_herding: + task_herding_num = max(5, int(self.herding_num * task_mixture_weight[i])) + herding_X_i = task_spec.herding(task_herding_num).detach().cpu().numpy() + else: + herding_X_i = task_spec.z.detach().cpu().numpy() + task_herding_num = herding_X_i.shape[0] task_val_num = task_herding_num // 5 - herding_X_i = task_spec.herding(task_herding_num).detach().cpu().numpy() train_X_i = herding_X_i[:-task_val_num] val_X_i = herding_X_i[-task_val_num:] @@ -267,7 +276,7 @@ class AveragingReuser(BaseReuser): # print(pred_y.shape) if not isinstance(pred_y, np.ndarray): pred_y = pred_y.detach().cpu().numpy() - softmax_pred = softmax(pred_y, axis=1) + softmax_pred = softmax(pred_y, axis=0) if mean_pred_y is None: mean_pred_y = softmax_pred else: diff --git a/learnware/market/__init__.py b/learnware/market/__init__.py index 10654c7..1620a5e 100644 --- a/learnware/market/__init__.py +++ b/learnware/market/__init__.py @@ -3,3 +3,4 @@ from .base import BaseUserInfo, BaseMarket from .evolve_anchor import EvolvedAnchoredMarket from .evolve import EvolvedMarket from .easy import EasyMarket +from .heterogeneous_feature import HeterogeneousFeatureMarket diff --git a/learnware/market/easy.py b/learnware/market/easy.py index 7e2de88..5e7bf80 100644 --- a/learnware/market/easy.py +++ b/learnware/market/easy.py @@ -119,37 +119,38 @@ class EasyMarket(BaseMarket): """ if not os.path.exists(zip_path): logger.warning("Zip Path NOT Found! Fail to add learnware.") - return None, False + return None, self.INVALID_LEARNWARE try: if len(semantic_spec["Data"]["Values"]) == 0: logger.warning("Illegal semantic specification, please choose Data.") - return None, False + return None, self.INVALID_LEARNWARE if len(semantic_spec["Task"]["Values"]) == 0: logger.warning("Illegal semantic specification, please choose Task.") - return None, False - if len(semantic_spec["Device"]["Values"]) == 0: + return None, self.INVALID_LEARNWARE + if len(semantic_spec["Library"]["Values"]) == 0: logger.warning("Illegal semantic specification, please choose Device.") - return None, False + return None, self.INVALID_LEARNWARE if len(semantic_spec["Name"]["Values"]) == 0: logger.warning("Illegal semantic specification, please provide Name.") - return None, False + return None, self.INVALID_LEARNWARE if len(semantic_spec["Description"]["Values"]) == 0 and len(semantic_spec["Scenario"]["Values"]) == 0: logger.warning("Illegal semantic specification, please provide Scenario or Description.") - return None, False + return None, self.INVALID_LEARNWARE if ( semantic_spec["Data"]["Type"] != "Class" or semantic_spec["Task"]["Type"] != "Class" - or semantic_spec["Device"]["Type"] != "Tag" + or semantic_spec["Library"]["Type"] != "Class" or semantic_spec["Scenario"]["Type"] != "Tag" or semantic_spec["Name"]["Type"] != "String" or semantic_spec["Description"]["Type"] != "String" ): logger.warning("Illegal semantic specification, please provide the right type.") - return None, False + return None, self.INVALID_LEARNWARE except: + print(semantic_spec) logger.warning("Illegal semantic specification, some keys are missing.") - return None, False + return None, self.INVALID_LEARNWARE logger.info("Get new learnware from %s" % (zip_path)) id = "%08d" % (self.count) @@ -476,7 +477,7 @@ class EasyMarket(BaseMarket): """ learnware_num = len(learnware_list) if learnware_num == 0: - return [], [] + return None, [], [] if learnware_num < max_search_num: logger.warning("Available Learnware num less than search_num!") max_search_num = learnware_num @@ -639,7 +640,6 @@ class EasyMarket(BaseMarket): weight_list = [] mixture_learnware_list = [] - # convert dist to score if mixture_dist is None: sorted_score_list = self._convert_dist_to_score(sorted_dist_list) mixture_score = None diff --git a/learnware/market/heterogeneous_feature.py b/learnware/market/heterogeneous_feature.py new file mode 100644 index 0000000..b25e2e8 --- /dev/null +++ b/learnware/market/heterogeneous_feature.py @@ -0,0 +1,102 @@ +import numpy as np +from typing import Tuple, Any, List, Union, Dict + +from .evolve import EvolvedMarket +from ..learnware import Learnware + + +class MappingFunction: + def __init__(self) -> None: + pass + + def transform(X: np.ndarray) -> np.ndarray: + """transform the data in one feature space to another feature space. + + Parameters + ---------- + X : np.ndarray + data in one feature space + + Returns + ------- + np.ndarray + transformed data in other feature space + """ + pass + + +class HeterogeneousFeatureMarket(EvolvedMarket): + """Organize learnwares with heterogeneous feature spaces + + Parameters + ---------- + EvolvedMarket : _type_ + Market version with evolved learnwares + """ + + def __init__(self): + super(HeterogeneousFeatureMarket, self).__init__() + self.mapping_function_list = {} + + def _mapping_function_list_initialization(self, learnware_list: List[Learnware]): + """Initialize mapping functions with all submitted learnwares + + Parameters + ---------- + learnware_list : List[Learnware] + list of learnwares + """ + self.mapping_function_list = self.learn_mapping_functions(learnware_list) + + def learn_mapping_functions(self, learnware_list: List[Learnware]) -> List[MappingFunction]: + """Use all statistical specifications of submitted learnwares to generate mapping functions from each original feature space to subsapce and vice verse. + + Parameters + ---------- + learnware_list : List[Learnware] + list of learnwares + + Returns + ------- + List[MappingFunction] + list of mapping functions + """ + pass + + def transform_original_to_subspace( + self, original_feature_space_idx: int, original_feature: np.ndarray + ) -> np.ndarray: + """Transform feature in a original feature space to the subspace. + + Parameters + ---------- + original_feature_space_idx : int + index of the original feature space + original_feature : np.ndarray + data in the original feature space + + Returns + ------- + np.ndarray + mapped data in the subspace + """ + pass + + def transform_subspace_to_original( + self, original_feature_space_idx: int, subspace_feature: np.ndarray + ) -> np.ndarray: + """Transform feature in the subspace to a original feature space. + + Parameters + ---------- + original_feature_space_idx : int + index of the original feature space + subspace_feature : np.ndarray + data in the subspace + + Returns + ------- + np.ndarray + mapped data in the original feature space + """ + pass