From 9873a6b04a52490e3a9417317349971662f213c9 Mon Sep 17 00:00:00 2001 From: chenzx Date: Fri, 21 Apr 2023 16:15:05 +0800 Subject: [PATCH 01/23] [MNT] update image example --- examples/example_image/main.py | 11 +++++++---- learnware/market/easy.py | 1 - 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/examples/example_image/main.py b/examples/example_image/main.py index 70589dc..6b7a878 100644 --- a/examples/example_image/main.py +++ b/examples/example_image/main.py @@ -157,6 +157,7 @@ def test_search(gamma=0.1, load_market=True): sorted_score_list, single_learnware_list, mixture_score, mixture_learnware_list = image_market.search_learnware( user_info ) + print(sorted_score_list) l = len(sorted_score_list) acc_list = [] for idx in range(l): @@ -167,11 +168,13 @@ def test_search(gamma=0.1, load_market=True): acc_list.append(acc) logger.info("search rank: %d, score: %.3f, learnware_id: %s, acc: %.3f" % (idx, score, learnware.id, acc)) # test reuse + """ reuse_baseline = JobSelectorReuser(learnware_list=mixture_learnware_list) reuse_predict = reuse_baseline.predict(user_data=user_data) reuse_score = eval_prediction(reuse_predict, user_label) job_selector_score_list.append(reuse_score) print(f"mixture reuse loss: {reuse_score}\n") + """ reuse_ensemble = EnsembleReuser(learnware_list=mixture_learnware_list, mode="vote") ensemble_predict_y = reuse_ensemble.predict(user_data=user_data) @@ -186,10 +189,10 @@ def test_search(gamma=0.1, load_market=True): % (np.mean(select_list), np.std(select_list), np.mean(avg_list), np.std(avg_list)) ) logger.info("Average performance improvement: %.3f" % (np.mean(improve_list))) - logger.info( - "Average Job Selector Reuse Performance: %.3f +/- %.3f" - % (np.mean(job_selector_score_list), np.std(job_selector_score_list)) - ) + # logger.info( + # "Average Job Selector Reuse Performance: %.3f +/- %.3f" + # % (np.mean(job_selector_score_list), np.std(job_selector_score_list)) + # ) logger.info( "Ensemble Reuse Performance: %.3f +/- %.3f" % (np.mean(ensemble_score_list), np.std(ensemble_score_list)) ) diff --git a/learnware/market/easy.py b/learnware/market/easy.py index adcc5da..ed86e87 100644 --- a/learnware/market/easy.py +++ b/learnware/market/easy.py @@ -639,7 +639,6 @@ class EasyMarket(BaseMarket): weight_list = [] mixture_learnware_list = [] - # convert dist to score if mixture_dist is None: sorted_score_list = self._convert_dist_to_score(sorted_dist_list) mixture_score = None From 877a7770f735e202f41d1d477405ec3466abc0c8 Mon Sep 17 00:00:00 2001 From: liuht Date: Fri, 21 Apr 2023 16:16:01 +0800 Subject: [PATCH 02/23] [MNT] Add use_herding --- learnware/learnware/reuse.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/learnware/learnware/reuse.py b/learnware/learnware/reuse.py index ccd1da8..8d1d0fe 100644 --- a/learnware/learnware/reuse.py +++ b/learnware/learnware/reuse.py @@ -17,7 +17,7 @@ logger = get_module_logger("Reuser") class JobSelectorReuser(BaseReuser): """Baseline Multiple Learnware Reuser uing Job Selector Method""" - def __init__(self, learnware_list: List[Learnware], herding_num: int = 1000): + def __init__(self, learnware_list: List[Learnware], herding_num: int = 1000, use_herding: bool = True): """The initialization method for job selector reuser Parameters @@ -29,6 +29,7 @@ class JobSelectorReuser(BaseReuser): """ super(JobSelectorReuser, self).__init__(learnware_list) self.herding_num = herding_num + self.use_herding = use_herding def predict(self, user_data: np.ndarray) -> np.ndarray: """Give prediction for user data using baseline job-selector method @@ -53,13 +54,15 @@ class JobSelectorReuser(BaseReuser): return selector_pred_y - def job_selector(self, user_data: np.ndarray): + def job_selector(self, user_data: np.ndarray, use_herding: bool): """Train job selector based on user's data, which predicts which learnware in the pool should be selected Parameters ---------- user_data : np.ndarray User's labeled raw data. + use_herding: bool + Whether create job selector training samples by herding """ if len(self.learnware_list) == 1: user_data_num = user_data.shape[0] @@ -69,11 +72,12 @@ class JobSelectorReuser(BaseReuser): learnware.specification.get_stat_spec_by_name("RKMEStatSpecification") for learnware in self.learnware_list ] - task_matrix = np.zeros((len(learnware_rkme_spec_list), len(learnware_rkme_spec_list))) + task_matrix = np.zeros((len(learnware_rkme_spec_list), len(learnware_rkme_spec_list))) for i in range(len(self.learnware_list)): task_rkme1 = learnware_rkme_spec_list[i] - for j in range(i, len(self.learnware_list)): + task_matrix[i][i] = task_rkme1.inner_prod(task_rkme1) + for j in range(i + 1, len(self.learnware_list)): task_rkme2 = learnware_rkme_spec_list[j] task_matrix[i][j] = task_matrix[j][i] = task_rkme1.inner_prod(task_rkme2) @@ -87,8 +91,12 @@ class JobSelectorReuser(BaseReuser): task_spec = learnware_rkme_spec_list[i] task_herding_num = max(5, int(self.herding_num * task_mixture_weight[i])) task_val_num = task_herding_num // 5 + + if self.use_herding: + herding_X_i = task_spec.herding(task_herding_num).detach().cpu().numpy() + else: + herding_X_i = task_spec.z.detach().cpu().numpy() - herding_X_i = task_spec.herding(task_herding_num).detach().cpu().numpy() train_X_i = herding_X_i[:-task_val_num] val_X_i = herding_X_i[-task_val_num:] From 0d209ed92dbb37e985a8f3cba92fc84ab0a907c5 Mon Sep 17 00:00:00 2001 From: chenzx Date: Fri, 21 Apr 2023 16:18:07 +0800 Subject: [PATCH 03/23] [MNT] Update image example --- examples/example_image/main.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/examples/example_image/main.py b/examples/example_image/main.py index 6b7a878..e96094a 100644 --- a/examples/example_image/main.py +++ b/examples/example_image/main.py @@ -4,7 +4,7 @@ from get_data import * import os import random from utils import generate_uploader, generate_user, ImageDataLoader, train, eval_prediction -from learnware.learnware import Learnware, JobSelectorReuser, EnsembleReuser +from learnware.learnware import Learnware, JobSelectorReuser, AveragingReuser import time from learnware.market import EasyMarket, BaseUserInfo @@ -157,7 +157,6 @@ def test_search(gamma=0.1, load_market=True): sorted_score_list, single_learnware_list, mixture_score, mixture_learnware_list = image_market.search_learnware( user_info ) - print(sorted_score_list) l = len(sorted_score_list) acc_list = [] for idx in range(l): @@ -176,7 +175,7 @@ def test_search(gamma=0.1, load_market=True): print(f"mixture reuse loss: {reuse_score}\n") """ - reuse_ensemble = EnsembleReuser(learnware_list=mixture_learnware_list, mode="vote") + reuse_ensemble = AveragingReuser(learnware_list=mixture_learnware_list, mode="vote") ensemble_predict_y = reuse_ensemble.predict(user_data=user_data) ensemble_score = eval_prediction(ensemble_predict_y, user_label) ensemble_score_list.append(ensemble_score) From d00e3094d0b019e278119167054d6bc7d0e746bd Mon Sep 17 00:00:00 2001 From: liuht Date: Fri, 21 Apr 2023 16:21:12 +0800 Subject: [PATCH 04/23] [FIX] add default value for use_herding --- learnware/learnware/reuse.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/learnware/learnware/reuse.py b/learnware/learnware/reuse.py index 8d1d0fe..6bb075f 100644 --- a/learnware/learnware/reuse.py +++ b/learnware/learnware/reuse.py @@ -54,7 +54,7 @@ class JobSelectorReuser(BaseReuser): return selector_pred_y - def job_selector(self, user_data: np.ndarray, use_herding: bool): + def job_selector(self, user_data: np.ndarray, use_herding: bool = True): """Train job selector based on user's data, which predicts which learnware in the pool should be selected Parameters From 4e249efab0e2efd337dfaa400177ca88070d7ee5 Mon Sep 17 00:00:00 2001 From: Gene Date: Fri, 21 Apr 2023 16:55:20 +0800 Subject: [PATCH 05/23] [FIX] Fix bugs in M5 --- examples/example_image/main.py | 6 +++--- examples/example_m5/example_init.py | 3 ++- examples/example_pfs/example_init.py | 1 + 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/examples/example_image/main.py b/examples/example_image/main.py index 70589dc..90a34e5 100644 --- a/examples/example_image/main.py +++ b/examples/example_image/main.py @@ -4,7 +4,7 @@ from get_data import * import os import random from utils import generate_uploader, generate_user, ImageDataLoader, train, eval_prediction -from learnware.learnware import Learnware, JobSelectorReuser, EnsembleReuser +from learnware.learnware import Learnware, JobSelectorReuser, AveragingReuser import time from learnware.market import EasyMarket, BaseUserInfo @@ -173,7 +173,7 @@ def test_search(gamma=0.1, load_market=True): job_selector_score_list.append(reuse_score) print(f"mixture reuse loss: {reuse_score}\n") - reuse_ensemble = EnsembleReuser(learnware_list=mixture_learnware_list, mode="vote") + reuse_ensemble = AveragingReuser(learnware_list=mixture_learnware_list, mode="vote") ensemble_predict_y = reuse_ensemble.predict(user_data=user_data) ensemble_score = eval_prediction(ensemble_predict_y, user_label) ensemble_score_list.append(ensemble_score) @@ -198,4 +198,4 @@ def test_search(gamma=0.1, load_market=True): if __name__ == "__main__": # prepare_data() # prepare_model() - test_search(load_market=True) + test_search(load_market=False) diff --git a/examples/example_m5/example_init.py b/examples/example_m5/example_init.py index d875d96..70f366d 100644 --- a/examples/example_m5/example_init.py +++ b/examples/example_m5/example_init.py @@ -1,13 +1,14 @@ import os import joblib import numpy as np +import lightgbm as lgb from learnware.model import BaseModel class Model(BaseModel): def __init__(self): dir_path = os.path.dirname(os.path.abspath(__file__)) - self.model = joblib.load(os.path.join(dir_path, "model.out")) + self.model = lgb.Booster(model_file=os.path.join(dir_path, "model.out")) def fit(self, X: np.ndarray, y: np.ndarray): pass diff --git a/examples/example_pfs/example_init.py b/examples/example_pfs/example_init.py index d875d96..88b788a 100644 --- a/examples/example_pfs/example_init.py +++ b/examples/example_pfs/example_init.py @@ -6,6 +6,7 @@ from learnware.model import BaseModel class Model(BaseModel): def __init__(self): + super(Model, self).__init__(input_shape=(31,), output_shape=()) dir_path = os.path.dirname(os.path.abspath(__file__)) self.model = joblib.load(os.path.join(dir_path, "model.out")) From 1289b6d8632d29a77f7bfc8fae1566d2b3d3dcab Mon Sep 17 00:00:00 2001 From: liuht Date: Fri, 21 Apr 2023 16:56:50 +0800 Subject: [PATCH 06/23] [MNT] Add input_shape and output_shape --- examples/example_m5/example_init.py | 1 + examples/example_m5/main.py | 46 ++++++++++++++++++++++------- 2 files changed, 37 insertions(+), 10 deletions(-) diff --git a/examples/example_m5/example_init.py b/examples/example_m5/example_init.py index d875d96..c5d26d1 100644 --- a/examples/example_m5/example_init.py +++ b/examples/example_m5/example_init.py @@ -6,6 +6,7 @@ from learnware.model import BaseModel class Model(BaseModel): def __init__(self): + super(Model, self).__init__(input_shape=(82,), output_shape=()) dir_path = os.path.dirname(os.path.abspath(__file__)) self.model = joblib.load(os.path.join(dir_path, "model.out")) diff --git a/examples/example_m5/main.py b/examples/example_m5/main.py index 6b7544e..a0853c4 100644 --- a/examples/example_m5/main.py +++ b/examples/example_m5/main.py @@ -1,13 +1,14 @@ import os import fire import zipfile +import numpy as np from tqdm import tqdm from shutil import copyfile, rmtree import learnware from learnware.market import EasyMarket, BaseUserInfo from learnware.market import database_ops -from learnware.learnware import Learnware, JobSelectorReuser +from learnware.learnware import Learnware, JobSelectorReuser, AveragingReuser import learnware.specification as specification from m5 import DataLoader @@ -114,7 +115,7 @@ class M5DatasetWorkflow: rmtree(dir_path) def test(self, regenerate_flag=False): - self.prepare_learnware(regenerate_flag) + #self.prepare_learnware(regenerate_flag) self._init_learnware_market() easy_market = EasyMarket() @@ -122,10 +123,17 @@ class M5DatasetWorkflow: m5 = DataLoader() idx_list = m5.get_idx_list() + os.makedirs("./user_spec", exist_ok=True) + sinle_score_list = [] + random_score_list = [] + job_selector_score_list = [] + ensemble_score_list = [] for idx in idx_list: train_x, train_y, test_x, test_y = m5.get_idx_data(idx) user_spec = specification.utils.generate_rkme_spec(X=test_x, gamma=0.1, cuda_idx=0) + user_spec_path = f"./user_spec/user_{idx}.json" + user_spec.save(user_spec_path) user_info = BaseUserInfo( id=f"user_{idx}", semantic_spec=user_senmantic, stat_info={"RKMEStatSpecification": user_spec} @@ -141,18 +149,36 @@ class M5DatasetWorkflow: print( f"single model num: {len(sorted_score_list)}, max_score: {sorted_score_list[0]}, min_score: {sorted_score_list[-1]}" ) + loss_list = [] for score, learnware in zip(sorted_score_list, single_learnware_list): pred_y = learnware.predict(test_x) - loss = m5.score(test_y, pred_y) - print(f"score: {score}, learnware_id: {learnware.id}, loss: {loss}") + loss_list.append(m5.score(test_y, pred_y)) + print( + f"Top1-score: {sorted_score_list[0]}, learnware_id: {single_learnware_list[0].id}, loss: {loss_list[-1]}" + ) mixture_id = " ".join([learnware.id for learnware in mixture_learnware_list]) - print(f"mixture_learnware: {mixture_id}\n") - - reuse_baseline = JobSelectorReuser(learnware_list=mixture_learnware_list) - reuse_predict = reuse_baseline.predict(user_data=test_x) - reuse_score = m5.score(test_y, reuse_predict) - print(f"mixture reuse loss: {reuse_score}\n") + print(f"mixture_score: {mixture_score}, mixture_learnware: {mixture_id}") + + reuse_job_selector = JobSelectorReuser(learnware_list=mixture_learnware_list) + job_selector_predict_y = reuse_job_selector.predict(user_data=test_x) + job_selector_score = m5.score(test_y, job_selector_predict_y) + print(f"mixture reuse loss (job selector): {job_selector_score}") + + reuse_ensemble = AveragingReuser(learnware_list=mixture_learnware_list) + ensemble_predict_y = reuse_ensemble.predict(user_data=test_x) + ensemble_score = m5.score(test_y, ensemble_predict_y) + print(f"mixture reuse loss (ensemble): {ensemble_score}\n") + + sinle_score_list.append(loss_list[0]) + random_score_list.append(np.mean(loss_list)) + job_selector_score_list.append(job_selector_score) + ensemble_score_list.append(ensemble_score) + + print(f"Single search score: {np.mean(sinle_score_list)}") + print(f"Job selector score: {np.mean(job_selector_score_list)}") + print(f"Average ensemble score: {np.mean(ensemble_score_list)}") + print(f"Random search score: {np.mean(random_score_list)}") if __name__ == "__main__": From 1ad8bc5643b29927ac3a690aef52286232a7d2fb Mon Sep 17 00:00:00 2001 From: chenzx Date: Fri, 21 Apr 2023 17:02:39 +0800 Subject: [PATCH 07/23] [MNT] Update image example --- .../example_image/example_files/example_init.py | 1 + examples/example_image/main.py | 15 +++++++-------- learnware/learnware/reuse.py | 2 +- learnware/market/easy.py | 2 +- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/examples/example_image/example_files/example_init.py b/examples/example_image/example_files/example_init.py index e75a116..b318ee8 100644 --- a/examples/example_image/example_files/example_init.py +++ b/examples/example_image/example_files/example_init.py @@ -8,6 +8,7 @@ import torch class Model(BaseModel): def __init__(self): + super().__init__(input_shape=(3, 32, 32), output_shape=(10,)) dir_path = os.path.dirname(os.path.abspath(__file__)) self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") self.model = ConvModel(channel=3, n_random_features=10).to(self.device) diff --git a/examples/example_image/main.py b/examples/example_image/main.py index e96094a..ed1fb69 100644 --- a/examples/example_image/main.py +++ b/examples/example_image/main.py @@ -167,13 +167,12 @@ def test_search(gamma=0.1, load_market=True): acc_list.append(acc) logger.info("search rank: %d, score: %.3f, learnware_id: %s, acc: %.3f" % (idx, score, learnware.id, acc)) # test reuse - """ + reuse_baseline = JobSelectorReuser(learnware_list=mixture_learnware_list) reuse_predict = reuse_baseline.predict(user_data=user_data) reuse_score = eval_prediction(reuse_predict, user_label) job_selector_score_list.append(reuse_score) print(f"mixture reuse loss: {reuse_score}\n") - """ reuse_ensemble = AveragingReuser(learnware_list=mixture_learnware_list, mode="vote") ensemble_predict_y = reuse_ensemble.predict(user_data=user_data) @@ -188,10 +187,10 @@ def test_search(gamma=0.1, load_market=True): % (np.mean(select_list), np.std(select_list), np.mean(avg_list), np.std(avg_list)) ) logger.info("Average performance improvement: %.3f" % (np.mean(improve_list))) - # logger.info( - # "Average Job Selector Reuse Performance: %.3f +/- %.3f" - # % (np.mean(job_selector_score_list), np.std(job_selector_score_list)) - # ) + logger.info( + "Average Job Selector Reuse Performance: %.3f +/- %.3f" + % (np.mean(job_selector_score_list), np.std(job_selector_score_list)) + ) logger.info( "Ensemble Reuse Performance: %.3f +/- %.3f" % (np.mean(ensemble_score_list), np.std(ensemble_score_list)) ) @@ -199,5 +198,5 @@ def test_search(gamma=0.1, load_market=True): if __name__ == "__main__": # prepare_data() - # prepare_model() - test_search(load_market=True) + prepare_model() + test_search(load_market=False) diff --git a/learnware/learnware/reuse.py b/learnware/learnware/reuse.py index 8d1d0fe..fbdcc44 100644 --- a/learnware/learnware/reuse.py +++ b/learnware/learnware/reuse.py @@ -91,7 +91,7 @@ class JobSelectorReuser(BaseReuser): task_spec = learnware_rkme_spec_list[i] task_herding_num = max(5, int(self.herding_num * task_mixture_weight[i])) task_val_num = task_herding_num // 5 - + if self.use_herding: herding_X_i = task_spec.herding(task_herding_num).detach().cpu().numpy() else: diff --git a/learnware/market/easy.py b/learnware/market/easy.py index b8d9e1d..4fbe859 100644 --- a/learnware/market/easy.py +++ b/learnware/market/easy.py @@ -476,7 +476,7 @@ class EasyMarket(BaseMarket): """ learnware_num = len(learnware_list) if learnware_num == 0: - return [], [] + return None, [], [] if learnware_num < max_search_num: logger.warning("Available Learnware num less than search_num!") max_search_num = learnware_num From e2c67fcf14be903ad22281ad28a8a529a0498f4f Mon Sep 17 00:00:00 2001 From: liuht Date: Fri, 21 Apr 2023 18:03:14 +0800 Subject: [PATCH 08/23] [FIX] fix typos --- examples/example_m5/main.py | 8 ++++---- examples/example_pfs/main.py | 6 +++--- examples/workflow_by_code/main.py | 6 +++--- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/examples/example_m5/main.py b/examples/example_m5/main.py index a0853c4..3759bb7 100644 --- a/examples/example_m5/main.py +++ b/examples/example_m5/main.py @@ -24,7 +24,7 @@ semantic_specs = [ } ] -user_senmantic = { +user_semantic = { "Data": {"Values": ["Tabular"], "Type": "Class"}, "Task": {"Values": ["Classification"], "Type": "Class"}, "Device": {"Values": ["GPU"], "Type": "Tag"}, @@ -115,7 +115,7 @@ class M5DatasetWorkflow: rmtree(dir_path) def test(self, regenerate_flag=False): - #self.prepare_learnware(regenerate_flag) + self.prepare_learnware(regenerate_flag) self._init_learnware_market() easy_market = EasyMarket() @@ -136,7 +136,7 @@ class M5DatasetWorkflow: user_spec.save(user_spec_path) user_info = BaseUserInfo( - id=f"user_{idx}", semantic_spec=user_senmantic, stat_info={"RKMEStatSpecification": user_spec} + id=f"user_{idx}", semantic_spec=user_semantic, stat_info={"RKMEStatSpecification": user_spec} ) ( sorted_score_list, @@ -160,7 +160,7 @@ class M5DatasetWorkflow: mixture_id = " ".join([learnware.id for learnware in mixture_learnware_list]) print(f"mixture_score: {mixture_score}, mixture_learnware: {mixture_id}") - reuse_job_selector = JobSelectorReuser(learnware_list=mixture_learnware_list) + reuse_job_selector = JobSelectorReuser(learnware_list=mixture_learnware_list, use_herding=False) job_selector_predict_y = reuse_job_selector.predict(user_data=test_x) job_selector_score = m5.score(test_y, job_selector_predict_y) print(f"mixture reuse loss (job selector): {job_selector_score}") diff --git a/examples/example_pfs/main.py b/examples/example_pfs/main.py index 5d3ae6a..f42b3a9 100644 --- a/examples/example_pfs/main.py +++ b/examples/example_pfs/main.py @@ -24,7 +24,7 @@ semantic_specs = [ } ] -user_senmantic = { +user_semantic = { "Data": {"Values": ["Tabular"], "Type": "Class"}, "Task": {"Values": ["Classification"], "Type": "Class"}, "Device": {"Values": ["GPU"], "Type": "Tag"}, @@ -134,7 +134,7 @@ class PFSDatasetWorkflow: user_spec.save(user_spec_path) user_info = BaseUserInfo( - id=f"user_{idx}", semantic_spec=user_senmantic, stat_info={"RKMEStatSpecification": user_spec} + id=f"user_{idx}", semantic_spec=user_semantic, stat_info={"RKMEStatSpecification": user_spec} ) ( sorted_score_list, @@ -158,7 +158,7 @@ class PFSDatasetWorkflow: mixture_id = " ".join([learnware.id for learnware in mixture_learnware_list]) print(f"mixture_score: {mixture_score}, mixture_learnware: {mixture_id}") - reuse_job_selector = JobSelectorReuser(learnware_list=mixture_learnware_list) + reuse_job_selector = JobSelectorReuser(learnware_list=mixture_learnware_list, use_herding=False) job_selector_predict_y = reuse_job_selector.predict(user_data=test_x) job_selector_score = pfs.score(test_y, job_selector_predict_y) print(f"mixture reuse loss (job selector): {job_selector_score}") diff --git a/examples/workflow_by_code/main.py b/examples/workflow_by_code/main.py index 01f09f9..1208c52 100644 --- a/examples/workflow_by_code/main.py +++ b/examples/workflow_by_code/main.py @@ -26,7 +26,7 @@ semantic_specs = [ } ] -user_senmantic = { +user_semantic = { "Data": {"Values": ["Tabular"], "Type": "Class"}, "Task": { "Values": ["Classification"], @@ -130,7 +130,7 @@ class LearnwareMarketWorkflow: with zipfile.ZipFile(zip_path, "r") as zip_obj: zip_obj.extractall(path=unzip_dir) - user_info = BaseUserInfo(id="user_0", semantic_spec=user_senmantic) + user_info = BaseUserInfo(id="user_0", semantic_spec=user_semantic) _, single_learnware_list, _ = easy_market.search_learnware(user_info) print("User info:", user_info.get_semantic_spec()) @@ -159,7 +159,7 @@ class LearnwareMarketWorkflow: user_spec = specification.rkme.RKMEStatSpecification() user_spec.load(os.path.join(unzip_dir, "svm.json")) user_info = BaseUserInfo( - id="user_0", semantic_spec=user_senmantic, stat_info={"RKMEStatSpecification": user_spec} + id="user_0", semantic_spec=user_semantic, stat_info={"RKMEStatSpecification": user_spec} ) ( sorted_score_list, From 7045b5eaaacd5c36537aac332efcb63e88800663 Mon Sep 17 00:00:00 2001 From: xiey Date: Fri, 21 Apr 2023 19:06:03 +0800 Subject: [PATCH 09/23] [MNT] Add market_id in example_image --- examples/example_image/main.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/example_image/main.py b/examples/example_image/main.py index e96094a..50ea37f 100644 --- a/examples/example_image/main.py +++ b/examples/example_image/main.py @@ -133,10 +133,10 @@ def prepare_market(): def test_search(gamma=0.1, load_market=True): if load_market: - image_market = EasyMarket() + image_market = EasyMarket(market_id="image") else: prepare_market() - image_market = EasyMarket() + image_market = EasyMarket(market_id="image") logger.info("Number of items in the market: %d" % len(image_market)) select_list = [] @@ -200,4 +200,4 @@ def test_search(gamma=0.1, load_market=True): if __name__ == "__main__": # prepare_data() # prepare_model() - test_search(load_market=True) + test_search(load_market=False) From 49a639d46e0f8feedcbbcdc49c135f0173aade3c Mon Sep 17 00:00:00 2001 From: liuht Date: Fri, 21 Apr 2023 19:36:09 +0800 Subject: [PATCH 10/23] [FIX] fix bugs in use_herding --- learnware/learnware/reuse.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/learnware/learnware/reuse.py b/learnware/learnware/reuse.py index b16a1b9..b4639a9 100644 --- a/learnware/learnware/reuse.py +++ b/learnware/learnware/reuse.py @@ -89,13 +89,13 @@ class JobSelectorReuser(BaseReuser): herding_y, train_herding_y, val_herding_y = [], [], [] for i in range(len(self.learnware_list)): task_spec = learnware_rkme_spec_list[i] - task_herding_num = max(5, int(self.herding_num * task_mixture_weight[i])) - task_val_num = task_herding_num // 5 - if self.use_herding: + task_herding_num = max(5, int(self.herding_num * task_mixture_weight[i])) herding_X_i = task_spec.herding(task_herding_num).detach().cpu().numpy() else: herding_X_i = task_spec.z.detach().cpu().numpy() + task_herding_num = herding_X_i.shape[0] + task_val_num = task_herding_num // 5 train_X_i = herding_X_i[:-task_val_num] val_X_i = herding_X_i[-task_val_num:] From e9a360281f01458fc9bbe37e91152c4d952df171 Mon Sep 17 00:00:00 2001 From: xiey Date: Fri, 21 Apr 2023 19:37:06 +0800 Subject: [PATCH 11/23] [MNT] Change semantic specs --- examples/example_image/main.py | 4 ++-- examples/example_m5/main.py | 4 ++-- examples/example_market_db/example_db.py | 6 +++--- examples/example_pfs/main.py | 4 ++-- examples/workflow_by_code/main.py | 4 ++-- learnware/config.py | 9 +++++++-- learnware/market/easy.py | 5 +++-- 7 files changed, 21 insertions(+), 15 deletions(-) diff --git a/examples/example_image/main.py b/examples/example_image/main.py index 9bc7252..9f4279a 100644 --- a/examples/example_image/main.py +++ b/examples/example_image/main.py @@ -40,7 +40,7 @@ semantic_specs = [ { "Data": {"Values": ["Tabular"], "Type": "Class"}, "Task": {"Values": ["Classification"], "Type": "Class"}, - "Device": {"Values": ["GPU"], "Type": "Tag"}, + "Library": {"Values": ["Pytorch"], "Type": "Tag"}, "Scenario": {"Values": ["Business"], "Type": "Tag"}, "Description": {"Values": "", "Type": "String"}, "Name": {"Values": "learnware_1", "Type": "String"}, @@ -50,7 +50,7 @@ semantic_specs = [ user_senmantic = { "Data": {"Values": ["Tabular"], "Type": "Class"}, "Task": {"Values": ["Classification"], "Type": "Class"}, - "Device": {"Values": ["GPU"], "Type": "Tag"}, + "Library": {"Values": ["Pytorch"], "Type": "Tag"}, "Scenario": {"Values": ["Business"], "Type": "Tag"}, "Description": {"Values": "", "Type": "String"}, "Name": {"Values": "", "Type": "String"}, diff --git a/examples/example_m5/main.py b/examples/example_m5/main.py index 3759bb7..a19b14a 100644 --- a/examples/example_m5/main.py +++ b/examples/example_m5/main.py @@ -17,7 +17,7 @@ semantic_specs = [ { "Data": {"Values": ["Tabular"], "Type": "Class"}, "Task": {"Values": ["Classification"], "Type": "Class"}, - "Device": {"Values": ["GPU"], "Type": "Tag"}, + "Library": {"Values": ["Scikit-learn"], "Type": "Tag"}, "Scenario": {"Values": ["Business"], "Type": "Tag"}, "Description": {"Values": "", "Type": "String"}, "Name": {"Values": "learnware_1", "Type": "String"}, @@ -27,7 +27,7 @@ semantic_specs = [ user_semantic = { "Data": {"Values": ["Tabular"], "Type": "Class"}, "Task": {"Values": ["Classification"], "Type": "Class"}, - "Device": {"Values": ["GPU"], "Type": "Tag"}, + "Library": {"Values": ["Scikit-learn"], "Type": "Tag"}, "Scenario": {"Values": ["Business"], "Type": "Tag"}, "Description": {"Values": "", "Type": "String"}, "Name": {"Values": "", "Type": "String"}, diff --git a/examples/example_market_db/example_db.py b/examples/example_market_db/example_db.py index 11eb4cc..89f7f33 100644 --- a/examples/example_market_db/example_db.py +++ b/examples/example_market_db/example_db.py @@ -15,7 +15,7 @@ semantic_specs = [ { "Data": {"Values": ["Tabular"], "Type": "Class"}, "Task": {"Values": ["Classification"], "Type": "Class"}, - "Device": {"Values": ["GPU"], "Type": "Tag"}, + "Library": {"Values": ["Scikit-learn"], "Type": "Tag"}, "Scenario": {"Values": ["Nature"], "Type": "Tag"}, "Description": {"Values": "", "Type": "String"}, "Name": {"Values": "learnware_1", "Type": "String"}, @@ -23,7 +23,7 @@ semantic_specs = [ { "Data": {"Values": ["Tabular"], "Type": "Class"}, "Task": {"Values": ["Classification"], "Type": "Class"}, - "Device": {"Values": ["GPU"], "Type": "Tag"}, + "Library": {"Values": ["Scikit-learn"], "Type": "Tag"}, "Scenario": {"Values": ["Business", "Nature"], "Type": "Tag"}, "Description": {"Values": "", "Type": "String"}, "Name": {"Values": "learnware_2", "Type": "String"}, @@ -31,7 +31,7 @@ semantic_specs = [ { "Data": {"Values": ["Tabular"], "Type": "Class"}, "Task": {"Values": ["Regression"], "Type": "Class"}, - "Device": {"Values": ["GPU"], "Type": "Tag"}, + "Library": {"Values": ["Scikit-learn"], "Type": "Tag"}, "Scenario": {"Values": ["Business"], "Type": "Tag"}, "Description": {"Values": "", "Type": "String"}, "Name": {"Values": "learnware_3", "Type": "String"}, diff --git a/examples/example_pfs/main.py b/examples/example_pfs/main.py index f42b3a9..f6038dc 100644 --- a/examples/example_pfs/main.py +++ b/examples/example_pfs/main.py @@ -17,7 +17,7 @@ semantic_specs = [ { "Data": {"Values": ["Tabular"], "Type": "Class"}, "Task": {"Values": ["Classification"], "Type": "Class"}, - "Device": {"Values": ["GPU"], "Type": "Tag"}, + "Library": {"Values": ["Scikit-learn"], "Type": "Tag"}, "Scenario": {"Values": ["Business"], "Type": "Tag"}, "Description": {"Values": "", "Type": "String"}, "Name": {"Values": "learnware_1", "Type": "String"}, @@ -27,7 +27,7 @@ semantic_specs = [ user_semantic = { "Data": {"Values": ["Tabular"], "Type": "Class"}, "Task": {"Values": ["Classification"], "Type": "Class"}, - "Device": {"Values": ["GPU"], "Type": "Tag"}, + "Library": {"Values": ["Scikit-learn"], "Type": "Tag"}, "Scenario": {"Values": ["Business"], "Type": "Tag"}, "Description": {"Values": "", "Type": "String"}, "Name": {"Values": "", "Type": "String"}, diff --git a/examples/workflow_by_code/main.py b/examples/workflow_by_code/main.py index 1208c52..204cb07 100644 --- a/examples/workflow_by_code/main.py +++ b/examples/workflow_by_code/main.py @@ -19,7 +19,7 @@ semantic_specs = [ { "Data": {"Values": ["Tabular"], "Type": "Class"}, "Task": {"Values": ["Classification"], "Type": "Class"}, - "Device": {"Values": ["GPU"], "Type": "Tag"}, + "Library": {"Values": ["Scikit-learn"], "Type": "Tag"}, "Scenario": {"Values": ["Business"], "Type": "Tag"}, "Description": {"Values": "", "Type": "String"}, "Name": {"Values": "learnware_1", "Type": "String"}, @@ -32,7 +32,7 @@ user_semantic = { "Values": ["Classification"], "Type": "Class", }, - "Device": {"Values": ["GPU"], "Type": "Tag"}, + "Library": {"Values": ["Scikit-learn"], "Type": "Tag"}, "Scenario": {"Values": ["Business"], "Type": "Tag"}, "Description": {"Values": "", "Type": "String"}, "Name": {"Values": "", "Type": "String"}, diff --git a/learnware/config.py b/learnware/config.py index 18c2a15..2c79f5a 100644 --- a/learnware/config.py +++ b/learnware/config.py @@ -79,11 +79,16 @@ semantic_config = { # "Generation", "Segmentation", "Object Detection", + "Others", ], "Type": "Class", # Choose only one class }, - "Device": { - "Values": ["CPU", "GPU"], + # "Device": { + # "Values": ["CPU", "GPU"], + # "Type": "Tag", + # }, # Choose one or more tags + "Library": { + "Values": ["Scikit-learn", "PyTorch", "TensorFlow", "Others"], "Type": "Tag", }, # Choose one or more tags "Scenario": { diff --git a/learnware/market/easy.py b/learnware/market/easy.py index 4fbe859..1a04f9e 100644 --- a/learnware/market/easy.py +++ b/learnware/market/easy.py @@ -128,7 +128,7 @@ class EasyMarket(BaseMarket): if len(semantic_spec["Task"]["Values"]) == 0: logger.warning("Illegal semantic specification, please choose Task.") return None, False - if len(semantic_spec["Device"]["Values"]) == 0: + if len(semantic_spec["Library"]["Values"]) == 0: logger.warning("Illegal semantic specification, please choose Device.") return None, False if len(semantic_spec["Name"]["Values"]) == 0: @@ -140,7 +140,7 @@ class EasyMarket(BaseMarket): if ( semantic_spec["Data"]["Type"] != "Class" or semantic_spec["Task"]["Type"] != "Class" - or semantic_spec["Device"]["Type"] != "Tag" + or semantic_spec["Library"]["Type"] != "Tag" or semantic_spec["Scenario"]["Type"] != "Tag" or semantic_spec["Name"]["Type"] != "String" or semantic_spec["Description"]["Type"] != "String" @@ -148,6 +148,7 @@ class EasyMarket(BaseMarket): logger.warning("Illegal semantic specification, please provide the right type.") return None, False except: + print(semantic_spec) logger.warning("Illegal semantic specification, some keys are missing.") return None, False From d77a37c975c3d69886e915e3d88796f77a2f6583 Mon Sep 17 00:00:00 2001 From: tanp Date: Fri, 21 Apr 2023 20:01:20 +0800 Subject: [PATCH 12/23] [MNT] add the interface of the heterogeneous feature space market. --- learnware/market/heterogeneous_feature.py | 51 +++++++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 learnware/market/heterogeneous_feature.py diff --git a/learnware/market/heterogeneous_feature.py b/learnware/market/heterogeneous_feature.py new file mode 100644 index 0000000..a7768e2 --- /dev/null +++ b/learnware/market/heterogeneous_feature.py @@ -0,0 +1,51 @@ +import numpy as np +from typing import Tuple, Any, List, Union, Dict + +from .evolve import EvolvedMarket +from ..learnware import Learnware + + +class HeterogeneousFeatureMarket(EvolvedMarket): + """Organize learnwares with heterogeneous feature spaces + + Parameters + ---------- + EvolvedMarket : _type_ + Market version with evolved learnwares + """ + + def __init__(self): + super(EvolvedMarket, self).__init__() + + def learn_mapping_functions(self, learnware_list: List[Learnware]): + """Use all statistical specifications of submitted learnwares to generate mapping functions from each original feature space to subsapce and vice verse. + + Parameters + ---------- + learnware_list : List[Learnware] + list of learnwares + """ + + def transform_original_to_subspace(self, original_feature_space_idx: int, original_feature: np.ndarray): + """Transform feature in a original feature space to the subspace. + + Parameters + ---------- + original_feature_space_idx: int + index of the original feature space + original_feature: np.ndarray + data in the original feature space + """ + pass + + def transform_subspace_to_original(self, original_feature_space_idx: int, subspace_feature: np.ndarray): + """Transform feature in a original feature space to the subspace. + + Parameters + ---------- + original_feature_space_idx: int + index of the original feature space + subspace_feature: np.ndarray + data in the subspace + """ + pass \ No newline at end of file From fa1c6d1b59d76d6f2490a7dc98923829764c4c30 Mon Sep 17 00:00:00 2001 From: tanp Date: Fri, 21 Apr 2023 20:03:24 +0800 Subject: [PATCH 13/23] [MNT] add HeterogeneousFeatureMarket to the __init__.py --- learnware/market/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/learnware/market/__init__.py b/learnware/market/__init__.py index 10654c7..c1827e4 100644 --- a/learnware/market/__init__.py +++ b/learnware/market/__init__.py @@ -3,3 +3,4 @@ from .base import BaseUserInfo, BaseMarket from .evolve_anchor import EvolvedAnchoredMarket from .evolve import EvolvedMarket from .easy import EasyMarket +from .heterogeneous_feature import HeterogeneousFeatureMarket \ No newline at end of file From cfc76b7318adc335b8cd5cfa1d77edd0f89b4fff Mon Sep 17 00:00:00 2001 From: tanp Date: Fri, 21 Apr 2023 20:13:36 +0800 Subject: [PATCH 14/23] [FIX] Fix the typos, add the pass in learn_mapping_functions func. --- learnware/market/__init__.py | 2 +- learnware/market/heterogeneous_feature.py | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/learnware/market/__init__.py b/learnware/market/__init__.py index c1827e4..1620a5e 100644 --- a/learnware/market/__init__.py +++ b/learnware/market/__init__.py @@ -3,4 +3,4 @@ from .base import BaseUserInfo, BaseMarket from .evolve_anchor import EvolvedAnchoredMarket from .evolve import EvolvedMarket from .easy import EasyMarket -from .heterogeneous_feature import HeterogeneousFeatureMarket \ No newline at end of file +from .heterogeneous_feature import HeterogeneousFeatureMarket diff --git a/learnware/market/heterogeneous_feature.py b/learnware/market/heterogeneous_feature.py index a7768e2..5a6b105 100644 --- a/learnware/market/heterogeneous_feature.py +++ b/learnware/market/heterogeneous_feature.py @@ -25,6 +25,7 @@ class HeterogeneousFeatureMarket(EvolvedMarket): learnware_list : List[Learnware] list of learnwares """ + pass def transform_original_to_subspace(self, original_feature_space_idx: int, original_feature: np.ndarray): """Transform feature in a original feature space to the subspace. @@ -34,7 +35,7 @@ class HeterogeneousFeatureMarket(EvolvedMarket): original_feature_space_idx: int index of the original feature space original_feature: np.ndarray - data in the original feature space + data in the original feature space """ pass @@ -46,6 +47,6 @@ class HeterogeneousFeatureMarket(EvolvedMarket): original_feature_space_idx: int index of the original feature space subspace_feature: np.ndarray - data in the subspace + data in the subspace """ - pass \ No newline at end of file + pass From 13feff89232dde1b46ce760c3c814a0f9d301574 Mon Sep 17 00:00:00 2001 From: tanp Date: Fri, 21 Apr 2023 20:24:52 +0800 Subject: [PATCH 15/23] [FIX] Fix the bug in the __init__ --- learnware/market/heterogeneous_feature.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/learnware/market/heterogeneous_feature.py b/learnware/market/heterogeneous_feature.py index 5a6b105..f9dd094 100644 --- a/learnware/market/heterogeneous_feature.py +++ b/learnware/market/heterogeneous_feature.py @@ -15,7 +15,7 @@ class HeterogeneousFeatureMarket(EvolvedMarket): """ def __init__(self): - super(EvolvedMarket, self).__init__() + super(HeterogeneousFeatureMarket, self).__init__() def learn_mapping_functions(self, learnware_list: List[Learnware]): """Use all statistical specifications of submitted learnwares to generate mapping functions from each original feature space to subsapce and vice verse. From 5c98554c86eb4a0a7da295b14ff2b424f44bd2a8 Mon Sep 17 00:00:00 2001 From: xiey Date: Fri, 21 Apr 2023 20:48:31 +0800 Subject: [PATCH 16/23] [MNT] Change semantic spec: library from tag to class --- learnware/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/learnware/config.py b/learnware/config.py index 2c79f5a..be274ba 100644 --- a/learnware/config.py +++ b/learnware/config.py @@ -89,7 +89,7 @@ semantic_config = { # }, # Choose one or more tags "Library": { "Values": ["Scikit-learn", "PyTorch", "TensorFlow", "Others"], - "Type": "Tag", + "Type": "Class", }, # Choose one or more tags "Scenario": { "Values": [ From 4c959100a5f9013343a25aedd9bdecb0a82cbd87 Mon Sep 17 00:00:00 2001 From: xiey Date: Fri, 21 Apr 2023 20:50:51 +0800 Subject: [PATCH 17/23] [FIX] fix a bug in semantic check --- learnware/market/easy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/learnware/market/easy.py b/learnware/market/easy.py index 1a04f9e..96c1a5f 100644 --- a/learnware/market/easy.py +++ b/learnware/market/easy.py @@ -140,7 +140,7 @@ class EasyMarket(BaseMarket): if ( semantic_spec["Data"]["Type"] != "Class" or semantic_spec["Task"]["Type"] != "Class" - or semantic_spec["Library"]["Type"] != "Tag" + or semantic_spec["Library"]["Type"] != "Class" or semantic_spec["Scenario"]["Type"] != "Tag" or semantic_spec["Name"]["Type"] != "String" or semantic_spec["Description"]["Type"] != "String" From 688e4978e66b8ee744ea38566e7a2412745bcaa5 Mon Sep 17 00:00:00 2001 From: xiey Date: Fri, 21 Apr 2023 21:50:26 +0800 Subject: [PATCH 18/23] [FIX] Change semantic check return value --- learnware/market/easy.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/learnware/market/easy.py b/learnware/market/easy.py index 96c1a5f..413147b 100644 --- a/learnware/market/easy.py +++ b/learnware/market/easy.py @@ -119,7 +119,7 @@ class EasyMarket(BaseMarket): """ if not os.path.exists(zip_path): logger.warning("Zip Path NOT Found! Fail to add learnware.") - return None, False + return None, self.INVALID_LEARNWARE try: if len(semantic_spec["Data"]["Values"]) == 0: @@ -127,16 +127,16 @@ class EasyMarket(BaseMarket): return None, False if len(semantic_spec["Task"]["Values"]) == 0: logger.warning("Illegal semantic specification, please choose Task.") - return None, False + return None, self.INVALID_LEARNWARE if len(semantic_spec["Library"]["Values"]) == 0: logger.warning("Illegal semantic specification, please choose Device.") - return None, False + return None, self.INVALID_LEARNWARE if len(semantic_spec["Name"]["Values"]) == 0: logger.warning("Illegal semantic specification, please provide Name.") - return None, False + return None, self.INVALID_LEARNWARE if len(semantic_spec["Description"]["Values"]) == 0 and len(semantic_spec["Scenario"]["Values"]) == 0: logger.warning("Illegal semantic specification, please provide Scenario or Description.") - return None, False + return None, self.INVALID_LEARNWARE if ( semantic_spec["Data"]["Type"] != "Class" or semantic_spec["Task"]["Type"] != "Class" @@ -146,11 +146,11 @@ class EasyMarket(BaseMarket): or semantic_spec["Description"]["Type"] != "String" ): logger.warning("Illegal semantic specification, please provide the right type.") - return None, False + return None, self.INVALID_LEARNWARE except: print(semantic_spec) logger.warning("Illegal semantic specification, some keys are missing.") - return None, False + return None, self.INVALID_LEARNWARE logger.info("Get new learnware from %s" % (zip_path)) id = "%08d" % (self.count) From 7e8f4c34d8cbd07d6510fd19d375694e1094e42c Mon Sep 17 00:00:00 2001 From: xiey Date: Fri, 21 Apr 2023 22:05:18 +0800 Subject: [PATCH 19/23] [MNT] fix a bug in semantic check --- learnware/market/easy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/learnware/market/easy.py b/learnware/market/easy.py index 413147b..5e7bf80 100644 --- a/learnware/market/easy.py +++ b/learnware/market/easy.py @@ -124,7 +124,7 @@ class EasyMarket(BaseMarket): try: if len(semantic_spec["Data"]["Values"]) == 0: logger.warning("Illegal semantic specification, please choose Data.") - return None, False + return None, self.INVALID_LEARNWARE if len(semantic_spec["Task"]["Values"]) == 0: logger.warning("Illegal semantic specification, please choose Task.") return None, self.INVALID_LEARNWARE From 6aab14340700630472fc1c5c6358be4a4fc9f0d5 Mon Sep 17 00:00:00 2001 From: tanp Date: Fri, 21 Apr 2023 22:40:08 +0800 Subject: [PATCH 20/23] [MNT] add the mapping function class, modify the HeterogeneousFeatureMarket class --- learnware/market/heterogeneous_feature.py | 55 +++++++++++++++++++---- 1 file changed, 47 insertions(+), 8 deletions(-) diff --git a/learnware/market/heterogeneous_feature.py b/learnware/market/heterogeneous_feature.py index f9dd094..b31659a 100644 --- a/learnware/market/heterogeneous_feature.py +++ b/learnware/market/heterogeneous_feature.py @@ -5,6 +5,26 @@ from .evolve import EvolvedMarket from ..learnware import Learnware +class MappingFunction: + def __init__(self) -> None: + pass + + def transform(X: np.ndarray) -> np.ndarray: + """transform the data in one feature space to another feature space. + + Parameters + ---------- + X : np.ndarray + data in one feature space + + Returns + ------- + np.ndarray + transformed data in other feature space + """ + pass + + class HeterogeneousFeatureMarket(EvolvedMarket): """Organize learnwares with heterogeneous feature spaces @@ -16,37 +36,56 @@ class HeterogeneousFeatureMarket(EvolvedMarket): def __init__(self): super(HeterogeneousFeatureMarket, self).__init__() + self.mapping_function_list={} - def learn_mapping_functions(self, learnware_list: List[Learnware]): + def _mapping_function_list_initialization(self, learnware_list: List[Learnware]): + self.mapping_function_list=self.learn_mapping_functions(learnware_list) + + def learn_mapping_functions(self, learnware_list: List[Learnware])-> List[MappingFunction]: """Use all statistical specifications of submitted learnwares to generate mapping functions from each original feature space to subsapce and vice verse. Parameters ---------- learnware_list : List[Learnware] list of learnwares + + Returns + ------- + List[MappingFunction] + list of mapping functions """ pass - def transform_original_to_subspace(self, original_feature_space_idx: int, original_feature: np.ndarray): + def transform_original_to_subspace(self, original_feature_space_idx: int, original_feature: np.ndarray) -> np.ndarray: """Transform feature in a original feature space to the subspace. Parameters ---------- - original_feature_space_idx: int + original_feature_space_idx : int index of the original feature space - original_feature: np.ndarray + original_feature : np.ndarray data in the original feature space + + Returns + ------- + np.ndarray + mapped data in the subspace """ pass - def transform_subspace_to_original(self, original_feature_space_idx: int, subspace_feature: np.ndarray): - """Transform feature in a original feature space to the subspace. + def transform_subspace_to_original(self, original_feature_space_idx: int, subspace_feature: np.ndarray) -> np.ndarray: + """Transform feature in the subspace to a original feature space. Parameters ---------- - original_feature_space_idx: int + original_feature_space_idx : int index of the original feature space - subspace_feature: np.ndarray + subspace_feature : np.ndarray data in the subspace + + Returns + ------- + np.ndarray + mapped data in the original feature space """ pass From e5b58b594055fc0f1dfec276383a0f0d8976d85f Mon Sep 17 00:00:00 2001 From: tanp Date: Fri, 21 Apr 2023 22:41:31 +0800 Subject: [PATCH 21/23] [FIX] modify the docstring --- learnware/market/heterogeneous_feature.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/learnware/market/heterogeneous_feature.py b/learnware/market/heterogeneous_feature.py index b31659a..de52ebc 100644 --- a/learnware/market/heterogeneous_feature.py +++ b/learnware/market/heterogeneous_feature.py @@ -39,6 +39,13 @@ class HeterogeneousFeatureMarket(EvolvedMarket): self.mapping_function_list={} def _mapping_function_list_initialization(self, learnware_list: List[Learnware]): + """Initialize mapping functions with all submitted learnwares + + Parameters + ---------- + learnware_list : List[Learnware] + list of learnwares + """ self.mapping_function_list=self.learn_mapping_functions(learnware_list) def learn_mapping_functions(self, learnware_list: List[Learnware])-> List[MappingFunction]: From 6d1f88bc64debd60a86406ea271ce6868313dc87 Mon Sep 17 00:00:00 2001 From: liuht Date: Sat, 22 Apr 2023 00:36:29 +0800 Subject: [PATCH 22/23] [FIX] fix typos, expand job-selector param range --- examples/example_m5/main.py | 8 ++++---- examples/example_pfs/main.py | 8 ++++---- learnware/learnware/reuse.py | 25 +++++++++++++------------ 3 files changed, 21 insertions(+), 20 deletions(-) diff --git a/examples/example_m5/main.py b/examples/example_m5/main.py index a19b14a..7d4f979 100644 --- a/examples/example_m5/main.py +++ b/examples/example_m5/main.py @@ -124,7 +124,7 @@ class M5DatasetWorkflow: m5 = DataLoader() idx_list = m5.get_idx_list() os.makedirs("./user_spec", exist_ok=True) - sinle_score_list = [] + single_score_list = [] random_score_list = [] job_selector_score_list = [] ensemble_score_list = [] @@ -154,7 +154,7 @@ class M5DatasetWorkflow: pred_y = learnware.predict(test_x) loss_list.append(m5.score(test_y, pred_y)) print( - f"Top1-score: {sorted_score_list[0]}, learnware_id: {single_learnware_list[0].id}, loss: {loss_list[-1]}" + f"Top1-score: {sorted_score_list[0]}, learnware_id: {single_learnware_list[0].id}, loss: {loss_list[0]}" ) mixture_id = " ".join([learnware.id for learnware in mixture_learnware_list]) @@ -170,12 +170,12 @@ class M5DatasetWorkflow: ensemble_score = m5.score(test_y, ensemble_predict_y) print(f"mixture reuse loss (ensemble): {ensemble_score}\n") - sinle_score_list.append(loss_list[0]) + single_score_list.append(loss_list[0]) random_score_list.append(np.mean(loss_list)) job_selector_score_list.append(job_selector_score) ensemble_score_list.append(ensemble_score) - print(f"Single search score: {np.mean(sinle_score_list)}") + print(f"Single search score: {np.mean(single_score_list)}") print(f"Job selector score: {np.mean(job_selector_score_list)}") print(f"Average ensemble score: {np.mean(ensemble_score_list)}") print(f"Random search score: {np.mean(random_score_list)}") diff --git a/examples/example_pfs/main.py b/examples/example_pfs/main.py index f6038dc..25da85b 100644 --- a/examples/example_pfs/main.py +++ b/examples/example_pfs/main.py @@ -122,7 +122,7 @@ class PFSDatasetWorkflow: pfs = Dataloader() idx_list = pfs.get_idx_list() os.makedirs("./user_spec", exist_ok=True) - sinle_score_list = [] + single_score_list = [] random_score_list = [] job_selector_score_list = [] ensemble_score_list = [] @@ -152,7 +152,7 @@ class PFSDatasetWorkflow: pred_y = learnware.predict(test_x) loss_list.append(pfs.score(test_y, pred_y)) print( - f"Top1-score: {sorted_score_list[0]}, learnware_id: {single_learnware_list[0].id}, loss: {loss_list[-1]}" + f"Top1-score: {sorted_score_list[0]}, learnware_id: {single_learnware_list[0].id}, loss: {loss_list[0]}" ) mixture_id = " ".join([learnware.id for learnware in mixture_learnware_list]) @@ -168,12 +168,12 @@ class PFSDatasetWorkflow: ensemble_score = pfs.score(test_y, ensemble_predict_y) print(f"mixture reuse loss (ensemble): {ensemble_score}\n") - sinle_score_list.append(loss_list[0]) + single_score_list.append(loss_list[0]) random_score_list.append(np.mean(loss_list)) job_selector_score_list.append(job_selector_score) ensemble_score_list.append(ensemble_score) - print(f"Single search score: {np.mean(sinle_score_list)}") + print(f"Single search score: {np.mean(single_score_list)}") print(f"Job selector score: {np.mean(job_selector_score_list)}") print(f"Average ensemble score: {np.mean(ensemble_score_list)}") print(f"Random search score: {np.mean(random_score_list)}") diff --git a/learnware/learnware/reuse.py b/learnware/learnware/reuse.py index b4639a9..97ee44e 100644 --- a/learnware/learnware/reuse.py +++ b/learnware/learnware/reuse.py @@ -73,17 +73,18 @@ class JobSelectorReuser(BaseReuser): for learnware in self.learnware_list ] - task_matrix = np.zeros((len(learnware_rkme_spec_list), len(learnware_rkme_spec_list))) - for i in range(len(self.learnware_list)): - task_rkme1 = learnware_rkme_spec_list[i] - task_matrix[i][i] = task_rkme1.inner_prod(task_rkme1) - for j in range(i + 1, len(self.learnware_list)): - task_rkme2 = learnware_rkme_spec_list[j] - task_matrix[i][j] = task_matrix[j][i] = task_rkme1.inner_prod(task_rkme2) - - task_mixture_weight = self._calculate_rkme_spec_mixture_weight( - user_data, learnware_rkme_spec_list, task_matrix - ) + if self.use_herding: + task_matrix = np.zeros((len(learnware_rkme_spec_list), len(learnware_rkme_spec_list))) + for i in range(len(self.learnware_list)): + task_rkme1 = learnware_rkme_spec_list[i] + task_matrix[i][i] = task_rkme1.inner_prod(task_rkme1) + for j in range(i + 1, len(self.learnware_list)): + task_rkme2 = learnware_rkme_spec_list[j] + task_matrix[i][j] = task_matrix[j][i] = task_rkme1.inner_prod(task_rkme2) + + task_mixture_weight = self._calculate_rkme_spec_mixture_weight( + user_data, learnware_rkme_spec_list, task_matrix + ) herding_X, train_herding_X, val_herding_X = None, None, None herding_y, train_herding_y, val_herding_y = [], [], [] @@ -275,7 +276,7 @@ class AveragingReuser(BaseReuser): # print(pred_y.shape) if not isinstance(pred_y, np.ndarray): pred_y = pred_y.detach().cpu().numpy() - softmax_pred = softmax(pred_y, axis=1) + softmax_pred = softmax(pred_y, axis=0) if mean_pred_y is None: mean_pred_y = softmax_pred else: From 7b72739f949cc0d4493f1ba2b9e9c08f08608081 Mon Sep 17 00:00:00 2001 From: xiey Date: Sat, 22 Apr 2023 10:07:24 +0800 Subject: [PATCH 23/23] [MNT] change user semantic in examples --- examples/example_image/main.py | 8 ++++---- examples/example_m5/main.py | 4 ++-- examples/example_market_db/example_db.py | 13 +++++-------- examples/example_pfs/main.py | 4 ++-- examples/workflow_by_code/main.py | 4 ++-- learnware/market/heterogeneous_feature.py | 14 +++++++++----- 6 files changed, 24 insertions(+), 23 deletions(-) diff --git a/examples/example_image/main.py b/examples/example_image/main.py index 9f4279a..757fe44 100644 --- a/examples/example_image/main.py +++ b/examples/example_image/main.py @@ -40,17 +40,17 @@ semantic_specs = [ { "Data": {"Values": ["Tabular"], "Type": "Class"}, "Task": {"Values": ["Classification"], "Type": "Class"}, - "Library": {"Values": ["Pytorch"], "Type": "Tag"}, + "Library": {"Values": ["Pytorch"], "Type": "Class"}, "Scenario": {"Values": ["Business"], "Type": "Tag"}, "Description": {"Values": "", "Type": "String"}, "Name": {"Values": "learnware_1", "Type": "String"}, } ] -user_senmantic = { +user_semantic = { "Data": {"Values": ["Tabular"], "Type": "Class"}, "Task": {"Values": ["Classification"], "Type": "Class"}, - "Library": {"Values": ["Pytorch"], "Type": "Tag"}, + "Library": {"Values": ["Pytorch"], "Type": "Class"}, "Scenario": {"Values": ["Business"], "Type": "Tag"}, "Description": {"Values": "", "Type": "String"}, "Name": {"Values": "", "Type": "String"}, @@ -151,7 +151,7 @@ def test_search(gamma=0.1, load_market=True): user_label = np.load(user_label_path) user_stat_spec = specification.utils.generate_rkme_spec(X=user_data, gamma=gamma, cuda_idx=0) user_info = BaseUserInfo( - id=f"user_{i}", semantic_spec=user_senmantic, stat_info={"RKMEStatSpecification": user_stat_spec} + id=f"user_{i}", semantic_spec=user_semantic, stat_info={"RKMEStatSpecification": user_stat_spec} ) logger.info("Searching Market for user: %d" % (i)) sorted_score_list, single_learnware_list, mixture_score, mixture_learnware_list = image_market.search_learnware( diff --git a/examples/example_m5/main.py b/examples/example_m5/main.py index 7d4f979..310bb40 100644 --- a/examples/example_m5/main.py +++ b/examples/example_m5/main.py @@ -17,7 +17,7 @@ semantic_specs = [ { "Data": {"Values": ["Tabular"], "Type": "Class"}, "Task": {"Values": ["Classification"], "Type": "Class"}, - "Library": {"Values": ["Scikit-learn"], "Type": "Tag"}, + "Library": {"Values": ["Scikit-learn"], "Type": "Class"}, "Scenario": {"Values": ["Business"], "Type": "Tag"}, "Description": {"Values": "", "Type": "String"}, "Name": {"Values": "learnware_1", "Type": "String"}, @@ -27,7 +27,7 @@ semantic_specs = [ user_semantic = { "Data": {"Values": ["Tabular"], "Type": "Class"}, "Task": {"Values": ["Classification"], "Type": "Class"}, - "Library": {"Values": ["Scikit-learn"], "Type": "Tag"}, + "Library": {"Values": ["Scikit-learn"], "Type": "Class"}, "Scenario": {"Values": ["Business"], "Type": "Tag"}, "Description": {"Values": "", "Type": "String"}, "Name": {"Values": "", "Type": "String"}, diff --git a/examples/example_market_db/example_db.py b/examples/example_market_db/example_db.py index 89f7f33..b4e146a 100644 --- a/examples/example_market_db/example_db.py +++ b/examples/example_market_db/example_db.py @@ -15,7 +15,7 @@ semantic_specs = [ { "Data": {"Values": ["Tabular"], "Type": "Class"}, "Task": {"Values": ["Classification"], "Type": "Class"}, - "Library": {"Values": ["Scikit-learn"], "Type": "Tag"}, + "Library": {"Values": ["Scikit-learn"], "Type": "Class"}, "Scenario": {"Values": ["Nature"], "Type": "Tag"}, "Description": {"Values": "", "Type": "String"}, "Name": {"Values": "learnware_1", "Type": "String"}, @@ -23,7 +23,7 @@ semantic_specs = [ { "Data": {"Values": ["Tabular"], "Type": "Class"}, "Task": {"Values": ["Classification"], "Type": "Class"}, - "Library": {"Values": ["Scikit-learn"], "Type": "Tag"}, + "Library": {"Values": ["Scikit-learn"], "Type": "Class"}, "Scenario": {"Values": ["Business", "Nature"], "Type": "Tag"}, "Description": {"Values": "", "Type": "String"}, "Name": {"Values": "learnware_2", "Type": "String"}, @@ -31,7 +31,7 @@ semantic_specs = [ { "Data": {"Values": ["Tabular"], "Type": "Class"}, "Task": {"Values": ["Regression"], "Type": "Class"}, - "Library": {"Values": ["Scikit-learn"], "Type": "Tag"}, + "Library": {"Values": ["Scikit-learn"], "Type": "Class"}, "Scenario": {"Values": ["Business"], "Type": "Tag"}, "Description": {"Values": "", "Type": "String"}, "Name": {"Values": "learnware_3", "Type": "String"}, @@ -40,11 +40,8 @@ semantic_specs = [ user_senmantic = { "Data": {"Values": ["Tabular"], "Type": "Class"}, - "Task": { - "Values": ["Classification"], - "Type": "Class", - }, - "Device": {"Values": ["GPU"], "Type": "Tag"}, + "Task": {"Values": ["Classification"], "Type": "Class"}, + "Device": {"Values": ["GPU"], "Type": "Class"}, "Scenario": {"Values": ["Business"], "Type": "Tag"}, "Description": {"Values": "", "Type": "String"}, "Name": {"Values": "learnware", "Type": "String"}, diff --git a/examples/example_pfs/main.py b/examples/example_pfs/main.py index 25da85b..95e2662 100644 --- a/examples/example_pfs/main.py +++ b/examples/example_pfs/main.py @@ -17,7 +17,7 @@ semantic_specs = [ { "Data": {"Values": ["Tabular"], "Type": "Class"}, "Task": {"Values": ["Classification"], "Type": "Class"}, - "Library": {"Values": ["Scikit-learn"], "Type": "Tag"}, + "Library": {"Values": ["Scikit-learn"], "Type": "Class"}, "Scenario": {"Values": ["Business"], "Type": "Tag"}, "Description": {"Values": "", "Type": "String"}, "Name": {"Values": "learnware_1", "Type": "String"}, @@ -27,7 +27,7 @@ semantic_specs = [ user_semantic = { "Data": {"Values": ["Tabular"], "Type": "Class"}, "Task": {"Values": ["Classification"], "Type": "Class"}, - "Library": {"Values": ["Scikit-learn"], "Type": "Tag"}, + "Library": {"Values": ["Scikit-learn"], "Type": "Class"}, "Scenario": {"Values": ["Business"], "Type": "Tag"}, "Description": {"Values": "", "Type": "String"}, "Name": {"Values": "", "Type": "String"}, diff --git a/examples/workflow_by_code/main.py b/examples/workflow_by_code/main.py index 204cb07..97d2fd0 100644 --- a/examples/workflow_by_code/main.py +++ b/examples/workflow_by_code/main.py @@ -19,7 +19,7 @@ semantic_specs = [ { "Data": {"Values": ["Tabular"], "Type": "Class"}, "Task": {"Values": ["Classification"], "Type": "Class"}, - "Library": {"Values": ["Scikit-learn"], "Type": "Tag"}, + "Library": {"Values": ["Scikit-learn"], "Type": "Class"}, "Scenario": {"Values": ["Business"], "Type": "Tag"}, "Description": {"Values": "", "Type": "String"}, "Name": {"Values": "learnware_1", "Type": "String"}, @@ -33,7 +33,7 @@ user_semantic = { "Type": "Class", }, "Library": {"Values": ["Scikit-learn"], "Type": "Tag"}, - "Scenario": {"Values": ["Business"], "Type": "Tag"}, + "Scenario": {"Values": ["Business"], "Type": "Class"}, "Description": {"Values": "", "Type": "String"}, "Name": {"Values": "", "Type": "String"}, } diff --git a/learnware/market/heterogeneous_feature.py b/learnware/market/heterogeneous_feature.py index de52ebc..b25e2e8 100644 --- a/learnware/market/heterogeneous_feature.py +++ b/learnware/market/heterogeneous_feature.py @@ -36,7 +36,7 @@ class HeterogeneousFeatureMarket(EvolvedMarket): def __init__(self): super(HeterogeneousFeatureMarket, self).__init__() - self.mapping_function_list={} + self.mapping_function_list = {} def _mapping_function_list_initialization(self, learnware_list: List[Learnware]): """Initialize mapping functions with all submitted learnwares @@ -46,9 +46,9 @@ class HeterogeneousFeatureMarket(EvolvedMarket): learnware_list : List[Learnware] list of learnwares """ - self.mapping_function_list=self.learn_mapping_functions(learnware_list) + self.mapping_function_list = self.learn_mapping_functions(learnware_list) - def learn_mapping_functions(self, learnware_list: List[Learnware])-> List[MappingFunction]: + def learn_mapping_functions(self, learnware_list: List[Learnware]) -> List[MappingFunction]: """Use all statistical specifications of submitted learnwares to generate mapping functions from each original feature space to subsapce and vice verse. Parameters @@ -63,7 +63,9 @@ class HeterogeneousFeatureMarket(EvolvedMarket): """ pass - def transform_original_to_subspace(self, original_feature_space_idx: int, original_feature: np.ndarray) -> np.ndarray: + def transform_original_to_subspace( + self, original_feature_space_idx: int, original_feature: np.ndarray + ) -> np.ndarray: """Transform feature in a original feature space to the subspace. Parameters @@ -80,7 +82,9 @@ class HeterogeneousFeatureMarket(EvolvedMarket): """ pass - def transform_subspace_to_original(self, original_feature_space_idx: int, subspace_feature: np.ndarray) -> np.ndarray: + def transform_subspace_to_original( + self, original_feature_space_idx: int, subspace_feature: np.ndarray + ) -> np.ndarray: """Transform feature in the subspace to a original feature space. Parameters