diff --git a/examples/example_image/main.py b/examples/example_image/main.py index 2c68dbb..9df532a 100644 --- a/examples/example_image/main.py +++ b/examples/example_image/main.py @@ -38,45 +38,17 @@ os.makedirs(model_save_root, exist_ok=True) semantic_specs = [ { "Data": {"Values": ["Tabular"], "Type": "Class"}, - "Task": { - "Values": ["Classification"], - "Type": "Class", - }, - "Device": {"Values": ["GPU"], "Type": "Tag"}, - "Scenario": {"Values": ["Nature"], "Type": "Tag"}, - "Description": {"Values": "", "Type": "String"}, - "Name": {"Values": "learnware_1", "Type": "String"}, - }, - { - "Data": {"Values": ["Tabular"], "Type": "Class"}, - "Task": { - "Values": ["Classification"], - "Type": "Class", - }, - "Device": {"Values": ["GPU"], "Type": "Tag"}, - "Scenario": {"Values": ["Business", "Nature"], "Type": "Tag"}, - "Description": {"Values": "", "Type": "String"}, - "Name": {"Values": "learnware_2", "Type": "String"}, - }, - { - "Data": {"Values": ["Tabular"], "Type": "Class"}, - "Task": { - "Values": ["Classification"], - "Type": "Class", - }, + "Task": {"Values": ["Classification"], "Type": "Class"}, "Device": {"Values": ["GPU"], "Type": "Tag"}, "Scenario": {"Values": ["Business"], "Type": "Tag"}, "Description": {"Values": "", "Type": "String"}, - "Name": {"Values": "learnware_3", "Type": "String"}, - }, + "Name": {"Values": "learnware_1", "Type": "String"}, + } ] user_senmantic = { "Data": {"Values": ["Tabular"], "Type": "Class"}, - "Task": { - "Values": ["Classification"], - "Type": "Class", - }, + "Task": {"Values": ["Classification"], "Type": "Class"}, "Device": {"Values": ["GPU"], "Type": "Tag"}, "Scenario": {"Values": ["Business"], "Type": "Tag"}, "Description": {"Values": "", "Type": "String"}, @@ -144,7 +116,7 @@ def prepare_market(): new_learnware_path = prepare_learnware( data_path, model_path, init_file_path, yaml_file_path, tmp_dir, "%s_%d" % (dataset, i) ) - semantic_spec = semantic_specs[i % 3] + semantic_spec = semantic_specs[0] semantic_spec["Name"]["Values"] = "learnware_%d" % (i) semantic_spec["Description"]["Values"] = "test_learnware_number_%d" % (i) image_market.add_learnware(new_learnware_path, semantic_spec) diff --git a/examples/example_m5/main.py b/examples/example_m5/main.py index e3e4390..761582c 100644 --- a/examples/example_m5/main.py +++ b/examples/example_m5/main.py @@ -15,45 +15,17 @@ from m5 import DataLoader semantic_specs = [ { "Data": {"Values": ["Tabular"], "Type": "Class"}, - "Task": { - "Values": ["Classification"], - "Type": "Class", - }, - "Device": {"Values": ["GPU"], "Type": "Tag"}, - "Scenario": {"Values": ["Nature"], "Type": "Tag"}, - "Description": {"Values": "", "Type": "String"}, - "Name": {"Values": "learnware_1", "Type": "String"}, - }, - { - "Data": {"Values": ["Tabular"], "Type": "Class"}, - "Task": { - "Values": ["Classification"], - "Type": "Class", - }, - "Device": {"Values": ["GPU"], "Type": "Tag"}, - "Scenario": {"Values": ["Business", "Nature"], "Type": "Tag"}, - "Description": {"Values": "", "Type": "String"}, - "Name": {"Values": "learnware_2", "Type": "String"}, - }, - { - "Data": {"Values": ["Tabular"], "Type": "Class"}, - "Task": { - "Values": ["Classification"], - "Type": "Class", - }, + "Task": {"Values": ["Classification"], "Type": "Class"}, "Device": {"Values": ["GPU"], "Type": "Tag"}, "Scenario": {"Values": ["Business"], "Type": "Tag"}, "Description": {"Values": "", "Type": "String"}, - "Name": {"Values": "learnware_3", "Type": "String"}, - }, + "Name": {"Values": "learnware_1", "Type": "String"}, + } ] user_senmantic = { "Data": {"Values": ["Tabular"], "Type": "Class"}, - "Task": { - "Values": ["Classification"], - "Type": "Class", - }, + "Task": {"Values": ["Classification"], "Type": "Class"}, "Device": {"Values": ["GPU"], "Type": "Tag"}, "Scenario": {"Values": ["Business"], "Type": "Tag"}, "Description": {"Values": "", "Type": "String"}, @@ -86,7 +58,7 @@ class M5DatasetWorkflow: zip_path_list.append(os.path.join(curr_root, zip_path)) for idx, zip_path in enumerate(zip_path_list): - semantic_spec = semantic_specs[idx % 3] + semantic_spec = semantic_specs[0] semantic_spec["Name"]["Values"] = "learnware_%d" % (idx) semantic_spec["Description"]["Values"] = "test_learnware_number_%d" % (idx) easy_market.add_learnware(zip_path, semantic_spec) @@ -101,7 +73,7 @@ class M5DatasetWorkflow: m5 = DataLoader() idx_list = m5.get_idx_list() - algo_list = ["ridge", "lgb"] + algo_list = ["lgb"] # algo_list = ["ridge", "lgb"] curr_root = os.path.dirname(os.path.abspath(__file__)) curr_root = os.path.join(curr_root, "learnware_pool") @@ -161,6 +133,9 @@ class M5DatasetWorkflow: sorted_score_list, single_learnware_list, mixture_learnware_list = easy_market.search_learnware(user_info) print(f"search result of user{idx}:") + print( + f"single model num: {len(sorted_score_list)}, max_score: {sorted_score_list[0]}, min_score: {sorted_score_list[-1]}" + ) for score, learnware in zip(sorted_score_list, single_learnware_list): pred_y = learnware.predict(test_x) loss = m5.score(test_y, pred_y) @@ -169,11 +144,10 @@ class M5DatasetWorkflow: mixture_id = " ".join([learnware.id for learnware in mixture_learnware_list]) print(f"mixture_learnware: {mixture_id}\n") - # TODO: model reuse score reuse_baseline = JobSelectorReuser(learnware_list=mixture_learnware_list) reuse_predict = reuse_baseline.predict(user_data=test_x) reuse_score = m5.score(test_y, reuse_predict) - print(f"mixture reuse score: {reuse_score}\n") + print(f"mixture reuse loss: {reuse_score}\n") if __name__ == "__main__": diff --git a/examples/example_m5/upload.py b/examples/example_m5/upload.py new file mode 100644 index 0000000..0c9e209 --- /dev/null +++ b/examples/example_m5/upload.py @@ -0,0 +1,86 @@ +import hashlib +import requests +import os +import random +import json +import time +from tqdm import tqdm + +email = "tanzh@lamda.nju.edu.cn" +password = hashlib.md5(b"Qwerty123").hexdigest() +login_url = "http://210.28.134.201:8089/auth/login" +submit_url = "http://210.28.134.201:8089/user/add_learnware" +all_data_type = ["Table", "Image", "Video", "Text", "Audio"] +all_task_type = [ + "Classification", + "Regression", + "Clustering", + "Feature Extraction", + "Generation", + "Segmentation", + "Object Detection", +] +all_device_type = ["CPU", "GPU"] +all_scenario = [ + "Business", + "Financial", + "Health", + "Politics", + "Computer", + "Internet", + "Traffic", + "Nature", + "Fashion", + "Industry", + "Agriculture", + "Education", + "Entertainment", + "Architecture", +] + +# ############### +# 以上部分无需修改 # +# ############### + + +def main(): + session = requests.Session() + res = session.post(login_url, json={"email": email, "password": password}) + + # /path/to/learnware/folder 修改为学件文件夹地址 + learnware_pool = os.listdir(os.path.join(os.path.abspath("."), "learnware_pool")) + + for learnware in learnware_pool: + # 修改相应的语义规约 + name = "M5_Shop" + "%02d" % int(learnware.split(".")[0].split("_")[1]) + name = name + "_" + time.strftime("%Y%m%d%H%M%S", time.localtime()) + description = f"This is a description of learnware {name}" + data = random.choice(all_data_type) + task = random.choice(all_task_type) + device = list(set(random.choices(all_device_type, k=2))) + scenario = list(set(random.choices(all_scenario, k=5))) + semantic_specification = { + "Data": {"Values": ["Table"], "Type": "Class"}, + "Task": {"Values": ["Regression"], "Type": "Class"}, + "Device": {"Values": ["CPU"], "Type": "Tag"}, + "Scenario": {"Values": ["Business"], "Type": "Tag"}, + "Description": {"Values": "A sales-forecasting model from Walmart store", "Type": "String"}, + "Name": {"Values": name, "Type": "String"}, + } + res = session.post( + submit_url, + data={ + "semantic_specification": json.dumps(semantic_specification), + }, + files={ + "learnware_file": open( + os.path.join(os.path.abspath("."), "learnware_pool", learnware), + "rb", + ) + }, + ) + assert json.loads(res.text)["code"] == 0, "Upload error" + + +if __name__ == "__main__": + main() diff --git a/examples/example_pfs/main.py b/examples/example_pfs/main.py index 3d0fd3e..580ce8d 100644 --- a/examples/example_pfs/main.py +++ b/examples/example_pfs/main.py @@ -15,45 +15,17 @@ from pfs import Dataloader semantic_specs = [ { "Data": {"Values": ["Tabular"], "Type": "Class"}, - "Task": { - "Values": ["Classification"], - "Type": "Class", - }, - "Device": {"Values": ["GPU"], "Type": "Tag"}, - "Scenario": {"Values": ["Nature"], "Type": "Tag"}, - "Description": {"Values": "", "Type": "String"}, - "Name": {"Values": "learnware_1", "Type": "String"}, - }, - { - "Data": {"Values": ["Tabular"], "Type": "Class"}, - "Task": { - "Values": ["Classification"], - "Type": "Class", - }, - "Device": {"Values": ["GPU"], "Type": "Tag"}, - "Scenario": {"Values": ["Business", "Nature"], "Type": "Tag"}, - "Description": {"Values": "", "Type": "String"}, - "Name": {"Values": "learnware_2", "Type": "String"}, - }, - { - "Data": {"Values": ["Tabular"], "Type": "Class"}, - "Task": { - "Values": ["Classification"], - "Type": "Class", - }, + "Task": {"Values": ["Classification"], "Type": "Class"}, "Device": {"Values": ["GPU"], "Type": "Tag"}, "Scenario": {"Values": ["Business"], "Type": "Tag"}, "Description": {"Values": "", "Type": "String"}, - "Name": {"Values": "learnware_3", "Type": "String"}, - }, + "Name": {"Values": "learnware_1", "Type": "String"}, + } ] user_senmantic = { "Data": {"Values": ["Tabular"], "Type": "Class"}, - "Task": { - "Values": ["Classification"], - "Type": "Class", - }, + "Task": {"Values": ["Classification"], "Type": "Class"}, "Device": {"Values": ["GPU"], "Type": "Tag"}, "Scenario": {"Values": ["Business"], "Type": "Tag"}, "Description": {"Values": "", "Type": "String"}, @@ -86,7 +58,7 @@ class PFSDatasetWorkflow: zip_path_list.append(os.path.join(curr_root, zip_path)) for idx, zip_path in enumerate(zip_path_list): - semantic_spec = semantic_specs[idx % 3] + semantic_spec = semantic_specs[0] semantic_spec["Name"]["Values"] = "learnware_%d" % (idx) semantic_spec["Description"]["Values"] = "test_learnware_number_%d" % (idx) easy_market.add_learnware(zip_path, semantic_spec) @@ -142,8 +114,8 @@ class PFSDatasetWorkflow: rmtree(dir_path) def test(self, regenerate_flag=False): - # self.prepare_learnware(regenerate_flag) - # self._init_learnware_market() + self.prepare_learnware(regenerate_flag) + self._init_learnware_market() easy_market = EasyMarket() print("Total Item:", len(easy_market)) diff --git a/examples/example_pfs/upload.py b/examples/example_pfs/upload.py new file mode 100644 index 0000000..ed8449f --- /dev/null +++ b/examples/example_pfs/upload.py @@ -0,0 +1,89 @@ +import hashlib +import requests +import os +import random +import json +import time +from tqdm import tqdm + +email = "tanzh@lamda.nju.edu.cn" +password = hashlib.md5(b"Qwerty123").hexdigest() +login_url = "http://210.28.134.201:8089/auth/login" +submit_url = "http://210.28.134.201:8089/user/add_learnware" +all_data_type = ["Table", "Image", "Video", "Text", "Audio"] +all_task_type = [ + "Classification", + "Regression", + "Clustering", + "Feature Extraction", + "Generation", + "Segmentation", + "Object Detection", +] +all_device_type = ["CPU", "GPU"] +all_scenario = [ + "Business", + "Financial", + "Health", + "Politics", + "Computer", + "Internet", + "Traffic", + "Nature", + "Fashion", + "Industry", + "Agriculture", + "Education", + "Entertainment", + "Architecture", +] + +# ############### +# 以上部分无需修改 # +# ############### + + +def main(): + session = requests.Session() + res = session.post(login_url, json={"email": email, "password": password}) + + # /path/to/learnware/folder 修改为学件文件夹地址 + learnware_pool = os.listdir(os.path.join(os.path.abspath("."), "learnware_pool")) + + for learnware in learnware_pool: + # 修改相应的语义规约 + name = "PFS_Shop" + "%02d" % int(learnware.split(".")[0].split("_")[1]) + name = name + "_" + time.strftime("%Y%m%d%H%M%S", time.localtime()) + description = f"This is a description of learnware {name}" + data = random.choice(all_data_type) + task = random.choice(all_task_type) + device = list(set(random.choices(all_device_type, k=2))) + scenario = list(set(random.choices(all_scenario, k=5))) + semantic_specification = { + "Data": {"Values": ["Table"], "Type": "Class"}, + "Task": {"Values": ["Regression"], "Type": "Class"}, + "Device": {"Values": ["CPU"], "Type": "Tag"}, + "Scenario": {"Values": ["Business"], "Type": "Tag"}, + "Description": { + "Values": "A sales-forecasting model from Predict Future Sales Competition on Kaggle", + "Type": "String", + }, + "Name": {"Values": name, "Type": "String"}, + } + res = session.post( + submit_url, + data={ + "semantic_specification": json.dumps(semantic_specification), + }, + files={ + "learnware_file": open( + os.path.join(os.path.abspath("."), "learnware_pool", learnware), + "rb", + ) + }, + ) + assert json.loads(res.text)["code"] == 0, "Upload error" + + +if __name__ == "__main__": + main() diff --git a/examples/workflow_by_code/main.py b/examples/workflow_by_code/main.py index 2b72e27..d2baedc 100644 --- a/examples/workflow_by_code/main.py +++ b/examples/workflow_by_code/main.py @@ -18,37 +18,12 @@ curr_root = os.path.dirname(os.path.abspath(__file__)) semantic_specs = [ { "Data": {"Values": ["Tabular"], "Type": "Class"}, - "Task": { - "Values": ["Classification"], - "Type": "Class", - }, - "Device": {"Values": ["GPU"], "Type": "Tag"}, - "Scenario": {"Values": ["Nature"], "Type": "Tag"}, - "Description": {"Values": "", "Type": "String"}, - "Name": {"Values": "learnware_1", "Type": "String"}, - }, - { - "Data": {"Values": ["Tabular"], "Type": "Class"}, - "Task": { - "Values": ["Classification"], - "Type": "Class", - }, - "Device": {"Values": ["GPU"], "Type": "Tag"}, - "Scenario": {"Values": ["Business", "Nature"], "Type": "Tag"}, - "Description": {"Values": "", "Type": "String"}, - "Name": {"Values": "learnware_2", "Type": "String"}, - }, - { - "Data": {"Values": ["Tabular"], "Type": "Class"}, - "Task": { - "Values": ["Classification"], - "Type": "Class", - }, + "Task": {"Values": ["Classification"], "Type": "Class"}, "Device": {"Values": ["GPU"], "Type": "Tag"}, "Scenario": {"Values": ["Business"], "Type": "Tag"}, "Description": {"Values": "", "Type": "String"}, - "Name": {"Values": "learnware_3", "Type": "String"}, - }, + "Name": {"Values": "learnware_1", "Type": "String"}, + } ] user_senmantic = { @@ -118,7 +93,7 @@ class LearnwareMarketWorkflow: print("Total Item:", len(easy_market)) for idx, zip_path in enumerate(self.zip_path_list): - semantic_spec = semantic_specs[idx % 3] + semantic_spec = semantic_specs[0] semantic_spec["Name"]["Values"] = "learnware_%d" % (idx) semantic_spec["Description"]["Values"] = "test_learnware_number_%d" % (idx) easy_market.add_learnware(zip_path, semantic_spec) diff --git a/learnware/market/easy.py b/learnware/market/easy.py index dd549c1..0133fa1 100644 --- a/learnware/market/easy.py +++ b/learnware/market/easy.py @@ -213,7 +213,7 @@ class EasyMarket(BaseMarket): else: max_score = (max_dist - min_dist) / (max_dist - dist_epsilon) - if max_dist < dist_epsilon or max_score > 1: + if min_dist < dist_epsilon: dist_epsilon = min_dist elif max_score < min_score: dist_epsilon = max_dist - (max_dist - min_dist) / min_score @@ -333,7 +333,7 @@ class EasyMarket(BaseMarket): learnware_list: List[Learnware], user_rkme: RKMEStatSpecification, max_search_num: int, - weight_cutoff: float = 0.9, + weight_cutoff: float = 0.95, ) -> Tuple[List[float], List[Learnware]]: """Select learnwares based on a total mixture ratio, then recalculate their mixture weights @@ -362,19 +362,25 @@ class EasyMarket(BaseMarket): max_search_num = learnware_num weight, _ = self._calculate_rkme_spec_mixture_weight(learnware_list, user_rkme) - sort_by_weight_idx_list = sorted(range(learnware_num), key=lambda k: weight[k]) + sort_by_weight_idx_list = sorted(range(learnware_num), key=lambda k: weight[k], reverse=True) weight_sum = 0 mixture_list = [] for idx in sort_by_weight_idx_list: - weight_sum += sort_by_weight_idx_list[idx] + weight_sum += weight[idx] if weight_sum <= weight_cutoff: mixture_list.append(learnware_list[idx]) + else: + break - if len(mixture_list) > max_search_num: - mixture_list = mixture_list[:max_search_num] + if len(mixture_list) <= 1: + mixture_list = [learnware_list[sort_by_weight_idx_list[0]]] + mixture_weight = [1] + else: + if len(mixture_list) > max_search_num: + mixture_list = mixture_list[:max_search_num] + mixture_weight, _ = self._calculate_rkme_spec_mixture_weight(mixture_list, user_rkme) - mixture_weight, _ = self._calculate_rkme_spec_mixture_weight(mixture_list, user_rkme) return mixture_weight, mixture_list def _filter_by_rkme_spec_single( @@ -438,7 +444,7 @@ class EasyMarket(BaseMarket): return filtered_learnware_list - def _search_by_rkme_spec_mixture( + def _search_by_rkme_spec_mixture_greedy( self, learnware_list: List[Learnware], user_rkme: RKMEStatSpecification, @@ -578,7 +584,7 @@ class EasyMarket(BaseMarket): return match_learnwares def search_learnware( - self, user_info: BaseUserInfo, max_search_num=5 + self, user_info: BaseUserInfo, max_search_num: int = 5, search_method: str = "greedy" ) -> Tuple[List[float], List[Learnware], List[Learnware]]: """Search learnwares based on user_info @@ -612,9 +618,16 @@ class EasyMarket(BaseMarket): sorted_score_list, single_learnware_list = self._filter_by_rkme_spec_single( sorted_score_list, single_learnware_list ) - weight_list, mixture_learnware_list = self._search_by_rkme_spec_mixture( - learnware_list, user_rkme, max_search_num - ) + if search_method == "auto": + weight_list, mixture_learnware_list = self._search_by_rkme_spec_mixture_auto( + learnware_list, user_rkme, max_search_num + ) + elif search_method == "greedy": + weight_list, mixture_learnware_list = self._search_by_rkme_spec_mixture_greedy( + learnware_list, user_rkme, max_search_num + ) + else: + logger.warning("f{search_method} not supported!") return sorted_score_list, single_learnware_list, mixture_learnware_list def delete_learnware(self, id: str) -> bool: