| @@ -38,45 +38,17 @@ os.makedirs(model_save_root, exist_ok=True) | |||
| semantic_specs = [ | |||
| { | |||
| "Data": {"Values": ["Tabular"], "Type": "Class"}, | |||
| "Task": { | |||
| "Values": ["Classification"], | |||
| "Type": "Class", | |||
| }, | |||
| "Device": {"Values": ["GPU"], "Type": "Tag"}, | |||
| "Scenario": {"Values": ["Nature"], "Type": "Tag"}, | |||
| "Description": {"Values": "", "Type": "String"}, | |||
| "Name": {"Values": "learnware_1", "Type": "String"}, | |||
| }, | |||
| { | |||
| "Data": {"Values": ["Tabular"], "Type": "Class"}, | |||
| "Task": { | |||
| "Values": ["Classification"], | |||
| "Type": "Class", | |||
| }, | |||
| "Device": {"Values": ["GPU"], "Type": "Tag"}, | |||
| "Scenario": {"Values": ["Business", "Nature"], "Type": "Tag"}, | |||
| "Description": {"Values": "", "Type": "String"}, | |||
| "Name": {"Values": "learnware_2", "Type": "String"}, | |||
| }, | |||
| { | |||
| "Data": {"Values": ["Tabular"], "Type": "Class"}, | |||
| "Task": { | |||
| "Values": ["Classification"], | |||
| "Type": "Class", | |||
| }, | |||
| "Task": {"Values": ["Classification"], "Type": "Class"}, | |||
| "Device": {"Values": ["GPU"], "Type": "Tag"}, | |||
| "Scenario": {"Values": ["Business"], "Type": "Tag"}, | |||
| "Description": {"Values": "", "Type": "String"}, | |||
| "Name": {"Values": "learnware_3", "Type": "String"}, | |||
| }, | |||
| "Name": {"Values": "learnware_1", "Type": "String"}, | |||
| } | |||
| ] | |||
| user_senmantic = { | |||
| "Data": {"Values": ["Tabular"], "Type": "Class"}, | |||
| "Task": { | |||
| "Values": ["Classification"], | |||
| "Type": "Class", | |||
| }, | |||
| "Task": {"Values": ["Classification"], "Type": "Class"}, | |||
| "Device": {"Values": ["GPU"], "Type": "Tag"}, | |||
| "Scenario": {"Values": ["Business"], "Type": "Tag"}, | |||
| "Description": {"Values": "", "Type": "String"}, | |||
| @@ -144,7 +116,7 @@ def prepare_market(): | |||
| new_learnware_path = prepare_learnware( | |||
| data_path, model_path, init_file_path, yaml_file_path, tmp_dir, "%s_%d" % (dataset, i) | |||
| ) | |||
| semantic_spec = semantic_specs[i % 3] | |||
| semantic_spec = semantic_specs[0] | |||
| semantic_spec["Name"]["Values"] = "learnware_%d" % (i) | |||
| semantic_spec["Description"]["Values"] = "test_learnware_number_%d" % (i) | |||
| image_market.add_learnware(new_learnware_path, semantic_spec) | |||
| @@ -15,45 +15,17 @@ from m5 import DataLoader | |||
| semantic_specs = [ | |||
| { | |||
| "Data": {"Values": ["Tabular"], "Type": "Class"}, | |||
| "Task": { | |||
| "Values": ["Classification"], | |||
| "Type": "Class", | |||
| }, | |||
| "Device": {"Values": ["GPU"], "Type": "Tag"}, | |||
| "Scenario": {"Values": ["Nature"], "Type": "Tag"}, | |||
| "Description": {"Values": "", "Type": "String"}, | |||
| "Name": {"Values": "learnware_1", "Type": "String"}, | |||
| }, | |||
| { | |||
| "Data": {"Values": ["Tabular"], "Type": "Class"}, | |||
| "Task": { | |||
| "Values": ["Classification"], | |||
| "Type": "Class", | |||
| }, | |||
| "Device": {"Values": ["GPU"], "Type": "Tag"}, | |||
| "Scenario": {"Values": ["Business", "Nature"], "Type": "Tag"}, | |||
| "Description": {"Values": "", "Type": "String"}, | |||
| "Name": {"Values": "learnware_2", "Type": "String"}, | |||
| }, | |||
| { | |||
| "Data": {"Values": ["Tabular"], "Type": "Class"}, | |||
| "Task": { | |||
| "Values": ["Classification"], | |||
| "Type": "Class", | |||
| }, | |||
| "Task": {"Values": ["Classification"], "Type": "Class"}, | |||
| "Device": {"Values": ["GPU"], "Type": "Tag"}, | |||
| "Scenario": {"Values": ["Business"], "Type": "Tag"}, | |||
| "Description": {"Values": "", "Type": "String"}, | |||
| "Name": {"Values": "learnware_3", "Type": "String"}, | |||
| }, | |||
| "Name": {"Values": "learnware_1", "Type": "String"}, | |||
| } | |||
| ] | |||
| user_senmantic = { | |||
| "Data": {"Values": ["Tabular"], "Type": "Class"}, | |||
| "Task": { | |||
| "Values": ["Classification"], | |||
| "Type": "Class", | |||
| }, | |||
| "Task": {"Values": ["Classification"], "Type": "Class"}, | |||
| "Device": {"Values": ["GPU"], "Type": "Tag"}, | |||
| "Scenario": {"Values": ["Business"], "Type": "Tag"}, | |||
| "Description": {"Values": "", "Type": "String"}, | |||
| @@ -86,7 +58,7 @@ class M5DatasetWorkflow: | |||
| zip_path_list.append(os.path.join(curr_root, zip_path)) | |||
| for idx, zip_path in enumerate(zip_path_list): | |||
| semantic_spec = semantic_specs[idx % 3] | |||
| semantic_spec = semantic_specs[0] | |||
| semantic_spec["Name"]["Values"] = "learnware_%d" % (idx) | |||
| semantic_spec["Description"]["Values"] = "test_learnware_number_%d" % (idx) | |||
| easy_market.add_learnware(zip_path, semantic_spec) | |||
| @@ -101,7 +73,7 @@ class M5DatasetWorkflow: | |||
| m5 = DataLoader() | |||
| idx_list = m5.get_idx_list() | |||
| algo_list = ["ridge", "lgb"] | |||
| algo_list = ["lgb"] # algo_list = ["ridge", "lgb"] | |||
| curr_root = os.path.dirname(os.path.abspath(__file__)) | |||
| curr_root = os.path.join(curr_root, "learnware_pool") | |||
| @@ -161,6 +133,9 @@ class M5DatasetWorkflow: | |||
| sorted_score_list, single_learnware_list, mixture_learnware_list = easy_market.search_learnware(user_info) | |||
| print(f"search result of user{idx}:") | |||
| print( | |||
| f"single model num: {len(sorted_score_list)}, max_score: {sorted_score_list[0]}, min_score: {sorted_score_list[-1]}" | |||
| ) | |||
| for score, learnware in zip(sorted_score_list, single_learnware_list): | |||
| pred_y = learnware.predict(test_x) | |||
| loss = m5.score(test_y, pred_y) | |||
| @@ -169,11 +144,10 @@ class M5DatasetWorkflow: | |||
| mixture_id = " ".join([learnware.id for learnware in mixture_learnware_list]) | |||
| print(f"mixture_learnware: {mixture_id}\n") | |||
| # TODO: model reuse score | |||
| reuse_baseline = JobSelectorReuser(learnware_list=mixture_learnware_list) | |||
| reuse_predict = reuse_baseline.predict(user_data=test_x) | |||
| reuse_score = m5.score(test_y, reuse_predict) | |||
| print(f"mixture reuse score: {reuse_score}\n") | |||
| print(f"mixture reuse loss: {reuse_score}\n") | |||
| if __name__ == "__main__": | |||
| @@ -0,0 +1,86 @@ | |||
| import hashlib | |||
| import requests | |||
| import os | |||
| import random | |||
| import json | |||
| import time | |||
| from tqdm import tqdm | |||
| email = "tanzh@lamda.nju.edu.cn" | |||
| password = hashlib.md5(b"Qwerty123").hexdigest() | |||
| login_url = "http://210.28.134.201:8089/auth/login" | |||
| submit_url = "http://210.28.134.201:8089/user/add_learnware" | |||
| all_data_type = ["Table", "Image", "Video", "Text", "Audio"] | |||
| all_task_type = [ | |||
| "Classification", | |||
| "Regression", | |||
| "Clustering", | |||
| "Feature Extraction", | |||
| "Generation", | |||
| "Segmentation", | |||
| "Object Detection", | |||
| ] | |||
| all_device_type = ["CPU", "GPU"] | |||
| all_scenario = [ | |||
| "Business", | |||
| "Financial", | |||
| "Health", | |||
| "Politics", | |||
| "Computer", | |||
| "Internet", | |||
| "Traffic", | |||
| "Nature", | |||
| "Fashion", | |||
| "Industry", | |||
| "Agriculture", | |||
| "Education", | |||
| "Entertainment", | |||
| "Architecture", | |||
| ] | |||
| # ############### | |||
| # 以上部分无需修改 # | |||
| # ############### | |||
| def main(): | |||
| session = requests.Session() | |||
| res = session.post(login_url, json={"email": email, "password": password}) | |||
| # /path/to/learnware/folder 修改为学件文件夹地址 | |||
| learnware_pool = os.listdir(os.path.join(os.path.abspath("."), "learnware_pool")) | |||
| for learnware in learnware_pool: | |||
| # 修改相应的语义规约 | |||
| name = "M5_Shop" + "%02d" % int(learnware.split(".")[0].split("_")[1]) | |||
| name = name + "_" + time.strftime("%Y%m%d%H%M%S", time.localtime()) | |||
| description = f"This is a description of learnware {name}" | |||
| data = random.choice(all_data_type) | |||
| task = random.choice(all_task_type) | |||
| device = list(set(random.choices(all_device_type, k=2))) | |||
| scenario = list(set(random.choices(all_scenario, k=5))) | |||
| semantic_specification = { | |||
| "Data": {"Values": ["Table"], "Type": "Class"}, | |||
| "Task": {"Values": ["Regression"], "Type": "Class"}, | |||
| "Device": {"Values": ["CPU"], "Type": "Tag"}, | |||
| "Scenario": {"Values": ["Business"], "Type": "Tag"}, | |||
| "Description": {"Values": "A sales-forecasting model from Walmart store", "Type": "String"}, | |||
| "Name": {"Values": name, "Type": "String"}, | |||
| } | |||
| res = session.post( | |||
| submit_url, | |||
| data={ | |||
| "semantic_specification": json.dumps(semantic_specification), | |||
| }, | |||
| files={ | |||
| "learnware_file": open( | |||
| os.path.join(os.path.abspath("."), "learnware_pool", learnware), | |||
| "rb", | |||
| ) | |||
| }, | |||
| ) | |||
| assert json.loads(res.text)["code"] == 0, "Upload error" | |||
| if __name__ == "__main__": | |||
| main() | |||
| @@ -15,45 +15,17 @@ from pfs import Dataloader | |||
| semantic_specs = [ | |||
| { | |||
| "Data": {"Values": ["Tabular"], "Type": "Class"}, | |||
| "Task": { | |||
| "Values": ["Classification"], | |||
| "Type": "Class", | |||
| }, | |||
| "Device": {"Values": ["GPU"], "Type": "Tag"}, | |||
| "Scenario": {"Values": ["Nature"], "Type": "Tag"}, | |||
| "Description": {"Values": "", "Type": "String"}, | |||
| "Name": {"Values": "learnware_1", "Type": "String"}, | |||
| }, | |||
| { | |||
| "Data": {"Values": ["Tabular"], "Type": "Class"}, | |||
| "Task": { | |||
| "Values": ["Classification"], | |||
| "Type": "Class", | |||
| }, | |||
| "Device": {"Values": ["GPU"], "Type": "Tag"}, | |||
| "Scenario": {"Values": ["Business", "Nature"], "Type": "Tag"}, | |||
| "Description": {"Values": "", "Type": "String"}, | |||
| "Name": {"Values": "learnware_2", "Type": "String"}, | |||
| }, | |||
| { | |||
| "Data": {"Values": ["Tabular"], "Type": "Class"}, | |||
| "Task": { | |||
| "Values": ["Classification"], | |||
| "Type": "Class", | |||
| }, | |||
| "Task": {"Values": ["Classification"], "Type": "Class"}, | |||
| "Device": {"Values": ["GPU"], "Type": "Tag"}, | |||
| "Scenario": {"Values": ["Business"], "Type": "Tag"}, | |||
| "Description": {"Values": "", "Type": "String"}, | |||
| "Name": {"Values": "learnware_3", "Type": "String"}, | |||
| }, | |||
| "Name": {"Values": "learnware_1", "Type": "String"}, | |||
| } | |||
| ] | |||
| user_senmantic = { | |||
| "Data": {"Values": ["Tabular"], "Type": "Class"}, | |||
| "Task": { | |||
| "Values": ["Classification"], | |||
| "Type": "Class", | |||
| }, | |||
| "Task": {"Values": ["Classification"], "Type": "Class"}, | |||
| "Device": {"Values": ["GPU"], "Type": "Tag"}, | |||
| "Scenario": {"Values": ["Business"], "Type": "Tag"}, | |||
| "Description": {"Values": "", "Type": "String"}, | |||
| @@ -86,7 +58,7 @@ class PFSDatasetWorkflow: | |||
| zip_path_list.append(os.path.join(curr_root, zip_path)) | |||
| for idx, zip_path in enumerate(zip_path_list): | |||
| semantic_spec = semantic_specs[idx % 3] | |||
| semantic_spec = semantic_specs[0] | |||
| semantic_spec["Name"]["Values"] = "learnware_%d" % (idx) | |||
| semantic_spec["Description"]["Values"] = "test_learnware_number_%d" % (idx) | |||
| easy_market.add_learnware(zip_path, semantic_spec) | |||
| @@ -142,8 +114,8 @@ class PFSDatasetWorkflow: | |||
| rmtree(dir_path) | |||
| def test(self, regenerate_flag=False): | |||
| # self.prepare_learnware(regenerate_flag) | |||
| # self._init_learnware_market() | |||
| self.prepare_learnware(regenerate_flag) | |||
| self._init_learnware_market() | |||
| easy_market = EasyMarket() | |||
| print("Total Item:", len(easy_market)) | |||
| @@ -0,0 +1,89 @@ | |||
| import hashlib | |||
| import requests | |||
| import os | |||
| import random | |||
| import json | |||
| import time | |||
| from tqdm import tqdm | |||
| email = "tanzh@lamda.nju.edu.cn" | |||
| password = hashlib.md5(b"Qwerty123").hexdigest() | |||
| login_url = "http://210.28.134.201:8089/auth/login" | |||
| submit_url = "http://210.28.134.201:8089/user/add_learnware" | |||
| all_data_type = ["Table", "Image", "Video", "Text", "Audio"] | |||
| all_task_type = [ | |||
| "Classification", | |||
| "Regression", | |||
| "Clustering", | |||
| "Feature Extraction", | |||
| "Generation", | |||
| "Segmentation", | |||
| "Object Detection", | |||
| ] | |||
| all_device_type = ["CPU", "GPU"] | |||
| all_scenario = [ | |||
| "Business", | |||
| "Financial", | |||
| "Health", | |||
| "Politics", | |||
| "Computer", | |||
| "Internet", | |||
| "Traffic", | |||
| "Nature", | |||
| "Fashion", | |||
| "Industry", | |||
| "Agriculture", | |||
| "Education", | |||
| "Entertainment", | |||
| "Architecture", | |||
| ] | |||
| # ############### | |||
| # 以上部分无需修改 # | |||
| # ############### | |||
| def main(): | |||
| session = requests.Session() | |||
| res = session.post(login_url, json={"email": email, "password": password}) | |||
| # /path/to/learnware/folder 修改为学件文件夹地址 | |||
| learnware_pool = os.listdir(os.path.join(os.path.abspath("."), "learnware_pool")) | |||
| for learnware in learnware_pool: | |||
| # 修改相应的语义规约 | |||
| name = "PFS_Shop" + "%02d" % int(learnware.split(".")[0].split("_")[1]) | |||
| name = name + "_" + time.strftime("%Y%m%d%H%M%S", time.localtime()) | |||
| description = f"This is a description of learnware {name}" | |||
| data = random.choice(all_data_type) | |||
| task = random.choice(all_task_type) | |||
| device = list(set(random.choices(all_device_type, k=2))) | |||
| scenario = list(set(random.choices(all_scenario, k=5))) | |||
| semantic_specification = { | |||
| "Data": {"Values": ["Table"], "Type": "Class"}, | |||
| "Task": {"Values": ["Regression"], "Type": "Class"}, | |||
| "Device": {"Values": ["CPU"], "Type": "Tag"}, | |||
| "Scenario": {"Values": ["Business"], "Type": "Tag"}, | |||
| "Description": { | |||
| "Values": "A sales-forecasting model from Predict Future Sales Competition on Kaggle", | |||
| "Type": "String", | |||
| }, | |||
| "Name": {"Values": name, "Type": "String"}, | |||
| } | |||
| res = session.post( | |||
| submit_url, | |||
| data={ | |||
| "semantic_specification": json.dumps(semantic_specification), | |||
| }, | |||
| files={ | |||
| "learnware_file": open( | |||
| os.path.join(os.path.abspath("."), "learnware_pool", learnware), | |||
| "rb", | |||
| ) | |||
| }, | |||
| ) | |||
| assert json.loads(res.text)["code"] == 0, "Upload error" | |||
| if __name__ == "__main__": | |||
| main() | |||
| @@ -18,37 +18,12 @@ curr_root = os.path.dirname(os.path.abspath(__file__)) | |||
| semantic_specs = [ | |||
| { | |||
| "Data": {"Values": ["Tabular"], "Type": "Class"}, | |||
| "Task": { | |||
| "Values": ["Classification"], | |||
| "Type": "Class", | |||
| }, | |||
| "Device": {"Values": ["GPU"], "Type": "Tag"}, | |||
| "Scenario": {"Values": ["Nature"], "Type": "Tag"}, | |||
| "Description": {"Values": "", "Type": "String"}, | |||
| "Name": {"Values": "learnware_1", "Type": "String"}, | |||
| }, | |||
| { | |||
| "Data": {"Values": ["Tabular"], "Type": "Class"}, | |||
| "Task": { | |||
| "Values": ["Classification"], | |||
| "Type": "Class", | |||
| }, | |||
| "Device": {"Values": ["GPU"], "Type": "Tag"}, | |||
| "Scenario": {"Values": ["Business", "Nature"], "Type": "Tag"}, | |||
| "Description": {"Values": "", "Type": "String"}, | |||
| "Name": {"Values": "learnware_2", "Type": "String"}, | |||
| }, | |||
| { | |||
| "Data": {"Values": ["Tabular"], "Type": "Class"}, | |||
| "Task": { | |||
| "Values": ["Classification"], | |||
| "Type": "Class", | |||
| }, | |||
| "Task": {"Values": ["Classification"], "Type": "Class"}, | |||
| "Device": {"Values": ["GPU"], "Type": "Tag"}, | |||
| "Scenario": {"Values": ["Business"], "Type": "Tag"}, | |||
| "Description": {"Values": "", "Type": "String"}, | |||
| "Name": {"Values": "learnware_3", "Type": "String"}, | |||
| }, | |||
| "Name": {"Values": "learnware_1", "Type": "String"}, | |||
| } | |||
| ] | |||
| user_senmantic = { | |||
| @@ -118,7 +93,7 @@ class LearnwareMarketWorkflow: | |||
| print("Total Item:", len(easy_market)) | |||
| for idx, zip_path in enumerate(self.zip_path_list): | |||
| semantic_spec = semantic_specs[idx % 3] | |||
| semantic_spec = semantic_specs[0] | |||
| semantic_spec["Name"]["Values"] = "learnware_%d" % (idx) | |||
| semantic_spec["Description"]["Values"] = "test_learnware_number_%d" % (idx) | |||
| easy_market.add_learnware(zip_path, semantic_spec) | |||
| @@ -213,7 +213,7 @@ class EasyMarket(BaseMarket): | |||
| else: | |||
| max_score = (max_dist - min_dist) / (max_dist - dist_epsilon) | |||
| if max_dist < dist_epsilon or max_score > 1: | |||
| if min_dist < dist_epsilon: | |||
| dist_epsilon = min_dist | |||
| elif max_score < min_score: | |||
| dist_epsilon = max_dist - (max_dist - min_dist) / min_score | |||
| @@ -333,7 +333,7 @@ class EasyMarket(BaseMarket): | |||
| learnware_list: List[Learnware], | |||
| user_rkme: RKMEStatSpecification, | |||
| max_search_num: int, | |||
| weight_cutoff: float = 0.9, | |||
| weight_cutoff: float = 0.95, | |||
| ) -> Tuple[List[float], List[Learnware]]: | |||
| """Select learnwares based on a total mixture ratio, then recalculate their mixture weights | |||
| @@ -362,19 +362,25 @@ class EasyMarket(BaseMarket): | |||
| max_search_num = learnware_num | |||
| weight, _ = self._calculate_rkme_spec_mixture_weight(learnware_list, user_rkme) | |||
| sort_by_weight_idx_list = sorted(range(learnware_num), key=lambda k: weight[k]) | |||
| sort_by_weight_idx_list = sorted(range(learnware_num), key=lambda k: weight[k], reverse=True) | |||
| weight_sum = 0 | |||
| mixture_list = [] | |||
| for idx in sort_by_weight_idx_list: | |||
| weight_sum += sort_by_weight_idx_list[idx] | |||
| weight_sum += weight[idx] | |||
| if weight_sum <= weight_cutoff: | |||
| mixture_list.append(learnware_list[idx]) | |||
| else: | |||
| break | |||
| if len(mixture_list) > max_search_num: | |||
| mixture_list = mixture_list[:max_search_num] | |||
| if len(mixture_list) <= 1: | |||
| mixture_list = [learnware_list[sort_by_weight_idx_list[0]]] | |||
| mixture_weight = [1] | |||
| else: | |||
| if len(mixture_list) > max_search_num: | |||
| mixture_list = mixture_list[:max_search_num] | |||
| mixture_weight, _ = self._calculate_rkme_spec_mixture_weight(mixture_list, user_rkme) | |||
| mixture_weight, _ = self._calculate_rkme_spec_mixture_weight(mixture_list, user_rkme) | |||
| return mixture_weight, mixture_list | |||
| def _filter_by_rkme_spec_single( | |||
| @@ -438,7 +444,7 @@ class EasyMarket(BaseMarket): | |||
| return filtered_learnware_list | |||
| def _search_by_rkme_spec_mixture( | |||
| def _search_by_rkme_spec_mixture_greedy( | |||
| self, | |||
| learnware_list: List[Learnware], | |||
| user_rkme: RKMEStatSpecification, | |||
| @@ -578,7 +584,7 @@ class EasyMarket(BaseMarket): | |||
| return match_learnwares | |||
| def search_learnware( | |||
| self, user_info: BaseUserInfo, max_search_num=5 | |||
| self, user_info: BaseUserInfo, max_search_num: int = 5, search_method: str = "greedy" | |||
| ) -> Tuple[List[float], List[Learnware], List[Learnware]]: | |||
| """Search learnwares based on user_info | |||
| @@ -612,9 +618,16 @@ class EasyMarket(BaseMarket): | |||
| sorted_score_list, single_learnware_list = self._filter_by_rkme_spec_single( | |||
| sorted_score_list, single_learnware_list | |||
| ) | |||
| weight_list, mixture_learnware_list = self._search_by_rkme_spec_mixture( | |||
| learnware_list, user_rkme, max_search_num | |||
| ) | |||
| if search_method == "auto": | |||
| weight_list, mixture_learnware_list = self._search_by_rkme_spec_mixture_auto( | |||
| learnware_list, user_rkme, max_search_num | |||
| ) | |||
| elif search_method == "greedy": | |||
| weight_list, mixture_learnware_list = self._search_by_rkme_spec_mixture_greedy( | |||
| learnware_list, user_rkme, max_search_num | |||
| ) | |||
| else: | |||
| logger.warning("f{search_method} not supported!") | |||
| return sorted_score_list, single_learnware_list, mixture_learnware_list | |||
| def delete_learnware(self, id: str) -> bool: | |||