Merge branch 'dev' of git.nju.edu.cn:learnware/learnware-market into dev

3 years ago · 7402d4252b
--- a/examples/example_image/main.py
+++ b/examples/example_image/main.py
@@ -38,45 +38,17 @@ os.makedirs(model_save_root, exist_ok=True)
 semantic_specs = [
    {
        "Data": {"Values": ["Tabular"], "Type": "Class"},
        "Task": {
            "Values": ["Classification"],
            "Type": "Class",
        },
        "Device": {"Values": ["GPU"], "Type": "Tag"},
        "Scenario": {"Values": ["Nature"], "Type": "Tag"},
        "Description": {"Values": "", "Type": "String"},
        "Name": {"Values": "learnware_1", "Type": "String"},
    },
    {
        "Data": {"Values": ["Tabular"], "Type": "Class"},
        "Task": {
            "Values": ["Classification"],
            "Type": "Class",
        },
        "Device": {"Values": ["GPU"], "Type": "Tag"},
        "Scenario": {"Values": ["Business", "Nature"], "Type": "Tag"},
        "Description": {"Values": "", "Type": "String"},
        "Name": {"Values": "learnware_2", "Type": "String"},
    },
    {
        "Data": {"Values": ["Tabular"], "Type": "Class"},
        "Task": {
            "Values": ["Classification"],
            "Type": "Class",
        },
        "Task": {"Values": ["Classification"], "Type": "Class"},
        "Device": {"Values": ["GPU"], "Type": "Tag"},
        "Scenario": {"Values": ["Business"], "Type": "Tag"},
        "Description": {"Values": "", "Type": "String"},
        "Name": {"Values": "learnware_3", "Type": "String"},
    },
        "Name": {"Values": "learnware_1", "Type": "String"},
    }
 ]

 user_senmantic = {
    "Data": {"Values": ["Tabular"], "Type": "Class"},
    "Task": {
        "Values": ["Classification"],
        "Type": "Class",
    },
    "Task": {"Values": ["Classification"], "Type": "Class"},
    "Device": {"Values": ["GPU"], "Type": "Tag"},
    "Scenario": {"Values": ["Business"], "Type": "Tag"},
    "Description": {"Values": "", "Type": "String"},
@@ -144,7 +116,7 @@ def prepare_market():
        new_learnware_path = prepare_learnware(
            data_path, model_path, init_file_path, yaml_file_path, tmp_dir, "%s_%d" % (dataset, i)
        )
        semantic_spec = semantic_specs[i % 3]
        semantic_spec = semantic_specs[0]
        semantic_spec["Name"]["Values"] = "learnware_%d" % (i)
        semantic_spec["Description"]["Values"] = "test_learnware_number_%d" % (i)
        image_market.add_learnware(new_learnware_path, semantic_spec)
--- a/examples/example_m5/main.py
+++ b/examples/example_m5/main.py
@@ -15,45 +15,17 @@ from m5 import DataLoader
 semantic_specs = [
    {
        "Data": {"Values": ["Tabular"], "Type": "Class"},
        "Task": {
            "Values": ["Classification"],
            "Type": "Class",
        },
        "Device": {"Values": ["GPU"], "Type": "Tag"},
        "Scenario": {"Values": ["Nature"], "Type": "Tag"},
        "Description": {"Values": "", "Type": "String"},
        "Name": {"Values": "learnware_1", "Type": "String"},
    },
    {
        "Data": {"Values": ["Tabular"], "Type": "Class"},
        "Task": {
            "Values": ["Classification"],
            "Type": "Class",
        },
        "Device": {"Values": ["GPU"], "Type": "Tag"},
        "Scenario": {"Values": ["Business", "Nature"], "Type": "Tag"},
        "Description": {"Values": "", "Type": "String"},
        "Name": {"Values": "learnware_2", "Type": "String"},
    },
    {
        "Data": {"Values": ["Tabular"], "Type": "Class"},
        "Task": {
            "Values": ["Classification"],
            "Type": "Class",
        },
        "Task": {"Values": ["Classification"], "Type": "Class"},
        "Device": {"Values": ["GPU"], "Type": "Tag"},
        "Scenario": {"Values": ["Business"], "Type": "Tag"},
        "Description": {"Values": "", "Type": "String"},
        "Name": {"Values": "learnware_3", "Type": "String"},
    },
        "Name": {"Values": "learnware_1", "Type": "String"},
    }
 ]

 user_senmantic = {
    "Data": {"Values": ["Tabular"], "Type": "Class"},
    "Task": {
        "Values": ["Classification"],
        "Type": "Class",
    },
    "Task": {"Values": ["Classification"], "Type": "Class"},
    "Device": {"Values": ["GPU"], "Type": "Tag"},
    "Scenario": {"Values": ["Business"], "Type": "Tag"},
    "Description": {"Values": "", "Type": "String"},
@@ -86,7 +58,7 @@ class M5DatasetWorkflow:
            zip_path_list.append(os.path.join(curr_root, zip_path))

        for idx, zip_path in enumerate(zip_path_list):
            semantic_spec = semantic_specs[idx % 3]
            semantic_spec = semantic_specs[0]
            semantic_spec["Name"]["Values"] = "learnware_%d" % (idx)
            semantic_spec["Description"]["Values"] = "test_learnware_number_%d" % (idx)
            easy_market.add_learnware(zip_path, semantic_spec)
@@ -101,7 +73,7 @@ class M5DatasetWorkflow:

        m5 = DataLoader()
        idx_list = m5.get_idx_list()
        algo_list = ["ridge", "lgb"]
        algo_list = ["lgb"]  # algo_list = ["ridge", "lgb"]

        curr_root = os.path.dirname(os.path.abspath(__file__))
        curr_root = os.path.join(curr_root, "learnware_pool")
@@ -161,6 +133,9 @@ class M5DatasetWorkflow:
            sorted_score_list, single_learnware_list, mixture_learnware_list = easy_market.search_learnware(user_info)

            print(f"search result of user{idx}:")
            print(
                f"single model num: {len(sorted_score_list)}, max_score: {sorted_score_list[0]}, min_score: {sorted_score_list[-1]}"
            )
            for score, learnware in zip(sorted_score_list, single_learnware_list):
                pred_y = learnware.predict(test_x)
                loss = m5.score(test_y, pred_y)
@@ -169,11 +144,10 @@ class M5DatasetWorkflow:
            mixture_id = " ".join([learnware.id for learnware in mixture_learnware_list])
            print(f"mixture_learnware: {mixture_id}\n")

            # TODO: model reuse score
            reuse_baseline = JobSelectorReuser(learnware_list=mixture_learnware_list)
            reuse_predict = reuse_baseline.predict(user_data=test_x)
            reuse_score = m5.score(test_y, reuse_predict)
            print(f"mixture reuse score: {reuse_score}\n")
            print(f"mixture reuse loss: {reuse_score}\n")


 if __name__ == "__main__":
--- a/examples/example_m5/upload.py
+++ b/examples/example_m5/upload.py
@@ -0,0 +1,86 @@
 import hashlib
 import requests
 import os
 import random
 import json
 import time
 from tqdm import tqdm

 email = "tanzh@lamda.nju.edu.cn"
 password = hashlib.md5(b"Qwerty123").hexdigest()
 login_url = "http://210.28.134.201:8089/auth/login"
 submit_url = "http://210.28.134.201:8089/user/add_learnware"
 all_data_type = ["Table", "Image", "Video", "Text", "Audio"]
 all_task_type = [
    "Classification",
    "Regression",
    "Clustering",
    "Feature Extraction",
    "Generation",
    "Segmentation",
    "Object Detection",
 ]
 all_device_type = ["CPU", "GPU"]
 all_scenario = [
    "Business",
    "Financial",
    "Health",
    "Politics",
    "Computer",
    "Internet",
    "Traffic",
    "Nature",
    "Fashion",
    "Industry",
    "Agriculture",
    "Education",
    "Entertainment",
    "Architecture",
 ]

 # ###############
 # 以上部分无需修改 #
 # ###############


 def main():
    session = requests.Session()
    res = session.post(login_url, json={"email": email, "password": password})

    # /path/to/learnware/folder 修改为学件文件夹地址
    learnware_pool = os.listdir(os.path.join(os.path.abspath("."), "learnware_pool"))

    for learnware in learnware_pool:
        # 修改相应的语义规约
        name = "M5_Shop" + "%02d" % int(learnware.split(".")[0].split("_")[1])
        name = name + "_" + time.strftime("%Y%m%d%H%M%S", time.localtime())
        description = f"This is a description of learnware {name}"
        data = random.choice(all_data_type)
        task = random.choice(all_task_type)
        device = list(set(random.choices(all_device_type, k=2)))
        scenario = list(set(random.choices(all_scenario, k=5)))
        semantic_specification = {
            "Data": {"Values": ["Table"], "Type": "Class"},
            "Task": {"Values": ["Regression"], "Type": "Class"},
            "Device": {"Values": ["CPU"], "Type": "Tag"},
            "Scenario": {"Values": ["Business"], "Type": "Tag"},
            "Description": {"Values": "A sales-forecasting model from Walmart store", "Type": "String"},
            "Name": {"Values": name, "Type": "String"},
        }
        res = session.post(
            submit_url,
            data={
                "semantic_specification": json.dumps(semantic_specification),
            },
            files={
                "learnware_file": open(
                    os.path.join(os.path.abspath("."), "learnware_pool", learnware),
                    "rb",
                )
            },
        )
        assert json.loads(res.text)["code"] == 0, "Upload error"


 if __name__ == "__main__":
    main()
--- a/examples/example_pfs/main.py
+++ b/examples/example_pfs/main.py
@@ -15,45 +15,17 @@ from pfs import Dataloader
 semantic_specs = [
    {
        "Data": {"Values": ["Tabular"], "Type": "Class"},
        "Task": {
            "Values": ["Classification"],
            "Type": "Class",
        },
        "Device": {"Values": ["GPU"], "Type": "Tag"},
        "Scenario": {"Values": ["Nature"], "Type": "Tag"},
        "Description": {"Values": "", "Type": "String"},
        "Name": {"Values": "learnware_1", "Type": "String"},
    },
    {
        "Data": {"Values": ["Tabular"], "Type": "Class"},
        "Task": {
            "Values": ["Classification"],
            "Type": "Class",
        },
        "Device": {"Values": ["GPU"], "Type": "Tag"},
        "Scenario": {"Values": ["Business", "Nature"], "Type": "Tag"},
        "Description": {"Values": "", "Type": "String"},
        "Name": {"Values": "learnware_2", "Type": "String"},
    },
    {
        "Data": {"Values": ["Tabular"], "Type": "Class"},
        "Task": {
            "Values": ["Classification"],
            "Type": "Class",
        },
        "Task": {"Values": ["Classification"], "Type": "Class"},
        "Device": {"Values": ["GPU"], "Type": "Tag"},
        "Scenario": {"Values": ["Business"], "Type": "Tag"},
        "Description": {"Values": "", "Type": "String"},
        "Name": {"Values": "learnware_3", "Type": "String"},
    },
        "Name": {"Values": "learnware_1", "Type": "String"},
    }
 ]

 user_senmantic = {
    "Data": {"Values": ["Tabular"], "Type": "Class"},
    "Task": {
        "Values": ["Classification"],
        "Type": "Class",
    },
    "Task": {"Values": ["Classification"], "Type": "Class"},
    "Device": {"Values": ["GPU"], "Type": "Tag"},
    "Scenario": {"Values": ["Business"], "Type": "Tag"},
    "Description": {"Values": "", "Type": "String"},
@@ -86,7 +58,7 @@ class PFSDatasetWorkflow:
            zip_path_list.append(os.path.join(curr_root, zip_path))

        for idx, zip_path in enumerate(zip_path_list):
            semantic_spec = semantic_specs[idx % 3]
            semantic_spec = semantic_specs[0]
            semantic_spec["Name"]["Values"] = "learnware_%d" % (idx)
            semantic_spec["Description"]["Values"] = "test_learnware_number_%d" % (idx)
            easy_market.add_learnware(zip_path, semantic_spec)
@@ -142,8 +114,8 @@ class PFSDatasetWorkflow:
                rmtree(dir_path)

    def test(self, regenerate_flag=False):
        # self.prepare_learnware(regenerate_flag)
        # self._init_learnware_market()
        self.prepare_learnware(regenerate_flag)
        self._init_learnware_market()

        easy_market = EasyMarket()
        print("Total Item:", len(easy_market))
--- a/examples/example_pfs/upload.py
+++ b/examples/example_pfs/upload.py
@@ -0,0 +1,89 @@
 import hashlib
 import requests
 import os
 import random
 import json
 import time
 from tqdm import tqdm

 email = "tanzh@lamda.nju.edu.cn"
 password = hashlib.md5(b"Qwerty123").hexdigest()
 login_url = "http://210.28.134.201:8089/auth/login"
 submit_url = "http://210.28.134.201:8089/user/add_learnware"
 all_data_type = ["Table", "Image", "Video", "Text", "Audio"]
 all_task_type = [
    "Classification",
    "Regression",
    "Clustering",
    "Feature Extraction",
    "Generation",
    "Segmentation",
    "Object Detection",
 ]
 all_device_type = ["CPU", "GPU"]
 all_scenario = [
    "Business",
    "Financial",
    "Health",
    "Politics",
    "Computer",
    "Internet",
    "Traffic",
    "Nature",
    "Fashion",
    "Industry",
    "Agriculture",
    "Education",
    "Entertainment",
    "Architecture",
 ]

 # ###############
 # 以上部分无需修改 #
 # ###############


 def main():
    session = requests.Session()
    res = session.post(login_url, json={"email": email, "password": password})

    # /path/to/learnware/folder 修改为学件文件夹地址
    learnware_pool = os.listdir(os.path.join(os.path.abspath("."), "learnware_pool"))

    for learnware in learnware_pool:
        # 修改相应的语义规约
        name = "PFS_Shop" + "%02d" % int(learnware.split(".")[0].split("_")[1])
        name = name + "_" + time.strftime("%Y%m%d%H%M%S", time.localtime())
        description = f"This is a description of learnware {name}"
        data = random.choice(all_data_type)
        task = random.choice(all_task_type)
        device = list(set(random.choices(all_device_type, k=2)))
        scenario = list(set(random.choices(all_scenario, k=5)))
        semantic_specification = {
            "Data": {"Values": ["Table"], "Type": "Class"},
            "Task": {"Values": ["Regression"], "Type": "Class"},
            "Device": {"Values": ["CPU"], "Type": "Tag"},
            "Scenario": {"Values": ["Business"], "Type": "Tag"},
            "Description": {
                "Values": "A sales-forecasting model from Predict Future Sales Competition on Kaggle",
                "Type": "String",
            },
            "Name": {"Values": name, "Type": "String"},
        }
        res = session.post(
            submit_url,
            data={
                "semantic_specification": json.dumps(semantic_specification),
            },
            files={
                "learnware_file": open(
                    os.path.join(os.path.abspath("."), "learnware_pool", learnware),
                    "rb",
                )
            },
        )
        assert json.loads(res.text)["code"] == 0, "Upload error"


 if __name__ == "__main__":
    main()
--- a/examples/workflow_by_code/main.py
+++ b/examples/workflow_by_code/main.py
@@ -18,37 +18,12 @@ curr_root = os.path.dirname(os.path.abspath(__file__))
 semantic_specs = [
    {
        "Data": {"Values": ["Tabular"], "Type": "Class"},
        "Task": {
            "Values": ["Classification"],
            "Type": "Class",
        },
        "Device": {"Values": ["GPU"], "Type": "Tag"},
        "Scenario": {"Values": ["Nature"], "Type": "Tag"},
        "Description": {"Values": "", "Type": "String"},
        "Name": {"Values": "learnware_1", "Type": "String"},
    },
    {
        "Data": {"Values": ["Tabular"], "Type": "Class"},
        "Task": {
            "Values": ["Classification"],
            "Type": "Class",
        },
        "Device": {"Values": ["GPU"], "Type": "Tag"},
        "Scenario": {"Values": ["Business", "Nature"], "Type": "Tag"},
        "Description": {"Values": "", "Type": "String"},
        "Name": {"Values": "learnware_2", "Type": "String"},
    },
    {
        "Data": {"Values": ["Tabular"], "Type": "Class"},
        "Task": {
            "Values": ["Classification"],
            "Type": "Class",
        },
        "Task": {"Values": ["Classification"], "Type": "Class"},
        "Device": {"Values": ["GPU"], "Type": "Tag"},
        "Scenario": {"Values": ["Business"], "Type": "Tag"},
        "Description": {"Values": "", "Type": "String"},
        "Name": {"Values": "learnware_3", "Type": "String"},
    },
        "Name": {"Values": "learnware_1", "Type": "String"},
    }
 ]

 user_senmantic = {
@@ -118,7 +93,7 @@ class LearnwareMarketWorkflow:
        print("Total Item:", len(easy_market))

        for idx, zip_path in enumerate(self.zip_path_list):
            semantic_spec = semantic_specs[idx % 3]
            semantic_spec = semantic_specs[0]
            semantic_spec["Name"]["Values"] = "learnware_%d" % (idx)
            semantic_spec["Description"]["Values"] = "test_learnware_number_%d" % (idx)
            easy_market.add_learnware(zip_path, semantic_spec)
--- a/learnware/market/easy.py
+++ b/learnware/market/easy.py
@@ -213,7 +213,7 @@ class EasyMarket(BaseMarket):
        else:
            max_score = (max_dist - min_dist) / (max_dist - dist_epsilon)

            if max_dist < dist_epsilon or max_score > 1:
            if min_dist < dist_epsilon:
                dist_epsilon = min_dist
            elif max_score < min_score:
                dist_epsilon = max_dist - (max_dist - min_dist) / min_score
@@ -333,7 +333,7 @@ class EasyMarket(BaseMarket):
        learnware_list: List[Learnware],
        user_rkme: RKMEStatSpecification,
        max_search_num: int,
        weight_cutoff: float = 0.9,
        weight_cutoff: float = 0.95,
    ) -> Tuple[List[float], List[Learnware]]:
        """Select learnwares based on a total mixture ratio, then recalculate their mixture weights

@@ -362,19 +362,25 @@ class EasyMarket(BaseMarket):
            max_search_num = learnware_num

        weight, _ = self._calculate_rkme_spec_mixture_weight(learnware_list, user_rkme)
        sort_by_weight_idx_list = sorted(range(learnware_num), key=lambda k: weight[k])
        sort_by_weight_idx_list = sorted(range(learnware_num), key=lambda k: weight[k], reverse=True)

        weight_sum = 0
        mixture_list = []
        for idx in sort_by_weight_idx_list:
            weight_sum += sort_by_weight_idx_list[idx]
            weight_sum += weight[idx]
            if weight_sum <= weight_cutoff:
                mixture_list.append(learnware_list[idx])
            else:
                break

        if len(mixture_list) > max_search_num:
            mixture_list = mixture_list[:max_search_num]
        if len(mixture_list) <= 1:
            mixture_list = [learnware_list[sort_by_weight_idx_list[0]]]
            mixture_weight = [1]
        else:
            if len(mixture_list) > max_search_num:
                mixture_list = mixture_list[:max_search_num]
            mixture_weight, _ = self._calculate_rkme_spec_mixture_weight(mixture_list, user_rkme)

        mixture_weight, _ = self._calculate_rkme_spec_mixture_weight(mixture_list, user_rkme)
        return mixture_weight, mixture_list

    def _filter_by_rkme_spec_single(
@@ -438,7 +444,7 @@ class EasyMarket(BaseMarket):

        return filtered_learnware_list

    def _search_by_rkme_spec_mixture(
    def _search_by_rkme_spec_mixture_greedy(
        self,
        learnware_list: List[Learnware],
        user_rkme: RKMEStatSpecification,
@@ -578,7 +584,7 @@ class EasyMarket(BaseMarket):
        return match_learnwares

    def search_learnware(
        self, user_info: BaseUserInfo, max_search_num=5
        self, user_info: BaseUserInfo, max_search_num: int = 5, search_method: str = "greedy"
    ) -> Tuple[List[float], List[Learnware], List[Learnware]]:
        """Search learnwares based on user_info

@@ -612,9 +618,16 @@ class EasyMarket(BaseMarket):
            sorted_score_list, single_learnware_list = self._filter_by_rkme_spec_single(
                sorted_score_list, single_learnware_list
            )
            weight_list, mixture_learnware_list = self._search_by_rkme_spec_mixture(
                learnware_list, user_rkme, max_search_num
            )
            if search_method == "auto":
                weight_list, mixture_learnware_list = self._search_by_rkme_spec_mixture_auto(
                    learnware_list, user_rkme, max_search_num
                )
            elif search_method == "greedy":
                weight_list, mixture_learnware_list = self._search_by_rkme_spec_mixture_greedy(
                    learnware_list, user_rkme, max_search_num
                )
            else:
                logger.warning("f{search_method} not supported!")
            return sorted_score_list, single_learnware_list, mixture_learnware_list

    def delete_learnware(self, id: str) -> bool: