[MNT] Add the example of pfs

2 years ago · dcbdd8d76f
--- a/examples/example_pfs/example.yaml
+++ b/examples/example_pfs/example.yaml
@@ -0,0 +1,8 @@
 model:
  class_name: Model
  kwargs: {}
 stat_specifications:
  - module_path: learnware.specification
    class_name: RKMEStatSpecification
    file_name: rkme.json
    kwargs: {}  
--- a/examples/example_pfs/example_init.py
+++ b/examples/example_pfs/example_init.py
@@ -0,0 +1,19 @@
 import os
 import joblib
 import numpy as np
 from learnware.model import BaseModel


 class Model(BaseModel):
    def __init__(self):
        dir_path = os.path.dirname(os.path.abspath(__file__))
        self.model = joblib.load(os.path.join(dir_path, "model.out"))

    def fit(self, X: np.ndarray, y: np.ndarray):
        pass

    def predict(self, X: np.ndarray) -> np.ndarray:
        return self.model.predict(X)

    def finetune(self, X: np.ndarray, y: np.ndarray):
        pass
--- a/examples/example_pfs/main.py
+++ b/examples/example_pfs/main.py
@@ -1,12 +1,163 @@
 import os
 import fire
 import zipfile
 from tqdm import tqdm
 from shutil import copyfile, rmtree

 import learnware
 from learnware.market import EasyMarket, BaseUserInfo
 from learnware.market import database_ops
 from learnware.learnware import Learnware
 import learnware.specification as specification
 from pfs import Dataloader


 semantic_specs = [
    {
        "Data": {"Values": ["Tabular"], "Type": "Class"},
        "Task": {"Values": ["Classification"], "Type": "Class",},
        "Device": {"Values": ["GPU"], "Type": "Tag"},
        "Scenario": {"Values": ["Nature"], "Type": "Tag"},
        "Description": {"Values": "", "Type": "Description"},
        "Name": {"Values": "learnware_1", "Type": "Name"},
    },
    {
        "Data": {"Values": ["Tabular"], "Type": "Class"},
        "Task": {"Values": ["Classification"], "Type": "Class",},
        "Device": {"Values": ["GPU"], "Type": "Tag"},
        "Scenario": {"Values": ["Business", "Nature"], "Type": "Tag"},
        "Description": {"Values": "", "Type": "Description"},
        "Name": {"Values": "learnware_2", "Type": "Name"},
    },
    {
        "Data": {"Values": ["Tabular"], "Type": "Class"},
        "Task": {"Values": ["Classification"], "Type": "Class",},
        "Device": {"Values": ["GPU"], "Type": "Tag"},
        "Scenario": {"Values": ["Business"], "Type": "Tag"},
        "Description": {"Values": "", "Type": "Description"},
        "Name": {"Values": "learnware_3", "Type": "Name"},
    },
 ]

 user_senmantic = {
    "Data": {"Values": ["Tabular"], "Type": "Class"},
    "Task": {"Values": ["Classification"], "Type": "Class",},
    "Device": {"Values": ["GPU"], "Type": "Tag"},
    "Scenario": {"Values": ["Business"], "Type": "Tag"},
    "Description": {"Values": "", "Type": "Description"},
    "Name": {"Values": "", "Type": "Name"},
 }


 class PFSDatasetWorkflow:        
    def _init_pfs_dataset(self):
        pfs = Dataloader()
        pfs.regenerate_data()

        algo_list = ["ridge", "lgb"]
        for algo in algo_list:
            pfs.set_algo(algo)
            pfs.retrain_models()
    
    def _init_learnware_market(self):
        """initialize learnware market"""
        database_ops.clear_learnware_table()
        learnware.init()

        easy_market = EasyMarket()
        print("Total Item:", len(easy_market))
        
        zip_path_list = []
        curr_root = os.path.dirname(os.path.abspath(__file__))
        curr_root = os.path.join(curr_root, "learnware_pool")
        for zip_path in os.listdir(curr_root):
            zip_path_list.append(zip_path)

        for idx, zip_path in enumerate(zip_path_list):
            semantic_spec = semantic_specs[idx % 3]
            semantic_spec["Name"]["Values"] = "learnware_%d" % (idx)
            semantic_spec["Description"]["Values"] = "test_learnware_number_%d" % (idx)
            easy_market.add_learnware(zip_path, semantic_spec)

        print("Total Item:", len(easy_market))
        curr_inds = easy_market._get_ids()
        print("Available ids:", curr_inds)
        
    def prepare_learnware(self, regenerate_flag=False):
        if regenerate_flag:
            self._init_pfs_dataset()
        
        pfs = Dataloader()
        idx_list = pfs.get_idx_list()
        algo_list = ["ridge", "lgb"]
        
        curr_root = os.path.dirname(os.path.abspath(__file__))
        curr_root = os.path.join(curr_root, "learnware_pool")
        os.makedirs(curr_root, exist_ok=True)
        
        for idx in tqdm(idx_list):
            train_x, train_y, test_x, test_y = pfs.get_idx_data(idx)
            spec = specification.utils.generate_rkme_spec(X=train_x, gamma=0.1, cuda_idx=0)
            
            for algo in algo_list:
                pfs.set_algo(algo)
                dir_path = os.path.join(curr_root, f"{algo}_{idx}")
                os.makedirs(dir_path, exist_ok=True)
                
                spec_path = os.path.join(dir_path, "rkme.json")
                spec.save(spec_path)
                
                model_path = pfs.get_model_path(idx)
                model_file = os.path.join(dir_path, "model.out")
                copyfile(model_path, model_file)
                
                init_file = os.path.join(dir_path, "__init__.py")
                copyfile("example_init.py", init_file)
                
                yaml_file = os.path.join(dir_path, "learnware.yaml")
                copyfile("example.yaml", yaml_file)
                
                zip_file = dir_path + ".zip"
                with zipfile.ZipFile(zip_file, "w") as zip_obj:
                    for foldername, subfolders, filenames in os.walk(dir_path):
                        for filename in filenames:
                            file_path = os.path.join(foldername, filename)
                            zip_info = zipfile.ZipInfo(filename)
                            zip_info.compress_type = zipfile.ZIP_STORED
                            with open(file_path, "rb") as file:
                                zip_obj.writestr(zip_info, file.read())
                
                rmtree(dir_path)
    
    def test(self, regenerate_flag=False):
        self.prepare_learnware(regenerate_flag)
        self._init_learnware_market()

        easy_market = EasyMarket()
        print("Total Item:", len(easy_market))
        
        pfs = Dataloader()
        idx_list = pfs.get_idx_list()
        
        for idx in idx_list:
            train_x, train_y, test_x, test_y = pfs.get_idx_data(idx)
            user_spec = specification.utils.generate_rkme_spec(X=test_x, gamma=0.1, cuda_idx=0)
            
            user_info = BaseUserInfo(
                id=f"user_{idx}", semantic_spec=user_senmantic, stat_info={"RKMEStatSpecification": user_spec}
            )
            sorted_score_list, single_learnware_list, mixture_learnware_list = easy_market.search_learnware(user_info)

            print(f"search result of user{idx}:")
            for score, learnware in zip(sorted_score_list, single_learnware_list):
                pred_y = learnware.predict(test_x)
                loss = pfs.score(test_y, pred_y)[0]
                print(f"score: {score}, learnware_id: {learnware.id}, loss: {loss}")
            
            mixture_id = " ".join([learnware.id for learnware in mixture_learnware_list])
            print(f"mixture_learnware: {mixture_id}\n")
            # TODO: model reuse score


 if __name__ == "__main__":
    pfs = Dataloader()
    # pfs.regenerate_data()
    algo_list = ["ridge", "lgb"]
    for algo in algo_list:
        pfs.set_algo(algo)
        pfs.retrain_models()
    fire.Fire(PFSDatasetWorkflow)
--- a/examples/example_pfs/pfs/init.py
+++ b/examples/example_pfs/pfs/init.py
@@ -33,6 +33,11 @@ class Dataloader:
        test_xs, test_ys, _, _ = load_pfs_data(fpath)
        return train_xs, train_ys, test_xs, test_ys

    def get_model_path(self, idx):
        shop_ids = [i for i in range(60) if i not in [0, 1, 40]]
        shop_ids = [i for i in shop_ids if i not in [8, 11, 23, 36]]
        return os.path.join(model_dir, "{}_Shop{:0>2d}.out".format(self.algo, shop_ids[idx]))
    
    def retrain_models(self):
        algo = self.algo
        errs = get_errors(algo=algo)
--- a/learnware/specification/utils.py
+++ b/learnware/specification/utils.py
@@ -44,6 +44,7 @@ def generate_rkme_spec(
    RKMEStatSpecification
            A RKMEStatSpecification object
    """
    X = np.ascontiguousarray(X).astype(np.float32)
    rkme_spec = RKMEStatSpecification(gamma=gamma, cuda_idx=cuda_idx)
    rkme_spec.generate_stat_spec_from_data(X, K, step_size, steps, nonnegative_beta, reduce)
    return rkme_spec