beimingwu
/
learnware

 
			
							import os
import fire
import zipfile
from tqdm import tqdm
from shutil import copyfile, rmtree

import learnware
from learnware.market import EasyMarket, BaseUserInfo
from learnware.market import database_ops
from learnware.learnware import Learnware
import learnware.specification as specification
from pfs import Dataloader


semantic_specs = [
    {
        "Data": {"Values": ["Tabular"], "Type": "Class"},
        "Task": {"Values": ["Classification"], "Type": "Class",},
        "Device": {"Values": ["GPU"], "Type": "Tag"},
        "Scenario": {"Values": ["Nature"], "Type": "Tag"},
        "Description": {"Values": "", "Type": "Description"},
        "Name": {"Values": "learnware_1", "Type": "Name"},
    },
    {
        "Data": {"Values": ["Tabular"], "Type": "Class"},
        "Task": {"Values": ["Classification"], "Type": "Class",},
        "Device": {"Values": ["GPU"], "Type": "Tag"},
        "Scenario": {"Values": ["Business", "Nature"], "Type": "Tag"},
        "Description": {"Values": "", "Type": "Description"},
        "Name": {"Values": "learnware_2", "Type": "Name"},
    },
    {
        "Data": {"Values": ["Tabular"], "Type": "Class"},
        "Task": {"Values": ["Classification"], "Type": "Class",},
        "Device": {"Values": ["GPU"], "Type": "Tag"},
        "Scenario": {"Values": ["Business"], "Type": "Tag"},
        "Description": {"Values": "", "Type": "Description"},
        "Name": {"Values": "learnware_3", "Type": "Name"},
    },
]

user_senmantic = {
    "Data": {"Values": ["Tabular"], "Type": "Class"},
    "Task": {"Values": ["Classification"], "Type": "Class",},
    "Device": {"Values": ["GPU"], "Type": "Tag"},
    "Scenario": {"Values": ["Business"], "Type": "Tag"},
    "Description": {"Values": "", "Type": "Description"},
    "Name": {"Values": "", "Type": "Name"},
}


class PFSDatasetWorkflow:        
    def _init_pfs_dataset(self):
        pfs = Dataloader()
        pfs.regenerate_data()

        algo_list = ["ridge", "lgb"]
        for algo in algo_list:
            pfs.set_algo(algo)
            pfs.retrain_models()
    
    def _init_learnware_market(self):
        """initialize learnware market"""
        database_ops.clear_learnware_table()
        learnware.init()

        easy_market = EasyMarket()
        print("Total Item:", len(easy_market))
        
        zip_path_list = []
        curr_root = os.path.dirname(os.path.abspath(__file__))
        curr_root = os.path.join(curr_root, "learnware_pool")
        for zip_path in os.listdir(curr_root):
            zip_path_list.append(zip_path)

        for idx, zip_path in enumerate(zip_path_list):
            semantic_spec = semantic_specs[idx % 3]
            semantic_spec["Name"]["Values"] = "learnware_%d" % (idx)
            semantic_spec["Description"]["Values"] = "test_learnware_number_%d" % (idx)
            easy_market.add_learnware(zip_path, semantic_spec)

        print("Total Item:", len(easy_market))
        curr_inds = easy_market._get_ids()
        print("Available ids:", curr_inds)
        
    def prepare_learnware(self, regenerate_flag=False):
        if regenerate_flag:
            self._init_pfs_dataset()
        
        pfs = Dataloader()
        idx_list = pfs.get_idx_list()
        algo_list = ["ridge", "lgb"]
        
        curr_root = os.path.dirname(os.path.abspath(__file__))
        curr_root = os.path.join(curr_root, "learnware_pool")
        os.makedirs(curr_root, exist_ok=True)
        
        for idx in tqdm(idx_list):
            train_x, train_y, test_x, test_y = pfs.get_idx_data(idx)
            spec = specification.utils.generate_rkme_spec(X=train_x, gamma=0.1, cuda_idx=0)
            
            for algo in algo_list:
                pfs.set_algo(algo)
                dir_path = os.path.join(curr_root, f"{algo}_{idx}")
                os.makedirs(dir_path, exist_ok=True)
                
                spec_path = os.path.join(dir_path, "rkme.json")
                spec.save(spec_path)
                
                model_path = pfs.get_model_path(idx)
                model_file = os.path.join(dir_path, "model.out")
                copyfile(model_path, model_file)
                
                init_file = os.path.join(dir_path, "__init__.py")
                copyfile("example_init.py", init_file)
                
                yaml_file = os.path.join(dir_path, "learnware.yaml")
                copyfile("example.yaml", yaml_file)
                
                zip_file = dir_path + ".zip"
                with zipfile.ZipFile(zip_file, "w") as zip_obj:
                    for foldername, subfolders, filenames in os.walk(dir_path):
                        for filename in filenames:
                            file_path = os.path.join(foldername, filename)
                            zip_info = zipfile.ZipInfo(filename)
                            zip_info.compress_type = zipfile.ZIP_STORED
                            with open(file_path, "rb") as file:
                                zip_obj.writestr(zip_info, file.read())
                
                rmtree(dir_path)
    
    def test(self, regenerate_flag=False):
        self.prepare_learnware(regenerate_flag)
        self._init_learnware_market()

        easy_market = EasyMarket()
        print("Total Item:", len(easy_market))
        
        pfs = Dataloader()
        idx_list = pfs.get_idx_list()
        
        for idx in idx_list:
            train_x, train_y, test_x, test_y = pfs.get_idx_data(idx)
            user_spec = specification.utils.generate_rkme_spec(X=test_x, gamma=0.1, cuda_idx=0)
            
            user_info = BaseUserInfo(
                id=f"user_{idx}", semantic_spec=user_senmantic, stat_info={"RKMEStatSpecification": user_spec}
            )
            sorted_score_list, single_learnware_list, mixture_learnware_list = easy_market.search_learnware(user_info)

            print(f"search result of user{idx}:")
            for score, learnware in zip(sorted_score_list, single_learnware_list):
                pred_y = learnware.predict(test_x)
                loss = pfs.score(test_y, pred_y)[0]
                print(f"score: {score}, learnware_id: {learnware.id}, loss: {loss}")
            
            mixture_id = " ".join([learnware.id for learnware in mixture_learnware_list])
            print(f"mixture_learnware: {mixture_id}\n")
            # TODO: model reuse score


if __name__ == "__main__":
    fire.Fire(PFSDatasetWorkflow)