import os import fire import zipfile from tqdm import tqdm from shutil import copyfile, rmtree import learnware from learnware.market import EasyMarket, BaseUserInfo from learnware.market import database_ops from learnware.learnware import Learnware import learnware.specification as specification from pfs import Dataloader semantic_specs = [ { "Data": {"Values": ["Tabular"], "Type": "Class"}, "Task": {"Values": ["Classification"], "Type": "Class",}, "Device": {"Values": ["GPU"], "Type": "Tag"}, "Scenario": {"Values": ["Nature"], "Type": "Tag"}, "Description": {"Values": "", "Type": "Description"}, "Name": {"Values": "learnware_1", "Type": "Name"}, }, { "Data": {"Values": ["Tabular"], "Type": "Class"}, "Task": {"Values": ["Classification"], "Type": "Class",}, "Device": {"Values": ["GPU"], "Type": "Tag"}, "Scenario": {"Values": ["Business", "Nature"], "Type": "Tag"}, "Description": {"Values": "", "Type": "Description"}, "Name": {"Values": "learnware_2", "Type": "Name"}, }, { "Data": {"Values": ["Tabular"], "Type": "Class"}, "Task": {"Values": ["Classification"], "Type": "Class",}, "Device": {"Values": ["GPU"], "Type": "Tag"}, "Scenario": {"Values": ["Business"], "Type": "Tag"}, "Description": {"Values": "", "Type": "Description"}, "Name": {"Values": "learnware_3", "Type": "Name"}, }, ] user_senmantic = { "Data": {"Values": ["Tabular"], "Type": "Class"}, "Task": {"Values": ["Classification"], "Type": "Class",}, "Device": {"Values": ["GPU"], "Type": "Tag"}, "Scenario": {"Values": ["Business"], "Type": "Tag"}, "Description": {"Values": "", "Type": "Description"}, "Name": {"Values": "", "Type": "Name"}, } class PFSDatasetWorkflow: def _init_pfs_dataset(self): pfs = Dataloader() pfs.regenerate_data() algo_list = ["ridge", "lgb"] for algo in algo_list: pfs.set_algo(algo) pfs.retrain_models() def _init_learnware_market(self): """initialize learnware market""" database_ops.clear_learnware_table() learnware.init() easy_market = EasyMarket() print("Total Item:", len(easy_market)) zip_path_list = [] curr_root = os.path.dirname(os.path.abspath(__file__)) curr_root = os.path.join(curr_root, "learnware_pool") for zip_path in os.listdir(curr_root): zip_path_list.append(zip_path) for idx, zip_path in enumerate(zip_path_list): semantic_spec = semantic_specs[idx % 3] semantic_spec["Name"]["Values"] = "learnware_%d" % (idx) semantic_spec["Description"]["Values"] = "test_learnware_number_%d" % (idx) easy_market.add_learnware(zip_path, semantic_spec) print("Total Item:", len(easy_market)) curr_inds = easy_market._get_ids() print("Available ids:", curr_inds) def prepare_learnware(self, regenerate_flag=False): if regenerate_flag: self._init_pfs_dataset() pfs = Dataloader() idx_list = pfs.get_idx_list() algo_list = ["ridge", "lgb"] curr_root = os.path.dirname(os.path.abspath(__file__)) curr_root = os.path.join(curr_root, "learnware_pool") os.makedirs(curr_root, exist_ok=True) for idx in tqdm(idx_list): train_x, train_y, test_x, test_y = pfs.get_idx_data(idx) spec = specification.utils.generate_rkme_spec(X=train_x, gamma=0.1, cuda_idx=0) for algo in algo_list: pfs.set_algo(algo) dir_path = os.path.join(curr_root, f"{algo}_{idx}") os.makedirs(dir_path, exist_ok=True) spec_path = os.path.join(dir_path, "rkme.json") spec.save(spec_path) model_path = pfs.get_model_path(idx) model_file = os.path.join(dir_path, "model.out") copyfile(model_path, model_file) init_file = os.path.join(dir_path, "__init__.py") copyfile("example_init.py", init_file) yaml_file = os.path.join(dir_path, "learnware.yaml") copyfile("example.yaml", yaml_file) zip_file = dir_path + ".zip" with zipfile.ZipFile(zip_file, "w") as zip_obj: for foldername, subfolders, filenames in os.walk(dir_path): for filename in filenames: file_path = os.path.join(foldername, filename) zip_info = zipfile.ZipInfo(filename) zip_info.compress_type = zipfile.ZIP_STORED with open(file_path, "rb") as file: zip_obj.writestr(zip_info, file.read()) rmtree(dir_path) def test(self, regenerate_flag=False): self.prepare_learnware(regenerate_flag) self._init_learnware_market() easy_market = EasyMarket() print("Total Item:", len(easy_market)) pfs = Dataloader() idx_list = pfs.get_idx_list() for idx in idx_list: train_x, train_y, test_x, test_y = pfs.get_idx_data(idx) user_spec = specification.utils.generate_rkme_spec(X=test_x, gamma=0.1, cuda_idx=0) user_info = BaseUserInfo( id=f"user_{idx}", semantic_spec=user_senmantic, stat_info={"RKMEStatSpecification": user_spec} ) sorted_score_list, single_learnware_list, mixture_learnware_list = easy_market.search_learnware(user_info) print(f"search result of user{idx}:") for score, learnware in zip(sorted_score_list, single_learnware_list): pred_y = learnware.predict(test_x) loss = pfs.score(test_y, pred_y)[0] print(f"score: {score}, learnware_id: {learnware.id}, loss: {loss}") mixture_id = " ".join([learnware.id for learnware in mixture_learnware_list]) print(f"mixture_learnware: {mixture_id}\n") # TODO: model reuse score if __name__ == "__main__": fire.Fire(PFSDatasetWorkflow)