diff --git a/examples/example_pfs/example.yaml b/examples/example_pfs/example.yaml new file mode 100644 index 0000000..6ca01c9 --- /dev/null +++ b/examples/example_pfs/example.yaml @@ -0,0 +1,8 @@ +model: + class_name: Model + kwargs: {} +stat_specifications: + - module_path: learnware.specification + class_name: RKMEStatSpecification + file_name: rkme.json + kwargs: {} \ No newline at end of file diff --git a/examples/example_pfs/example_init.py b/examples/example_pfs/example_init.py new file mode 100644 index 0000000..d875d96 --- /dev/null +++ b/examples/example_pfs/example_init.py @@ -0,0 +1,19 @@ +import os +import joblib +import numpy as np +from learnware.model import BaseModel + + +class Model(BaseModel): + def __init__(self): + dir_path = os.path.dirname(os.path.abspath(__file__)) + self.model = joblib.load(os.path.join(dir_path, "model.out")) + + def fit(self, X: np.ndarray, y: np.ndarray): + pass + + def predict(self, X: np.ndarray) -> np.ndarray: + return self.model.predict(X) + + def finetune(self, X: np.ndarray, y: np.ndarray): + pass diff --git a/examples/example_pfs/main.py b/examples/example_pfs/main.py index 3524774..c935430 100644 --- a/examples/example_pfs/main.py +++ b/examples/example_pfs/main.py @@ -1,12 +1,163 @@ +import os +import fire +import zipfile +from tqdm import tqdm +from shutil import copyfile, rmtree + import learnware +from learnware.market import EasyMarket, BaseUserInfo +from learnware.market import database_ops +from learnware.learnware import Learnware +import learnware.specification as specification from pfs import Dataloader +semantic_specs = [ + { + "Data": {"Values": ["Tabular"], "Type": "Class"}, + "Task": {"Values": ["Classification"], "Type": "Class",}, + "Device": {"Values": ["GPU"], "Type": "Tag"}, + "Scenario": {"Values": ["Nature"], "Type": "Tag"}, + "Description": {"Values": "", "Type": "Description"}, + "Name": {"Values": "learnware_1", "Type": "Name"}, + }, + { + "Data": {"Values": ["Tabular"], "Type": "Class"}, + "Task": {"Values": ["Classification"], "Type": "Class",}, + "Device": {"Values": ["GPU"], "Type": "Tag"}, + "Scenario": {"Values": ["Business", "Nature"], "Type": "Tag"}, + "Description": {"Values": "", "Type": "Description"}, + "Name": {"Values": "learnware_2", "Type": "Name"}, + }, + { + "Data": {"Values": ["Tabular"], "Type": "Class"}, + "Task": {"Values": ["Classification"], "Type": "Class",}, + "Device": {"Values": ["GPU"], "Type": "Tag"}, + "Scenario": {"Values": ["Business"], "Type": "Tag"}, + "Description": {"Values": "", "Type": "Description"}, + "Name": {"Values": "learnware_3", "Type": "Name"}, + }, +] + +user_senmantic = { + "Data": {"Values": ["Tabular"], "Type": "Class"}, + "Task": {"Values": ["Classification"], "Type": "Class",}, + "Device": {"Values": ["GPU"], "Type": "Tag"}, + "Scenario": {"Values": ["Business"], "Type": "Tag"}, + "Description": {"Values": "", "Type": "Description"}, + "Name": {"Values": "", "Type": "Name"}, +} + + +class PFSDatasetWorkflow: + def _init_pfs_dataset(self): + pfs = Dataloader() + pfs.regenerate_data() + + algo_list = ["ridge", "lgb"] + for algo in algo_list: + pfs.set_algo(algo) + pfs.retrain_models() + + def _init_learnware_market(self): + """initialize learnware market""" + database_ops.clear_learnware_table() + learnware.init() + + easy_market = EasyMarket() + print("Total Item:", len(easy_market)) + + zip_path_list = [] + curr_root = os.path.dirname(os.path.abspath(__file__)) + curr_root = os.path.join(curr_root, "learnware_pool") + for zip_path in os.listdir(curr_root): + zip_path_list.append(zip_path) + + for idx, zip_path in enumerate(zip_path_list): + semantic_spec = semantic_specs[idx % 3] + semantic_spec["Name"]["Values"] = "learnware_%d" % (idx) + semantic_spec["Description"]["Values"] = "test_learnware_number_%d" % (idx) + easy_market.add_learnware(zip_path, semantic_spec) + + print("Total Item:", len(easy_market)) + curr_inds = easy_market._get_ids() + print("Available ids:", curr_inds) + + def prepare_learnware(self, regenerate_flag=False): + if regenerate_flag: + self._init_pfs_dataset() + + pfs = Dataloader() + idx_list = pfs.get_idx_list() + algo_list = ["ridge", "lgb"] + + curr_root = os.path.dirname(os.path.abspath(__file__)) + curr_root = os.path.join(curr_root, "learnware_pool") + os.makedirs(curr_root, exist_ok=True) + + for idx in tqdm(idx_list): + train_x, train_y, test_x, test_y = pfs.get_idx_data(idx) + spec = specification.utils.generate_rkme_spec(X=train_x, gamma=0.1, cuda_idx=0) + + for algo in algo_list: + pfs.set_algo(algo) + dir_path = os.path.join(curr_root, f"{algo}_{idx}") + os.makedirs(dir_path, exist_ok=True) + + spec_path = os.path.join(dir_path, "rkme.json") + spec.save(spec_path) + + model_path = pfs.get_model_path(idx) + model_file = os.path.join(dir_path, "model.out") + copyfile(model_path, model_file) + + init_file = os.path.join(dir_path, "__init__.py") + copyfile("example_init.py", init_file) + + yaml_file = os.path.join(dir_path, "learnware.yaml") + copyfile("example.yaml", yaml_file) + + zip_file = dir_path + ".zip" + with zipfile.ZipFile(zip_file, "w") as zip_obj: + for foldername, subfolders, filenames in os.walk(dir_path): + for filename in filenames: + file_path = os.path.join(foldername, filename) + zip_info = zipfile.ZipInfo(filename) + zip_info.compress_type = zipfile.ZIP_STORED + with open(file_path, "rb") as file: + zip_obj.writestr(zip_info, file.read()) + + rmtree(dir_path) + + def test(self, regenerate_flag=False): + self.prepare_learnware(regenerate_flag) + self._init_learnware_market() + + easy_market = EasyMarket() + print("Total Item:", len(easy_market)) + + pfs = Dataloader() + idx_list = pfs.get_idx_list() + + for idx in idx_list: + train_x, train_y, test_x, test_y = pfs.get_idx_data(idx) + user_spec = specification.utils.generate_rkme_spec(X=test_x, gamma=0.1, cuda_idx=0) + + user_info = BaseUserInfo( + id=f"user_{idx}", semantic_spec=user_senmantic, stat_info={"RKMEStatSpecification": user_spec} + ) + sorted_score_list, single_learnware_list, mixture_learnware_list = easy_market.search_learnware(user_info) + + print(f"search result of user{idx}:") + for score, learnware in zip(sorted_score_list, single_learnware_list): + pred_y = learnware.predict(test_x) + loss = pfs.score(test_y, pred_y)[0] + print(f"score: {score}, learnware_id: {learnware.id}, loss: {loss}") + + mixture_id = " ".join([learnware.id for learnware in mixture_learnware_list]) + print(f"mixture_learnware: {mixture_id}\n") + # TODO: model reuse score + if __name__ == "__main__": - pfs = Dataloader() - # pfs.regenerate_data() - algo_list = ["ridge", "lgb"] - for algo in algo_list: - pfs.set_algo(algo) - pfs.retrain_models() \ No newline at end of file + fire.Fire(PFSDatasetWorkflow) \ No newline at end of file diff --git a/examples/example_pfs/pfs/__init__.py b/examples/example_pfs/pfs/__init__.py index 5ebfb0a..993c853 100644 --- a/examples/example_pfs/pfs/__init__.py +++ b/examples/example_pfs/pfs/__init__.py @@ -33,6 +33,11 @@ class Dataloader: test_xs, test_ys, _, _ = load_pfs_data(fpath) return train_xs, train_ys, test_xs, test_ys + def get_model_path(self, idx): + shop_ids = [i for i in range(60) if i not in [0, 1, 40]] + shop_ids = [i for i in shop_ids if i not in [8, 11, 23, 36]] + return os.path.join(model_dir, "{}_Shop{:0>2d}.out".format(self.algo, shop_ids[idx])) + def retrain_models(self): algo = self.algo errs = get_errors(algo=algo) diff --git a/learnware/specification/utils.py b/learnware/specification/utils.py index 54966fe..6eb29b3 100644 --- a/learnware/specification/utils.py +++ b/learnware/specification/utils.py @@ -44,6 +44,7 @@ def generate_rkme_spec( RKMEStatSpecification A RKMEStatSpecification object """ + X = np.ascontiguousarray(X).astype(np.float32) rkme_spec = RKMEStatSpecification(gamma=gamma, cuda_idx=cuda_idx) rkme_spec.generate_stat_spec_from_data(X, K, step_size, steps, nonnegative_beta, reduce) return rkme_spec