Browse Source

[MNT] Add the example of pfs

tags/v0.3.2
Gene 2 years ago
parent
commit
dcbdd8d76f
5 changed files with 190 additions and 6 deletions
  1. +8
    -0
      examples/example_pfs/example.yaml
  2. +19
    -0
      examples/example_pfs/example_init.py
  3. +157
    -6
      examples/example_pfs/main.py
  4. +5
    -0
      examples/example_pfs/pfs/__init__.py
  5. +1
    -0
      learnware/specification/utils.py

+ 8
- 0
examples/example_pfs/example.yaml View File

@@ -0,0 +1,8 @@
model:
class_name: Model
kwargs: {}
stat_specifications:
- module_path: learnware.specification
class_name: RKMEStatSpecification
file_name: rkme.json
kwargs: {}

+ 19
- 0
examples/example_pfs/example_init.py View File

@@ -0,0 +1,19 @@
import os
import joblib
import numpy as np
from learnware.model import BaseModel


class Model(BaseModel):
def __init__(self):
dir_path = os.path.dirname(os.path.abspath(__file__))
self.model = joblib.load(os.path.join(dir_path, "model.out"))

def fit(self, X: np.ndarray, y: np.ndarray):
pass

def predict(self, X: np.ndarray) -> np.ndarray:
return self.model.predict(X)

def finetune(self, X: np.ndarray, y: np.ndarray):
pass

+ 157
- 6
examples/example_pfs/main.py View File

@@ -1,12 +1,163 @@
import os
import fire
import zipfile
from tqdm import tqdm
from shutil import copyfile, rmtree

import learnware
from learnware.market import EasyMarket, BaseUserInfo
from learnware.market import database_ops
from learnware.learnware import Learnware
import learnware.specification as specification
from pfs import Dataloader


semantic_specs = [
{
"Data": {"Values": ["Tabular"], "Type": "Class"},
"Task": {"Values": ["Classification"], "Type": "Class",},
"Device": {"Values": ["GPU"], "Type": "Tag"},
"Scenario": {"Values": ["Nature"], "Type": "Tag"},
"Description": {"Values": "", "Type": "Description"},
"Name": {"Values": "learnware_1", "Type": "Name"},
},
{
"Data": {"Values": ["Tabular"], "Type": "Class"},
"Task": {"Values": ["Classification"], "Type": "Class",},
"Device": {"Values": ["GPU"], "Type": "Tag"},
"Scenario": {"Values": ["Business", "Nature"], "Type": "Tag"},
"Description": {"Values": "", "Type": "Description"},
"Name": {"Values": "learnware_2", "Type": "Name"},
},
{
"Data": {"Values": ["Tabular"], "Type": "Class"},
"Task": {"Values": ["Classification"], "Type": "Class",},
"Device": {"Values": ["GPU"], "Type": "Tag"},
"Scenario": {"Values": ["Business"], "Type": "Tag"},
"Description": {"Values": "", "Type": "Description"},
"Name": {"Values": "learnware_3", "Type": "Name"},
},
]

user_senmantic = {
"Data": {"Values": ["Tabular"], "Type": "Class"},
"Task": {"Values": ["Classification"], "Type": "Class",},
"Device": {"Values": ["GPU"], "Type": "Tag"},
"Scenario": {"Values": ["Business"], "Type": "Tag"},
"Description": {"Values": "", "Type": "Description"},
"Name": {"Values": "", "Type": "Name"},
}


class PFSDatasetWorkflow:
def _init_pfs_dataset(self):
pfs = Dataloader()
pfs.regenerate_data()

algo_list = ["ridge", "lgb"]
for algo in algo_list:
pfs.set_algo(algo)
pfs.retrain_models()
def _init_learnware_market(self):
"""initialize learnware market"""
database_ops.clear_learnware_table()
learnware.init()

easy_market = EasyMarket()
print("Total Item:", len(easy_market))
zip_path_list = []
curr_root = os.path.dirname(os.path.abspath(__file__))
curr_root = os.path.join(curr_root, "learnware_pool")
for zip_path in os.listdir(curr_root):
zip_path_list.append(zip_path)

for idx, zip_path in enumerate(zip_path_list):
semantic_spec = semantic_specs[idx % 3]
semantic_spec["Name"]["Values"] = "learnware_%d" % (idx)
semantic_spec["Description"]["Values"] = "test_learnware_number_%d" % (idx)
easy_market.add_learnware(zip_path, semantic_spec)

print("Total Item:", len(easy_market))
curr_inds = easy_market._get_ids()
print("Available ids:", curr_inds)
def prepare_learnware(self, regenerate_flag=False):
if regenerate_flag:
self._init_pfs_dataset()
pfs = Dataloader()
idx_list = pfs.get_idx_list()
algo_list = ["ridge", "lgb"]
curr_root = os.path.dirname(os.path.abspath(__file__))
curr_root = os.path.join(curr_root, "learnware_pool")
os.makedirs(curr_root, exist_ok=True)
for idx in tqdm(idx_list):
train_x, train_y, test_x, test_y = pfs.get_idx_data(idx)
spec = specification.utils.generate_rkme_spec(X=train_x, gamma=0.1, cuda_idx=0)
for algo in algo_list:
pfs.set_algo(algo)
dir_path = os.path.join(curr_root, f"{algo}_{idx}")
os.makedirs(dir_path, exist_ok=True)
spec_path = os.path.join(dir_path, "rkme.json")
spec.save(spec_path)
model_path = pfs.get_model_path(idx)
model_file = os.path.join(dir_path, "model.out")
copyfile(model_path, model_file)
init_file = os.path.join(dir_path, "__init__.py")
copyfile("example_init.py", init_file)
yaml_file = os.path.join(dir_path, "learnware.yaml")
copyfile("example.yaml", yaml_file)
zip_file = dir_path + ".zip"
with zipfile.ZipFile(zip_file, "w") as zip_obj:
for foldername, subfolders, filenames in os.walk(dir_path):
for filename in filenames:
file_path = os.path.join(foldername, filename)
zip_info = zipfile.ZipInfo(filename)
zip_info.compress_type = zipfile.ZIP_STORED
with open(file_path, "rb") as file:
zip_obj.writestr(zip_info, file.read())
rmtree(dir_path)
def test(self, regenerate_flag=False):
self.prepare_learnware(regenerate_flag)
self._init_learnware_market()

easy_market = EasyMarket()
print("Total Item:", len(easy_market))
pfs = Dataloader()
idx_list = pfs.get_idx_list()
for idx in idx_list:
train_x, train_y, test_x, test_y = pfs.get_idx_data(idx)
user_spec = specification.utils.generate_rkme_spec(X=test_x, gamma=0.1, cuda_idx=0)
user_info = BaseUserInfo(
id=f"user_{idx}", semantic_spec=user_senmantic, stat_info={"RKMEStatSpecification": user_spec}
)
sorted_score_list, single_learnware_list, mixture_learnware_list = easy_market.search_learnware(user_info)

print(f"search result of user{idx}:")
for score, learnware in zip(sorted_score_list, single_learnware_list):
pred_y = learnware.predict(test_x)
loss = pfs.score(test_y, pred_y)[0]
print(f"score: {score}, learnware_id: {learnware.id}, loss: {loss}")
mixture_id = " ".join([learnware.id for learnware in mixture_learnware_list])
print(f"mixture_learnware: {mixture_id}\n")
# TODO: model reuse score


if __name__ == "__main__":
pfs = Dataloader()
# pfs.regenerate_data()
algo_list = ["ridge", "lgb"]
for algo in algo_list:
pfs.set_algo(algo)
pfs.retrain_models()
fire.Fire(PFSDatasetWorkflow)

+ 5
- 0
examples/example_pfs/pfs/__init__.py View File

@@ -33,6 +33,11 @@ class Dataloader:
test_xs, test_ys, _, _ = load_pfs_data(fpath)
return train_xs, train_ys, test_xs, test_ys

def get_model_path(self, idx):
shop_ids = [i for i in range(60) if i not in [0, 1, 40]]
shop_ids = [i for i in shop_ids if i not in [8, 11, 23, 36]]
return os.path.join(model_dir, "{}_Shop{:0>2d}.out".format(self.algo, shop_ids[idx]))
def retrain_models(self):
algo = self.algo
errs = get_errors(algo=algo)


+ 1
- 0
learnware/specification/utils.py View File

@@ -44,6 +44,7 @@ def generate_rkme_spec(
RKMEStatSpecification
A RKMEStatSpecification object
"""
X = np.ascontiguousarray(X).astype(np.float32)
rkme_spec = RKMEStatSpecification(gamma=gamma, cuda_idx=cuda_idx)
rkme_spec.generate_stat_spec_from_data(X, K, step_size, steps, nonnegative_beta, reduce)
return rkme_spec


Loading…
Cancel
Save