|
|
|
@@ -1,12 +1,163 @@ |
|
|
|
import os |
|
|
|
import fire |
|
|
|
import zipfile |
|
|
|
from tqdm import tqdm |
|
|
|
from shutil import copyfile, rmtree |
|
|
|
|
|
|
|
import learnware |
|
|
|
from learnware.market import EasyMarket, BaseUserInfo |
|
|
|
from learnware.market import database_ops |
|
|
|
from learnware.learnware import Learnware |
|
|
|
import learnware.specification as specification |
|
|
|
from pfs import Dataloader |
|
|
|
|
|
|
|
|
|
|
|
semantic_specs = [ |
|
|
|
{ |
|
|
|
"Data": {"Values": ["Tabular"], "Type": "Class"}, |
|
|
|
"Task": {"Values": ["Classification"], "Type": "Class",}, |
|
|
|
"Device": {"Values": ["GPU"], "Type": "Tag"}, |
|
|
|
"Scenario": {"Values": ["Nature"], "Type": "Tag"}, |
|
|
|
"Description": {"Values": "", "Type": "Description"}, |
|
|
|
"Name": {"Values": "learnware_1", "Type": "Name"}, |
|
|
|
}, |
|
|
|
{ |
|
|
|
"Data": {"Values": ["Tabular"], "Type": "Class"}, |
|
|
|
"Task": {"Values": ["Classification"], "Type": "Class",}, |
|
|
|
"Device": {"Values": ["GPU"], "Type": "Tag"}, |
|
|
|
"Scenario": {"Values": ["Business", "Nature"], "Type": "Tag"}, |
|
|
|
"Description": {"Values": "", "Type": "Description"}, |
|
|
|
"Name": {"Values": "learnware_2", "Type": "Name"}, |
|
|
|
}, |
|
|
|
{ |
|
|
|
"Data": {"Values": ["Tabular"], "Type": "Class"}, |
|
|
|
"Task": {"Values": ["Classification"], "Type": "Class",}, |
|
|
|
"Device": {"Values": ["GPU"], "Type": "Tag"}, |
|
|
|
"Scenario": {"Values": ["Business"], "Type": "Tag"}, |
|
|
|
"Description": {"Values": "", "Type": "Description"}, |
|
|
|
"Name": {"Values": "learnware_3", "Type": "Name"}, |
|
|
|
}, |
|
|
|
] |
|
|
|
|
|
|
|
user_senmantic = { |
|
|
|
"Data": {"Values": ["Tabular"], "Type": "Class"}, |
|
|
|
"Task": {"Values": ["Classification"], "Type": "Class",}, |
|
|
|
"Device": {"Values": ["GPU"], "Type": "Tag"}, |
|
|
|
"Scenario": {"Values": ["Business"], "Type": "Tag"}, |
|
|
|
"Description": {"Values": "", "Type": "Description"}, |
|
|
|
"Name": {"Values": "", "Type": "Name"}, |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
class PFSDatasetWorkflow: |
|
|
|
def _init_pfs_dataset(self): |
|
|
|
pfs = Dataloader() |
|
|
|
pfs.regenerate_data() |
|
|
|
|
|
|
|
algo_list = ["ridge", "lgb"] |
|
|
|
for algo in algo_list: |
|
|
|
pfs.set_algo(algo) |
|
|
|
pfs.retrain_models() |
|
|
|
|
|
|
|
def _init_learnware_market(self): |
|
|
|
"""initialize learnware market""" |
|
|
|
database_ops.clear_learnware_table() |
|
|
|
learnware.init() |
|
|
|
|
|
|
|
easy_market = EasyMarket() |
|
|
|
print("Total Item:", len(easy_market)) |
|
|
|
|
|
|
|
zip_path_list = [] |
|
|
|
curr_root = os.path.dirname(os.path.abspath(__file__)) |
|
|
|
curr_root = os.path.join(curr_root, "learnware_pool") |
|
|
|
for zip_path in os.listdir(curr_root): |
|
|
|
zip_path_list.append(zip_path) |
|
|
|
|
|
|
|
for idx, zip_path in enumerate(zip_path_list): |
|
|
|
semantic_spec = semantic_specs[idx % 3] |
|
|
|
semantic_spec["Name"]["Values"] = "learnware_%d" % (idx) |
|
|
|
semantic_spec["Description"]["Values"] = "test_learnware_number_%d" % (idx) |
|
|
|
easy_market.add_learnware(zip_path, semantic_spec) |
|
|
|
|
|
|
|
print("Total Item:", len(easy_market)) |
|
|
|
curr_inds = easy_market._get_ids() |
|
|
|
print("Available ids:", curr_inds) |
|
|
|
|
|
|
|
def prepare_learnware(self, regenerate_flag=False): |
|
|
|
if regenerate_flag: |
|
|
|
self._init_pfs_dataset() |
|
|
|
|
|
|
|
pfs = Dataloader() |
|
|
|
idx_list = pfs.get_idx_list() |
|
|
|
algo_list = ["ridge", "lgb"] |
|
|
|
|
|
|
|
curr_root = os.path.dirname(os.path.abspath(__file__)) |
|
|
|
curr_root = os.path.join(curr_root, "learnware_pool") |
|
|
|
os.makedirs(curr_root, exist_ok=True) |
|
|
|
|
|
|
|
for idx in tqdm(idx_list): |
|
|
|
train_x, train_y, test_x, test_y = pfs.get_idx_data(idx) |
|
|
|
spec = specification.utils.generate_rkme_spec(X=train_x, gamma=0.1, cuda_idx=0) |
|
|
|
|
|
|
|
for algo in algo_list: |
|
|
|
pfs.set_algo(algo) |
|
|
|
dir_path = os.path.join(curr_root, f"{algo}_{idx}") |
|
|
|
os.makedirs(dir_path, exist_ok=True) |
|
|
|
|
|
|
|
spec_path = os.path.join(dir_path, "rkme.json") |
|
|
|
spec.save(spec_path) |
|
|
|
|
|
|
|
model_path = pfs.get_model_path(idx) |
|
|
|
model_file = os.path.join(dir_path, "model.out") |
|
|
|
copyfile(model_path, model_file) |
|
|
|
|
|
|
|
init_file = os.path.join(dir_path, "__init__.py") |
|
|
|
copyfile("example_init.py", init_file) |
|
|
|
|
|
|
|
yaml_file = os.path.join(dir_path, "learnware.yaml") |
|
|
|
copyfile("example.yaml", yaml_file) |
|
|
|
|
|
|
|
zip_file = dir_path + ".zip" |
|
|
|
with zipfile.ZipFile(zip_file, "w") as zip_obj: |
|
|
|
for foldername, subfolders, filenames in os.walk(dir_path): |
|
|
|
for filename in filenames: |
|
|
|
file_path = os.path.join(foldername, filename) |
|
|
|
zip_info = zipfile.ZipInfo(filename) |
|
|
|
zip_info.compress_type = zipfile.ZIP_STORED |
|
|
|
with open(file_path, "rb") as file: |
|
|
|
zip_obj.writestr(zip_info, file.read()) |
|
|
|
|
|
|
|
rmtree(dir_path) |
|
|
|
|
|
|
|
def test(self, regenerate_flag=False): |
|
|
|
self.prepare_learnware(regenerate_flag) |
|
|
|
self._init_learnware_market() |
|
|
|
|
|
|
|
easy_market = EasyMarket() |
|
|
|
print("Total Item:", len(easy_market)) |
|
|
|
|
|
|
|
pfs = Dataloader() |
|
|
|
idx_list = pfs.get_idx_list() |
|
|
|
|
|
|
|
for idx in idx_list: |
|
|
|
train_x, train_y, test_x, test_y = pfs.get_idx_data(idx) |
|
|
|
user_spec = specification.utils.generate_rkme_spec(X=test_x, gamma=0.1, cuda_idx=0) |
|
|
|
|
|
|
|
user_info = BaseUserInfo( |
|
|
|
id=f"user_{idx}", semantic_spec=user_senmantic, stat_info={"RKMEStatSpecification": user_spec} |
|
|
|
) |
|
|
|
sorted_score_list, single_learnware_list, mixture_learnware_list = easy_market.search_learnware(user_info) |
|
|
|
|
|
|
|
print(f"search result of user{idx}:") |
|
|
|
for score, learnware in zip(sorted_score_list, single_learnware_list): |
|
|
|
pred_y = learnware.predict(test_x) |
|
|
|
loss = pfs.score(test_y, pred_y)[0] |
|
|
|
print(f"score: {score}, learnware_id: {learnware.id}, loss: {loss}") |
|
|
|
|
|
|
|
mixture_id = " ".join([learnware.id for learnware in mixture_learnware_list]) |
|
|
|
print(f"mixture_learnware: {mixture_id}\n") |
|
|
|
# TODO: model reuse score |
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
pfs = Dataloader() |
|
|
|
# pfs.regenerate_data() |
|
|
|
algo_list = ["ridge", "lgb"] |
|
|
|
for algo in algo_list: |
|
|
|
pfs.set_algo(algo) |
|
|
|
pfs.retrain_models() |
|
|
|
fire.Fire(PFSDatasetWorkflow) |