Browse Source

Merge branch 'dev' of git.nju.edu.cn:learnware/learnware-market into dev

tags/v0.3.2
chenzx 3 years ago
parent
commit
7402d4252b
7 changed files with 226 additions and 145 deletions
  1. +5
    -33
      examples/example_image/main.py
  2. +10
    -36
      examples/example_m5/main.py
  3. +86
    -0
      examples/example_m5/upload.py
  4. +7
    -35
      examples/example_pfs/main.py
  5. +89
    -0
      examples/example_pfs/upload.py
  6. +4
    -29
      examples/workflow_by_code/main.py
  7. +25
    -12
      learnware/market/easy.py

+ 5
- 33
examples/example_image/main.py View File

@@ -38,45 +38,17 @@ os.makedirs(model_save_root, exist_ok=True)
semantic_specs = [
{
"Data": {"Values": ["Tabular"], "Type": "Class"},
"Task": {
"Values": ["Classification"],
"Type": "Class",
},
"Device": {"Values": ["GPU"], "Type": "Tag"},
"Scenario": {"Values": ["Nature"], "Type": "Tag"},
"Description": {"Values": "", "Type": "String"},
"Name": {"Values": "learnware_1", "Type": "String"},
},
{
"Data": {"Values": ["Tabular"], "Type": "Class"},
"Task": {
"Values": ["Classification"],
"Type": "Class",
},
"Device": {"Values": ["GPU"], "Type": "Tag"},
"Scenario": {"Values": ["Business", "Nature"], "Type": "Tag"},
"Description": {"Values": "", "Type": "String"},
"Name": {"Values": "learnware_2", "Type": "String"},
},
{
"Data": {"Values": ["Tabular"], "Type": "Class"},
"Task": {
"Values": ["Classification"],
"Type": "Class",
},
"Task": {"Values": ["Classification"], "Type": "Class"},
"Device": {"Values": ["GPU"], "Type": "Tag"},
"Scenario": {"Values": ["Business"], "Type": "Tag"},
"Description": {"Values": "", "Type": "String"},
"Name": {"Values": "learnware_3", "Type": "String"},
},
"Name": {"Values": "learnware_1", "Type": "String"},
}
]

user_senmantic = {
"Data": {"Values": ["Tabular"], "Type": "Class"},
"Task": {
"Values": ["Classification"],
"Type": "Class",
},
"Task": {"Values": ["Classification"], "Type": "Class"},
"Device": {"Values": ["GPU"], "Type": "Tag"},
"Scenario": {"Values": ["Business"], "Type": "Tag"},
"Description": {"Values": "", "Type": "String"},
@@ -144,7 +116,7 @@ def prepare_market():
new_learnware_path = prepare_learnware(
data_path, model_path, init_file_path, yaml_file_path, tmp_dir, "%s_%d" % (dataset, i)
)
semantic_spec = semantic_specs[i % 3]
semantic_spec = semantic_specs[0]
semantic_spec["Name"]["Values"] = "learnware_%d" % (i)
semantic_spec["Description"]["Values"] = "test_learnware_number_%d" % (i)
image_market.add_learnware(new_learnware_path, semantic_spec)


+ 10
- 36
examples/example_m5/main.py View File

@@ -15,45 +15,17 @@ from m5 import DataLoader
semantic_specs = [
{
"Data": {"Values": ["Tabular"], "Type": "Class"},
"Task": {
"Values": ["Classification"],
"Type": "Class",
},
"Device": {"Values": ["GPU"], "Type": "Tag"},
"Scenario": {"Values": ["Nature"], "Type": "Tag"},
"Description": {"Values": "", "Type": "String"},
"Name": {"Values": "learnware_1", "Type": "String"},
},
{
"Data": {"Values": ["Tabular"], "Type": "Class"},
"Task": {
"Values": ["Classification"],
"Type": "Class",
},
"Device": {"Values": ["GPU"], "Type": "Tag"},
"Scenario": {"Values": ["Business", "Nature"], "Type": "Tag"},
"Description": {"Values": "", "Type": "String"},
"Name": {"Values": "learnware_2", "Type": "String"},
},
{
"Data": {"Values": ["Tabular"], "Type": "Class"},
"Task": {
"Values": ["Classification"],
"Type": "Class",
},
"Task": {"Values": ["Classification"], "Type": "Class"},
"Device": {"Values": ["GPU"], "Type": "Tag"},
"Scenario": {"Values": ["Business"], "Type": "Tag"},
"Description": {"Values": "", "Type": "String"},
"Name": {"Values": "learnware_3", "Type": "String"},
},
"Name": {"Values": "learnware_1", "Type": "String"},
}
]

user_senmantic = {
"Data": {"Values": ["Tabular"], "Type": "Class"},
"Task": {
"Values": ["Classification"],
"Type": "Class",
},
"Task": {"Values": ["Classification"], "Type": "Class"},
"Device": {"Values": ["GPU"], "Type": "Tag"},
"Scenario": {"Values": ["Business"], "Type": "Tag"},
"Description": {"Values": "", "Type": "String"},
@@ -86,7 +58,7 @@ class M5DatasetWorkflow:
zip_path_list.append(os.path.join(curr_root, zip_path))

for idx, zip_path in enumerate(zip_path_list):
semantic_spec = semantic_specs[idx % 3]
semantic_spec = semantic_specs[0]
semantic_spec["Name"]["Values"] = "learnware_%d" % (idx)
semantic_spec["Description"]["Values"] = "test_learnware_number_%d" % (idx)
easy_market.add_learnware(zip_path, semantic_spec)
@@ -101,7 +73,7 @@ class M5DatasetWorkflow:

m5 = DataLoader()
idx_list = m5.get_idx_list()
algo_list = ["ridge", "lgb"]
algo_list = ["lgb"] # algo_list = ["ridge", "lgb"]

curr_root = os.path.dirname(os.path.abspath(__file__))
curr_root = os.path.join(curr_root, "learnware_pool")
@@ -161,6 +133,9 @@ class M5DatasetWorkflow:
sorted_score_list, single_learnware_list, mixture_learnware_list = easy_market.search_learnware(user_info)

print(f"search result of user{idx}:")
print(
f"single model num: {len(sorted_score_list)}, max_score: {sorted_score_list[0]}, min_score: {sorted_score_list[-1]}"
)
for score, learnware in zip(sorted_score_list, single_learnware_list):
pred_y = learnware.predict(test_x)
loss = m5.score(test_y, pred_y)
@@ -169,11 +144,10 @@ class M5DatasetWorkflow:
mixture_id = " ".join([learnware.id for learnware in mixture_learnware_list])
print(f"mixture_learnware: {mixture_id}\n")

# TODO: model reuse score
reuse_baseline = JobSelectorReuser(learnware_list=mixture_learnware_list)
reuse_predict = reuse_baseline.predict(user_data=test_x)
reuse_score = m5.score(test_y, reuse_predict)
print(f"mixture reuse score: {reuse_score}\n")
print(f"mixture reuse loss: {reuse_score}\n")


if __name__ == "__main__":


+ 86
- 0
examples/example_m5/upload.py View File

@@ -0,0 +1,86 @@
import hashlib
import requests
import os
import random
import json
import time
from tqdm import tqdm

email = "tanzh@lamda.nju.edu.cn"
password = hashlib.md5(b"Qwerty123").hexdigest()
login_url = "http://210.28.134.201:8089/auth/login"
submit_url = "http://210.28.134.201:8089/user/add_learnware"
all_data_type = ["Table", "Image", "Video", "Text", "Audio"]
all_task_type = [
"Classification",
"Regression",
"Clustering",
"Feature Extraction",
"Generation",
"Segmentation",
"Object Detection",
]
all_device_type = ["CPU", "GPU"]
all_scenario = [
"Business",
"Financial",
"Health",
"Politics",
"Computer",
"Internet",
"Traffic",
"Nature",
"Fashion",
"Industry",
"Agriculture",
"Education",
"Entertainment",
"Architecture",
]

# ###############
# 以上部分无需修改 #
# ###############


def main():
session = requests.Session()
res = session.post(login_url, json={"email": email, "password": password})

# /path/to/learnware/folder 修改为学件文件夹地址
learnware_pool = os.listdir(os.path.join(os.path.abspath("."), "learnware_pool"))

for learnware in learnware_pool:
# 修改相应的语义规约
name = "M5_Shop" + "%02d" % int(learnware.split(".")[0].split("_")[1])
name = name + "_" + time.strftime("%Y%m%d%H%M%S", time.localtime())
description = f"This is a description of learnware {name}"
data = random.choice(all_data_type)
task = random.choice(all_task_type)
device = list(set(random.choices(all_device_type, k=2)))
scenario = list(set(random.choices(all_scenario, k=5)))
semantic_specification = {
"Data": {"Values": ["Table"], "Type": "Class"},
"Task": {"Values": ["Regression"], "Type": "Class"},
"Device": {"Values": ["CPU"], "Type": "Tag"},
"Scenario": {"Values": ["Business"], "Type": "Tag"},
"Description": {"Values": "A sales-forecasting model from Walmart store", "Type": "String"},
"Name": {"Values": name, "Type": "String"},
}
res = session.post(
submit_url,
data={
"semantic_specification": json.dumps(semantic_specification),
},
files={
"learnware_file": open(
os.path.join(os.path.abspath("."), "learnware_pool", learnware),
"rb",
)
},
)
assert json.loads(res.text)["code"] == 0, "Upload error"


if __name__ == "__main__":
main()

+ 7
- 35
examples/example_pfs/main.py View File

@@ -15,45 +15,17 @@ from pfs import Dataloader
semantic_specs = [
{
"Data": {"Values": ["Tabular"], "Type": "Class"},
"Task": {
"Values": ["Classification"],
"Type": "Class",
},
"Device": {"Values": ["GPU"], "Type": "Tag"},
"Scenario": {"Values": ["Nature"], "Type": "Tag"},
"Description": {"Values": "", "Type": "String"},
"Name": {"Values": "learnware_1", "Type": "String"},
},
{
"Data": {"Values": ["Tabular"], "Type": "Class"},
"Task": {
"Values": ["Classification"],
"Type": "Class",
},
"Device": {"Values": ["GPU"], "Type": "Tag"},
"Scenario": {"Values": ["Business", "Nature"], "Type": "Tag"},
"Description": {"Values": "", "Type": "String"},
"Name": {"Values": "learnware_2", "Type": "String"},
},
{
"Data": {"Values": ["Tabular"], "Type": "Class"},
"Task": {
"Values": ["Classification"],
"Type": "Class",
},
"Task": {"Values": ["Classification"], "Type": "Class"},
"Device": {"Values": ["GPU"], "Type": "Tag"},
"Scenario": {"Values": ["Business"], "Type": "Tag"},
"Description": {"Values": "", "Type": "String"},
"Name": {"Values": "learnware_3", "Type": "String"},
},
"Name": {"Values": "learnware_1", "Type": "String"},
}
]

user_senmantic = {
"Data": {"Values": ["Tabular"], "Type": "Class"},
"Task": {
"Values": ["Classification"],
"Type": "Class",
},
"Task": {"Values": ["Classification"], "Type": "Class"},
"Device": {"Values": ["GPU"], "Type": "Tag"},
"Scenario": {"Values": ["Business"], "Type": "Tag"},
"Description": {"Values": "", "Type": "String"},
@@ -86,7 +58,7 @@ class PFSDatasetWorkflow:
zip_path_list.append(os.path.join(curr_root, zip_path))

for idx, zip_path in enumerate(zip_path_list):
semantic_spec = semantic_specs[idx % 3]
semantic_spec = semantic_specs[0]
semantic_spec["Name"]["Values"] = "learnware_%d" % (idx)
semantic_spec["Description"]["Values"] = "test_learnware_number_%d" % (idx)
easy_market.add_learnware(zip_path, semantic_spec)
@@ -142,8 +114,8 @@ class PFSDatasetWorkflow:
rmtree(dir_path)

def test(self, regenerate_flag=False):
# self.prepare_learnware(regenerate_flag)
# self._init_learnware_market()
self.prepare_learnware(regenerate_flag)
self._init_learnware_market()

easy_market = EasyMarket()
print("Total Item:", len(easy_market))


+ 89
- 0
examples/example_pfs/upload.py View File

@@ -0,0 +1,89 @@
import hashlib
import requests
import os
import random
import json
import time
from tqdm import tqdm

email = "tanzh@lamda.nju.edu.cn"
password = hashlib.md5(b"Qwerty123").hexdigest()
login_url = "http://210.28.134.201:8089/auth/login"
submit_url = "http://210.28.134.201:8089/user/add_learnware"
all_data_type = ["Table", "Image", "Video", "Text", "Audio"]
all_task_type = [
"Classification",
"Regression",
"Clustering",
"Feature Extraction",
"Generation",
"Segmentation",
"Object Detection",
]
all_device_type = ["CPU", "GPU"]
all_scenario = [
"Business",
"Financial",
"Health",
"Politics",
"Computer",
"Internet",
"Traffic",
"Nature",
"Fashion",
"Industry",
"Agriculture",
"Education",
"Entertainment",
"Architecture",
]

# ###############
# 以上部分无需修改 #
# ###############


def main():
session = requests.Session()
res = session.post(login_url, json={"email": email, "password": password})

# /path/to/learnware/folder 修改为学件文件夹地址
learnware_pool = os.listdir(os.path.join(os.path.abspath("."), "learnware_pool"))

for learnware in learnware_pool:
# 修改相应的语义规约
name = "PFS_Shop" + "%02d" % int(learnware.split(".")[0].split("_")[1])
name = name + "_" + time.strftime("%Y%m%d%H%M%S", time.localtime())
description = f"This is a description of learnware {name}"
data = random.choice(all_data_type)
task = random.choice(all_task_type)
device = list(set(random.choices(all_device_type, k=2)))
scenario = list(set(random.choices(all_scenario, k=5)))
semantic_specification = {
"Data": {"Values": ["Table"], "Type": "Class"},
"Task": {"Values": ["Regression"], "Type": "Class"},
"Device": {"Values": ["CPU"], "Type": "Tag"},
"Scenario": {"Values": ["Business"], "Type": "Tag"},
"Description": {
"Values": "A sales-forecasting model from Predict Future Sales Competition on Kaggle",
"Type": "String",
},
"Name": {"Values": name, "Type": "String"},
}
res = session.post(
submit_url,
data={
"semantic_specification": json.dumps(semantic_specification),
},
files={
"learnware_file": open(
os.path.join(os.path.abspath("."), "learnware_pool", learnware),
"rb",
)
},
)
assert json.loads(res.text)["code"] == 0, "Upload error"


if __name__ == "__main__":
main()

+ 4
- 29
examples/workflow_by_code/main.py View File

@@ -18,37 +18,12 @@ curr_root = os.path.dirname(os.path.abspath(__file__))
semantic_specs = [
{
"Data": {"Values": ["Tabular"], "Type": "Class"},
"Task": {
"Values": ["Classification"],
"Type": "Class",
},
"Device": {"Values": ["GPU"], "Type": "Tag"},
"Scenario": {"Values": ["Nature"], "Type": "Tag"},
"Description": {"Values": "", "Type": "String"},
"Name": {"Values": "learnware_1", "Type": "String"},
},
{
"Data": {"Values": ["Tabular"], "Type": "Class"},
"Task": {
"Values": ["Classification"],
"Type": "Class",
},
"Device": {"Values": ["GPU"], "Type": "Tag"},
"Scenario": {"Values": ["Business", "Nature"], "Type": "Tag"},
"Description": {"Values": "", "Type": "String"},
"Name": {"Values": "learnware_2", "Type": "String"},
},
{
"Data": {"Values": ["Tabular"], "Type": "Class"},
"Task": {
"Values": ["Classification"],
"Type": "Class",
},
"Task": {"Values": ["Classification"], "Type": "Class"},
"Device": {"Values": ["GPU"], "Type": "Tag"},
"Scenario": {"Values": ["Business"], "Type": "Tag"},
"Description": {"Values": "", "Type": "String"},
"Name": {"Values": "learnware_3", "Type": "String"},
},
"Name": {"Values": "learnware_1", "Type": "String"},
}
]

user_senmantic = {
@@ -118,7 +93,7 @@ class LearnwareMarketWorkflow:
print("Total Item:", len(easy_market))

for idx, zip_path in enumerate(self.zip_path_list):
semantic_spec = semantic_specs[idx % 3]
semantic_spec = semantic_specs[0]
semantic_spec["Name"]["Values"] = "learnware_%d" % (idx)
semantic_spec["Description"]["Values"] = "test_learnware_number_%d" % (idx)
easy_market.add_learnware(zip_path, semantic_spec)


+ 25
- 12
learnware/market/easy.py View File

@@ -213,7 +213,7 @@ class EasyMarket(BaseMarket):
else:
max_score = (max_dist - min_dist) / (max_dist - dist_epsilon)

if max_dist < dist_epsilon or max_score > 1:
if min_dist < dist_epsilon:
dist_epsilon = min_dist
elif max_score < min_score:
dist_epsilon = max_dist - (max_dist - min_dist) / min_score
@@ -333,7 +333,7 @@ class EasyMarket(BaseMarket):
learnware_list: List[Learnware],
user_rkme: RKMEStatSpecification,
max_search_num: int,
weight_cutoff: float = 0.9,
weight_cutoff: float = 0.95,
) -> Tuple[List[float], List[Learnware]]:
"""Select learnwares based on a total mixture ratio, then recalculate their mixture weights

@@ -362,19 +362,25 @@ class EasyMarket(BaseMarket):
max_search_num = learnware_num

weight, _ = self._calculate_rkme_spec_mixture_weight(learnware_list, user_rkme)
sort_by_weight_idx_list = sorted(range(learnware_num), key=lambda k: weight[k])
sort_by_weight_idx_list = sorted(range(learnware_num), key=lambda k: weight[k], reverse=True)

weight_sum = 0
mixture_list = []
for idx in sort_by_weight_idx_list:
weight_sum += sort_by_weight_idx_list[idx]
weight_sum += weight[idx]
if weight_sum <= weight_cutoff:
mixture_list.append(learnware_list[idx])
else:
break

if len(mixture_list) > max_search_num:
mixture_list = mixture_list[:max_search_num]
if len(mixture_list) <= 1:
mixture_list = [learnware_list[sort_by_weight_idx_list[0]]]
mixture_weight = [1]
else:
if len(mixture_list) > max_search_num:
mixture_list = mixture_list[:max_search_num]
mixture_weight, _ = self._calculate_rkme_spec_mixture_weight(mixture_list, user_rkme)

mixture_weight, _ = self._calculate_rkme_spec_mixture_weight(mixture_list, user_rkme)
return mixture_weight, mixture_list

def _filter_by_rkme_spec_single(
@@ -438,7 +444,7 @@ class EasyMarket(BaseMarket):

return filtered_learnware_list

def _search_by_rkme_spec_mixture(
def _search_by_rkme_spec_mixture_greedy(
self,
learnware_list: List[Learnware],
user_rkme: RKMEStatSpecification,
@@ -578,7 +584,7 @@ class EasyMarket(BaseMarket):
return match_learnwares

def search_learnware(
self, user_info: BaseUserInfo, max_search_num=5
self, user_info: BaseUserInfo, max_search_num: int = 5, search_method: str = "greedy"
) -> Tuple[List[float], List[Learnware], List[Learnware]]:
"""Search learnwares based on user_info

@@ -612,9 +618,16 @@ class EasyMarket(BaseMarket):
sorted_score_list, single_learnware_list = self._filter_by_rkme_spec_single(
sorted_score_list, single_learnware_list
)
weight_list, mixture_learnware_list = self._search_by_rkme_spec_mixture(
learnware_list, user_rkme, max_search_num
)
if search_method == "auto":
weight_list, mixture_learnware_list = self._search_by_rkme_spec_mixture_auto(
learnware_list, user_rkme, max_search_num
)
elif search_method == "greedy":
weight_list, mixture_learnware_list = self._search_by_rkme_spec_mixture_greedy(
learnware_list, user_rkme, max_search_num
)
else:
logger.warning("f{search_method} not supported!")
return sorted_score_list, single_learnware_list, mixture_learnware_list

def delete_learnware(self, id: str) -> bool:


Loading…
Cancel
Save