Browse Source

Merge pull request #9 from Learnware-LAMDA/bixd/dev

Fix bugs for container when pip install
tags/v0.3.2
bxdd GitHub 2 years ago
parent
commit
5d2dcc76dc
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
17 changed files with 96 additions and 50 deletions
  1. +5
    -2
      examples/dataset_image_workflow/get_data.py
  2. +3
    -3
      examples/dataset_m5_workflow/m5/utils.py
  3. +9
    -2
      examples/dataset_m5_workflow/upload.py
  4. +2
    -2
      examples/dataset_pfs_workflow/pfs/pfs_cross_transfer.py
  5. +9
    -2
      examples/dataset_pfs_workflow/upload.py
  6. +4
    -1
      examples/workflow_by_code/main.py
  7. +0
    -1
      learnware/client/container.py
  8. +19
    -5
      learnware/client/learnware_client.py
  9. +2
    -5
      learnware/client/package_utils.py
  10. +5
    -8
      learnware/client/utils.py
  11. +16
    -4
      learnware/config.py
  12. +4
    -1
      learnware/learnware/__init__.py
  13. +9
    -9
      learnware/learnware/reuse.py
  14. +1
    -1
      learnware/specification/base.py
  15. +4
    -2
      learnware/specification/rkme.py
  16. +0
    -1
      tests/test_learnware_client/test_reuse.py
  17. +4
    -1
      tests/test_workflow/test_workflow.py

+ 5
- 2
examples/dataset_image_workflow/get_data.py View File

@@ -192,7 +192,7 @@ def get_zca_matrix(X, reg_coef=0.1):


def layernorm_data(X): def layernorm_data(X):
X_processed = X - torch.mean(X, [1, 2, 3], keepdim=True) X_processed = X - torch.mean(X, [1, 2, 3], keepdim=True)
X_processed = X_processed / torch.sqrt(torch.sum(X_processed ** 2, [1, 2, 3], keepdim=True))
X_processed = X_processed / torch.sqrt(torch.sum(X_processed**2, [1, 2, 3], keepdim=True))


return X_processed return X_processed


@@ -240,7 +240,10 @@ def augment(images, dc_aug_param, device):
def scalefun(i): def scalefun(i):
h = int((np.random.uniform(1 - scale, 1 + scale)) * shape[2]) h = int((np.random.uniform(1 - scale, 1 + scale)) * shape[2])
w = int((np.random.uniform(1 - scale, 1 + scale)) * shape[2]) w = int((np.random.uniform(1 - scale, 1 + scale)) * shape[2])
tmp = F.interpolate(images[i : i + 1], [h, w],)[0]
tmp = F.interpolate(
images[i : i + 1],
[h, w],
)[0]
mhw = max(h, w, shape[2], shape[3]) mhw = max(h, w, shape[2], shape[3])
im_ = torch.zeros(shape[1], mhw, mhw, dtype=torch.float, device=device) im_ = torch.zeros(shape[1], mhw, mhw, dtype=torch.float, device=device)
r = int((mhw - h) / 2) r = int((mhw - h) / 2)


+ 3
- 3
examples/dataset_m5_workflow/m5/utils.py View File

@@ -70,7 +70,7 @@ def measure_aux_algo(idx, test_sample, model):


# Simple "Memory profilers" to see memory usage # Simple "Memory profilers" to see memory usage
def get_memory_usage(): def get_memory_usage():
return np.round(psutil.Process(os.getpid()).memory_info()[0] / 2.0 ** 30, 2)
return np.round(psutil.Process(os.getpid()).memory_info()[0] / 2.0**30, 2)




def sizeof_fmt(num, suffix="B"): def sizeof_fmt(num, suffix="B"):
@@ -84,7 +84,7 @@ def sizeof_fmt(num, suffix="B"):
# Memory Reducer # Memory Reducer
def reduce_mem_usage(df, float16_flag=True, verbose=True): def reduce_mem_usage(df, float16_flag=True, verbose=True):
numerics = ["int16", "int32", "int64", "float16", "float32", "float64"] numerics = ["int16", "int32", "int64", "float16", "float32", "float64"]
start_mem = df.memory_usage().sum() / 1024 ** 2
start_mem = df.memory_usage().sum() / 1024**2
for col in df.columns: for col in df.columns:
col_type = df[col].dtypes col_type = df[col].dtypes
if col_type in numerics: if col_type in numerics:
@@ -106,7 +106,7 @@ def reduce_mem_usage(df, float16_flag=True, verbose=True):
df[col] = df[col].astype(np.float32) df[col] = df[col].astype(np.float32)
else: else:
df[col] = df[col].astype(np.float64) df[col] = df[col].astype(np.float64)
end_mem = df.memory_usage().sum() / 1024 ** 2
end_mem = df.memory_usage().sum() / 1024**2
if verbose: if verbose:
print( print(
"Mem. usage decreased to {:5.2f} Mb ({:.1f}% reduction)".format( "Mem. usage decreased to {:5.2f} Mb ({:.1f}% reduction)".format(


+ 9
- 2
examples/dataset_m5_workflow/upload.py View File

@@ -69,8 +69,15 @@ def main():
} }
res = session.post( res = session.post(
submit_url, submit_url,
data={"semantic_specification": json.dumps(semantic_specification),},
files={"learnware_file": open(os.path.join(os.path.abspath("."), "learnware_pool", learnware), "rb",)},
data={
"semantic_specification": json.dumps(semantic_specification),
},
files={
"learnware_file": open(
os.path.join(os.path.abspath("."), "learnware_pool", learnware),
"rb",
)
},
) )
assert json.loads(res.text)["code"] == 0, "Upload error" assert json.loads(res.text)["code"] == 0, "Upload error"




+ 2
- 2
examples/dataset_pfs_workflow/pfs/pfs_cross_transfer.py View File

@@ -67,7 +67,7 @@ def get_split_errs(algo):
for tmp in range(len(proportion_list)): for tmp in range(len(proportion_list)):
model = lgb.LGBMModel( model = lgb.LGBMModel(
boosting_type="gbdt", boosting_type="gbdt",
num_leaves=2 ** 7 - 1,
num_leaves=2**7 - 1,
learning_rate=0.01, learning_rate=0.01,
objective="rmse", objective="rmse",
metric="rmse", metric="rmse",
@@ -119,7 +119,7 @@ def get_errors(algo):
if algo == "lgb": if algo == "lgb":
model = lgb.LGBMModel( model = lgb.LGBMModel(
boosting_type="gbdt", boosting_type="gbdt",
num_leaves=2 ** 7 - 1,
num_leaves=2**7 - 1,
learning_rate=0.01, learning_rate=0.01,
objective="rmse", objective="rmse",
metric="rmse", metric="rmse",


+ 9
- 2
examples/dataset_pfs_workflow/upload.py View File

@@ -72,8 +72,15 @@ def main():
} }
res = session.post( res = session.post(
submit_url, submit_url,
data={"semantic_specification": json.dumps(semantic_specification),},
files={"learnware_file": open(os.path.join(os.path.abspath("."), "learnware_pool", learnware), "rb",)},
data={
"semantic_specification": json.dumps(semantic_specification),
},
files={
"learnware_file": open(
os.path.join(os.path.abspath("."), "learnware_pool", learnware),
"rb",
)
},
) )
assert json.loads(res.text)["code"] == 0, "Upload error" assert json.loads(res.text)["code"] == 0, "Upload error"




+ 4
- 1
examples/workflow_by_code/main.py View File

@@ -19,7 +19,10 @@ curr_root = os.path.dirname(os.path.abspath(__file__))


user_semantic = { user_semantic = {
"Data": {"Values": ["Table"], "Type": "Class"}, "Data": {"Values": ["Table"], "Type": "Class"},
"Task": {"Values": ["Classification"], "Type": "Class",},
"Task": {
"Values": ["Classification"],
"Type": "Class",
},
"Library": {"Values": ["Scikit-learn"], "Type": "Class"}, "Library": {"Values": ["Scikit-learn"], "Type": "Class"},
"Scenario": {"Values": ["Education"], "Type": "Tag"}, "Scenario": {"Values": ["Education"], "Type": "Tag"},
"Description": {"Values": "", "Type": "String"}, "Description": {"Values": "", "Type": "String"},


+ 0
- 1
learnware/client/container.py View File

@@ -18,7 +18,6 @@ logger = get_module_logger(module_name="client_container")


class ModelEnvContainer(BaseModel): class ModelEnvContainer(BaseModel):
def __init__(self, model_config: dict, learnware_zippath: str): def __init__(self, model_config: dict, learnware_zippath: str):

self.model_script = os.path.join(C.package_path, "client", "scripts", "run_model.py") self.model_script = os.path.join(C.package_path, "client", "scripts", "run_model.py")
self.model_config = model_config self.model_config = model_config
self.conda_env = f"learnware_{shortuuid.uuid()}" self.conda_env = f"learnware_{shortuuid.uuid()}"


+ 19
- 5
learnware/client/learnware_client.py View File

@@ -104,8 +104,13 @@ class LearnwareClient:
for chunk in file_chunks(learnware_file): for chunk in file_chunks(learnware_file):
response = requests.post( response = requests.post(
url_upload, url_upload,
files={"chunk_file": chunk,},
data={"file_hash": file_hash, "chunk_begin": begin,},
files={
"chunk_file": chunk,
},
data={
"file_hash": file_hash,
"chunk_begin": begin,
},
headers=self.headers, headers=self.headers,
) )


@@ -123,7 +128,10 @@ class LearnwareClient:


response = requests.post( response = requests.post(
url_add, url_add,
json={"file_hash": file_hash, "semantic_specification": json.dumps(semantic_specification),},
json={
"file_hash": file_hash,
"semantic_specification": json.dumps(semantic_specification),
},
headers=self.headers, headers=self.headers,
) )


@@ -137,7 +145,14 @@ class LearnwareClient:
def download_learnware(self, learnware_id, save_path): def download_learnware(self, learnware_id, save_path):
url = f"{self.host}/engine/download_learnware" url = f"{self.host}/engine/download_learnware"


response = requests.get(url, params={"learnware_id": learnware_id,}, headers=self.headers, stream=True,)
response = requests.get(
url,
params={
"learnware_id": learnware_id,
},
headers=self.headers,
stream=True,
)


if response.status_code != 200: if response.status_code != 200:
raise Exception("download failed: " + json.dumps(response.json())) raise Exception("download failed: " + json.dumps(response.json()))
@@ -269,7 +284,6 @@ class LearnwareClient:
def create_semantic_specification( def create_semantic_specification(
self, name, description, data_type, task_type, library_type, senarioes, input_description, output_description self, name, description, data_type, task_type, library_type, senarioes, input_description, output_description
): ):

semantic_specification = dict() semantic_specification = dict()
semantic_specification["Input"] = input_description semantic_specification["Input"] = input_description
semantic_specification["Output"] = output_description semantic_specification["Output"] = output_description


+ 2
- 5
learnware/client/package_utils.py View File

@@ -24,8 +24,7 @@ def try_to_run(args, timeout=5, retry=5):




def parse_pip_requirement(line: str): def parse_pip_requirement(line: str):
"""Parse pip requirement line to package name
"""
"""Parse pip requirement line to package name"""


line = line.strip() line = line.strip()


@@ -47,8 +46,7 @@ def parse_pip_requirement(line: str):




def read_pip_packages_from_requirements(requirements_file: str) -> List[str]: def read_pip_packages_from_requirements(requirements_file: str) -> List[str]:
"""Read requiremnts.txt and parse it to list
"""
"""Read requiremnts.txt and parse it to list"""


packages = [] packages = []
lines = [] lines = []
@@ -174,7 +172,6 @@ def filter_nonexist_conda_packages_file(yaml_file: str, output_yaml_file: str):




def filter_nonexist_pip_packages_file(requirements_file: str, output_file: str): def filter_nonexist_pip_packages_file(requirements_file: str, output_file: str):

packages, lines = read_pip_packages_from_requirements(requirements_file) packages, lines = read_pip_packages_from_requirements(requirements_file)


exist_packages, nonexist_packages = filter_nonexist_pip_packages(packages) exist_packages, nonexist_packages = filter_nonexist_pip_packages(packages)


+ 5
- 8
learnware/client/utils.py View File

@@ -10,14 +10,11 @@ logger = get_module_logger(module_name="client_utils")




def system_execute(args, timeout=None): def system_execute(args, timeout=None):

com_process = subprocess.run(
args, stdout=subprocess.DEVNULL, stderr=subprocess.PIPE, timeout=timeout
)
com_process = subprocess.run(args, stdout=subprocess.DEVNULL, stderr=subprocess.PIPE, timeout=timeout)
try: try:
com_process.check_returncode() com_process.check_returncode()
except subprocess.CalledProcessError as err: except subprocess.CalledProcessError as err:
print(com_process.stderr)
print("System Execute Error:", str(com_process.stderr))
raise err raise err




@@ -27,14 +24,14 @@ def remove_enviroment(conda_env):


def install_environment(zip_path, conda_env): def install_environment(zip_path, conda_env):
"""Install environment of a learnware """Install environment of a learnware
Parameters Parameters
---------- ----------
zip_path : str zip_path : str
Path of the learnware zip file Path of the learnware zip file
conda_env : str conda_env : str
a new conda environment will be created with the given name; a new conda environment will be created with the given name;
Raises Raises
------ ------
Exception Exception
@@ -59,7 +56,7 @@ def install_environment(zip_path, conda_env):
z_file.extract(member="requirements.txt", path=tempdir) z_file.extract(member="requirements.txt", path=tempdir)
requirements_path: str = os.path.join(tempdir, "requirements.txt") requirements_path: str = os.path.join(tempdir, "requirements.txt")
requirements_path_filter: str = os.path.join(tempdir, "requirements_filter.txt") requirements_path_filter: str = os.path.join(tempdir, "requirements_filter.txt")
logger.info(f"checking the avaliabe pip packages for {yaml_path}")
logger.info(f"checking the avaliabe pip packages for {conda_env}")
filter_nonexist_pip_packages_file( filter_nonexist_pip_packages_file(
requirements_file=requirements_path, output_file=requirements_path_filter requirements_file=requirements_path, output_file=requirements_path_filter
) )


+ 16
- 4
learnware/config.py View File

@@ -72,7 +72,10 @@ os.makedirs(DATABASE_PATH, exist_ok=True)
os.makedirs(STDOUT_PATH, exist_ok=True) os.makedirs(STDOUT_PATH, exist_ok=True)


semantic_config = { semantic_config = {
"Data": {"Values": ["Table", "Image", "Video", "Text", "Audio"], "Type": "Class",}, # Choose only one class
"Data": {
"Values": ["Table", "Image", "Video", "Text", "Audio"],
"Type": "Class",
}, # Choose only one class
"Task": { "Task": {
"Values": [ "Values": [
"Classification", "Classification",
@@ -113,8 +116,14 @@ semantic_config = {
], ],
"Type": "Tag", # Choose one or more tags "Type": "Tag", # Choose one or more tags
}, },
"Description": {"Values": None, "Type": "String",},
"Name": {"Values": None, "Type": "String",},
"Description": {
"Values": None,
"Type": "String",
},
"Name": {
"Values": None,
"Type": "String",
},
} }


_DEFAULT_CONFIG = { _DEFAULT_CONFIG = {
@@ -128,7 +137,10 @@ _DEFAULT_CONFIG = {
"learnware_pool_path": LEARNWARE_POOL_PATH, "learnware_pool_path": LEARNWARE_POOL_PATH,
"learnware_zip_pool_path": LEARNWARE_ZIP_POOL_PATH, "learnware_zip_pool_path": LEARNWARE_ZIP_POOL_PATH,
"learnware_folder_pool_path": LEARNWARE_FOLDER_POOL_PATH, "learnware_folder_pool_path": LEARNWARE_FOLDER_POOL_PATH,
"learnware_folder_config": {"yaml_file": "learnware.yaml", "module_file": "__init__.py",},
"learnware_folder_config": {
"yaml_file": "learnware.yaml",
"module_file": "__init__.py",
},
"database_url": f"sqlite:///{DATABASE_PATH}", "database_url": f"sqlite:///{DATABASE_PATH}",
"max_reduced_set_size": 1310720, "max_reduced_set_size": 1310720,
"backend_host": "http://www.lamda.nju.edu.cn/learnware/api", "backend_host": "http://www.lamda.nju.edu.cn/learnware/api",


+ 4
- 1
learnware/learnware/__init__.py View File

@@ -31,7 +31,10 @@ def get_learnware_from_dirpath(id: str, semantic_spec: dict, learnware_dirpath:
The contructed learnware object, return None if build failed The contructed learnware object, return None if build failed
""" """
learnware_config = { learnware_config = {
"model": {"class_name": "Model", "kwargs": {},},
"model": {
"class_name": "Model",
"kwargs": {},
},
"stat_specifications": [ "stat_specifications": [
{ {
"module_path": "learnware.specification", "module_path": "learnware.specification",


+ 9
- 9
learnware/learnware/reuse.py View File

@@ -302,7 +302,7 @@ class AveragingReuser(BaseReuser):
pred_y = pred_y.detach().cpu().numpy() pred_y = pred_y.detach().cpu().numpy()
if not isinstance(pred_y, np.ndarray): if not isinstance(pred_y, np.ndarray):
raise TypeError(f"Model output must be np.ndarray or torch.Tensor") raise TypeError(f"Model output must be np.ndarray or torch.Tensor")
if len(pred_y.shape) == 1: if len(pred_y.shape) == 1:
pred_y = pred_y.reshape(-1, 1) pred_y = pred_y.reshape(-1, 1)
else: else:
@@ -312,7 +312,7 @@ class AveragingReuser(BaseReuser):
elif self.mode == "vote_by_prob": elif self.mode == "vote_by_prob":
pred_y = softmax(pred_y, axis=-1) pred_y = softmax(pred_y, axis=-1)
preds.append(pred_y) preds.append(pred_y)
if self.mode == "vote_by_prob": if self.mode == "vote_by_prob":
return np.mean(preds, axis=0) return np.mean(preds, axis=0)
else: else:
@@ -325,9 +325,9 @@ class AveragingReuser(BaseReuser):


class EnsemblePruningReuser(BaseReuser): class EnsemblePruningReuser(BaseReuser):
""" """
Baseline Multiple Learnware Reuser uing Marign Distribution guided multi-objective evolutionary Ensemble Pruning (MDEP) Method.
References: [1] Yu-Chang Wu, Yi-Xiao He, Chao Qian, and Zhi-Hua Zhou. Multi-objective Evolutionary Ensemble Pruning Guided by Margin Distribution. In: Proceedings of the 17th International Conference on Parallel Problem Solving from Nature (PPSN'22), Dortmund, Germany, 2022.
Baseline Multiple Learnware Reuser uing Marign Distribution guided multi-objective evolutionary Ensemble Pruning (MDEP) Method.
References: [1] Yu-Chang Wu, Yi-Xiao He, Chao Qian, and Zhi-Hua Zhou. Multi-objective Evolutionary Ensemble Pruning Guided by Margin Distribution. In: Proceedings of the 17th International Conference on Parallel Problem Solving from Nature (PPSN'22), Dortmund, Germany, 2022.
""" """


def __init__(self, learnware_list: List[Learnware], mode: str): def __init__(self, learnware_list: List[Learnware], mode: str):
@@ -359,7 +359,7 @@ class EnsemblePruningReuser(BaseReuser):
- The ground truth of validation set. - The ground truth of validation set.
- The dimension is (number of instances, 1). - The dimension is (number of instances, 1).
maxgen : int maxgen : int
- The maximum number of iteration rounds.
- The maximum number of iteration rounds.


Returns Returns
------- -------
@@ -443,7 +443,7 @@ class EnsemblePruningReuser(BaseReuser):
- The ground truth of validation set. - The ground truth of validation set.
- The dimension is (number of instances, 1). - The dimension is (number of instances, 1).
maxgen : int maxgen : int
- The maximum number of iteration rounds.
- The maximum number of iteration rounds.


Returns Returns
------- -------
@@ -557,7 +557,7 @@ class EnsemblePruningReuser(BaseReuser):
- The ground truth of validation set. - The ground truth of validation set.
- The dimension is (number of instances, 1). - The dimension is (number of instances, 1).
maxgen : int maxgen : int
- The maximum number of iteration rounds.
- The maximum number of iteration rounds.


Returns Returns
------- -------
@@ -645,7 +645,7 @@ class EnsemblePruningReuser(BaseReuser):


def _get_predict(self, X: np.ndarray, selected_idxes: List[int]): def _get_predict(self, X: np.ndarray, selected_idxes: List[int]):
"""Concatenate the output of learnwares corresponding to selected_idxes """Concatenate the output of learnwares corresponding to selected_idxes
Parameters Parameters
---------- ----------
X : np.ndarray X : np.ndarray


+ 1
- 1
learnware/specification/base.py View File

@@ -74,7 +74,7 @@ class Specification:


def update_stat_spec(self, *args, **kwargs): def update_stat_spec(self, *args, **kwargs):
"""Update the statistical specification by the way of 'name'='value' """Update the statistical specification by the way of 'name'='value'
or use class name as default name
or use class name as default name
""" """
for _v in args: for _v in args:
self.stat_spec[_v.__class__.__name__] = _v self.stat_spec[_v.__class__.__name__] = _v


+ 4
- 2
learnware/specification/rkme.py View File

@@ -428,7 +428,9 @@ class RKMEStatSpecification(BaseStatSpecification):
rkme_to_save["beta"] = rkme_to_save["beta"].tolist() rkme_to_save["beta"] = rkme_to_save["beta"].tolist()
rkme_to_save["device"] = "gpu" if rkme_to_save["cuda_idx"] != -1 else "cpu" rkme_to_save["device"] = "gpu" if rkme_to_save["cuda_idx"] != -1 else "cpu"
json.dump( json.dump(
rkme_to_save, codecs.open(save_path, "w", encoding="utf-8"), separators=(",", ":"),
rkme_to_save,
codecs.open(save_path, "w", encoding="utf-8"),
separators=(",", ":"),
) )


def load(self, filepath: str) -> bool: def load(self, filepath: str) -> bool:
@@ -521,7 +523,7 @@ def torch_rbf_kernel(x1, x2, gamma) -> torch.Tensor:
""" """
x1 = x1.double() x1 = x1.double()
x2 = x2.double() x2 = x2.double()
X12norm = torch.sum(x1 ** 2, 1, keepdim=True) - 2 * x1 @ x2.T + torch.sum(x2 ** 2, 1, keepdim=True).T
X12norm = torch.sum(x1**2, 1, keepdim=True) - 2 * x1 @ x2.T + torch.sum(x2**2, 1, keepdim=True).T
return torch.exp(-X12norm * gamma) return torch.exp(-X12norm * gamma)






+ 0
- 1
tests/test_learnware_client/test_reuse.py View File

@@ -33,7 +33,6 @@ if __name__ == "__main__":
learnware_list.append(learnware) learnware_list.append(learnware)


with LearnwaresContainer(learnware_list, zip_paths) as env_container: with LearnwaresContainer(learnware_list, zip_paths) as env_container:

learnware_list = env_container.get_learnware_list_with_container() learnware_list = env_container.get_learnware_list_with_container()
reuser = AveragingReuser(learnware_list, mode="vote") reuser = AveragingReuser(learnware_list, mode="vote")
input_array = np.random.randint(0, 3, size=(20, 9)) input_array = np.random.randint(0, 3, size=(20, 9))


+ 4
- 1
tests/test_workflow/test_workflow.py View File

@@ -19,7 +19,10 @@ curr_root = os.path.dirname(os.path.abspath(__file__))


user_semantic = { user_semantic = {
"Data": {"Values": ["Tabular"], "Type": "Class"}, "Data": {"Values": ["Tabular"], "Type": "Class"},
"Task": {"Values": ["Classification"], "Type": "Class",},
"Task": {
"Values": ["Classification"],
"Type": "Class",
},
"Library": {"Values": ["Scikit-learn"], "Type": "Class"}, "Library": {"Values": ["Scikit-learn"], "Type": "Class"},
"Scenario": {"Values": ["Education"], "Type": "Tag"}, "Scenario": {"Values": ["Education"], "Type": "Tag"},
"Description": {"Values": "", "Type": "String"}, "Description": {"Values": "", "Type": "String"},


Loading…
Cancel
Save