diff --git a/examples/dataset_image_workflow/get_data.py b/examples/dataset_image_workflow/get_data.py index e0ee9bc..c3af534 100644 --- a/examples/dataset_image_workflow/get_data.py +++ b/examples/dataset_image_workflow/get_data.py @@ -192,7 +192,7 @@ def get_zca_matrix(X, reg_coef=0.1): def layernorm_data(X): X_processed = X - torch.mean(X, [1, 2, 3], keepdim=True) - X_processed = X_processed / torch.sqrt(torch.sum(X_processed ** 2, [1, 2, 3], keepdim=True)) + X_processed = X_processed / torch.sqrt(torch.sum(X_processed**2, [1, 2, 3], keepdim=True)) return X_processed @@ -240,7 +240,10 @@ def augment(images, dc_aug_param, device): def scalefun(i): h = int((np.random.uniform(1 - scale, 1 + scale)) * shape[2]) w = int((np.random.uniform(1 - scale, 1 + scale)) * shape[2]) - tmp = F.interpolate(images[i : i + 1], [h, w],)[0] + tmp = F.interpolate( + images[i : i + 1], + [h, w], + )[0] mhw = max(h, w, shape[2], shape[3]) im_ = torch.zeros(shape[1], mhw, mhw, dtype=torch.float, device=device) r = int((mhw - h) / 2) diff --git a/examples/dataset_m5_workflow/m5/utils.py b/examples/dataset_m5_workflow/m5/utils.py index c21a297..721eee2 100644 --- a/examples/dataset_m5_workflow/m5/utils.py +++ b/examples/dataset_m5_workflow/m5/utils.py @@ -70,7 +70,7 @@ def measure_aux_algo(idx, test_sample, model): # Simple "Memory profilers" to see memory usage def get_memory_usage(): - return np.round(psutil.Process(os.getpid()).memory_info()[0] / 2.0 ** 30, 2) + return np.round(psutil.Process(os.getpid()).memory_info()[0] / 2.0**30, 2) def sizeof_fmt(num, suffix="B"): @@ -84,7 +84,7 @@ def sizeof_fmt(num, suffix="B"): # Memory Reducer def reduce_mem_usage(df, float16_flag=True, verbose=True): numerics = ["int16", "int32", "int64", "float16", "float32", "float64"] - start_mem = df.memory_usage().sum() / 1024 ** 2 + start_mem = df.memory_usage().sum() / 1024**2 for col in df.columns: col_type = df[col].dtypes if col_type in numerics: @@ -106,7 +106,7 @@ def reduce_mem_usage(df, float16_flag=True, verbose=True): df[col] = df[col].astype(np.float32) else: df[col] = df[col].astype(np.float64) - end_mem = df.memory_usage().sum() / 1024 ** 2 + end_mem = df.memory_usage().sum() / 1024**2 if verbose: print( "Mem. usage decreased to {:5.2f} Mb ({:.1f}% reduction)".format( diff --git a/examples/dataset_m5_workflow/upload.py b/examples/dataset_m5_workflow/upload.py index 4e26c36..0c9e209 100644 --- a/examples/dataset_m5_workflow/upload.py +++ b/examples/dataset_m5_workflow/upload.py @@ -69,8 +69,15 @@ def main(): } res = session.post( submit_url, - data={"semantic_specification": json.dumps(semantic_specification),}, - files={"learnware_file": open(os.path.join(os.path.abspath("."), "learnware_pool", learnware), "rb",)}, + data={ + "semantic_specification": json.dumps(semantic_specification), + }, + files={ + "learnware_file": open( + os.path.join(os.path.abspath("."), "learnware_pool", learnware), + "rb", + ) + }, ) assert json.loads(res.text)["code"] == 0, "Upload error" diff --git a/examples/dataset_pfs_workflow/pfs/pfs_cross_transfer.py b/examples/dataset_pfs_workflow/pfs/pfs_cross_transfer.py index a106fb7..93a3fa3 100644 --- a/examples/dataset_pfs_workflow/pfs/pfs_cross_transfer.py +++ b/examples/dataset_pfs_workflow/pfs/pfs_cross_transfer.py @@ -67,7 +67,7 @@ def get_split_errs(algo): for tmp in range(len(proportion_list)): model = lgb.LGBMModel( boosting_type="gbdt", - num_leaves=2 ** 7 - 1, + num_leaves=2**7 - 1, learning_rate=0.01, objective="rmse", metric="rmse", @@ -119,7 +119,7 @@ def get_errors(algo): if algo == "lgb": model = lgb.LGBMModel( boosting_type="gbdt", - num_leaves=2 ** 7 - 1, + num_leaves=2**7 - 1, learning_rate=0.01, objective="rmse", metric="rmse", diff --git a/examples/dataset_pfs_workflow/upload.py b/examples/dataset_pfs_workflow/upload.py index d09bc46..9719230 100644 --- a/examples/dataset_pfs_workflow/upload.py +++ b/examples/dataset_pfs_workflow/upload.py @@ -72,8 +72,15 @@ def main(): } res = session.post( submit_url, - data={"semantic_specification": json.dumps(semantic_specification),}, - files={"learnware_file": open(os.path.join(os.path.abspath("."), "learnware_pool", learnware), "rb",)}, + data={ + "semantic_specification": json.dumps(semantic_specification), + }, + files={ + "learnware_file": open( + os.path.join(os.path.abspath("."), "learnware_pool", learnware), + "rb", + ) + }, ) assert json.loads(res.text)["code"] == 0, "Upload error" diff --git a/examples/workflow_by_code/main.py b/examples/workflow_by_code/main.py index 9eafb6e..3534aae 100644 --- a/examples/workflow_by_code/main.py +++ b/examples/workflow_by_code/main.py @@ -19,7 +19,10 @@ curr_root = os.path.dirname(os.path.abspath(__file__)) user_semantic = { "Data": {"Values": ["Table"], "Type": "Class"}, - "Task": {"Values": ["Classification"], "Type": "Class",}, + "Task": { + "Values": ["Classification"], + "Type": "Class", + }, "Library": {"Values": ["Scikit-learn"], "Type": "Class"}, "Scenario": {"Values": ["Education"], "Type": "Tag"}, "Description": {"Values": "", "Type": "String"}, diff --git a/learnware/client/container.py b/learnware/client/container.py index a999e1d..15236b0 100644 --- a/learnware/client/container.py +++ b/learnware/client/container.py @@ -18,7 +18,6 @@ logger = get_module_logger(module_name="client_container") class ModelEnvContainer(BaseModel): def __init__(self, model_config: dict, learnware_zippath: str): - self.model_script = os.path.join(C.package_path, "client", "scripts", "run_model.py") self.model_config = model_config self.conda_env = f"learnware_{shortuuid.uuid()}" diff --git a/learnware/client/learnware_client.py b/learnware/client/learnware_client.py index 8d25144..723267f 100644 --- a/learnware/client/learnware_client.py +++ b/learnware/client/learnware_client.py @@ -104,8 +104,13 @@ class LearnwareClient: for chunk in file_chunks(learnware_file): response = requests.post( url_upload, - files={"chunk_file": chunk,}, - data={"file_hash": file_hash, "chunk_begin": begin,}, + files={ + "chunk_file": chunk, + }, + data={ + "file_hash": file_hash, + "chunk_begin": begin, + }, headers=self.headers, ) @@ -123,7 +128,10 @@ class LearnwareClient: response = requests.post( url_add, - json={"file_hash": file_hash, "semantic_specification": json.dumps(semantic_specification),}, + json={ + "file_hash": file_hash, + "semantic_specification": json.dumps(semantic_specification), + }, headers=self.headers, ) @@ -137,7 +145,14 @@ class LearnwareClient: def download_learnware(self, learnware_id, save_path): url = f"{self.host}/engine/download_learnware" - response = requests.get(url, params={"learnware_id": learnware_id,}, headers=self.headers, stream=True,) + response = requests.get( + url, + params={ + "learnware_id": learnware_id, + }, + headers=self.headers, + stream=True, + ) if response.status_code != 200: raise Exception("download failed: " + json.dumps(response.json())) @@ -269,7 +284,6 @@ class LearnwareClient: def create_semantic_specification( self, name, description, data_type, task_type, library_type, senarioes, input_description, output_description ): - semantic_specification = dict() semantic_specification["Input"] = input_description semantic_specification["Output"] = output_description diff --git a/learnware/client/package_utils.py b/learnware/client/package_utils.py index 0088bb4..06ff689 100644 --- a/learnware/client/package_utils.py +++ b/learnware/client/package_utils.py @@ -24,8 +24,7 @@ def try_to_run(args, timeout=5, retry=5): def parse_pip_requirement(line: str): - """Parse pip requirement line to package name - """ + """Parse pip requirement line to package name""" line = line.strip() @@ -47,8 +46,7 @@ def parse_pip_requirement(line: str): def read_pip_packages_from_requirements(requirements_file: str) -> List[str]: - """Read requiremnts.txt and parse it to list - """ + """Read requiremnts.txt and parse it to list""" packages = [] lines = [] @@ -174,7 +172,6 @@ def filter_nonexist_conda_packages_file(yaml_file: str, output_yaml_file: str): def filter_nonexist_pip_packages_file(requirements_file: str, output_file: str): - packages, lines = read_pip_packages_from_requirements(requirements_file) exist_packages, nonexist_packages = filter_nonexist_pip_packages(packages) diff --git a/learnware/client/utils.py b/learnware/client/utils.py index d192c85..b6d9c8a 100644 --- a/learnware/client/utils.py +++ b/learnware/client/utils.py @@ -10,14 +10,11 @@ logger = get_module_logger(module_name="client_utils") def system_execute(args, timeout=None): - - com_process = subprocess.run( - args, stdout=subprocess.DEVNULL, stderr=subprocess.PIPE, timeout=timeout - ) + com_process = subprocess.run(args, stdout=subprocess.DEVNULL, stderr=subprocess.PIPE, timeout=timeout) try: com_process.check_returncode() except subprocess.CalledProcessError as err: - print(com_process.stderr) + print("System Execute Error:", str(com_process.stderr)) raise err @@ -27,14 +24,14 @@ def remove_enviroment(conda_env): def install_environment(zip_path, conda_env): """Install environment of a learnware - + Parameters ---------- zip_path : str Path of the learnware zip file conda_env : str a new conda environment will be created with the given name; - + Raises ------ Exception @@ -59,7 +56,7 @@ def install_environment(zip_path, conda_env): z_file.extract(member="requirements.txt", path=tempdir) requirements_path: str = os.path.join(tempdir, "requirements.txt") requirements_path_filter: str = os.path.join(tempdir, "requirements_filter.txt") - logger.info(f"checking the avaliabe pip packages for {yaml_path}") + logger.info(f"checking the avaliabe pip packages for {conda_env}") filter_nonexist_pip_packages_file( requirements_file=requirements_path, output_file=requirements_path_filter ) diff --git a/learnware/config.py b/learnware/config.py index 137f15b..7fdcc91 100644 --- a/learnware/config.py +++ b/learnware/config.py @@ -72,7 +72,10 @@ os.makedirs(DATABASE_PATH, exist_ok=True) os.makedirs(STDOUT_PATH, exist_ok=True) semantic_config = { - "Data": {"Values": ["Table", "Image", "Video", "Text", "Audio"], "Type": "Class",}, # Choose only one class + "Data": { + "Values": ["Table", "Image", "Video", "Text", "Audio"], + "Type": "Class", + }, # Choose only one class "Task": { "Values": [ "Classification", @@ -113,8 +116,14 @@ semantic_config = { ], "Type": "Tag", # Choose one or more tags }, - "Description": {"Values": None, "Type": "String",}, - "Name": {"Values": None, "Type": "String",}, + "Description": { + "Values": None, + "Type": "String", + }, + "Name": { + "Values": None, + "Type": "String", + }, } _DEFAULT_CONFIG = { @@ -128,7 +137,10 @@ _DEFAULT_CONFIG = { "learnware_pool_path": LEARNWARE_POOL_PATH, "learnware_zip_pool_path": LEARNWARE_ZIP_POOL_PATH, "learnware_folder_pool_path": LEARNWARE_FOLDER_POOL_PATH, - "learnware_folder_config": {"yaml_file": "learnware.yaml", "module_file": "__init__.py",}, + "learnware_folder_config": { + "yaml_file": "learnware.yaml", + "module_file": "__init__.py", + }, "database_url": f"sqlite:///{DATABASE_PATH}", "max_reduced_set_size": 1310720, "backend_host": "http://www.lamda.nju.edu.cn/learnware/api", diff --git a/learnware/learnware/__init__.py b/learnware/learnware/__init__.py index 70e35d6..a094521 100644 --- a/learnware/learnware/__init__.py +++ b/learnware/learnware/__init__.py @@ -31,7 +31,10 @@ def get_learnware_from_dirpath(id: str, semantic_spec: dict, learnware_dirpath: The contructed learnware object, return None if build failed """ learnware_config = { - "model": {"class_name": "Model", "kwargs": {},}, + "model": { + "class_name": "Model", + "kwargs": {}, + }, "stat_specifications": [ { "module_path": "learnware.specification", diff --git a/learnware/learnware/reuse.py b/learnware/learnware/reuse.py index 0b68667..b618002 100644 --- a/learnware/learnware/reuse.py +++ b/learnware/learnware/reuse.py @@ -302,7 +302,7 @@ class AveragingReuser(BaseReuser): pred_y = pred_y.detach().cpu().numpy() if not isinstance(pred_y, np.ndarray): raise TypeError(f"Model output must be np.ndarray or torch.Tensor") - + if len(pred_y.shape) == 1: pred_y = pred_y.reshape(-1, 1) else: @@ -312,7 +312,7 @@ class AveragingReuser(BaseReuser): elif self.mode == "vote_by_prob": pred_y = softmax(pred_y, axis=-1) preds.append(pred_y) - + if self.mode == "vote_by_prob": return np.mean(preds, axis=0) else: @@ -325,9 +325,9 @@ class AveragingReuser(BaseReuser): class EnsemblePruningReuser(BaseReuser): """ - Baseline Multiple Learnware Reuser uing Marign Distribution guided multi-objective evolutionary Ensemble Pruning (MDEP) Method. - - References: [1] Yu-Chang Wu, Yi-Xiao He, Chao Qian, and Zhi-Hua Zhou. Multi-objective Evolutionary Ensemble Pruning Guided by Margin Distribution. In: Proceedings of the 17th International Conference on Parallel Problem Solving from Nature (PPSN'22), Dortmund, Germany, 2022. + Baseline Multiple Learnware Reuser uing Marign Distribution guided multi-objective evolutionary Ensemble Pruning (MDEP) Method. + + References: [1] Yu-Chang Wu, Yi-Xiao He, Chao Qian, and Zhi-Hua Zhou. Multi-objective Evolutionary Ensemble Pruning Guided by Margin Distribution. In: Proceedings of the 17th International Conference on Parallel Problem Solving from Nature (PPSN'22), Dortmund, Germany, 2022. """ def __init__(self, learnware_list: List[Learnware], mode: str): @@ -359,7 +359,7 @@ class EnsemblePruningReuser(BaseReuser): - The ground truth of validation set. - The dimension is (number of instances, 1). maxgen : int - - The maximum number of iteration rounds. + - The maximum number of iteration rounds. Returns ------- @@ -443,7 +443,7 @@ class EnsemblePruningReuser(BaseReuser): - The ground truth of validation set. - The dimension is (number of instances, 1). maxgen : int - - The maximum number of iteration rounds. + - The maximum number of iteration rounds. Returns ------- @@ -557,7 +557,7 @@ class EnsemblePruningReuser(BaseReuser): - The ground truth of validation set. - The dimension is (number of instances, 1). maxgen : int - - The maximum number of iteration rounds. + - The maximum number of iteration rounds. Returns ------- @@ -645,7 +645,7 @@ class EnsemblePruningReuser(BaseReuser): def _get_predict(self, X: np.ndarray, selected_idxes: List[int]): """Concatenate the output of learnwares corresponding to selected_idxes - + Parameters ---------- X : np.ndarray diff --git a/learnware/specification/base.py b/learnware/specification/base.py index 732ff92..56c1ad9 100644 --- a/learnware/specification/base.py +++ b/learnware/specification/base.py @@ -74,7 +74,7 @@ class Specification: def update_stat_spec(self, *args, **kwargs): """Update the statistical specification by the way of 'name'='value' - or use class name as default name + or use class name as default name """ for _v in args: self.stat_spec[_v.__class__.__name__] = _v diff --git a/learnware/specification/rkme.py b/learnware/specification/rkme.py index 6f9471a..68c572f 100644 --- a/learnware/specification/rkme.py +++ b/learnware/specification/rkme.py @@ -428,7 +428,9 @@ class RKMEStatSpecification(BaseStatSpecification): rkme_to_save["beta"] = rkme_to_save["beta"].tolist() rkme_to_save["device"] = "gpu" if rkme_to_save["cuda_idx"] != -1 else "cpu" json.dump( - rkme_to_save, codecs.open(save_path, "w", encoding="utf-8"), separators=(",", ":"), + rkme_to_save, + codecs.open(save_path, "w", encoding="utf-8"), + separators=(",", ":"), ) def load(self, filepath: str) -> bool: @@ -521,7 +523,7 @@ def torch_rbf_kernel(x1, x2, gamma) -> torch.Tensor: """ x1 = x1.double() x2 = x2.double() - X12norm = torch.sum(x1 ** 2, 1, keepdim=True) - 2 * x1 @ x2.T + torch.sum(x2 ** 2, 1, keepdim=True).T + X12norm = torch.sum(x1**2, 1, keepdim=True) - 2 * x1 @ x2.T + torch.sum(x2**2, 1, keepdim=True).T return torch.exp(-X12norm * gamma) diff --git a/tests/test_learnware_client/test_reuse.py b/tests/test_learnware_client/test_reuse.py index 699a138..b1659f2 100644 --- a/tests/test_learnware_client/test_reuse.py +++ b/tests/test_learnware_client/test_reuse.py @@ -33,7 +33,6 @@ if __name__ == "__main__": learnware_list.append(learnware) with LearnwaresContainer(learnware_list, zip_paths) as env_container: - learnware_list = env_container.get_learnware_list_with_container() reuser = AveragingReuser(learnware_list, mode="vote") input_array = np.random.randint(0, 3, size=(20, 9)) diff --git a/tests/test_workflow/test_workflow.py b/tests/test_workflow/test_workflow.py index 403244c..89e71aa 100644 --- a/tests/test_workflow/test_workflow.py +++ b/tests/test_workflow/test_workflow.py @@ -19,7 +19,10 @@ curr_root = os.path.dirname(os.path.abspath(__file__)) user_semantic = { "Data": {"Values": ["Tabular"], "Type": "Class"}, - "Task": {"Values": ["Classification"], "Type": "Class",}, + "Task": { + "Values": ["Classification"], + "Type": "Class", + }, "Library": {"Values": ["Scikit-learn"], "Type": "Class"}, "Scenario": {"Values": ["Education"], "Type": "Tag"}, "Description": {"Values": "", "Type": "String"},