From 6ab674f26e1dcce079f4df267f3fefdea2c3668b Mon Sep 17 00:00:00 2001 From: bxdd Date: Tue, 10 Oct 2023 18:58:50 +0800 Subject: [PATCH] [MNT] black format --- examples/dataset_image_workflow/get_data.py | 7 +- examples/dataset_m5_workflow/m5/utils.py | 6 +- examples/dataset_m5_workflow/upload.py | 11 +-- .../pfs/pfs_cross_transfer.py | 4 +- examples/dataset_pfs_workflow/upload.py | 11 +-- examples/workflow_by_code/main.py | 5 +- learnware/client/container.py | 78 ++++++++++--------- learnware/client/scripts/run_model.py | 46 +++++------ learnware/client/utils.py | 17 ++-- learnware/learnware/reuse.py | 16 ++-- tests/test_learnware_client/test_reuse.py | 8 +- tests/test_learnware_upload/test_upload.py | 4 +- tests/test_workflow/test_workflow.py | 5 +- 13 files changed, 100 insertions(+), 118 deletions(-) diff --git a/examples/dataset_image_workflow/get_data.py b/examples/dataset_image_workflow/get_data.py index c3af534..e0ee9bc 100644 --- a/examples/dataset_image_workflow/get_data.py +++ b/examples/dataset_image_workflow/get_data.py @@ -192,7 +192,7 @@ def get_zca_matrix(X, reg_coef=0.1): def layernorm_data(X): X_processed = X - torch.mean(X, [1, 2, 3], keepdim=True) - X_processed = X_processed / torch.sqrt(torch.sum(X_processed**2, [1, 2, 3], keepdim=True)) + X_processed = X_processed / torch.sqrt(torch.sum(X_processed ** 2, [1, 2, 3], keepdim=True)) return X_processed @@ -240,10 +240,7 @@ def augment(images, dc_aug_param, device): def scalefun(i): h = int((np.random.uniform(1 - scale, 1 + scale)) * shape[2]) w = int((np.random.uniform(1 - scale, 1 + scale)) * shape[2]) - tmp = F.interpolate( - images[i : i + 1], - [h, w], - )[0] + tmp = F.interpolate(images[i : i + 1], [h, w],)[0] mhw = max(h, w, shape[2], shape[3]) im_ = torch.zeros(shape[1], mhw, mhw, dtype=torch.float, device=device) r = int((mhw - h) / 2) diff --git a/examples/dataset_m5_workflow/m5/utils.py b/examples/dataset_m5_workflow/m5/utils.py index 721eee2..c21a297 100644 --- a/examples/dataset_m5_workflow/m5/utils.py +++ b/examples/dataset_m5_workflow/m5/utils.py @@ -70,7 +70,7 @@ def measure_aux_algo(idx, test_sample, model): # Simple "Memory profilers" to see memory usage def get_memory_usage(): - return np.round(psutil.Process(os.getpid()).memory_info()[0] / 2.0**30, 2) + return np.round(psutil.Process(os.getpid()).memory_info()[0] / 2.0 ** 30, 2) def sizeof_fmt(num, suffix="B"): @@ -84,7 +84,7 @@ def sizeof_fmt(num, suffix="B"): # Memory Reducer def reduce_mem_usage(df, float16_flag=True, verbose=True): numerics = ["int16", "int32", "int64", "float16", "float32", "float64"] - start_mem = df.memory_usage().sum() / 1024**2 + start_mem = df.memory_usage().sum() / 1024 ** 2 for col in df.columns: col_type = df[col].dtypes if col_type in numerics: @@ -106,7 +106,7 @@ def reduce_mem_usage(df, float16_flag=True, verbose=True): df[col] = df[col].astype(np.float32) else: df[col] = df[col].astype(np.float64) - end_mem = df.memory_usage().sum() / 1024**2 + end_mem = df.memory_usage().sum() / 1024 ** 2 if verbose: print( "Mem. usage decreased to {:5.2f} Mb ({:.1f}% reduction)".format( diff --git a/examples/dataset_m5_workflow/upload.py b/examples/dataset_m5_workflow/upload.py index 0c9e209..4e26c36 100644 --- a/examples/dataset_m5_workflow/upload.py +++ b/examples/dataset_m5_workflow/upload.py @@ -69,15 +69,8 @@ def main(): } res = session.post( submit_url, - data={ - "semantic_specification": json.dumps(semantic_specification), - }, - files={ - "learnware_file": open( - os.path.join(os.path.abspath("."), "learnware_pool", learnware), - "rb", - ) - }, + data={"semantic_specification": json.dumps(semantic_specification),}, + files={"learnware_file": open(os.path.join(os.path.abspath("."), "learnware_pool", learnware), "rb",)}, ) assert json.loads(res.text)["code"] == 0, "Upload error" diff --git a/examples/dataset_pfs_workflow/pfs/pfs_cross_transfer.py b/examples/dataset_pfs_workflow/pfs/pfs_cross_transfer.py index 93a3fa3..a106fb7 100644 --- a/examples/dataset_pfs_workflow/pfs/pfs_cross_transfer.py +++ b/examples/dataset_pfs_workflow/pfs/pfs_cross_transfer.py @@ -67,7 +67,7 @@ def get_split_errs(algo): for tmp in range(len(proportion_list)): model = lgb.LGBMModel( boosting_type="gbdt", - num_leaves=2**7 - 1, + num_leaves=2 ** 7 - 1, learning_rate=0.01, objective="rmse", metric="rmse", @@ -119,7 +119,7 @@ def get_errors(algo): if algo == "lgb": model = lgb.LGBMModel( boosting_type="gbdt", - num_leaves=2**7 - 1, + num_leaves=2 ** 7 - 1, learning_rate=0.01, objective="rmse", metric="rmse", diff --git a/examples/dataset_pfs_workflow/upload.py b/examples/dataset_pfs_workflow/upload.py index 9719230..d09bc46 100644 --- a/examples/dataset_pfs_workflow/upload.py +++ b/examples/dataset_pfs_workflow/upload.py @@ -72,15 +72,8 @@ def main(): } res = session.post( submit_url, - data={ - "semantic_specification": json.dumps(semantic_specification), - }, - files={ - "learnware_file": open( - os.path.join(os.path.abspath("."), "learnware_pool", learnware), - "rb", - ) - }, + data={"semantic_specification": json.dumps(semantic_specification),}, + files={"learnware_file": open(os.path.join(os.path.abspath("."), "learnware_pool", learnware), "rb",)}, ) assert json.loads(res.text)["code"] == 0, "Upload error" diff --git a/examples/workflow_by_code/main.py b/examples/workflow_by_code/main.py index 3534aae..9eafb6e 100644 --- a/examples/workflow_by_code/main.py +++ b/examples/workflow_by_code/main.py @@ -19,10 +19,7 @@ curr_root = os.path.dirname(os.path.abspath(__file__)) user_semantic = { "Data": {"Values": ["Table"], "Type": "Class"}, - "Task": { - "Values": ["Classification"], - "Type": "Class", - }, + "Task": {"Values": ["Classification"], "Type": "Class",}, "Library": {"Values": ["Scikit-learn"], "Type": "Class"}, "Scenario": {"Values": ["Education"], "Type": "Tag"}, "Description": {"Values": "", "Type": "String"}, diff --git a/learnware/client/container.py b/learnware/client/container.py index d72af50..ff6f778 100644 --- a/learnware/client/container.py +++ b/learnware/client/container.py @@ -12,67 +12,69 @@ from ..logger import get_module_logger logger = get_module_logger(module_name="client_container") + class ModelEnvContainer(BaseModel): - def __init__(self, model_config: dict, learnware_zippath: str): """The initialization method for base model """ - - self.model_script = os.path.join(C.package_path, 'learnware', 'client', 'run_model.py') + + self.model_script = os.path.join(C.package_path, "learnware", "client", "run_model.py") self.model_config = model_config self.conda_env = f"learnware_{shortuuid.uuid()}" self.learnware_zippath = learnware_zippath install_environment(learnware_zippath, self.conda_env) - + with tempfile.TemporaryDirectory(prefix="learnware_") as tempdir: - output_path = os.path.join(tempdir, 'output.pkl') - model_path = os.path.join(tempdir, 'model.pkl') - - with open(model_path, 'wb') as model_fp: + output_path = os.path.join(tempdir, "output.pkl") + model_path = os.path.join(tempdir, "model.pkl") + + with open(model_path, "wb") as model_fp: pickle.dump(model_config, model_fp) - - system_execute(f"conda run --no-capture-output python3 {self.model_script} --model-path {model_path} --output-path {output_path}") - with open(output_path, 'rb') as output_fp: + system_execute( + f"conda run --no-capture-output python3 {self.model_script} --model-path {model_path} --output-path {output_path}" + ) + + with open(output_path, "rb") as output_fp: output_results = pickle.load(output_fp) - - if output_results['status'] != 'success': - raise output_results['error_info'] - - input_shape = output_results['metadata']['input_shape'] - output_shape = output_results['metadata']['output_shape'] - + + if output_results["status"] != "success": + raise output_results["error_info"] + + input_shape = output_results["metadata"]["input_shape"] + output_shape = output_results["metadata"]["output_shape"] + super(ModelEnvContainer, self).__init__(input_shape, output_shape) - def run_model_with_script(self, method, **kargs): with tempfile.TemporaryDirectory(prefix="learnware_") as tempdir: - input_path = os.path.join(tempdir, 'input.pkl') - output_path = os.path.join(tempdir, 'output.pkl') - model_path = os.path.join(tempdir, 'model.pkl') - - with open(model_path, 'wb') as model_fp: + input_path = os.path.join(tempdir, "input.pkl") + output_path = os.path.join(tempdir, "output.pkl") + model_path = os.path.join(tempdir, "model.pkl") + + with open(model_path, "wb") as model_fp: pickle.dump(self.model_config, model_fp) - - with open(input_path, 'wb') as input_fp: - pickle.dump({'method': method, 'kargs': kargs}, input_fp) - system_execute(f"conda run --no-capture-output python3 {self.model_script} --model-path {model_path} --input-path {input_path} --output-path {output_path}") - - with open(output_path, 'rb') as output_fp: + with open(input_path, "wb") as input_fp: + pickle.dump({"method": method, "kargs": kargs}, input_fp) + + system_execute( + f"conda run --no-capture-output python3 {self.model_script} --model-path {model_path} --input-path {input_path} --output-path {output_path}" + ) + + with open(output_path, "rb") as output_fp: output_results = pickle.load(output_fp) - - if output_results['status'] != 'success': - raise output_results['error_info'] - + + if output_results["status"] != "success": + raise output_results["error_info"] + return output_results[output_results] - + def fit(self, X, y): self.run_model_with_script("fit", X=X, y=y) - + def predict(self, X): return self.run_model_with_script("predict", X=X) - + def finetune(self, X, y): self.run_model_with_script("finetune", X=X, y=y) - diff --git a/learnware/client/scripts/run_model.py b/learnware/client/scripts/run_model.py index 97f944e..a284184 100644 --- a/learnware/client/scripts/run_model.py +++ b/learnware/client/scripts/run_model.py @@ -3,52 +3,48 @@ import pickle import argparse from learnware.utils import get_module_by_module_path + def run_model(model_path, input_path, output_path): - output_results = { - 'status': 'success' - } - + output_results = {"status": "success"} + try: - with open(model_path, 'rb') as model_file: + with open(model_path, "rb") as model_file: model_config = pickle.load(file=model_file) model_module = get_module_by_module_path(model_config["module_path"]) cls = getattr(model_module, model_config["class_name"]) setattr(sys.modules["__main__"], model_config["class_name"], cls) model = cls(**model_config.get("kwargs", {})) - - - output_results['metadata'] = { - 'input_shape': model.input_shape, - 'output_shape': model.output_shape, + + output_results["metadata"] = { + "input_shape": model.input_shape, + "output_shape": model.output_shape, } - + if input_path is not None: - with open(input_path, 'rb') as input_file: + with open(input_path, "rb") as input_file: input_args = pickle.load(input_file) - output_array = getattr(model, input_args.get('method', 'predict'))(**input_args.get('kargs', {})) - output_results[input_args.get('method', 'predict')] = output_array - + output_array = getattr(model, input_args.get("method", "predict"))(**input_args.get("kargs", {})) + output_results[input_args.get("method", "predict")] = output_array + except Exception as e: - output_results['status'] = 'fail' - output_results['error_info'] = e - - with open(output_path, 'rb') as output_file: + output_results["status"] = "fail" + output_results["error_info"] = e + + with open(output_path, "rb") as output_file: pickle.dump(output_results, output_file) - -if __name__ == '__main__': + +if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--model-path", type=str, required=True, help="path of model config") parser.add_argument("--input-path", type=str, required=False, help="path of input array") parser.add_argument("--output-path", type=str, required=True, help="path of output array") - + args = parser.parse_args() model_path = args.model_path input_path = args.input_path output_path = args.output_path - + print(model_path, input_path, output_path) - - diff --git a/learnware/client/utils.py b/learnware/client/utils.py index aecf586..6f55b9e 100644 --- a/learnware/client/utils.py +++ b/learnware/client/utils.py @@ -11,11 +11,13 @@ from .package_utils import filter_nonexist_conda_packages_file, filter_nonexist_ logger = get_module_logger(module_name="client_utils") + def system_execute(command): retcd: int = os.system(command=command) if retcd != 0: raise RuntimeError(f"Command {command} failed with return code {retcd}") - + + def install_environment(zip_path, conda_env): """Install environment of a learnware @@ -41,18 +43,21 @@ def install_environment(zip_path, conda_env): filter_nonexist_conda_packages_file(yaml_file=yaml_path, output_yaml_file=yaml_path_filter) # create environment system_execute(command=f"conda env update --name {conda_env} --file {yaml_path_filter}") - + elif "requirements.txt" in z_file.namelist(): z_file.extract(member="requirements.txt", path=tempdir) requirements_path: str = os.path.join(tempdir, "requirements.txt") requirements_path_filter: str = os.path.join(tempdir, "requirements_filter.txt") - filter_nonexist_pip_packages_file(requirements_file=requirements_path, output_file=requirements_path_filter) + filter_nonexist_pip_packages_file( + requirements_file=requirements_path, output_file=requirements_path_filter + ) system_execute(command=f"conda create --name {conda_env}") - system_execute( + system_execute( command=f"conda run --no-capture-output python3 -m pip install -r {requirements_path_filter}" ) else: raise Exception("Environment.yaml or requirements.txt not found in the learnware zip file.") - + + def remove_enviroment(conda_env): - system_execute(command=f"conda env remove -n {conda_env}") \ No newline at end of file + system_execute(command=f"conda env remove -n {conda_env}") diff --git a/learnware/learnware/reuse.py b/learnware/learnware/reuse.py index 8218135..e6559d7 100644 --- a/learnware/learnware/reuse.py +++ b/learnware/learnware/reuse.py @@ -481,7 +481,9 @@ class EnsemblePruningReuser(BaseReuser): v_true_count = (select == v_true.reshape(-1, 1)).sum(axis=1) error_v = (result[:, 0] != v_true.reshape(-1)).sum() margin = result[:, 1] - result[:, 3] - margin[result[:, 0] != v_true.reshape(-1)] = (v_true_count - result[:, 1])[result[:, 0] != v_true.reshape(-1)] + margin[result[:, 0] != v_true.reshape(-1)] = (v_true_count - result[:, 1])[ + result[:, 0] != v_true.reshape(-1) + ] margin = margin / Vars.sum() mean_margin = np.mean(margin) @@ -640,9 +642,9 @@ class EnsemblePruningReuser(BaseReuser): v_predict[v_predict == -1.0] = 0 v_true[v_true == -1.0] = 0 - + return res["Vars"][bst_pop] - + def fit(self, val_X: np.ndarray, val_y: np.ndarray, maxgen: int = 500): """Ensemble pruning based on the validation set @@ -662,7 +664,7 @@ class EnsemblePruningReuser(BaseReuser): v_predict.append(pred_y) v_predict = np.concatenate(v_predict, axis=1) v_true = val_y.reshape(-1, 1) - + # Run ensemble pruning algorithm if self.mode == "regression": res = self._MEDP_regression(v_predict, v_true, maxgen) @@ -670,9 +672,9 @@ class EnsemblePruningReuser(BaseReuser): res = self._MEDP_multiclass(v_predict, v_true, maxgen) elif self.mode == "binary": res = self._MEDP_binaryclass(v_predict, v_true, maxgen) - + self.selected_idxes = np.where(res == 1)[0].tolist() - + def predict(self, user_data: np.ndarray) -> np.ndarray: """Prediction for user data using the final pruned ensemble @@ -695,4 +697,4 @@ class EnsemblePruningReuser(BaseReuser): return np.concatenate(preds, axis=1).mean(axis=1) elif option == "binary" or option == "multiclass": preds = np.concatenate(preds, axis=1) - return np.apply_along_axis(lambda x: np.bincount(x).argmax(), axis=1, arr=preds) \ No newline at end of file + return np.apply_along_axis(lambda x: np.bincount(x).argmax(), axis=1, arr=preds) diff --git a/tests/test_learnware_client/test_reuse.py b/tests/test_learnware_client/test_reuse.py index 64b3831..1d381c9 100644 --- a/tests/test_learnware_client/test_reuse.py +++ b/tests/test_learnware_client/test_reuse.py @@ -12,7 +12,7 @@ if __name__ == "__main__": semantic_specification["Scenario"] = {"Type": "Tag", "Values": "Financial"} semantic_specification["Name"] = {"Type": "String", "Values": "test"} semantic_specification["Description"] = {"Type": "String", "Values": "test"} - - zip_path = '/home/bixd/workspace/learnware/Learnware/tests/test_workflow/learnware_pool/svm_0.zip' - - learnware = get_learnware_from_dirpath('test_id', semantic_specification, zip_path) \ No newline at end of file + + zip_path = "/home/bixd/workspace/learnware/Learnware/tests/test_workflow/learnware_pool/svm_0.zip" + + learnware = get_learnware_from_dirpath("test_id", semantic_specification, zip_path) diff --git a/tests/test_learnware_upload/test_upload.py b/tests/test_learnware_upload/test_upload.py index 22626d2..1e27203 100644 --- a/tests/test_learnware_upload/test_upload.py +++ b/tests/test_learnware_upload/test_upload.py @@ -12,8 +12,8 @@ if __name__ == "__main__": semantic_specification["Scenario"] = {"Type": "Tag", "Values": "Financial"} semantic_specification["Name"] = {"Type": "String", "Values": "test"} semantic_specification["Description"] = {"Type": "String", "Values": "test"} - + zip_path = "test.zip" client = LearnwareClient() client.install_environment(zip_path) - client.test_learnware(zip_path, semantic_specification) \ No newline at end of file + client.test_learnware(zip_path, semantic_specification) diff --git a/tests/test_workflow/test_workflow.py b/tests/test_workflow/test_workflow.py index 85f5c93..432a8b1 100644 --- a/tests/test_workflow/test_workflow.py +++ b/tests/test_workflow/test_workflow.py @@ -19,10 +19,7 @@ curr_root = os.path.dirname(os.path.abspath(__file__)) user_semantic = { "Data": {"Values": ["Tabular"], "Type": "Class"}, - "Task": { - "Values": ["Classification"], - "Type": "Class", - }, + "Task": {"Values": ["Classification"], "Type": "Class",}, "Library": {"Values": ["Scikit-learn"], "Type": "Class"}, "Scenario": {"Values": ["Education"], "Type": "Tag"}, "Description": {"Values": "", "Type": "String"},