[MNT] black format

2 years ago · 6ab674f26e
--- a/examples/dataset_image_workflow/get_data.py
+++ b/examples/dataset_image_workflow/get_data.py
@@ -192,7 +192,7 @@ def get_zca_matrix(X, reg_coef=0.1):

 def layernorm_data(X):
    X_processed = X - torch.mean(X, [1, 2, 3], keepdim=True)
    X_processed = X_processed / torch.sqrt(torch.sum(X_processed**2, [1, 2, 3], keepdim=True))
    X_processed = X_processed / torch.sqrt(torch.sum(X_processed ** 2, [1, 2, 3], keepdim=True))

    return X_processed

@@ -240,10 +240,7 @@ def augment(images, dc_aug_param, device):
        def scalefun(i):
            h = int((np.random.uniform(1 - scale, 1 + scale)) * shape[2])
            w = int((np.random.uniform(1 - scale, 1 + scale)) * shape[2])
            tmp = F.interpolate(
                images[i : i + 1],
                [h, w],
            )[0]
            tmp = F.interpolate(images[i : i + 1], [h, w],)[0]
            mhw = max(h, w, shape[2], shape[3])
            im_ = torch.zeros(shape[1], mhw, mhw, dtype=torch.float, device=device)
            r = int((mhw - h) / 2)
--- a/examples/dataset_m5_workflow/m5/utils.py
+++ b/examples/dataset_m5_workflow/m5/utils.py
@@ -70,7 +70,7 @@ def measure_aux_algo(idx, test_sample, model):

 # Simple "Memory profilers" to see memory usage
 def get_memory_usage():
    return np.round(psutil.Process(os.getpid()).memory_info()[0] / 2.0**30, 2)
    return np.round(psutil.Process(os.getpid()).memory_info()[0] / 2.0 ** 30, 2)


 def sizeof_fmt(num, suffix="B"):
@@ -84,7 +84,7 @@ def sizeof_fmt(num, suffix="B"):
 # Memory Reducer
 def reduce_mem_usage(df, float16_flag=True, verbose=True):
    numerics = ["int16", "int32", "int64", "float16", "float32", "float64"]
    start_mem = df.memory_usage().sum() / 1024**2
    start_mem = df.memory_usage().sum() / 1024 ** 2
    for col in df.columns:
        col_type = df[col].dtypes
        if col_type in numerics:
@@ -106,7 +106,7 @@ def reduce_mem_usage(df, float16_flag=True, verbose=True):
                    df[col] = df[col].astype(np.float32)
                else:
                    df[col] = df[col].astype(np.float64)
    end_mem = df.memory_usage().sum() / 1024**2
    end_mem = df.memory_usage().sum() / 1024 ** 2
    if verbose:
        print(
            "Mem. usage decreased to {:5.2f} Mb ({:.1f}% reduction)".format(
--- a/examples/dataset_m5_workflow/upload.py
+++ b/examples/dataset_m5_workflow/upload.py
@@ -69,15 +69,8 @@ def main():
        }
        res = session.post(
            submit_url,
            data={
                "semantic_specification": json.dumps(semantic_specification),
            },
            files={
                "learnware_file": open(
                    os.path.join(os.path.abspath("."), "learnware_pool", learnware),
                    "rb",
                )
            },
            data={"semantic_specification": json.dumps(semantic_specification),},
            files={"learnware_file": open(os.path.join(os.path.abspath("."), "learnware_pool", learnware), "rb",)},
        )
        assert json.loads(res.text)["code"] == 0, "Upload error"

--- a/examples/dataset_pfs_workflow/pfs/pfs_cross_transfer.py
+++ b/examples/dataset_pfs_workflow/pfs/pfs_cross_transfer.py
@@ -67,7 +67,7 @@ def get_split_errs(algo):
            for tmp in range(len(proportion_list)):
                model = lgb.LGBMModel(
                    boosting_type="gbdt",
                    num_leaves=2**7 - 1,
                    num_leaves=2 ** 7 - 1,
                    learning_rate=0.01,
                    objective="rmse",
                    metric="rmse",
@@ -119,7 +119,7 @@ def get_errors(algo):
        if algo == "lgb":
            model = lgb.LGBMModel(
                boosting_type="gbdt",
                num_leaves=2**7 - 1,
                num_leaves=2 ** 7 - 1,
                learning_rate=0.01,
                objective="rmse",
                metric="rmse",
--- a/examples/dataset_pfs_workflow/upload.py
+++ b/examples/dataset_pfs_workflow/upload.py
@@ -72,15 +72,8 @@ def main():
        }
        res = session.post(
            submit_url,
            data={
                "semantic_specification": json.dumps(semantic_specification),
            },
            files={
                "learnware_file": open(
                    os.path.join(os.path.abspath("."), "learnware_pool", learnware),
                    "rb",
                )
            },
            data={"semantic_specification": json.dumps(semantic_specification),},
            files={"learnware_file": open(os.path.join(os.path.abspath("."), "learnware_pool", learnware), "rb",)},
        )
        assert json.loads(res.text)["code"] == 0, "Upload error"

--- a/examples/workflow_by_code/main.py
+++ b/examples/workflow_by_code/main.py
@@ -19,10 +19,7 @@ curr_root = os.path.dirname(os.path.abspath(__file__))

 user_semantic = {
    "Data": {"Values": ["Table"], "Type": "Class"},
    "Task": {
        "Values": ["Classification"],
        "Type": "Class",
    },
    "Task": {"Values": ["Classification"], "Type": "Class",},
    "Library": {"Values": ["Scikit-learn"], "Type": "Class"},
    "Scenario": {"Values": ["Education"], "Type": "Tag"},
    "Description": {"Values": "", "Type": "String"},
--- a/learnware/client/container.py
+++ b/learnware/client/container.py
@@ -12,67 +12,69 @@ from ..logger import get_module_logger

 logger = get_module_logger(module_name="client_container")


 class ModelEnvContainer(BaseModel):
    
    def __init__(self, model_config: dict, learnware_zippath: str):
        """The initialization method for base model
        """
        
        self.model_script = os.path.join(C.package_path, 'learnware', 'client', 'run_model.py')

        self.model_script = os.path.join(C.package_path, "learnware", "client", "run_model.py")
        self.model_config = model_config
        self.conda_env = f"learnware_{shortuuid.uuid()}"
        self.learnware_zippath = learnware_zippath
        install_environment(learnware_zippath, self.conda_env)
        

        with tempfile.TemporaryDirectory(prefix="learnware_") as tempdir:
            output_path = os.path.join(tempdir, 'output.pkl')
            model_path = os.path.join(tempdir, 'model.pkl')
            
            with open(model_path, 'wb') as model_fp:
            output_path = os.path.join(tempdir, "output.pkl")
            model_path = os.path.join(tempdir, "model.pkl")

            with open(model_path, "wb") as model_fp:
                pickle.dump(model_config, model_fp)
            
            system_execute(f"conda run --no-capture-output python3 {self.model_script} --model-path {model_path} --output-path {output_path}")

            with open(output_path, 'rb') as output_fp:
            system_execute(
                f"conda run --no-capture-output python3 {self.model_script} --model-path {model_path} --output-path {output_path}"
            )

            with open(output_path, "rb") as output_fp:
                output_results = pickle.load(output_fp)
            
        if output_results['status'] != 'success':
            raise output_results['error_info']
        
        input_shape = output_results['metadata']['input_shape']
        output_shape = output_results['metadata']['output_shape']
            

        if output_results["status"] != "success":
            raise output_results["error_info"]

        input_shape = output_results["metadata"]["input_shape"]
        output_shape = output_results["metadata"]["output_shape"]

        super(ModelEnvContainer, self).__init__(input_shape, output_shape)
    

    def run_model_with_script(self, method, **kargs):
        with tempfile.TemporaryDirectory(prefix="learnware_") as tempdir:
            input_path = os.path.join(tempdir, 'input.pkl')
            output_path = os.path.join(tempdir, 'output.pkl')
            model_path = os.path.join(tempdir, 'model.pkl')
            
            with open(model_path, 'wb') as model_fp:
            input_path = os.path.join(tempdir, "input.pkl")
            output_path = os.path.join(tempdir, "output.pkl")
            model_path = os.path.join(tempdir, "model.pkl")

            with open(model_path, "wb") as model_fp:
                pickle.dump(self.model_config, model_fp)
                
            with open(input_path, 'wb') as input_fp:
                pickle.dump({'method': method, 'kargs': kargs}, input_fp)

            system_execute(f"conda run --no-capture-output python3 {self.model_script} --model-path {model_path} --input-path {input_path} --output-path {output_path}")
            
            with open(output_path, 'rb') as output_fp:
            with open(input_path, "wb") as input_fp:
                pickle.dump({"method": method, "kargs": kargs}, input_fp)

            system_execute(
                f"conda run --no-capture-output python3 {self.model_script} --model-path {model_path} --input-path {input_path} --output-path {output_path}"
            )

            with open(output_path, "rb") as output_fp:
                output_results = pickle.load(output_fp)
            
        if output_results['status'] != 'success':
            raise output_results['error_info']
        

        if output_results["status"] != "success":
            raise output_results["error_info"]

        return output_results[output_results]
    

    def fit(self, X, y):
        self.run_model_with_script("fit", X=X, y=y)
    

    def predict(self, X):
        return self.run_model_with_script("predict", X=X)
    

    def finetune(self, X, y):
        self.run_model_with_script("finetune", X=X, y=y)
    
--- a/learnware/client/scripts/run_model.py
+++ b/learnware/client/scripts/run_model.py
@@ -3,52 +3,48 @@ import pickle
 import argparse
 from learnware.utils import get_module_by_module_path


 def run_model(model_path, input_path, output_path):
    output_results = {
        'status': 'success'
    }
    
    output_results = {"status": "success"}

    try:
        with open(model_path, 'rb') as model_file:
        with open(model_path, "rb") as model_file:
            model_config = pickle.load(file=model_file)

        model_module = get_module_by_module_path(model_config["module_path"])
        cls = getattr(model_module, model_config["class_name"])
        setattr(sys.modules["__main__"], model_config["class_name"], cls)
        model = cls(**model_config.get("kwargs", {}))
    
    
        output_results['metadata'] = {
            'input_shape': model.input_shape,
            'output_shape': model.output_shape,

        output_results["metadata"] = {
            "input_shape": model.input_shape,
            "output_shape": model.output_shape,
        }
        

        if input_path is not None:
            with open(input_path, 'rb') as input_file:
            with open(input_path, "rb") as input_file:
                input_args = pickle.load(input_file)
            output_array = getattr(model, input_args.get('method', 'predict'))(**input_args.get('kargs', {}))
            output_results[input_args.get('method', 'predict')] = output_array
        
            output_array = getattr(model, input_args.get("method", "predict"))(**input_args.get("kargs", {}))
            output_results[input_args.get("method", "predict")] = output_array

    except Exception as e:
        output_results['status'] = 'fail'
        output_results['error_info'] = e
    
    with open(output_path, 'rb') as output_file:
        output_results["status"] = "fail"
        output_results["error_info"] = e

    with open(output_path, "rb") as output_file:
        pickle.dump(output_results, output_file)
    

 if __name__ == '__main__':

 if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--model-path", type=str, required=True, help="path of model config")
    parser.add_argument("--input-path", type=str, required=False, help="path of input array")
    parser.add_argument("--output-path", type=str, required=True, help="path of output array")
    

    args = parser.parse_args()

    model_path = args.model_path
    input_path = args.input_path
    output_path = args.output_path
    

    print(model_path, input_path, output_path)
   
    
--- a/learnware/client/utils.py
+++ b/learnware/client/utils.py
@@ -11,11 +11,13 @@ from .package_utils import filter_nonexist_conda_packages_file, filter_nonexist_

 logger = get_module_logger(module_name="client_utils")


 def system_execute(command):
    retcd: int = os.system(command=command)
    if retcd != 0:
        raise RuntimeError(f"Command {command} failed with return code {retcd}")
    


 def install_environment(zip_path, conda_env):
    """Install environment of a learnware
    
@@ -41,18 +43,21 @@ def install_environment(zip_path, conda_env):
                filter_nonexist_conda_packages_file(yaml_file=yaml_path, output_yaml_file=yaml_path_filter)
                # create environment
                system_execute(command=f"conda env update --name {conda_env} --file {yaml_path_filter}")
                

            elif "requirements.txt" in z_file.namelist():
                z_file.extract(member="requirements.txt", path=tempdir)
                requirements_path: str = os.path.join(tempdir, "requirements.txt")
                requirements_path_filter: str = os.path.join(tempdir, "requirements_filter.txt")
                filter_nonexist_pip_packages_file(requirements_file=requirements_path, output_file=requirements_path_filter)
                filter_nonexist_pip_packages_file(
                    requirements_file=requirements_path, output_file=requirements_path_filter
                )
                system_execute(command=f"conda create --name {conda_env}")
                system_execute(                                                                                                                                                              
                system_execute(
                    command=f"conda run --no-capture-output python3 -m pip install -r {requirements_path_filter}"
                )
            else:
                raise Exception("Environment.yaml or requirements.txt not found in the learnware zip file.")
            


 def remove_enviroment(conda_env):
    system_execute(command=f"conda env remove -n {conda_env}")
    system_execute(command=f"conda env remove -n {conda_env}")
--- a/learnware/learnware/reuse.py
+++ b/learnware/learnware/reuse.py
@@ -481,7 +481,9 @@ class EnsemblePruningReuser(BaseReuser):
            v_true_count = (select == v_true.reshape(-1, 1)).sum(axis=1)
            error_v = (result[:, 0] != v_true.reshape(-1)).sum()
            margin = result[:, 1] - result[:, 3]
            margin[result[:, 0] != v_true.reshape(-1)] = (v_true_count - result[:, 1])[result[:, 0] != v_true.reshape(-1)]
            margin[result[:, 0] != v_true.reshape(-1)] = (v_true_count - result[:, 1])[
                result[:, 0] != v_true.reshape(-1)
            ]

            margin = margin / Vars.sum()
            mean_margin = np.mean(margin)
@@ -640,9 +642,9 @@ class EnsemblePruningReuser(BaseReuser):

        v_predict[v_predict == -1.0] = 0
        v_true[v_true == -1.0] = 0
        

        return res["Vars"][bst_pop]
    

    def fit(self, val_X: np.ndarray, val_y: np.ndarray, maxgen: int = 500):
        """Ensemble pruning based on the validation set

@@ -662,7 +664,7 @@ class EnsemblePruningReuser(BaseReuser):
            v_predict.append(pred_y)
        v_predict = np.concatenate(v_predict, axis=1)
        v_true = val_y.reshape(-1, 1)
        

        # Run ensemble pruning algorithm
        if self.mode == "regression":
            res = self._MEDP_regression(v_predict, v_true, maxgen)
@@ -670,9 +672,9 @@ class EnsemblePruningReuser(BaseReuser):
            res = self._MEDP_multiclass(v_predict, v_true, maxgen)
        elif self.mode == "binary":
            res = self._MEDP_binaryclass(v_predict, v_true, maxgen)
            

        self.selected_idxes = np.where(res == 1)[0].tolist()
    

    def predict(self, user_data: np.ndarray) -> np.ndarray:
        """Prediction for user data using the final pruned ensemble

@@ -695,4 +697,4 @@ class EnsemblePruningReuser(BaseReuser):
            return np.concatenate(preds, axis=1).mean(axis=1)
        elif option == "binary" or option == "multiclass":
            preds = np.concatenate(preds, axis=1)
            return np.apply_along_axis(lambda x: np.bincount(x).argmax(), axis=1, arr=preds)
            return np.apply_along_axis(lambda x: np.bincount(x).argmax(), axis=1, arr=preds)
--- a/tests/test_learnware_client/test_reuse.py
+++ b/tests/test_learnware_client/test_reuse.py
@@ -12,7 +12,7 @@ if __name__ == "__main__":
    semantic_specification["Scenario"] = {"Type": "Tag", "Values": "Financial"}
    semantic_specification["Name"] = {"Type": "String", "Values": "test"}
    semantic_specification["Description"] = {"Type": "String", "Values": "test"}
    
    zip_path = '/home/bixd/workspace/learnware/Learnware/tests/test_workflow/learnware_pool/svm_0.zip'
    
    learnware = get_learnware_from_dirpath('test_id', semantic_specification, zip_path)

    zip_path = "/home/bixd/workspace/learnware/Learnware/tests/test_workflow/learnware_pool/svm_0.zip"

    learnware = get_learnware_from_dirpath("test_id", semantic_specification, zip_path)
--- a/tests/test_learnware_upload/test_upload.py
+++ b/tests/test_learnware_upload/test_upload.py
@@ -12,8 +12,8 @@ if __name__ == "__main__":
    semantic_specification["Scenario"] = {"Type": "Tag", "Values": "Financial"}
    semantic_specification["Name"] = {"Type": "String", "Values": "test"}
    semantic_specification["Description"] = {"Type": "String", "Values": "test"}
    

    zip_path = "test.zip"
    client = LearnwareClient()
    client.install_environment(zip_path)
    client.test_learnware(zip_path, semantic_specification)
    client.test_learnware(zip_path, semantic_specification)
--- a/tests/test_workflow/test_workflow.py
+++ b/tests/test_workflow/test_workflow.py
@@ -19,10 +19,7 @@ curr_root = os.path.dirname(os.path.abspath(__file__))

 user_semantic = {
    "Data": {"Values": ["Tabular"], "Type": "Class"},
    "Task": {
        "Values": ["Classification"],
        "Type": "Class",
    },
    "Task": {"Values": ["Classification"], "Type": "Class",},
    "Library": {"Values": ["Scikit-learn"], "Type": "Class"},
    "Scenario": {"Values": ["Education"], "Type": "Tag"},
    "Description": {"Values": "", "Type": "String"},