From edf6b76bf5bbdc7229af1997ddc28441d7b53489 Mon Sep 17 00:00:00 2001 From: Gene Date: Sat, 11 Nov 2023 23:02:27 +0800 Subject: [PATCH] [MNT] modify details and format code --- .../workflows/install_learnware_with_pip.yaml | 5 ----- .../workflows/install_learnware_with_source.yaml | 5 ----- examples/dataset_m5_workflow/main.py | 4 +--- examples/dataset_pfs_workflow/main.py | 4 +--- examples/dataset_text_workflow2/get_data.py | 10 ++++++---- examples/dataset_text_workflow2/main.py | 4 ++-- examples/dataset_text_workflow2/utils.py | 16 ++++++++-------- learnware/market/easy/checker.py | 2 +- learnware/specification/regular/table/rkme.py | 5 ++--- setup.py | 9 --------- .../test_check_learnware.py | 1 - 11 files changed, 21 insertions(+), 44 deletions(-) diff --git a/.github/workflows/install_learnware_with_pip.yaml b/.github/workflows/install_learnware_with_pip.yaml index e5e9ed2..350ed1d 100644 --- a/.github/workflows/install_learnware_with_pip.yaml +++ b/.github/workflows/install_learnware_with_pip.yaml @@ -39,11 +39,6 @@ jobs: conda run -n learnware python -m pip install --upgrade pip conda run -n learnware python -m pip install pytest - - name: Install faiss for MacOS - if: ${{ matrix.os == 'macos-11' || matrix.os == 'macos-latest' }} - run: | - conda run -n learnware conda install -c pytorch faiss - - name: Install learnware run: | conda run -n learnware python -m pip install learnware diff --git a/.github/workflows/install_learnware_with_source.yaml b/.github/workflows/install_learnware_with_source.yaml index 1702f6b..02cba9e 100644 --- a/.github/workflows/install_learnware_with_source.yaml +++ b/.github/workflows/install_learnware_with_source.yaml @@ -44,11 +44,6 @@ jobs: # stop the build if there are Python syntax errors or undefined names conda run -n learnware python -m flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics - - name: Install faiss for MacOS - if: ${{ matrix.os == 'macos-11' || matrix.os == 'macos-latest' }} - run: | - conda run -n learnware conda install -c pytorch faiss - - name: Install learnware run: | conda run -n learnware python -m pip install . diff --git a/examples/dataset_m5_workflow/main.py b/examples/dataset_m5_workflow/main.py index 2e126e0..bc8a369 100644 --- a/examples/dataset_m5_workflow/main.py +++ b/examples/dataset_m5_workflow/main.py @@ -8,7 +8,6 @@ from shutil import copyfile, rmtree import learnware from learnware.market import instantiate_learnware_market, BaseUserInfo -# from learnware.market import database_ops from learnware.reuse import JobSelectorReuser, AveragingReuser from learnware.specification import generate_rkme_spec from m5 import DataLoader @@ -51,6 +50,7 @@ user_semantic = { "Output": output_description, } + class M5DatasetWorkflow: def _init_m5_dataset(self): m5 = DataLoader() @@ -82,8 +82,6 @@ class M5DatasetWorkflow: easy_market.add_learnware(zip_path, semantic_spec) print("Total Item:", len(easy_market)) - # curr_inds = easy_market._get_ids() - # print("Available ids:", curr_inds) def prepare_learnware(self, regenerate_flag=False): if regenerate_flag: diff --git a/examples/dataset_pfs_workflow/main.py b/examples/dataset_pfs_workflow/main.py index c50119d..e0a8fac 100644 --- a/examples/dataset_pfs_workflow/main.py +++ b/examples/dataset_pfs_workflow/main.py @@ -55,7 +55,7 @@ class PFSDatasetWorkflow: pfs = Dataloader() pfs.regenerate_data() - algo_list = ["ridge"] # "ridge", "lgb" + algo_list = ["ridge"] # "ridge", "lgb" for algo in algo_list: pfs.set_algo(algo) pfs.retrain_models() @@ -79,8 +79,6 @@ class PFSDatasetWorkflow: easy_market.add_learnware(zip_path, semantic_spec) print("Total Item:", len(easy_market)) - # curr_inds = easy_market._get_ids() - # print("Available ids:", curr_inds) def prepare_learnware(self, regenerate_flag=False): if regenerate_flag: diff --git a/examples/dataset_text_workflow2/get_data.py b/examples/dataset_text_workflow2/get_data.py index 216e193..a55e8d7 100644 --- a/examples/dataset_text_workflow2/get_data.py +++ b/examples/dataset_text_workflow2/get_data.py @@ -8,7 +8,9 @@ def get_data(data_root="./data"): dtest = pd.read_csv(os.path.join(data_root, "test.csv")) # returned X(DataFrame), y(Series) - return (dtrain[['discourse_text', 'discourse_type']], - dtrain["discourse_effectiveness"], - dtest[['discourse_text', 'discourse_type']], - dtest["discourse_effectiveness"]) + return ( + dtrain[["discourse_text", "discourse_type"]], + dtrain["discourse_effectiveness"], + dtest[["discourse_text", "discourse_type"]], + dtest["discourse_effectiveness"], + ) diff --git a/examples/dataset_text_workflow2/main.py b/examples/dataset_text_workflow2/main.py index 69765a6..5b3ac96 100644 --- a/examples/dataset_text_workflow2/main.py +++ b/examples/dataset_text_workflow2/main.py @@ -78,10 +78,10 @@ def prepare_model(): modelv_save_path = os.path.join(model_save_root, "uploader_v_%d.pth" % (i)) modell_save_path = os.path.join(model_save_root, "uploader_l_%d.pth" % (i)) - with open(modelv_save_path, 'wb') as f: + with open(modelv_save_path, "wb") as f: pickle.dump(vectorizer, f) - with open(modell_save_path, 'wb') as f: + with open(modell_save_path, "wb") as f: pickle.dump(lgbm, f) logger.info("Model saved to '%s' and '%s'" % (modelv_save_path, modell_save_path)) diff --git a/examples/dataset_text_workflow2/utils.py b/examples/dataset_text_workflow2/utils.py index 4726407..247f706 100644 --- a/examples/dataset_text_workflow2/utils.py +++ b/examples/dataset_text_workflow2/utils.py @@ -39,11 +39,11 @@ def generate_uploader(data_x: pd.Series, data_y: pd.Series, n_uploaders=50, data return os.makedirs(data_save_root, exist_ok=True) - types = data_x['discourse_type'].unique() + types = data_x["discourse_type"].unique() for i in range(n_uploaders): - indices = data_x['discourse_type'] == types[i] - selected_X = data_x[indices]['discourse_text'].to_list() + indices = data_x["discourse_type"] == types[i] + selected_X = data_x[indices]["discourse_text"].to_list() selected_y = data_y[indices].to_list() X_save_dir = os.path.join(data_save_root, "uploader_%d_X.pkl" % (i)) @@ -61,11 +61,11 @@ def generate_user(data_x, data_y, n_users=50, data_save_root=None): return os.makedirs(data_save_root, exist_ok=True) - types = data_x['discourse_type'].unique() + types = data_x["discourse_type"].unique() for i in range(n_users): - indices = data_x['discourse_type'] == types[i] - selected_X = data_x[indices]['discourse_text'].to_list() + indices = data_x["discourse_type"] == types[i] + selected_X = data_x[indices]["discourse_text"].to_list() selected_y = data_y[indices].to_list() X_save_dir = os.path.join(data_save_root, "user_%d_X.pkl" % (i)) @@ -80,10 +80,10 @@ def generate_user(data_x, data_y, n_users=50, data_save_root=None): # Train Uploaders' models def train(X, y, out_classes): - vectorizer = TfidfVectorizer(stop_words='english') + vectorizer = TfidfVectorizer(stop_words="english") X_tfidf = vectorizer.fit_transform(X) - lgbm = LGBMClassifier(boosting_type='dart', n_estimators=500, num_leaves=21) + lgbm = LGBMClassifier(boosting_type="dart", n_estimators=500, num_leaves=21) lgbm.fit(X_tfidf, y) return vectorizer, lgbm diff --git a/learnware/market/easy/checker.py b/learnware/market/easy/checker.py index d0d50cf..6419b98 100644 --- a/learnware/market/easy/checker.py +++ b/learnware/market/easy/checker.py @@ -116,7 +116,7 @@ class EasyStatChecker(BaseChecker): elif spec_type == "RKMEImageSpecification": inputs = np.random.randint(0, 255, size=(10, *input_shape)) else: - raise ValueError(f"not supported spec type for spec_type = {spec_type}") + raise ValueError(f"not supported spec type for spec_type = {spec_type}") # Check output try: diff --git a/learnware/specification/regular/table/rkme.py b/learnware/specification/regular/table/rkme.py index b217e82..a37d509 100644 --- a/learnware/specification/regular/table/rkme.py +++ b/learnware/specification/regular/table/rkme.py @@ -1,14 +1,12 @@ from __future__ import annotations import os - import copy import torch import json import codecs import random import numpy as np -# from cvxopt import solvers, matrix from qpsolvers import solve_qp, Problem, solve_problem from collections import Counter from typing import Tuple, Any, List, Union, Dict @@ -20,6 +18,7 @@ from ....logger import get_module_logger logger = get_module_logger("rkme") + class RKMETableSpecification(RegularStatsSpecification): """Reduced Kernel Mean Embedding (RKME) Specification""" @@ -137,7 +136,7 @@ class RKMETableSpecification(RegularStatsSpecification): Size of the construced reduced set. """ X = X.astype("float32") - kmeans = MiniBatchKMeans(n_clusters=K, max_iter=100, verbose=False, n_init='auto') + kmeans = MiniBatchKMeans(n_clusters=K, max_iter=100, verbose=False, n_init="auto") kmeans.fit(X) center = torch.from_numpy(kmeans.cluster_centers_).double() self.z = center diff --git a/setup.py b/setup.py index 6d2777e..2c0df29 100644 --- a/setup.py +++ b/setup.py @@ -54,12 +54,6 @@ REQUIRED = [ "numpy>=1.20.0", "pandas>=0.25.1", "scipy>=1.0.0", -<<<<<<< HEAD - "matplotlib>=3.1.3", - "torch>=1.11.0", -======= - "cvxopt>=1.3.0", ->>>>>>> 93df27b2a16a169ecfb93e3a2e149b6c1ea56902 "tqdm>=4.65.0", "scikit-learn>=0.22", "joblib>=1.2.0", @@ -76,9 +70,6 @@ REQUIRED = [ "qpsolvers[clarabel]>=4.0.1", ] -# if get_platform() != MACOS: -# REQUIRED.append("faiss-cpu>=1.7.1") - here = os.path.abspath(os.path.dirname(__file__)) with open(os.path.join(here, "README.md"), encoding="utf-8") as f: long_description = f.read() diff --git a/tests/test_learnware_client/test_check_learnware.py b/tests/test_learnware_client/test_check_learnware.py index 36f0a81..0e6fca6 100644 --- a/tests/test_learnware_client/test_check_learnware.py +++ b/tests/test_learnware_client/test_check_learnware.py @@ -32,4 +32,3 @@ class TestCheckLearnware(unittest.TestCase): if __name__ == "__main__": unittest.main() -