From edf6b76bf5bbdc7229af1997ddc28441d7b53489 Mon Sep 17 00:00:00 2001
From: Gene <liujd@lamda.nju.edu.cn>
Date: Sat, 11 Nov 2023 23:02:27 +0800
Subject: [PATCH] [MNT] modify details and format code

---
 .../workflows/install_learnware_with_pip.yaml    |  5 -----
 .../workflows/install_learnware_with_source.yaml |  5 -----
 examples/dataset_m5_workflow/main.py             |  4 +---
 examples/dataset_pfs_workflow/main.py            |  4 +---
 examples/dataset_text_workflow2/get_data.py      | 10 ++++++----
 examples/dataset_text_workflow2/main.py          |  4 ++--
 examples/dataset_text_workflow2/utils.py         | 16 ++++++++--------
 learnware/market/easy/checker.py                 |  2 +-
 learnware/specification/regular/table/rkme.py    |  5 ++---
 setup.py                                         |  9 ---------
 .../test_check_learnware.py                      |  1 -
 11 files changed, 21 insertions(+), 44 deletions(-)

diff --git a/.github/workflows/install_learnware_with_pip.yaml b/.github/workflows/install_learnware_with_pip.yaml
index e5e9ed2..350ed1d 100644
--- a/.github/workflows/install_learnware_with_pip.yaml
+++ b/.github/workflows/install_learnware_with_pip.yaml
@@ -39,11 +39,6 @@ jobs:
         conda run -n learnware python -m pip install --upgrade pip
         conda run -n learnware python -m pip install pytest
 
-    - name: Install faiss for MacOS
-      if: ${{ matrix.os == 'macos-11' || matrix.os == 'macos-latest' }}
-      run: |
-        conda run -n learnware conda install -c pytorch faiss
-
     - name: Install learnware
       run: |
         conda run -n learnware python -m pip install learnware
diff --git a/.github/workflows/install_learnware_with_source.yaml b/.github/workflows/install_learnware_with_source.yaml
index 1702f6b..02cba9e 100644
--- a/.github/workflows/install_learnware_with_source.yaml
+++ b/.github/workflows/install_learnware_with_source.yaml
@@ -44,11 +44,6 @@ jobs:
         # stop the build if there are Python syntax errors or undefined names
         conda run -n learnware python -m flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
 
-    - name: Install faiss for MacOS
-      if: ${{ matrix.os == 'macos-11' || matrix.os == 'macos-latest' }}
-      run: |
-        conda run -n learnware conda install -c pytorch faiss
-
     - name: Install learnware
       run: |
         conda run -n learnware python -m pip install .
diff --git a/examples/dataset_m5_workflow/main.py b/examples/dataset_m5_workflow/main.py
index 2e126e0..bc8a369 100644
--- a/examples/dataset_m5_workflow/main.py
+++ b/examples/dataset_m5_workflow/main.py
@@ -8,7 +8,6 @@ from shutil import copyfile, rmtree
 
 import learnware
 from learnware.market import instantiate_learnware_market, BaseUserInfo
-# from learnware.market import database_ops
 from learnware.reuse import JobSelectorReuser, AveragingReuser
 from learnware.specification import generate_rkme_spec
 from m5 import DataLoader
@@ -51,6 +50,7 @@ user_semantic = {
     "Output": output_description,
 }
 
+
 class M5DatasetWorkflow:
     def _init_m5_dataset(self):
         m5 = DataLoader()
@@ -82,8 +82,6 @@ class M5DatasetWorkflow:
             easy_market.add_learnware(zip_path, semantic_spec)
 
         print("Total Item:", len(easy_market))
-        # curr_inds = easy_market._get_ids()
-        # print("Available ids:", curr_inds)
 
     def prepare_learnware(self, regenerate_flag=False):
         if regenerate_flag:
diff --git a/examples/dataset_pfs_workflow/main.py b/examples/dataset_pfs_workflow/main.py
index c50119d..e0a8fac 100644
--- a/examples/dataset_pfs_workflow/main.py
+++ b/examples/dataset_pfs_workflow/main.py
@@ -55,7 +55,7 @@ class PFSDatasetWorkflow:
         pfs = Dataloader()
         pfs.regenerate_data()
 
-        algo_list = ["ridge"] # "ridge", "lgb"
+        algo_list = ["ridge"]  # "ridge", "lgb"
         for algo in algo_list:
             pfs.set_algo(algo)
             pfs.retrain_models()
@@ -79,8 +79,6 @@ class PFSDatasetWorkflow:
             easy_market.add_learnware(zip_path, semantic_spec)
 
         print("Total Item:", len(easy_market))
-        # curr_inds = easy_market._get_ids()
-        # print("Available ids:", curr_inds)
 
     def prepare_learnware(self, regenerate_flag=False):
         if regenerate_flag:
diff --git a/examples/dataset_text_workflow2/get_data.py b/examples/dataset_text_workflow2/get_data.py
index 216e193..a55e8d7 100644
--- a/examples/dataset_text_workflow2/get_data.py
+++ b/examples/dataset_text_workflow2/get_data.py
@@ -8,7 +8,9 @@ def get_data(data_root="./data"):
     dtest = pd.read_csv(os.path.join(data_root, "test.csv"))
 
     # returned X(DataFrame), y(Series)
-    return (dtrain[['discourse_text', 'discourse_type']],
-            dtrain["discourse_effectiveness"],
-            dtest[['discourse_text', 'discourse_type']],
-            dtest["discourse_effectiveness"])
+    return (
+        dtrain[["discourse_text", "discourse_type"]],
+        dtrain["discourse_effectiveness"],
+        dtest[["discourse_text", "discourse_type"]],
+        dtest["discourse_effectiveness"],
+    )
diff --git a/examples/dataset_text_workflow2/main.py b/examples/dataset_text_workflow2/main.py
index 69765a6..5b3ac96 100644
--- a/examples/dataset_text_workflow2/main.py
+++ b/examples/dataset_text_workflow2/main.py
@@ -78,10 +78,10 @@ def prepare_model():
         modelv_save_path = os.path.join(model_save_root, "uploader_v_%d.pth" % (i))
         modell_save_path = os.path.join(model_save_root, "uploader_l_%d.pth" % (i))
 
-        with open(modelv_save_path, 'wb') as f:
+        with open(modelv_save_path, "wb") as f:
             pickle.dump(vectorizer, f)
 
-        with open(modell_save_path, 'wb') as f:
+        with open(modell_save_path, "wb") as f:
             pickle.dump(lgbm, f)
 
         logger.info("Model saved to '%s' and '%s'" % (modelv_save_path, modell_save_path))
diff --git a/examples/dataset_text_workflow2/utils.py b/examples/dataset_text_workflow2/utils.py
index 4726407..247f706 100644
--- a/examples/dataset_text_workflow2/utils.py
+++ b/examples/dataset_text_workflow2/utils.py
@@ -39,11 +39,11 @@ def generate_uploader(data_x: pd.Series, data_y: pd.Series, n_uploaders=50, data
         return
     os.makedirs(data_save_root, exist_ok=True)
 
-    types = data_x['discourse_type'].unique()
+    types = data_x["discourse_type"].unique()
 
     for i in range(n_uploaders):
-        indices = data_x['discourse_type'] == types[i]
-        selected_X = data_x[indices]['discourse_text'].to_list()
+        indices = data_x["discourse_type"] == types[i]
+        selected_X = data_x[indices]["discourse_text"].to_list()
         selected_y = data_y[indices].to_list()
 
         X_save_dir = os.path.join(data_save_root, "uploader_%d_X.pkl" % (i))
@@ -61,11 +61,11 @@ def generate_user(data_x, data_y, n_users=50, data_save_root=None):
         return
     os.makedirs(data_save_root, exist_ok=True)
 
-    types = data_x['discourse_type'].unique()
+    types = data_x["discourse_type"].unique()
 
     for i in range(n_users):
-        indices = data_x['discourse_type'] == types[i]
-        selected_X = data_x[indices]['discourse_text'].to_list()
+        indices = data_x["discourse_type"] == types[i]
+        selected_X = data_x[indices]["discourse_text"].to_list()
         selected_y = data_y[indices].to_list()
 
         X_save_dir = os.path.join(data_save_root, "user_%d_X.pkl" % (i))
@@ -80,10 +80,10 @@ def generate_user(data_x, data_y, n_users=50, data_save_root=None):
 
 # Train Uploaders' models
 def train(X, y, out_classes):
-    vectorizer = TfidfVectorizer(stop_words='english')
+    vectorizer = TfidfVectorizer(stop_words="english")
     X_tfidf = vectorizer.fit_transform(X)
 
-    lgbm = LGBMClassifier(boosting_type='dart', n_estimators=500, num_leaves=21)
+    lgbm = LGBMClassifier(boosting_type="dart", n_estimators=500, num_leaves=21)
     lgbm.fit(X_tfidf, y)
 
     return vectorizer, lgbm
diff --git a/learnware/market/easy/checker.py b/learnware/market/easy/checker.py
index d0d50cf..6419b98 100644
--- a/learnware/market/easy/checker.py
+++ b/learnware/market/easy/checker.py
@@ -116,7 +116,7 @@ class EasyStatChecker(BaseChecker):
             elif spec_type == "RKMEImageSpecification":
                 inputs = np.random.randint(0, 255, size=(10, *input_shape))
             else:
-                raise ValueError(f"not supported spec type for spec_type = {spec_type}")  
+                raise ValueError(f"not supported spec type for spec_type = {spec_type}")
 
             # Check output
             try:
diff --git a/learnware/specification/regular/table/rkme.py b/learnware/specification/regular/table/rkme.py
index b217e82..a37d509 100644
--- a/learnware/specification/regular/table/rkme.py
+++ b/learnware/specification/regular/table/rkme.py
@@ -1,14 +1,12 @@
 from __future__ import annotations
 
 import os
-
 import copy
 import torch
 import json
 import codecs
 import random
 import numpy as np
-# from cvxopt import solvers, matrix
 from qpsolvers import solve_qp, Problem, solve_problem
 from collections import Counter
 from typing import Tuple, Any, List, Union, Dict
@@ -20,6 +18,7 @@ from ....logger import get_module_logger
 
 logger = get_module_logger("rkme")
 
+
 class RKMETableSpecification(RegularStatsSpecification):
     """Reduced Kernel Mean Embedding (RKME) Specification"""
 
@@ -137,7 +136,7 @@ class RKMETableSpecification(RegularStatsSpecification):
             Size of the construced reduced set.
         """
         X = X.astype("float32")
-        kmeans = MiniBatchKMeans(n_clusters=K, max_iter=100, verbose=False, n_init='auto')
+        kmeans = MiniBatchKMeans(n_clusters=K, max_iter=100, verbose=False, n_init="auto")
         kmeans.fit(X)
         center = torch.from_numpy(kmeans.cluster_centers_).double()
         self.z = center
diff --git a/setup.py b/setup.py
index 6d2777e..2c0df29 100644
--- a/setup.py
+++ b/setup.py
@@ -54,12 +54,6 @@ REQUIRED = [
     "numpy>=1.20.0",
     "pandas>=0.25.1",
     "scipy>=1.0.0",
-<<<<<<< HEAD
-    "matplotlib>=3.1.3",
-    "torch>=1.11.0",
-=======
-    "cvxopt>=1.3.0",
->>>>>>> 93df27b2a16a169ecfb93e3a2e149b6c1ea56902
     "tqdm>=4.65.0",
     "scikit-learn>=0.22",
     "joblib>=1.2.0",
@@ -76,9 +70,6 @@ REQUIRED = [
     "qpsolvers[clarabel]>=4.0.1",
 ]
 
-# if get_platform() != MACOS:
-#     REQUIRED.append("faiss-cpu>=1.7.1")
-
 here = os.path.abspath(os.path.dirname(__file__))
 with open(os.path.join(here, "README.md"), encoding="utf-8") as f:
     long_description = f.read()
diff --git a/tests/test_learnware_client/test_check_learnware.py b/tests/test_learnware_client/test_check_learnware.py
index 36f0a81..0e6fca6 100644
--- a/tests/test_learnware_client/test_check_learnware.py
+++ b/tests/test_learnware_client/test_check_learnware.py
@@ -32,4 +32,3 @@ class TestCheckLearnware(unittest.TestCase):
 
 if __name__ == "__main__":
     unittest.main()
-