From 8a7713369a0456379cf920b7e32a8b65f9623e60 Mon Sep 17 00:00:00 2001
From: GeneLiuXe <356340460@qq.com>
Date: Tue, 18 Jul 2023 13:34:10 +0800
Subject: [PATCH 1/3] [MNT] Modify details about choosing cuda

---
 learnware/specification/rkme.py  | 3 +--
 learnware/specification/utils.py | 3 ++-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/learnware/specification/rkme.py b/learnware/specification/rkme.py
index f3bddc5..019bf89 100644
--- a/learnware/specification/rkme.py
+++ b/learnware/specification/rkme.py
@@ -365,7 +365,7 @@ class RKMEStatSpecification(BaseStatSpecification):
         if torch.is_tensor(rkme_to_save["beta"]):
             rkme_to_save["beta"] = rkme_to_save["beta"].detach().cpu().numpy()
         rkme_to_save["beta"] = rkme_to_save["beta"].tolist()
-        rkme_to_save["device"] = "gpu" if rkme_to_save["cuda_idx"] != -1 else "cpu"
+        rkme_to_save["device"] = "gpu" if torch.cuda.is_available() else "cpu"
         json.dump(
             rkme_to_save,
             codecs.open(save_path, "w", encoding="utf-8"),
@@ -433,7 +433,6 @@ def choose_device(cuda_idx=-1):
     """
     if cuda_idx != -1:
         device = torch.device(f"cuda:{cuda_idx}" if torch.cuda.is_available() else "cpu")
-        # device = torch.device(f"cuda:{cuda_idx}")
     else:
         device = torch.device("cpu")
     return device
diff --git a/learnware/specification/utils.py b/learnware/specification/utils.py
index 4614557..c322bae 100644
--- a/learnware/specification/utils.py
+++ b/learnware/specification/utils.py
@@ -13,7 +13,7 @@ def generate_rkme_spec(
     steps: int = 3,
     nonnegative_beta: bool = True,
     reduce: bool = True,
-    cuda_idx: int = -1,
+    cuda_idx: int = None,
 ) -> RKMEStatSpecification:
     """
             Interface for users to generate Reduced Kernel Mean Embedding (RKME) specification.
@@ -39,6 +39,7 @@ def generate_rkme_spec(
             Whether shrink original data to a smaller set, by default True
     cuda_idx : int
             A flag indicating whether use CUDA during RKME computation. -1 indicates CUDA not used.
+            None indicates that cuda is automatically selected.
 
     Returns
     -------

From fb72f6b01ea7efcb304e903ccf617ebc94340d88 Mon Sep 17 00:00:00 2001
From: Gene <liujd@lamda.nju.edu.cn>
Date: Tue, 18 Jul 2023 15:27:13 +0800
Subject: [PATCH 2/3] [MNT] Enhance the robustness of generating rkme spec

---
 learnware/specification/rkme.py  | 17 ++++--
 learnware/specification/utils.py | 97 +++++++++++++++++++++++---------
 2 files changed, 80 insertions(+), 34 deletions(-)

diff --git a/learnware/specification/rkme.py b/learnware/specification/rkme.py
index 019bf89..2271b03 100644
--- a/learnware/specification/rkme.py
+++ b/learnware/specification/rkme.py
@@ -104,12 +104,17 @@ class RKMEStatSpecification(BaseStatSpecification):
         Z_shape = tuple([K] + list(X_shape)[1:])
         X = X.reshape(self.num_points, -1)
 
-        # fill np.nan
-        X_nan = np.isnan(X)
-        if X_nan.max() == 1:
+        # Check data values
+        X[np.isinf(X) | np.isneginf(X) | np.isposinf(X) | np.isneginf(X)] = np.nan
+        if np.any(np.isnan(X)):
             for col in range(X.shape[1]):
-                col_mean = np.nanmean(X[:, col])
-                X[:, col] = np.where(X_nan[:, col], col_mean, X[:, col])
+                is_nan = np.isnan(X[:, col])
+                if np.any(is_nan):
+                    if np.all(is_nan):
+                        raise ValueError(f"All values in column {col} are exceptional, e.g., NaN and Inf.")
+                    # Fill np.nan with np.nanmean
+                    col_mean = np.nanmean(X[:, col])
+                    X[:, col] = np.where(is_nan, col_mean, X[:, col])
 
         if not reduce:
             self.z = X.reshape(X_shape)
@@ -365,7 +370,7 @@ class RKMEStatSpecification(BaseStatSpecification):
         if torch.is_tensor(rkme_to_save["beta"]):
             rkme_to_save["beta"] = rkme_to_save["beta"].detach().cpu().numpy()
         rkme_to_save["beta"] = rkme_to_save["beta"].tolist()
-        rkme_to_save["device"] = "gpu" if torch.cuda.is_available() else "cpu"
+        rkme_to_save["device"] = "gpu" if rkme_to_save["cuda_idx"] != -1 else "cpu"
         json.dump(
             rkme_to_save,
             codecs.open(save_path, "w", encoding="utf-8"),
diff --git a/learnware/specification/utils.py b/learnware/specification/utils.py
index c322bae..1fbd1fd 100644
--- a/learnware/specification/utils.py
+++ b/learnware/specification/utils.py
@@ -1,14 +1,40 @@
+import torch
 import numpy as np
+import pandas as pd
+from typing import Union
 
 from .base import BaseStatSpecification
 from .rkme import RKMEStatSpecification
 from ..config import C
 
 
+def convert_to_numpy(data: Union[np.ndarray, pd.DataFrame, torch.Tensor]):
+    """Convert data to np.ndarray
+
+    Parameters
+    ----------
+    data : np.ndarray, pd.DataFrame, or torch.Tensor
+        The input data that needs to be converted to a NumPy array.
+
+    Returns
+    -------
+    np.ndarray
+        The data converted to a NumPy array.
+    """
+    if isinstance(data, np.ndarray):
+        return data
+    elif isinstance(data, pd.DataFrame):
+        return data.to_numpy()
+    elif isinstance(data, torch.Tensor):
+        return data.detach().cpu().numpy()
+    else:
+        raise TypeError("Unsupported data format. Please provide a NumPy array, a Pandas DataFrame, or a PyTorch Tensor.")
+
+
 def generate_rkme_spec(
-    X: np.ndarray,
+    X: Union[np.ndarray, pd.DataFrame, torch.Tensor],
     gamma: float = 0.1,
-    K: int = 100,
+    reduced_set_size: int = 100,
     step_size: float = 0.1,
     steps: int = 3,
     nonnegative_beta: bool = True,
@@ -16,60 +42,75 @@ def generate_rkme_spec(
     cuda_idx: int = None,
 ) -> RKMEStatSpecification:
     """
-            Interface for users to generate Reduced Kernel Mean Embedding (RKME) specification.
-            Return a RKMEStatSpecification object, use .save() method to save as json file.
-
+        Interface for users to generate Reduced Kernel Mean Embedding (RKME) specification.
+        Return a RKMEStatSpecification object, use .save() method to save as json file.
 
     Parameters
     ----------
-    X : np.ndarray
-            Raw data in np.ndarray format.
-            Size of array: (n*d)
+    X : np.ndarray, pd.DataFrame, or torch.Tensor
+        Raw data in np.ndarray, pd.DataFrame, or torch.Tensor format.
+        The shape of X:
+            First dimension represents the number of samples (data points).
+            The remaining dimensions represent the dimensions (features) of each sample.
+            For example, if X has shape (100, 3), it means there are 100 samples, and each sample has 3 features.
     gamma : float
-    Bandwidth in gaussian kernel, by default 0.1.
-    K : int
-            Size of the construced reduced set.
+        Bandwidth in gaussian kernel, by default 0.1.
+    reduced_set_size : int
+        Size of the construced reduced set.
     step_size : float
-            Step size for gradient descent in the iterative optimization.
+        Step size for gradient descent in the iterative optimization.
     steps : int
-            Total rounds in the iterative optimization.
+        Total rounds in the iterative optimization.
     nonnegative_beta : bool, optional
-            True if weights for the reduced set are intended to be kept non-negative, by default False.
+        True if weights for the reduced set are intended to be kept non-negative, by default False.
     reduce : bool, optional
-            Whether shrink original data to a smaller set, by default True
+        Whether shrink original data to a smaller set, by default True
     cuda_idx : int
-            A flag indicating whether use CUDA during RKME computation. -1 indicates CUDA not used.
-            None indicates that cuda is automatically selected.
+        A flag indicating whether use CUDA during RKME computation. -1 indicates CUDA not used.
+        None indicates that CUDA is automatically selected.
 
     Returns
     -------
     RKMEStatSpecification
-            A RKMEStatSpecification object
+        A RKMEStatSpecification object
     """
+    # Convert data type
+    X = convert_to_numpy(X)
     X = np.ascontiguousarray(X).astype(np.float32)
+    
+    # Check reduced_set_size
     max_reduced_set_size = C.max_reduced_set_size
-    if K * X[0].size > max_reduced_set_size:
-        K = max(1, max_reduced_set_size // X[0].size)
+    if reduced_set_size * X[0].size > max_reduced_set_size:
+        reduced_set_size = max(1, max_reduced_set_size // X[0].size)
+    
+    # Check cuda_idx
+    if not torch.cuda.is_available() or cuda_idx == -1:
+        cuda_idx = -1
+    else:
+        num_cuda_devices = torch.cuda.device_count()
+        if cuda_idx is None or not (cuda_idx >= 0 and cuda_idx < num_cuda_devices):
+            cuda_idx = 0
+    
+    # Generate rkme spec
     rkme_spec = RKMEStatSpecification(gamma=gamma, cuda_idx=cuda_idx)
-    rkme_spec.generate_stat_spec_from_data(X, K, step_size, steps, nonnegative_beta, reduce)
+    rkme_spec.generate_stat_spec_from_data(X, reduced_set_size, step_size, steps, nonnegative_beta, reduce)
     return rkme_spec
 
 
 def generate_stat_spec(X: np.ndarray) -> BaseStatSpecification:
     """
-            Interface for users to generate statistical specification.
-            Return a StatSpecification object, use .save() method to save as npy file.
-
+        Interface for users to generate statistical specification.
+        Return a StatSpecification object, use .save() method to save as npy file.
 
     Parameters
     ----------
     X : np.ndarray
-            Raw data in np.ndarray format.
-            Size of array: (n*d)
+        Raw data in np.ndarray format.
+        Size of array: (n*d)
 
     Returns
     -------
     StatSpecification
-            A StatSpecification object
+        A StatSpecification object
     """
-    return None
+    return None
\ No newline at end of file

From 910a5c3ae6c8bb0397a6cffbc1ce8e4592803647 Mon Sep 17 00:00:00 2001
From: Gene <liujd@lamda.nju.edu.cn>
Date: Tue, 18 Jul 2023 15:28:50 +0800
Subject: [PATCH 3/3] [MNT] Modify version

---
 learnware/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/learnware/__init__.py b/learnware/__init__.py
index 234e6d2..67880e8 100644
--- a/learnware/__init__.py
+++ b/learnware/__init__.py
@@ -1,4 +1,4 @@
-__version__ = "0.1.0.99"
+__version__ = "0.1.0.dev"
 
 import os
 from .logger import get_module_logger