From 8a7713369a0456379cf920b7e32a8b65f9623e60 Mon Sep 17 00:00:00 2001 From: GeneLiuXe <356340460@qq.com> Date: Tue, 18 Jul 2023 13:34:10 +0800 Subject: [PATCH 1/3] [MNT] Modify details about choosing cuda --- learnware/specification/rkme.py | 3 +-- learnware/specification/utils.py | 3 ++- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/learnware/specification/rkme.py b/learnware/specification/rkme.py index f3bddc5..019bf89 100644 --- a/learnware/specification/rkme.py +++ b/learnware/specification/rkme.py @@ -365,7 +365,7 @@ class RKMEStatSpecification(BaseStatSpecification): if torch.is_tensor(rkme_to_save["beta"]): rkme_to_save["beta"] = rkme_to_save["beta"].detach().cpu().numpy() rkme_to_save["beta"] = rkme_to_save["beta"].tolist() - rkme_to_save["device"] = "gpu" if rkme_to_save["cuda_idx"] != -1 else "cpu" + rkme_to_save["device"] = "gpu" if torch.cuda.is_available() else "cpu" json.dump( rkme_to_save, codecs.open(save_path, "w", encoding="utf-8"), @@ -433,7 +433,6 @@ def choose_device(cuda_idx=-1): """ if cuda_idx != -1: device = torch.device(f"cuda:{cuda_idx}" if torch.cuda.is_available() else "cpu") - # device = torch.device(f"cuda:{cuda_idx}") else: device = torch.device("cpu") return device diff --git a/learnware/specification/utils.py b/learnware/specification/utils.py index 4614557..c322bae 100644 --- a/learnware/specification/utils.py +++ b/learnware/specification/utils.py @@ -13,7 +13,7 @@ def generate_rkme_spec( steps: int = 3, nonnegative_beta: bool = True, reduce: bool = True, - cuda_idx: int = -1, + cuda_idx: int = None, ) -> RKMEStatSpecification: """ Interface for users to generate Reduced Kernel Mean Embedding (RKME) specification. @@ -39,6 +39,7 @@ def generate_rkme_spec( Whether shrink original data to a smaller set, by default True cuda_idx : int A flag indicating whether use CUDA during RKME computation. -1 indicates CUDA not used. + None indicates that cuda is automatically selected. Returns ------- From fb72f6b01ea7efcb304e903ccf617ebc94340d88 Mon Sep 17 00:00:00 2001 From: Gene Date: Tue, 18 Jul 2023 15:27:13 +0800 Subject: [PATCH 2/3] [MNT] Enhance the robustness of generating rkme spec --- learnware/specification/rkme.py | 17 ++++-- learnware/specification/utils.py | 97 +++++++++++++++++++++++--------- 2 files changed, 80 insertions(+), 34 deletions(-) diff --git a/learnware/specification/rkme.py b/learnware/specification/rkme.py index 019bf89..2271b03 100644 --- a/learnware/specification/rkme.py +++ b/learnware/specification/rkme.py @@ -104,12 +104,17 @@ class RKMEStatSpecification(BaseStatSpecification): Z_shape = tuple([K] + list(X_shape)[1:]) X = X.reshape(self.num_points, -1) - # fill np.nan - X_nan = np.isnan(X) - if X_nan.max() == 1: + # Check data values + X[np.isinf(X) | np.isneginf(X) | np.isposinf(X) | np.isneginf(X)] = np.nan + if np.any(np.isnan(X)): for col in range(X.shape[1]): - col_mean = np.nanmean(X[:, col]) - X[:, col] = np.where(X_nan[:, col], col_mean, X[:, col]) + is_nan = np.isnan(X[:, col]) + if np.any(is_nan): + if np.all(is_nan): + raise ValueError(f"All values in column {col} are exceptional, e.g., NaN and Inf.") + # Fill np.nan with np.nanmean + col_mean = np.nanmean(X[:, col]) + X[:, col] = np.where(is_nan, col_mean, X[:, col]) if not reduce: self.z = X.reshape(X_shape) @@ -365,7 +370,7 @@ class RKMEStatSpecification(BaseStatSpecification): if torch.is_tensor(rkme_to_save["beta"]): rkme_to_save["beta"] = rkme_to_save["beta"].detach().cpu().numpy() rkme_to_save["beta"] = rkme_to_save["beta"].tolist() - rkme_to_save["device"] = "gpu" if torch.cuda.is_available() else "cpu" + rkme_to_save["device"] = "gpu" if rkme_to_save["cuda_idx"] != -1 else "cpu" json.dump( rkme_to_save, codecs.open(save_path, "w", encoding="utf-8"), diff --git a/learnware/specification/utils.py b/learnware/specification/utils.py index c322bae..1fbd1fd 100644 --- a/learnware/specification/utils.py +++ b/learnware/specification/utils.py @@ -1,14 +1,40 @@ +import torch import numpy as np +import pandas as pd +from typing import Union from .base import BaseStatSpecification from .rkme import RKMEStatSpecification from ..config import C +def convert_to_numpy(data: Union[np.ndarray, pd.DataFrame, torch.Tensor]): + """Convert data to np.ndarray + + Parameters + ---------- + data : np.ndarray, pd.DataFrame, or torch.Tensor + The input data that needs to be converted to a NumPy array. + + Returns + ------- + np.ndarray + The data converted to a NumPy array. + """ + if isinstance(data, np.ndarray): + return data + elif isinstance(data, pd.DataFrame): + return data.to_numpy() + elif isinstance(data, torch.Tensor): + return data.detach().cpu().numpy() + else: + raise TypeError("Unsupported data format. Please provide a NumPy array, a Pandas DataFrame, or a PyTorch Tensor.") + + def generate_rkme_spec( - X: np.ndarray, + X: Union[np.ndarray, pd.DataFrame, torch.Tensor], gamma: float = 0.1, - K: int = 100, + reduced_set_size: int = 100, step_size: float = 0.1, steps: int = 3, nonnegative_beta: bool = True, @@ -16,60 +42,75 @@ def generate_rkme_spec( cuda_idx: int = None, ) -> RKMEStatSpecification: """ - Interface for users to generate Reduced Kernel Mean Embedding (RKME) specification. - Return a RKMEStatSpecification object, use .save() method to save as json file. - + Interface for users to generate Reduced Kernel Mean Embedding (RKME) specification. + Return a RKMEStatSpecification object, use .save() method to save as json file. Parameters ---------- - X : np.ndarray - Raw data in np.ndarray format. - Size of array: (n*d) + X : np.ndarray, pd.DataFrame, or torch.Tensor + Raw data in np.ndarray, pd.DataFrame, or torch.Tensor format. + The shape of X: + First dimension represents the number of samples (data points). + The remaining dimensions represent the dimensions (features) of each sample. + For example, if X has shape (100, 3), it means there are 100 samples, and each sample has 3 features. gamma : float - Bandwidth in gaussian kernel, by default 0.1. - K : int - Size of the construced reduced set. + Bandwidth in gaussian kernel, by default 0.1. + reduced_set_size : int + Size of the construced reduced set. step_size : float - Step size for gradient descent in the iterative optimization. + Step size for gradient descent in the iterative optimization. steps : int - Total rounds in the iterative optimization. + Total rounds in the iterative optimization. nonnegative_beta : bool, optional - True if weights for the reduced set are intended to be kept non-negative, by default False. + True if weights for the reduced set are intended to be kept non-negative, by default False. reduce : bool, optional - Whether shrink original data to a smaller set, by default True + Whether shrink original data to a smaller set, by default True cuda_idx : int - A flag indicating whether use CUDA during RKME computation. -1 indicates CUDA not used. - None indicates that cuda is automatically selected. + A flag indicating whether use CUDA during RKME computation. -1 indicates CUDA not used. + None indicates that CUDA is automatically selected. Returns ------- RKMEStatSpecification - A RKMEStatSpecification object + A RKMEStatSpecification object """ + # Convert data type + X = convert_to_numpy(X) X = np.ascontiguousarray(X).astype(np.float32) + + # Check reduced_set_size max_reduced_set_size = C.max_reduced_set_size - if K * X[0].size > max_reduced_set_size: - K = max(1, max_reduced_set_size // X[0].size) + if reduced_set_size * X[0].size > max_reduced_set_size: + reduced_set_size = max(1, max_reduced_set_size // X[0].size) + + # Check cuda_idx + if not torch.cuda.is_available() or cuda_idx == -1: + cuda_idx = -1 + else: + num_cuda_devices = torch.cuda.device_count() + if cuda_idx is None or not (cuda_idx >= 0 and cuda_idx < num_cuda_devices): + cuda_idx = 0 + + # Generate rkme spec rkme_spec = RKMEStatSpecification(gamma=gamma, cuda_idx=cuda_idx) - rkme_spec.generate_stat_spec_from_data(X, K, step_size, steps, nonnegative_beta, reduce) + rkme_spec.generate_stat_spec_from_data(X, reduced_set_size, step_size, steps, nonnegative_beta, reduce) return rkme_spec def generate_stat_spec(X: np.ndarray) -> BaseStatSpecification: """ - Interface for users to generate statistical specification. - Return a StatSpecification object, use .save() method to save as npy file. - + Interface for users to generate statistical specification. + Return a StatSpecification object, use .save() method to save as npy file. Parameters ---------- X : np.ndarray - Raw data in np.ndarray format. - Size of array: (n*d) + Raw data in np.ndarray format. + Size of array: (n*d) Returns ------- StatSpecification - A StatSpecification object + A StatSpecification object """ - return None + return None \ No newline at end of file From 910a5c3ae6c8bb0397a6cffbc1ce8e4592803647 Mon Sep 17 00:00:00 2001 From: Gene Date: Tue, 18 Jul 2023 15:28:50 +0800 Subject: [PATCH 3/3] [MNT] Modify version --- learnware/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/learnware/__init__.py b/learnware/__init__.py index 234e6d2..67880e8 100644 --- a/learnware/__init__.py +++ b/learnware/__init__.py @@ -1,4 +1,4 @@ -__version__ = "0.1.0.99" +__version__ = "0.1.0.dev" import os from .logger import get_module_logger