|
- from __future__ import annotations
-
- import os
-
- import copy
- import torch
- import json
- import codecs
- import random
- import numpy as np
- from cvxopt import solvers, matrix
- from collections import Counter
- from typing import Tuple, Any, List, Union, Dict
-
- try:
- import faiss
-
- ver = faiss.__version__
- _FAISS_INSTALLED = ver >= "1.7.1"
- except ImportError:
- _FAISS_INSTALLED = False
-
- from ..base import RegularStatsSpecification
- from ....logger import get_module_logger
-
- logger = get_module_logger("rkme")
-
- if not _FAISS_INSTALLED:
- logger.warning(
- "Required faiss version >= 1.7.1 is not detected! Please run 'conda install -c pytorch faiss-cpu' first"
- )
-
-
- class RKMETableSpecification(RegularStatsSpecification):
- """Reduced Kernel Mean Embedding (RKME) Specification"""
-
- def __init__(self, gamma: float = 0.1, cuda_idx: int = -1):
- """Initializing RKME parameters.
-
- Parameters
- ----------
- gamma : float
- Bandwidth in gaussian kernel, by default 0.1.
- cuda_idx : int
- A flag indicating whether use CUDA during RKME computation. -1 indicates CUDA not used.
- """
- self.z = None
- self.beta = None
- self.gamma = gamma
- self.num_points = 0
- self.cuda_idx = cuda_idx
- torch.cuda.empty_cache()
- self.device = choose_device(cuda_idx=cuda_idx)
- setup_seed(0)
- super(RKMETableSpecification, self).__init__(type=self.__class__.__name__)
-
- def get_beta(self) -> np.ndarray:
- """Move beta(RKME weights) back to memory accessible to the CPU.
-
- Returns
- -------
- np.ndarray
- A copy of beta in CPU memory.
- """
- return self.beta.detach().cpu().numpy()
-
- def get_z(self) -> np.ndarray:
- """Move z(RKME reduced set points) back to memory accessible to the CPU.
-
- Returns
- -------
- np.ndarray
- A copy of z in CPU memory.
- """
- return self.z.detach().cpu().numpy()
-
- def generate_stat_spec_from_data(
- self,
- X: np.ndarray,
- K: int = 100,
- step_size: float = 0.1,
- steps: int = 3,
- nonnegative_beta: bool = True,
- reduce: bool = True,
- ):
- """Construct reduced set from raw dataset using iterative optimization.
-
- Parameters
- ----------
- X : np.ndarray or torch.tensor
- Raw data in np.ndarray format.
- K : int
- Size of the construced reduced set.
- step_size : float
- Step size for gradient descent in the iterative optimization.
- steps : int
- Total rounds in the iterative optimization.
- nonnegative_beta : bool, optional
- True if weights for the reduced set are intended to be kept non-negative, by default False.
- reduce : bool, optional
- Whether shrink original data to a smaller set, by default True
- """
- alpha = None
- self.num_points = X.shape[0]
- X_shape = X.shape
- Z_shape = tuple([K] + list(X_shape)[1:])
- X = X.reshape(self.num_points, -1)
-
- # Check data values
- X[np.isinf(X) | np.isneginf(X) | np.isposinf(X) | np.isneginf(X)] = np.nan
- if np.any(np.isnan(X)):
- for col in range(X.shape[1]):
- is_nan = np.isnan(X[:, col])
- if np.any(is_nan):
- if np.all(is_nan):
- raise ValueError(f"All values in column {col} are exceptional, e.g., NaN and Inf.")
- # Fill np.nan with np.nanmean
- col_mean = np.nanmean(X[:, col])
- X[:, col] = np.where(is_nan, col_mean, X[:, col])
-
- if not reduce:
- self.z = X.reshape(X_shape)
- self.beta = 1 / self.num_points * np.ones(self.num_points)
- self.z = torch.from_numpy(self.z).double().to(self.device)
- self.beta = torch.from_numpy(self.beta).double().to(self.device)
- return
-
- # Initialize Z by clustering, utiliing faiss to speed up the process.
- self._init_z_by_faiss(X, K)
- self._update_beta(X, nonnegative_beta)
-
- # Alternating optimize Z and beta
- for i in range(steps):
- self._update_z(alpha, X, step_size)
- self._update_beta(X, nonnegative_beta)
-
- # Reshape to original dimensions
- self.z = self.z.reshape(Z_shape)
-
- def _init_z_by_faiss(self, X: Union[np.ndarray, torch.tensor], K: int):
- """Intialize Z by faiss clustering.
-
- Parameters
- ----------
- X : np.ndarray or torch.tensor
- Raw data in np.ndarray format or torch.tensor format.
- K : int
- Size of the construced reduced set.
- """
- X = X.astype("float32")
- numDim = X.shape[1]
- kmeans = faiss.Kmeans(numDim, K, niter=100, verbose=False)
- kmeans.train(X)
- center = torch.from_numpy(kmeans.centroids).double()
- self.z = center
-
- def _update_beta(self, X: Any, nonnegative_beta: bool = True):
- """Fix Z and update beta using its closed-form solution.
-
- Parameters
- ----------
- X : np.ndarray or torch.tensor
- Raw data in np.ndarray format or torch.tensor format.
- nonnegative_beta : bool, optional
- True if weights for the reduced set are intended to be kept non-negative, by default False.
- """
- Z = self.z
- if not torch.is_tensor(Z):
- Z = torch.from_numpy(Z)
- Z = Z.to(self.device).double()
-
- if not torch.is_tensor(X):
- X = torch.from_numpy(X)
- X = X.to(self.device).double()
- K = torch_rbf_kernel(Z, Z, gamma=self.gamma).to(self.device)
- C = torch_rbf_kernel(Z, X, gamma=self.gamma).to(self.device)
- C = torch.sum(C, dim=1) / X.shape[0]
-
- if nonnegative_beta:
- beta = solve_qp(K, C).to(self.device)
- else:
- beta = torch.linalg.inv(K + torch.eye(K.shape[0]).to(self.device) * 1e-5) @ C
-
- self.beta = beta
-
- @torch.no_grad()
- def _update_z_vectorize(self, alpha: float, X: Any, step_size: float, batch_size=8):
- """Fix beta and update Z using gradient descent.
- Unlike method _update_z, this method updates z by batches.
-
- Parameters
- ----------
- alpha : int
- Normalization factor.
- X : np.ndarray or torch.tensor
- Raw data in np.ndarray format or torch.tensor format.
- step_size : float
- Step size for gradient descent.
- batch_size : int
- To prevent exceeding GPU memory, process no more than batch_size at a time.
- """
- gamma = self.gamma
- Z = self.z
- beta = self.beta
-
- if not torch.is_tensor(Z):
- Z = torch.from_numpy(Z)
- Z = Z.to(self.device).double()
-
- if not torch.is_tensor(beta):
- beta = torch.from_numpy(beta)
- beta = beta.to(self.device).double()
-
- if not torch.is_tensor(X):
- X = torch.from_numpy(X)
- X = X.to(self.device).double()
-
- grad_Z = torch.zeros_like(Z)
- for i in range(0, Z.shape[0], batch_size):
- Z_ = Z[i : i + batch_size]
- term_1 = torch.bmm(
- torch.unsqueeze((torch.unsqueeze(beta, dim=0) * torch_rbf_kernel(Z_, Z, gamma)), dim=1),
- torch.unsqueeze(Z_, dim=1) - torch.unsqueeze(Z, dim=0),
- )
- if alpha is not None:
- term_2 = -2 * torch.bmm(
- torch.unsqueeze(alpha * torch_rbf_kernel(Z_, X, gamma), dim=1),
- torch.unsqueeze(Z_, dim=1) - torch.unsqueeze(X, dim=0),
- )
- else:
- term_2 = -2 * torch.bmm(
- torch.unsqueeze(torch_rbf_kernel(Z_, X, gamma) / self.num_points, dim=1),
- torch.unsqueeze(Z_, dim=1) - torch.unsqueeze(X, dim=0),
- )
- grad_Z[i : i + batch_size] = (
- -2 * gamma * torch.unsqueeze(beta[i : i + batch_size], dim=1) * torch.squeeze(term_1 + term_2)
- )
-
- Z = Z - step_size * grad_Z
- self.z = Z
-
- def _update_z(self, alpha: float, X: Any, step_size: float):
- """Fix beta and update Z using gradient descent.
-
- Parameters
- ----------
- alpha : int
- Normalization factor.
- X : np.ndarray or torch.tensor
- Raw data in np.ndarray format or torch.tensor format.
- step_size : float
- Step size for gradient descent.
- """
- gamma = self.gamma
- Z = self.z
- beta = self.beta
-
- if not torch.is_tensor(Z):
- Z = torch.from_numpy(Z)
- Z = Z.to(self.device).double()
-
- if not torch.is_tensor(beta):
- beta = torch.from_numpy(beta)
- beta = beta.to(self.device).double()
-
- if not torch.is_tensor(X):
- X = torch.from_numpy(X)
- X = X.to(self.device).double()
-
- grad_Z = torch.zeros_like(Z)
-
- for i in range(Z.shape[0]):
- z_i = Z[i, :].reshape(1, -1)
- term_1 = (beta * torch_rbf_kernel(z_i, Z, gamma)) @ (z_i - Z)
- if alpha is not None:
- term_2 = -2 * (alpha * torch_rbf_kernel(z_i, X, gamma)) @ (z_i - X)
- else:
- term_2 = -2 * (torch_rbf_kernel(z_i, X, gamma) / self.num_points) @ (z_i - X)
- grad_Z[i, :] = -2 * gamma * beta[i] * (term_1 + term_2)
-
- Z = Z - step_size * grad_Z
- self.z = Z
-
- def _inner_prod_with_X(self, X: Any) -> float:
- """Compute the inner product between RKME specification and X
-
- Parameters
- ----------
- X : np.ndarray or torch.tensor
- Raw data in np.ndarray format or torch.tensor format.
-
- Returns
- -------
- float
- The inner product between RKME specification and X
- """
- beta = self.beta.reshape(1, -1).double().to(self.device)
- Z = self.z.double().to(self.device)
- if not torch.is_tensor(X):
- X = torch.from_numpy(X)
- X = X.to(self.device).double()
-
- v = torch_rbf_kernel(Z, X, self.gamma) * beta.T
- v = torch.sum(v, axis=0)
- return v.detach().cpu().numpy()
-
- def _sampling_candidates(self, N: int) -> np.ndarray:
- """Generate a large set of candidates as preparation for herding
-
- Parameters
- ----------
- N : int
- The number of herding candidates.
-
- Returns
- -------
- np.ndarray
- The herding candidates.
- """
- beta = self.beta
- beta[beta < 0] = 0 # currently we cannot use negative weight
- beta = beta / torch.sum(beta)
- sample_assign = torch.multinomial(beta, N, replacement=True)
-
- sample_list = []
- for i, n in Counter(np.array(sample_assign.cpu())).items():
- for _ in range(n):
- sample_list.append(
- torch.normal(mean=self.z[i].reshape(self.z[i].shape[0], -1), std=0.25).reshape(1, -1)
- )
- if len(sample_list) > 1:
- return torch.cat(sample_list, axis=0)
- elif len(sample_list) == 1:
- return sample_list[0]
- else:
- logger.warning("Not enough candidates for herding!")
-
- def inner_prod(self, Phi2: RKMETableSpecification) -> float:
- """Compute the inner product between two RKME specifications
-
- Parameters
- ----------
- Phi2 : RKMETableSpecification
- The other RKME specification.
-
- Returns
- -------
- float
- The inner product between two RKME specifications.
- """
- beta_1 = self.beta.reshape(1, -1).double().to(self.device)
- beta_2 = Phi2.beta.reshape(1, -1).double().to(self.device)
- Z1 = self.z.double().reshape(self.z.shape[0], -1).to(self.device)
- Z2 = Phi2.z.double().reshape(Phi2.z.shape[0], -1).to(self.device)
- v = torch.sum(torch_rbf_kernel(Z1, Z2, self.gamma) * (beta_1.T @ beta_2))
-
- return float(v)
-
- def dist(self, Phi2: RKMETableSpecification, omit_term1: bool = False) -> float:
- """Compute the Maximum-Mean-Discrepancy(MMD) between two RKME specifications
-
- Parameters
- ----------
- Phi2 : RKMETableSpecification
- The other RKME specification.
- omit_term1 : bool, optional
- True if the inner product of self with itself can be omitted, by default False.
- """
- if omit_term1:
- term1 = 0
- else:
- term1 = self.inner_prod(self)
- term2 = self.inner_prod(Phi2)
- term3 = Phi2.inner_prod(Phi2)
-
- return float(term1 - 2 * term2 + term3)
-
- def herding(self, T: int) -> np.ndarray:
- """Iteratively sample examples from an unknown distribution with the help of its RKME specification
-
- Parameters
- ----------
- T : int
- Total iteration number for sampling.
-
- Returns
- -------
- np.ndarray
- A collection of examples which approximate the unknown distribution.
- """
- # Flatten z
- Z_shape = self.z.shape
- self.z = self.z.reshape(self.z.shape[0], -1)
-
- Nstart = 100 * T
- Xstart = self._sampling_candidates(Nstart).to(self.device)
- D = self.z[0].shape[0]
- S = torch.zeros((T, D)).to(self.device)
- fsX = torch.from_numpy(self._inner_prod_with_X(Xstart)).to(self.device)
- fsS = torch.zeros(Nstart).to(self.device)
- for i in range(T):
- if i > 0:
- fsS = torch.sum(torch_rbf_kernel(S[:i, :], Xstart, self.gamma), axis=0)
- fs = (i + 1) * fsX - fsS
- idx = torch.argmax(fs)
- S[i, :] = Xstart[idx, :]
-
- # Reshape to orignial dimensions
- self.z = self.z.reshape(Z_shape)
- S_shape = tuple([S.shape[0]] + list(Z_shape)[1:])
- S = S.reshape(S_shape)
-
- return S
-
- def save(self, filepath: str):
- """Save the computed RKME specification to a specified path in JSON format.
-
- Parameters
- ----------
- filepath : str
- The specified saving path.
- """
- save_path = filepath
- rkme_to_save = copy.deepcopy(self.__dict__)
- if torch.is_tensor(rkme_to_save["z"]):
- rkme_to_save["z"] = rkme_to_save["z"].detach().cpu().numpy()
- rkme_to_save["z"] = rkme_to_save["z"].tolist()
- if torch.is_tensor(rkme_to_save["beta"]):
- rkme_to_save["beta"] = rkme_to_save["beta"].detach().cpu().numpy()
- rkme_to_save["beta"] = rkme_to_save["beta"].tolist()
- rkme_to_save["device"] = "gpu" if rkme_to_save["cuda_idx"] != -1 else "cpu"
- with codecs.open(save_path, "w", encoding="utf-8") as fout:
- json.dump(rkme_to_save, fout, separators=(",", ":"))
-
- def load(self, filepath: str) -> bool:
- """Load a RKME specification file in JSON format from the specified path.
-
- Parameters
- ----------
- filepath : str
- The specified loading path.
-
- Returns
- -------
- bool
- True if the RKME is loaded successfully.
- """
- # Load JSON file:
- load_path = filepath
- if os.path.exists(load_path):
- with codecs.open(load_path, "r", encoding="utf-8") as fin:
- obj_text = fin.read()
- rkme_load = json.loads(obj_text)
- rkme_load["device"] = choose_device(rkme_load["cuda_idx"])
- rkme_load["z"] = torch.from_numpy(np.array(rkme_load["z"]))
- rkme_load["beta"] = torch.from_numpy(np.array(rkme_load["beta"]))
-
- for d in self.__dir__():
- if d in rkme_load.keys():
- setattr(self, d, rkme_load[d])
- return True
- else:
- return False
-
-
- class RKMEStatSpecification(RKMETableSpecification):
- """nickname for RKMETableSpecification, for compatibility currently.
- TODO: modify all learnware in database and remove this nickname
- """
-
- pass
-
-
- def setup_seed(seed):
- """Fix a random seed for addressing reproducibility issues.
-
- Parameters
- ----------
- seed : int
- Random seed for torch, torch.cuda, numpy, random and cudnn libraries.
- """
- torch.manual_seed(seed)
- torch.cuda.manual_seed_all(seed)
- np.random.seed(seed)
- random.seed(seed)
- torch.backends.cudnn.deterministic = True
-
-
- def choose_device(cuda_idx=-1):
- """Let users choose compuational device between CPU or GPU.
-
- Parameters
- ----------
- cuda_idx : int, optional
- GPU index, by default -1 which stands for using CPU instead.
-
- Returns
- -------
- torch.device
- A torch.device object
- """
- cuda_idx = int(cuda_idx)
- if cuda_idx == -1 or not torch.cuda.is_available():
- device = torch.device("cpu")
- else:
- device_count = torch.cuda.device_count()
- if cuda_idx >= 0 and cuda_idx < device_count:
- device = torch.device(f"cuda:{cuda_idx}")
- else:
- device = torch.device("cuda:0")
- return device
-
-
- def torch_rbf_kernel(x1, x2, gamma) -> torch.Tensor:
- """Use pytorch to compute rbf_kernel function at faster speed.
-
- Parameters
- ----------
- x1 : torch.Tensor
- First vector in the rbf_kernel
- x2 : torch.Tensor
- Second vector in the rbf_kernel
- gamma : float
- Bandwidth in gaussian kernel
-
- Returns
- -------
- torch.Tensor
- The computed rbf_kernel value at x1, x2.
- """
- x1 = x1.double()
- x2 = x2.double()
- X12norm = torch.sum(x1**2, 1, keepdim=True) - 2 * x1 @ x2.T + torch.sum(x2**2, 1, keepdim=True).T
- return torch.exp(-X12norm * gamma)
-
-
- def solve_qp(K: np.ndarray, C: np.ndarray):
- """Solver for the following quadratic programming(QP) problem:
- - min 1/2 x^T K x - C^T x
- s.t 1^T x - 1 = 0
- - I x <= 0
-
- Parameters
- ----------
- K : np.ndarray
- Parameter in the quadratic term.
- C : np.ndarray
- Parameter in the linear term.
-
- Returns
- -------
- torch.tensor
- Solution to the QP problem.
- """
- n = K.shape[0]
- P = matrix(K.cpu().numpy())
- q = matrix(-C.cpu().numpy())
- G = matrix(-np.eye(n))
- h = matrix(np.zeros((n, 1)))
- A = matrix(np.ones((1, n)))
- b = matrix(np.ones((1, 1)))
-
- solvers.options["show_progress"] = False
- sol = solvers.qp(P, q, G, h, A, b) # Requires the sum of x to be 1
- # sol = solvers.qp(P, q, G, h) # Otherwise
- w = np.array(sol["x"])
- w = torch.from_numpy(w).reshape(-1)
-
- return w
|