|
- # -*- coding: utf-8 -*-
- """Utility functions for supporting time-series based outlier detection.
- """
-
- import numpy as np
- from sklearn.utils import check_array
-
-
- # def get_sub_sequences(X, window_size, step=1):
- # """Chop a univariate time series into sub sequences.
-
- # Parameters
- # ----------
- # X : numpy array of shape (n_samples,)
- # The input samples.
-
- # window_size : int
- # The moving window size.
-
- # step_size : int, optional (default=1)
- # The displacement for moving window.
-
- # Returns
- # -------
- # X_sub : numpy array of shape (valid_len, window_size)
- # The numpy matrix with each row stands for a subsequence.
- # """
- # X = check_array(X).astype(np.float)
- # n_samples = len(X)
-
- # # get the valid length
- # valid_len = get_sub_sequences_length(n_samples, window_size, step)
-
- # X_sub = np.zeros([valid_len, window_size])
- # # y_sub = np.zeros([valid_len, 1])
-
- # # exclude the edge
- # steps = list(range(0, n_samples, step))
- # steps = steps[:valid_len]
-
- # for idx, i in enumerate(steps):
- # X_sub[idx,] = X[i: i + window_size].ravel()
-
- # return X_sub
-
- def get_sub_matrices(X, window_size, step=1, return_numpy=True, flatten=True,
- flatten_order='F'):
- """Chop a multivariate time series into sub sequences (matrices).
-
- Parameters
- ----------
- X : numpy array of shape (n_samples,)
- The input samples.
-
- window_size : int
- The moving window size.
-
- step_size : int, optional (default=1)
- The displacement for moving window.
-
- return_numpy : bool, optional (default=True)
- If True, return the data format in 3d numpy array.
-
- flatten : bool, optional (default=True)
- If True, flatten the returned array in 2d.
-
- flatten_order : str, optional (default='F')
- Decide the order of the flatten for multivarite sequences.
- ‘C’ means to flatten in row-major (C-style) order.
- ‘F’ means to flatten in column-major (Fortran- style) order.
- ‘A’ means to flatten in column-major order if a is Fortran contiguous in memory,
- row-major order otherwise. ‘K’ means to flatten a in the order the elements occur in memory.
- The default is ‘F’.
-
- Returns
- -------
- X_sub : numpy array of shape (valid_len, window_size*n_sequences)
- The numpy matrix with each row stands for a flattend submatrix.
- """
- X = check_array(X).astype(np.float)
- n_samples, n_sequences = X.shape[0], X.shape[1]
-
- # get the valid length
- valid_len = get_sub_sequences_length(n_samples, window_size, step)
-
- X_sub = []
- X_left_inds = []
- X_right_inds = []
-
- # exclude the edge
- steps = list(range(0, n_samples, step))
- steps = steps[:valid_len]
-
- # print(n_samples, n_sequences)
- for idx, i in enumerate(steps):
- X_sub.append(X[i: i + window_size, :])
- X_left_inds.append(i)
- X_right_inds.append(i + window_size)
-
- X_sub = np.asarray(X_sub)
-
- if return_numpy:
- if flatten:
- temp_array = np.zeros([valid_len, window_size * n_sequences])
- if flatten_order == 'C':
- for i in range(valid_len):
- temp_array[i, :] = X_sub[i, :, :].flatten(order='C')
-
- else:
- for i in range(valid_len):
- temp_array[i, :] = X_sub[i, :, :].flatten(order='F')
- return temp_array, np.asarray(X_left_inds), np.asarray(
- X_right_inds)
-
- else:
- return np.asarray(X_sub), np.asarray(X_left_inds), np.asarray(
- X_right_inds)
- else:
- return X_sub, np.asarray(X_left_inds), np.asarray(X_right_inds)
-
-
- def get_sub_sequences_length(n_samples, window_size, step):
- """Pseudo chop a univariate time series into sub sequences. Return valid
- length only.
-
- Parameters
- ----------
- X : numpy array of shape (n_samples,)
- The input samples.
-
- window_size : int
- The moving window size.
-
- step_size : int, optional (default=1)
- The displacement for moving window.
-
- Returns
- -------
- valid_len : int
- The number of subsequences.
-
- """
- # if X.shape[0] == 1:
- # n_samples = X.shape[1]
- # elif X.shape[1] == 1:
- # n_samples = X.shape[0]
- # else:
- # raise ValueError("X is not a univarite series. The shape is {shape}.".format(shape=X.shape))
-
- # valid_len = n_samples - window_size + 1
- # valida_len = int_down(n_samples-window_size)/step + 1
- valid_len = int(np.floor((n_samples - window_size) / step)) + 1
- return valid_len
-
-
- if __name__ == "__main__":
- X_train = np.asarray(
- [3., 4., 8., 16, 18, 13., 22., 36., 59., 128, 62, 67, 78,
- 100]).reshape(-1, 1)
-
- X_train = np.asarray(
- [[3., 5], [5., 9], [7., 2], [42., 20], [8., 12], [10., 12], [12., 12],
- [18., 16], [20., 7], [18., 10], [23., 12], [22., 15]])
-
- # n_samples = X.shape[0]
-
- window_size = 3
-
- # valid_len = n_samples - window_size + 1
-
- # X_sub = np.zeros([valid_len, window_size])
-
- # for i in range(valid_len):
- # X_sub[i, ] = X[i: i+window_size]
-
- # X_sub_2 = get_sub_sequences(X, window_size, step=2)
- X_sub_3, X_left_inds, X_right_inds = get_sub_matrices(X_train, window_size,
- step=2,
- flatten_order='C')
|