wangwei
/
tods

 
			
							# -*- coding: utf-8 -*-
"""Utility functions for supporting time-series based outlier detection.
"""

import numpy as np
from sklearn.utils import check_array


# def get_sub_sequences(X, window_size, step=1):
#     """Chop a univariate time series into sub sequences.

#     Parameters
#     ----------
#     X : numpy array of shape (n_samples,)
#         The input samples.

#     window_size : int
#         The moving window size.

#     step_size : int, optional (default=1)
#         The displacement for moving window.

#     Returns
#     -------
#     X_sub : numpy array of shape (valid_len, window_size)
#         The numpy matrix with each row stands for a subsequence.
#     """
#     X = check_array(X).astype(np.float)
#     n_samples = len(X)

#     # get the valid length
#     valid_len = get_sub_sequences_length(n_samples, window_size, step)

#     X_sub = np.zeros([valid_len, window_size])
#     # y_sub = np.zeros([valid_len, 1])

#     # exclude the edge
#     steps = list(range(0, n_samples, step))
#     steps = steps[:valid_len]

#     for idx, i in enumerate(steps):
#         X_sub[idx,] = X[i: i + window_size].ravel()

#     return X_sub

def get_sub_matrices(X, window_size, step=1, return_numpy=True, flatten=True,
                     flatten_order='F'):
    """Chop a multivariate time series into sub sequences (matrices).

    Parameters
    ----------
    X : numpy array of shape (n_samples,)
        The input samples.

    window_size : int
        The moving window size.

    step_size : int, optional (default=1)
        The displacement for moving window.
    
    return_numpy : bool, optional (default=True)
        If True, return the data format in 3d numpy array.

    flatten : bool, optional (default=True)
        If True, flatten the returned array in 2d.
        
    flatten_order : str, optional (default='F')
        Decide the order of the flatten for multivarite sequences.
        ‘C’ means to flatten in row-major (C-style) order. 
        ‘F’ means to flatten in column-major (Fortran- style) order. 
        ‘A’ means to flatten in column-major order if a is Fortran contiguous in memory, 
        row-major order otherwise. ‘K’ means to flatten a in the order the elements occur in memory. 
        The default is ‘F’.

    Returns
    -------
    X_sub : numpy array of shape (valid_len, window_size*n_sequences)
        The numpy matrix with each row stands for a flattend submatrix.
    """
    X = check_array(X).astype(np.float)
    n_samples, n_sequences = X.shape[0], X.shape[1]

    # get the valid length
    valid_len = get_sub_sequences_length(n_samples, window_size, step)

    X_sub = []
    X_left_inds = []
    X_right_inds = []

    # exclude the edge
    steps = list(range(0, n_samples, step))
    steps = steps[:valid_len]

    # print(n_samples, n_sequences)
    for idx, i in enumerate(steps):
        X_sub.append(X[i: i + window_size, :])
        X_left_inds.append(i)
        X_right_inds.append(i + window_size)

    X_sub = np.asarray(X_sub)

    if return_numpy:
        if flatten:
            temp_array = np.zeros([valid_len, window_size * n_sequences])
            if flatten_order == 'C':
                for i in range(valid_len):
                    temp_array[i, :] = X_sub[i, :, :].flatten(order='C')

            else:
                for i in range(valid_len):
                    temp_array[i, :] = X_sub[i, :, :].flatten(order='F')
            return temp_array, np.asarray(X_left_inds), np.asarray(
                X_right_inds)

        else:
            return np.asarray(X_sub), np.asarray(X_left_inds), np.asarray(
                X_right_inds)
    else:
        return X_sub, np.asarray(X_left_inds), np.asarray(X_right_inds)


def get_sub_sequences_length(n_samples, window_size, step):
    """Pseudo chop a univariate time series into sub sequences. Return valid
    length only.

    Parameters
    ----------
    X : numpy array of shape (n_samples,)
        The input samples.

    window_size : int
        The moving window size.

    step_size : int, optional (default=1)
        The displacement for moving window.

    Returns
    -------
    valid_len : int
        The number of subsequences.
        
    """
    # if X.shape[0] == 1:
    #     n_samples = X.shape[1]
    # elif X.shape[1] == 1:
    #     n_samples = X.shape[0]
    # else:
    #     raise ValueError("X is not a univarite series. The shape is {shape}.".format(shape=X.shape))

    # valid_len = n_samples - window_size + 1
    # valida_len = int_down(n_samples-window_size)/step + 1 
    valid_len = int(np.floor((n_samples - window_size) / step)) + 1
    return valid_len


if __name__ == "__main__":
    X_train = np.asarray(
        [3., 4., 8., 16, 18, 13., 22., 36., 59., 128, 62, 67, 78,
         100]).reshape(-1, 1)

    X_train = np.asarray(
        [[3., 5], [5., 9], [7., 2], [42., 20], [8., 12], [10., 12], [12., 12],
         [18., 16], [20., 7], [18., 10], [23., 12], [22., 15]])

    # n_samples = X.shape[0]

    window_size = 3

    # valid_len = n_samples - window_size + 1

    # X_sub = np.zeros([valid_len, window_size])

    # for i in range(valid_len):
    #     X_sub[i, ] = X[i: i+window_size]

    # X_sub_2 = get_sub_sequences(X, window_size, step=2)
    X_sub_3, X_left_inds, X_right_inds = get_sub_matrices(X_train, window_size,
                                                          step=2,
                                                          flatten_order='C')