# -*- coding: utf-8 -*-
"""Autoregressive model for univariate time series outlier detection.
"""
import numpy as np
from sklearn.utils import check_array
from sklearn.utils.validation import check_is_fitted
from scipy.special import erf
from sklearn.preprocessing import MinMaxScaler

from detection_algorithm.core.CollectiveBase import CollectiveBaseDetector

# from tod.utility import get_sub_matrices

from keras.layers import Dense, LSTM
from keras.models import Sequential

class LSTMOutlierDetector(CollectiveBaseDetector):

    def __init__(self,contamination=0.1,
                    train_contamination=0.0,
                    min_attack_time=5,
                    danger_coefficient_weight=0.5,
                    loss='mean_squared_error',
                    optimizer='adam',
                    epochs=10,
                    batch_size=8,
                    dropout_rate=0.0,
                    feature_dim=1,
                    hidden_dim=8,
                    n_hidden_layer=0,
                    activation=None,
                    diff_group_method='average'
                 ):

        super(LSTMOutlierDetector, self).__init__(contamination=contamination,
                                                  window_size=min_attack_time,
                                                  step_size=1,
                                                  )

        self.train_contamination = train_contamination
        self.min_attack_time = min_attack_time
        self.danger_coefficient_weight = danger_coefficient_weight
        self.relative_error_threshold = None

        self.loss = loss
        self.optimizer = optimizer
        self.epochs = epochs
        self.batch_size = batch_size
        self.dropout_rate = dropout_rate
        self.feature_dim = feature_dim
        self.hidden_dim = hidden_dim
        self.n_hidden_layer = n_hidden_layer
        self.diff_group_method = diff_group_method


        self.model_ = Sequential()
        self.model_.add(LSTM(units=hidden_dim, input_shape=(feature_dim, 1),
                             dropout=dropout_rate, activation=activation))

        for layer_idx in range(n_hidden_layer):
            self.model_.add(LSTM(units=hidden_dim, input_shape=(hidden_dim, 1),
                             dropout=dropout_rate, activation=activation))

        self.model_.add(Dense(units=feature_dim, input_shape=(hidden_dim, 1), activation=None))

        self.model_.compile(loss=self.loss, optimizer=self.optimizer)

    def fit(self, X: np.array, y=None) -> object:
        """Fit detector. y is ignored in unsupervised methods.

        Parameters
        ----------
        X : numpy array of shape (n_samples, n_features)
            The input samples.

        y : Ignored
            Not used, present for API consistency by convention.

        Returns
        -------
        self : object
            Fitted estimator.
        """
        X = check_array(X).astype(np.float)
        self._set_n_classes(None)
        X_buf, y_buf = self._get_sub_matrices(X)

        # fit the LSTM model
        self.model_.fit(X_buf, y_buf, epochs=self.epochs, batch_size=self.batch_size)

        relative_error = self._relative_error(X)

        if self.train_contamination < 1e-6:
            self.relative_error_threshold = max(relative_error)
        else:
            self.relative_error_threshold = np.percentile(relative_error, 100 * (1 - self.train_contamination))

        self.decision_scores_, self.left_inds_, self.right_inds_ = self.decision_function(X)
        self._process_decision_scores()

        return self

    def _get_sub_matrices(self, X: np.array):
        # return X[:-1].reshape(-1, 1, self.feature_dim), X[1:]
        return np.expand_dims(X[:-1], axis=2), X[1:]


    def _relative_error(self, X: np.array):

        X = check_array(X).astype(np.float)
        X_buf, y_buf = self._get_sub_matrices(X)

        y_predict = self.model_.predict(X_buf)

        relative_error = (np.linalg.norm(y_predict - y_buf, axis=1) / np.linalg.norm(y_buf + 1e-6, axis=1)).ravel()

        return relative_error

    def decision_function(self, X: np.array):
        """Predict raw anomaly scores of X using the fitted detector.

        The anomaly score of an input sample is computed based on the fitted
        detector. For consistency, outliers are assigned with
        higher anomaly scores.

        Parameters
        ----------
        X : numpy array of shape (n_samples, n_features)
            The input samples. Sparse matrices are accepted only
            if they are supported by the base estimator.

        Returns
        -------
        anomaly_scores : numpy array of shape (n_samples,)
            The anomaly score of the input samples.
        """
        check_is_fitted(self, ['model_'])

        relative_error = self._relative_error(X)

        error_num_buf = (relative_error > self.relative_error_threshold).astype(int)

        if not (self.diff_group_method in ['max', 'min', 'average']):
            raise ValueError(self.diff_group_method, "is not a valid method")

        relative_error_left_inds = np.ones((len(relative_error), )) * len(relative_error)
        relative_error_right_inds = np.zeros((len(relative_error), ))


        if self.diff_group_method == 'average':
            danger_coefficient = np.zeros(relative_error.shape)
            averaged_relative_error = np.zeros(relative_error.shape)
            calculated_times = np.zeros(relative_error.shape)

            for i in range(len(relative_error) - self.min_attack_time + 1):
                dc_tmp = error_num_buf[i:i+self.min_attack_time].sum() / self.min_attack_time
                are_tmp = relative_error[i:i+self.min_attack_time].sum() / self.min_attack_time

                for j in range(self.min_attack_time):
                    averaged_relative_error[i + j] += are_tmp
                    danger_coefficient[i + j] += dc_tmp
                    calculated_times[i + j] += 1
                    relative_error_left_inds[i + j] = i if i < relative_error_left_inds[i + j] else relative_error_left_inds[i + j]
                    relative_error_right_inds[i + j] = i+self.min_attack_time if i+self.min_attack_time > relative_error_right_inds[i + j] else relative_error_left_inds[i + j]

            # print(calculated_times)
            danger_coefficient /= calculated_times
            averaged_relative_error /= calculated_times
            # print(danger_coefficient, averaged_relative_error)
                

        else:
            danger_coefficient = np.zeros(relative_error.shape)
            averaged_relative_error = np.zeros(relative_error.shape)

            if self.diff_group_method == 'min':
                danger_coefficient += float('inf')
                averaged_relative_error += float('inf')

            for i in range(len(relative_error) - self.min_attack_time + 1):
                dc_tmp = error_num_buf[i:i+self.min_attack_time].sum() / self.min_attack_time
                are_tmp = relative_error[i:i+self.min_attack_time].sum() / self.min_attack_time

                if self.diff_group_method == 'max':
                    for j in range(self.min_attack_time):
                        if are_tmp > averaged_relative_error[i + j] or dc_tmp > danger_coefficient[i+j]:
                            relative_error_left_inds[i + j] = i
                            relative_error_right_inds[i + j] = i+self.min_attack_time
                        if are_tmp > averaged_relative_error[i + j]:
                            averaged_relative_error[i + j] = are_tmp
                        if dc_tmp > danger_coefficient[i+j]:
                            danger_coefficient[i + j] = dc_tmp

                else:
                    for j in range(self.min_attack_time):
                        if are_tmp < averaged_relative_error[i + j] or dc_tmp < danger_coefficient[i+j]:
                            relative_error_left_inds[i + j] = i
                            relative_error_right_inds[i + j] = i+self.min_attack_time
                        if are_tmp < averaged_relative_error[i + j]:
                            averaged_relative_error[i + j] = are_tmp
                        if dc_tmp < danger_coefficient[i+j]:
                            danger_coefficient[i + j] = dc_tmp


        # print(relative_error_left_inds)
        # print(relative_error_right_inds)
        pred_score = danger_coefficient * self.danger_coefficient_weight + averaged_relative_error * (1 - self.danger_coefficient_weight)

        return pred_score, relative_error_left_inds, relative_error_right_inds


if __name__ == "__main__":
    X_train = np.asarray(
        [3., 4., 8., 16, 18, 13., 22., 36., 59., 128, 62, 67, 78, 100]).reshape(-1, 1)

    X_test = np.asarray(
        [3., 4., 8.6, 13.4, 22.5, 17, 19.2, 36.1, 127, -23, 59.2]).reshape(-1,1)

    # print(X_train.shape, X_test.shape)

    clf = LSTMOutlierDetector(contamination=0.1)
    clf.fit(X_train)
    # pred_scores = clf.decision_function(X_test)
    pred_labels, left_inds, right_inds = clf.predict(X_test)

    print(pred_labels.shape, left_inds.shape, right_inds.shape)

    print(clf.threshold_)
    # print(np.percentile(pred_scores, 100 * 0.9))

    # print('pred_scores: ',pred_scores)
    print('pred_labels: ',pred_labels)