Browse Source

improved coverall

Former-commit-id: 5692ce5aeb [formerly 92b6a4a9be] [formerly 100f54bd2c [formerly 7d943e83e5]] [formerly 52342837ed [formerly e71839027f] [formerly c237dd36fd [formerly 11cb093618]]] [formerly fd062fc23a [formerly 6a6b4a150f] [formerly e096ce11f8 [formerly cf13242c0a]] [formerly f0c3aa0ffd [formerly 89ec3e5172] [formerly 286da8ad39 [formerly a600289eb5]]]] [formerly 7f7e01bf10 [formerly 247000c6e8] [formerly b452a62072 [formerly fb9ab7a1ef]] [formerly cd701612a9 [formerly d14fbc679a] [formerly 66031aef18 [formerly 6c4de12160]]] [formerly 01fedbe315 [formerly 336d2e1279] [formerly 2ddf5a665b [formerly de54827ecb]] [formerly e60d88473c [formerly dd902b5ea1] [formerly 163d171847 [formerly c39520436b]]]]] [formerly 84799712d8 [formerly ec8a77f299] [formerly b4a2fc3949 [formerly 61e0c8d71c]] [formerly 5013c7ce26 [formerly 34fafb532c] [formerly 1ed4c0352b [formerly 802c472575]]] [formerly fdfff9257f [formerly 102db38420] [formerly 57f98625d8 [formerly 1ec39236c3]] [formerly ddb877dd26 [formerly 1214001406] [formerly b2b0d4b497 [formerly 79c3789e77]]]] [formerly 414c7153c0 [formerly 48343fc6a6] [formerly f41181e8cf [formerly 6d53476bde]] [formerly ac031546fa [formerly 281b014a76] [formerly 87abec8fa8 [formerly ce53023fcd]]] [formerly ad370ea7b6 [formerly 8738b6d345] [formerly 406064509d [formerly 2ad4b07aee]] [formerly bc654cf9b8 [formerly df9e15b1e0] [formerly 68fc5c2da0 [formerly 117b45c7ed]]]]]]
Former-commit-id: f1be9db9d3 [formerly 97c2ed2436] [formerly 96c8492928 [formerly 9f9e083deb]] [formerly 7a63119eeb [formerly f7096aa45e] [formerly 9c91ffffc7 [formerly c96766b3cd]]] [formerly 20956221cc [formerly b1ab129591] [formerly 1d990f5a7f [formerly c197e948f1]] [formerly fb01d5d960 [formerly d930c1682d] [formerly fbc105ec8b [formerly c943b019c4]]]] [formerly 0833e662d6 [formerly 28881f83e4] [formerly 8f94d1b92b [formerly 154b0e86df]] [formerly 116322ddb1 [formerly f3795307d7] [formerly f8bf101b55 [formerly ae7fe0a30a]]] [formerly af71574326 [formerly 23f92c302f] [formerly 160930a545 [formerly 0946c0009a]] [formerly 1435701eda [formerly d740215776] [formerly 68fc5c2da0]]]]
Former-commit-id: 91012233f5 [formerly 6fbb4a5196] [formerly 6a20c95004 [formerly b326ba2217]] [formerly e47d4706ad [formerly b2a6cd6bd1] [formerly 228f65385c [formerly 9bace0ae28]]] [formerly 89112c1d2a [formerly 39cb39c54c] [formerly 909a3d1f98 [formerly c5a936e79f]] [formerly 4348b4d011 [formerly fd5693b073] [formerly f39b6f7bfe [formerly cea1b9dd82]]]]
Former-commit-id: d12163fd42 [formerly 5fee890f54] [formerly 203b5d978a [formerly 9ab6d5c86c]] [formerly 5821c6e118 [formerly 311da3cab1] [formerly 2086ec36cc [formerly 46f7f90e8d]]]
Former-commit-id: 08561e07d6 [formerly ccc0af6dd6] [formerly c93fa0d038 [formerly 76411423ef]]
Former-commit-id: 158f073b79 [formerly 302de019b2]
Former-commit-id: e0d41474cf
master
YileAllenChen1 5 years ago
parent
commit
037cdb9ba2
11 changed files with 249 additions and 797 deletions
  1. +3
    -1
      examples/run_pipeline.py
  2. +3
    -3
      tods/data_processing/TimeIntervalTransform.py
  3. +2
    -2
      tods/detection_algorithm/DeepLog.py
  4. +0
    -189
      tods/detection_algorithm/MP.py
  5. +14
    -53
      tods/detection_algorithm/MatrixProfile.py
  6. +2
    -2
      tods/detection_algorithm/PyodSOD.py
  7. +3
    -4
      tods/detection_algorithm/UODBasePrimitive.py
  8. +176
    -117
      tods/feature_analysis/AutoCorrelation.py
  9. +0
    -376
      tods/feature_analysis/BKFilter.py
  10. +24
    -38
      tods/tests/test_Autocorrelation.py
  11. +22
    -12
      tods/tests/test_DeepLog.py

+ 3
- 1
examples/run_pipeline.py View File

@@ -15,8 +15,10 @@ parser.add_argument('--target_index', type=int, default=6,
help='Index of the ground truth (for evaluation)')
parser.add_argument('--metric',type=str, default='F1_MACRO',
help='Evaluation Metric (F1, F1_MACRO)')
parser.add_argument('--pipeline_path', default=os.path.join(this_path, '../tods/resources/default_pipeline.json'),
parser.add_argument('--pipeline_path', default=os.path.join(this_path, '../example_pipeline.json'),
help='Input the path of the pre-built pipeline description')
# parser.add_argument('--pipeline_path', default=os.path.join(this_path, '../tods/resources/default_pipeline.json'),
# help='Input the path of the pre-built pipeline description')

args = parser.parse_args()



+ 3
- 3
tods/data_processing/TimeIntervalTransform.py View File

@@ -112,15 +112,15 @@ class TimeIntervalTransform(transformer.TransformerPrimitiveBase[Inputs, Outputs
Container DataFrame with resampled time intervals
"""

if self.hyperparams['time_interval'] is None:
if self.hyperparams['time_interval'] is None: # pragma: no cover
time_interval = '5T'
else:
time_interval = self.hyperparams['time_interval']

try:
outputs = self._time_interval_transform(inputs, hyperparams)
#print(outputs)
except Exception as e:
except Exception as e: # pragma: no cover
self.logger.error("Error in Performing Time Interval Transform",e)

self._update_metadata(outputs)


+ 2
- 2
tods/detection_algorithm/DeepLog.py View File

@@ -304,7 +304,7 @@ class DeeplogLstm(BaseDetector):
if(layers == self.stacked_layers -1 ):
model.add(LSTM(self.hidden_size, return_sequences=False,dropout = self.dropout_rate))
continue
model.add(LSTM(self.hidden_size,return_sequences=True,dropout = self.dropout_rate))
model.add(LSTM(self.hidden_size,return_sequences=True,dropout = self.dropout_rate)) # pragma: no cover
#output layer

model.add(Dense(self.n_features_))
@@ -364,7 +364,7 @@ class DeeplogLstm(BaseDetector):
if self.preprocessing:
self.scaler_ = StandardScaler()
X_norm = self.scaler_.fit_transform(X)
else:
else: # pragma: no cover
X_norm = np.copy(X)

X_data = []


+ 0
- 189
tods/detection_algorithm/MP.py View File

@@ -1,189 +0,0 @@
from typing import Any, Callable, List, Dict, Union, Optional, Sequence, Tuple
from numpy import ndarray
from collections import OrderedDict
from scipy import sparse
import os
import sklearn
import numpy
import typing

# Custom import commands if any
import warnings
import numpy as np
from sklearn.utils import check_array
from sklearn.exceptions import NotFittedError
# from numba import njit
from pyod.utils.utility import argmaxn

from d3m.container.numpy import ndarray as d3m_ndarray
from d3m.container import DataFrame as d3m_dataframe
from d3m.metadata import hyperparams, params, base as metadata_base
from d3m import utils
from d3m.base import utils as base_utils
from d3m.exceptions import PrimitiveNotFittedError
from d3m.primitive_interfaces.base import CallResult, DockerContainer

# from d3m.primitive_interfaces.supervised_learning import SupervisedLearnerPrimitiveBase
from d3m.primitive_interfaces.unsupervised_learning import UnsupervisedLearnerPrimitiveBase
from d3m.primitive_interfaces.transformer import TransformerPrimitiveBase

from d3m.primitive_interfaces.base import ProbabilisticCompositionalityMixin, ContinueFitMixin
from d3m import exceptions
import pandas
import uuid

from d3m import container, utils as d3m_utils

from .UODBasePrimitive import Params_ODBase, Hyperparams_ODBase, UnsupervisedOutlierDetectorBase
import stumpy
# from typing import Union

Inputs = d3m_dataframe
Outputs = d3m_dataframe



class Params(Params_ODBase):
######## Add more Attributes #######
pass


class Hyperparams(Hyperparams_ODBase):
######## Add more Attributes #######
pass

class MP:
"""
This is the class for matrix profile function
"""
def __init__(self, window_size):
self._window_size = window_size
return

def produce(self, data):

"""

Args:
data: dataframe column
Returns:
nparray

"""
transformed_columns=utils.pandas.DataFrame()
#transformed_columns=d3m_dataframe
for col in data.columns:
output = stumpy.stump(data[col], m = self._window_size)
output = pd.DataFrame(output)
#print("output", output)
transformed_columns=pd.concat([transformed_columns,output],axis=1)
#transformed_columns[col]=output
#print(transformed_columns)
return transformed_columns

class MatrixProfile(UnsupervisedOutlierDetectorBase[Inputs, Outputs, Params, Hyperparams]):
"""

A primitive that performs matrix profile on a DataFrame using Stumpy package
Stumpy documentation: https://stumpy.readthedocs.io/en/latest/index.html

Parameters
----------
T_A : ndarray
The time series or sequence for which to compute the matrix profile
m : int
Window size
T_B : ndarray
The time series or sequence that contain your query subsequences
of interest. Default is `None` which corresponds to a self-join.
ignore_trivial : bool
Set to `True` if this is a self-join. Otherwise, for AB-join, set this
to `False`. Default is `True`.
Returnsfdsf
-------
out : ndarray
The first column consists of the matrix profile, the second column
consists of the matrix profile indices, the third column consists of
the left matrix profile indices, and the fourth column consists of
the right matrix profile indices.
"""

metadata = metadata_base.PrimitiveMetadata({
'__author__': "DATA Lab @Texas A&M University",
'name': "Matrix Profile",
#'python_path': 'd3m.primitives.tods.feature_analysis.matrix_profile',
'python_path': 'd3m.primitives.tods.detection_algorithm.matrix_profile',
'source': {'name': "DATALAB @Taxes A&M University", 'contact': 'mailto:khlai037@tamu.edu',
'uris': ['https://gitlab.com/lhenry15/tods/-/blob/Yile/anomaly-primitives/anomaly_primitives/MatrixProfile.py']},
'algorithm_types': [metadata_base.PrimitiveAlgorithmType.MATRIX_PROFILE,],
'primitive_family': metadata_base.PrimitiveFamily.FEATURE_CONSTRUCTION,
'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'MatrixProfilePrimitive')),
'hyperparams_to_tune': ['window_size'],
'version': '0.0.2',
})


def __init__(self, *,
hyperparams: Hyperparams, #
random_seed: int = 0,
docker_containers: Dict[str, DockerContainer] = None) -> None:
super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers)

self._clf = MP(window_size=hyperparams['window_size'])

def set_training_data(self, *, inputs: Inputs) -> None:
"""
Set training data for outlier detection.
Args:
inputs: Container DataFrame

Returns:
None
"""
super().set_training_data(inputs=inputs)

def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]:
"""
Fit model with training data.
Args:
*: Container DataFrame. Time series data up to fit.

Returns:
None
"""
return super().fit()

def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]:
"""
Process the testing data.
Args:
inputs: Container DataFrame. Time series data up to outlier detection.

Returns:
Container DataFrame
1 marks Outliers, 0 marks normal.
"""
return super().produce(inputs=inputs, timeout=timeout, iterations=iterations)

def get_params(self) -> Params:
"""
Return parameters.
Args:
None

Returns:
class Params
"""
return super().get_params()

def set_params(self, *, params: Params) -> None:
"""
Set parameters for outlier detection.
Args:
params: class Params

Returns:
None
"""
super().set_params(params=params)

+ 14
- 53
tods/detection_algorithm/MatrixProfile.py View File

@@ -114,14 +114,10 @@ class MP:

"""
transformed_columns=utils.pandas.DataFrame()
#transformed_columns=d3m_dataframe
for col in data.columns:
output = stumpy.stump(data[col], m = self._window_size)
output = pd.DataFrame(output)
#print("output", output)
transformed_columns=pd.concat([transformed_columns,output],axis=1)
#transformed_columns[col]=output
#print(transformed_columns)
return transformed_columns

class MatrixProfile(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]):
@@ -199,29 +195,29 @@ class MatrixProfile(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperp

if len(self._training_indices) > 0:
self._fitted = True
else:
else: # pragma: no cover
if self.hyperparams['error_on_no_input']:
raise RuntimeError("No input columns were selected")
self.logger.warn("No input columns were selected")

if not self._fitted:
if not self._fitted: # pragma: no cover
raise PrimitiveNotFittedError("Primitive not fitted.")
sk_inputs = inputs
if self.hyperparams['use_semantic_types']:
if self.hyperparams['use_semantic_types']: # pragma: no cover
sk_inputs = inputs.iloc[:, self._training_indices]
output_columns = []
if len(self._training_indices) > 0:
sk_output = self._clf.produce(sk_inputs)
if sparse.issparse(sk_output):
if sparse.issparse(sk_output): # pragma: no cover
sk_output = sk_output.toarray()
outputs = self._wrap_predictions(inputs, sk_output)
if len(outputs.columns) == len(self._input_column_names):
if len(outputs.columns) == len(self._input_column_names): # pragma: no cover
outputs.columns = self._input_column_names
output_columns = [outputs]

else:
else: # pragma: no cover
if self.hyperparams['error_on_no_input']:
raise RuntimeError("No input columns were selected")
self.logger.warn("No input columns were selected")
@@ -230,46 +226,17 @@ class MatrixProfile(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperp
add_index_columns=self.hyperparams['add_index_columns'],
inputs=inputs, column_indices=self._training_indices,
columns_list=output_columns)
#print(outputs)
#CallResult(outputs)
#print("___")
print(outputs.columns)

#print(outputs.columns)
#outputs.columns = [str(x) for x in outputs.columns]

return CallResult(outputs)

# assert isinstance(inputs, container.DataFrame), type(container.DataFrame)
# _, self._columns_to_produce = self._get_columns_to_fit(inputs, self.hyperparams)
# #print("columns_to_produce ", self._columns_to_produce)
# outputs = inputs
# if len(self._columns_to_produce) > 0:
# for col in self.hyperparams['use_columns']:
# output = self._clf.produce(inputs.iloc[ : ,col])
# outputs = pd.concat((outputs, pd.DataFrame({inputs.columns[col]+'_matrix_profile': output[:,0],
# inputs.columns[col]+'_matrix_profile_indices': output[:,1],
# inputs.columns[col]+'_left_matrix_profile_indices': output[:,2],
# inputs.columns[col]+'_right_matrix_profile_indices': output[:,3]})), axis = 1)

# else:
# if self.hyperparams['error_on_no_input']:
# raise RuntimeError("No input columns were selected")
# self.logger.warn("No input columns were selected")

# #print(outputs)
# self._update_metadata(outputs)

# return base.CallResult(outputs)



def _update_metadata(self, outputs):
def _update_metadata(self, outputs): # pragma: no cover
outputs.metadata = outputs.metadata.generate(outputs)
@classmethod
def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams):
def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): # pragma: no cover

"""

@@ -286,11 +253,11 @@ class MatrixProfile(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperp
if not hyperparams['use_semantic_types']:
return inputs, list(range(len(inputs.columns)))

inputs_metadata = inputs.metadata
inputs_metadata = inputs.metadata


def can_produce_column(column_index: int) -> bool:
def can_produce_column(column_index: int) -> bool:
return cls._can_produce_column(inputs_metadata, column_index, hyperparams)

columns_to_produce, columns_not_to_produce = base_utils.get_columns_to_use(inputs_metadata,
@@ -303,11 +270,11 @@ class MatrixProfile(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperp
Encountered error: when hyperparams['use_columns'] = (2,3) and hyperparams['exclude_columns'] is (1,2)
columns_to_produce is still [2]
"""
return inputs.iloc[:, columns_to_produce], columns_to_produce
return inputs.iloc[:, columns_to_produce], columns_to_produce

@classmethod
def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool:
def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: # pragma: no cover

"""

@@ -327,17 +294,11 @@ class MatrixProfile(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperp
accepted_semantic_types = set()
accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/Attribute")

# print(column_metadata)
# print(column_metadata['structural_type'], accepted_structural_types)

if not issubclass(column_metadata['structural_type'], accepted_structural_types):
return False

semantic_types = set(column_metadata.get('semantic_types', []))

# print(column_metadata)
# print(semantic_types, accepted_semantic_types)

if len(semantic_types) == 0:
cls.logger.warning("No semantic types found in column metadata")
return False


+ 2
- 2
tods/detection_algorithm/PyodSOD.py View File

@@ -173,7 +173,7 @@ class SODPrimitive(UnsupervisedOutlierDetectorBase[Inputs, Outputs, Params, Hype
"""
return super().produce(inputs=inputs, timeout=timeout, iterations=iterations)

def get_params(self) -> Params:
def get_params(self) -> Params: # pragma: no cover
"""
Return parameters.
Args:
@@ -184,7 +184,7 @@ class SODPrimitive(UnsupervisedOutlierDetectorBase[Inputs, Outputs, Params, Hype
"""
return super().get_params()

def set_params(self, *, params: Params) -> None:
def set_params(self, *, params: Params) -> None: # pragma: no cover
"""
Set parameters for outlier detection.
Args:


+ 3
- 4
tods/detection_algorithm/UODBasePrimitive.py View File

@@ -256,9 +256,9 @@ class UnsupervisedOutlierDetectorBase(UnsupervisedLearnerPrimitiveBase[Inputs, O

if len(self._training_indices) > 0:

# print('Fit: ', self._clf)
# print('Fit: ', self._training_inputs.values.shape)
# print('Fit: ', self._clf.fit(self._training_inputs.values))
#print('Fit: ', self._clf)
#print('Fit: ', self._training_inputs.values.shape)
#print('Fit: ', self._clf.fit(self._training_inputs.values))

self._clf.fit(X=self._training_inputs.values, **self._clf_fit_parameter)
self._fitted = True
@@ -314,7 +314,6 @@ class UnsupervisedOutlierDetectorBase(UnsupervisedLearnerPrimitiveBase[Inputs, O

else:
sk_output, _, _ = self._clf.predict(sk_inputs.values)

# print(sk_output)
if sparse.issparse(sk_output):
sk_output = sk_output.toarray()


+ 176
- 117
tods/feature_analysis/AutoCorrelation.py View File

@@ -1,6 +1,11 @@
import os
import sklearn
import numpy
import typing
import collections
import time
from scipy import sparse
from numpy import ndarray
from collections import OrderedDict
from typing import Any, Callable, List, Dict, Union, Optional, Sequence, Tuple

import numpy as np
@@ -11,17 +16,43 @@ from numpy import ndarray
from collections import OrderedDict
from common_primitives import dataframe_utils, utils

from d3m import utils
from d3m import container
from d3m.base import utils as base_utils
from d3m.exceptions import PrimitiveNotFittedError
from d3m.container import DataFrame as d3m_dataframe
from d3m.container.numpy import ndarray as d3m_ndarray
from d3m.primitive_interfaces import base, transformer
from d3m import container, exceptions, utils as d3m_utils
from d3m.metadata import base as metadata_base, hyperparams
from d3m.metadata import hyperparams, params, base as metadata_base
from d3m.primitive_interfaces.base import CallResult, DockerContainer
from d3m.primitive_interfaces.unsupervised_learning import UnsupervisedLearnerPrimitiveBase

from statsmodels.tsa.stattools import acf


# import os.path


__all__ = ('AutoCorrelation',)

Inputs = container.DataFrame
Outputs = container.DataFrame

Inputs = d3m_dataframe
Outputs = d3m_dataframe

class PrimitiveCount:
primitive_no = 0

class Params(params.Params):
components_: Optional[ndarray]
explained_variance_ratio_: Optional[ndarray]
explained_variance_: Optional[ndarray]
singular_values_: Optional[ndarray]
input_column_names: Optional[Any]
target_names_: Optional[Sequence[Any]]
training_indices_: Optional[Sequence[int]]
target_column_indices_: Optional[Sequence[int]]
target_columns_metadata_: Optional[List[OrderedDict]]


class Hyperparams(hyperparams.Hyperparams):
@@ -96,7 +127,7 @@ class Hyperparams(hyperparams.Hyperparams):
)
return_result = hyperparams.Enumeration(
values=['append', 'replace', 'new'],
default='new',
default='append',
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.",
)
@@ -134,6 +165,7 @@ class ACF:
self._fft = fft
self._alpha = alpha
self._missing = missing
self.primitiveNo = 0

def produce(self, data):

@@ -146,8 +178,12 @@ class ACF:

"""

output = acf(data)
return output
transformed_columns=utils.pandas.DataFrame()
for col in data.columns:
output = acf(data[col], unbiased = self._unbiased, nlags = self._nlags, qstat = self._qstat, fft = self._fft, alpha = self._alpha, missing = self._missing)
output = pd.DataFrame(output)
transformed_columns=pd.concat([transformed_columns,output],axis=1)
return transformed_columns



@@ -155,37 +191,53 @@ class AutoCorrelation(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hype
"""
A primitive that performs autocorrelation on a DataFrame
acf() function documentation: https://www.statsmodels.org/dev/generated/statsmodels.tsa.stattools.acf.html
"""

__author__ = "DATA Lab @Texas A&M University"
metadata = metadata_base.PrimitiveMetadata(
{
'id': '8c246c78-3082-4ec9-844e-5c98fcc76f9f',
'version': '0.0.2',
'name': "AutoCorrelation of values",
'python_path': 'd3m.primitives.tods.feature_analysis.auto_correlation',
'algorithm_types': [metadata_base.PrimitiveAlgorithmType.DATA_CONVERSION,], #TODO: check is this right?
'primitive_family': metadata_base.PrimitiveFamily.FEATURE_CONSTRUCTION,
"hyperparams_to_tune": ['unbiased', 'nlags', 'qstat', 'fft', 'alpha', 'missing'],
'source': {
'name': 'DATA Lab @Texas A&M University',
'contact': 'mailto:khlai037@tamu.edu',
'uris': ['https://gitlab.com/lhenry15/tods/-/blob/Yile/anomaly-primitives/anomaly_primitives/AutoCorrelation.py'],
},
'installation': [{
'type': metadata_base.PrimitiveInstallationType.PIP,
'package_uri': 'git+https://gitlab.com/datadrivendiscovery/common-primitives.git@{git_commit}#egg=common_primitives'.format(
git_commit=d3m_utils.current_git_commit(os.path.dirname(__file__)),
),
}],
},
)


def __init__(self, *, hyperparams: Hyperparams) -> None:
super().__init__(hyperparams=hyperparams)

self._clf = ACF(unbiased = hyperparams['unbiased'],
Parameters:
-------
x: array_like
The time series data.
unbiased: bool, default False
If True, then denominators for autocovariance are n-k, otherwise n.
nlags: int, default 40
Number of lags to return autocorrelation for.
qstat: bool, default False
If True, returns the Ljung-Box q statistic for each autocorrelation coefficient. See q_stat for more information.
fft: bool, default None
If True, computes the ACF via FFT.
alpha: scalar, default None
If a number is given, the confidence intervals for the given level are returned. For instance if alpha=.05, 95 % confidence intervals are returned where the standard deviation is computed according to Bartlett”s formula.
missing: str, default “none”
A string in [“none”, “raise”, “conservative”, “drop”] specifying how the NaNs are to be treated. “none” performs no checks. “raise” raises an exception if NaN values are found. “drop” removes the missing observations and then estimates the autocovariances treating the non-missing as contiguous. “conservative” computes the autocovariance using nan-ops so that nans are removed when computing the mean and cross-products that are used to estimate the autocovariance. When using “conservative”, n is set to the number of non-missing observations.
-------
"""
metadata = metadata_base.PrimitiveMetadata({
'__author__': "DATA Lab @Texas A&M University",
'name': "AutoCorrelation of values",
'python_path': 'd3m.primitives.tods.feature_analysis.auto_correlation',
'source': {'name': "DATALAB @Taxes A&M University", 'contact': 'mailto:khlai037@tamu.edu',
'uris': ['https://gitlab.com/lhenry15/tods/-/blob/Yile/anomaly-primitives/anomaly_primitives/AutoCorrelation.py']},
'algorithm_types': [metadata_base.PrimitiveAlgorithmType.AUTOCORRELATION,],
'primitive_family': metadata_base.PrimitiveFamily.FEATURE_CONSTRUCTION,
'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'AutocorrelationPrimitive')),
'hyperparams_to_tune': ['unbiased', 'nlags', 'qstat', 'fft', 'alpha', 'missing'],
'version': '0.0.2',
})

def __init__(self, *,
hyperparams: Hyperparams, #
random_seed: int = 0,
docker_containers: Dict[str, DockerContainer] = None) -> None:
super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers)


self._clf = ACF(unbiased = hyperparams['unbiased'],
nlags = hyperparams['nlags'],
qstat = hyperparams['qstat'],
fft = hyperparams['fft'],
@@ -193,48 +245,79 @@ class AutoCorrelation(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hype
missing = hyperparams['missing']
)

self.primitiveNo = PrimitiveCount.primitive_no
PrimitiveCount.primitive_no+=1


def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]:
"""
Process the testing data.
Args:
inputs: Container DataFrame
timeout: Default
iterations: Default
inputs: Container DataFrame.

Returns:
Container DataFrame containing moving average of selected columns
Container DataFrame after AutoCorrelation.
"""

assert isinstance(inputs, container.DataFrame), type(container.DataFrame)
_, self._columns_to_produce = self._get_columns_to_fit(inputs, self.hyperparams)
# Get cols to fit.
self._fitted = False
self._training_inputs, self._training_indices = self._get_columns_to_fit(inputs, self.hyperparams)
self._input_column_names = self._training_inputs.columns

print("training_indices_ ", self._training_indices)
if len(self._training_indices) > 0:
self._fitted = True
else: # pragma: no cover
if self.hyperparams['error_on_no_input']:
raise RuntimeError("No input columns were selected")
self.logger.warn("No input columns were selected")

if not self._fitted: # pragma: no cover
raise PrimitiveNotFittedError("Primitive not fitted.")
outputs = inputs
if len(self._columns_to_produce) > 0:
for col in self.hyperparams['use_columns']:
output = self._clf.produce(inputs.iloc[ : ,col])
outputs = pd.concat((outputs, pd.Series(output).rename(inputs.columns[col] + '_acf')), axis = 1)
else:
sk_inputs = inputs
if self.hyperparams['use_semantic_types']: # pragma: no cover
sk_inputs = inputs.iloc[:, self._training_indices]
output_columns = []
if len(self._training_indices) > 0:
print("sk_inputs ", sk_inputs)
sk_output = self._clf.produce(sk_inputs)
if sparse.issparse(sk_output): # pragma: no cover
sk_output = sk_output.toarray()
outputs = self._wrap_predictions(inputs, sk_output)
if len(outputs.columns) == len(self._input_column_names):
outputs.columns = self._input_column_names
output_columns = [outputs]

else: # pragma: no cover
if self.hyperparams['error_on_no_input']:
raise RuntimeError("No input columns were selected")
self.logger.warn("No input columns were selected")

self._update_metadata(outputs)
outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'],
add_index_columns=self.hyperparams['add_index_columns'],
inputs=inputs, column_indices=self._training_indices,
columns_list=output_columns)

return CallResult(outputs)


return base.CallResult(outputs)



def _update_metadata(self, outputs):
def _update_metadata(self, outputs): # pragma: no cover
outputs.metadata = outputs.metadata.generate(outputs)
@classmethod
def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams):
def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): # pragma: no cover
"""
Select columns to fit.
Args:
inputs: Container DataFrame
hyperparams: d3m.metadata.hyperparams.Hyperparams
Returns:
list
Select columns to fit.
Args:
inputs: Container DataFrame
hyperparams: d3m.metadata.hyperparams.Hyperparams
Returns:
list
"""

if not hyperparams['use_semantic_types']:
@@ -242,9 +325,8 @@ class AutoCorrelation(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hype

inputs_metadata = inputs.metadata


def can_produce_column(column_index: int) -> bool:

return cls._can_produce_column(inputs_metadata, column_index, hyperparams)

columns_to_produce, columns_not_to_produce = base_utils.get_columns_to_use(inputs_metadata,
@@ -252,7 +334,6 @@ class AutoCorrelation(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hype
exclude_columns=hyperparams['exclude_columns'],
can_use_column=can_produce_column)


"""
Encountered error: when hyperparams['use_columns'] = (2,3) and hyperparams['exclude_columns'] is (1,2)
columns_to_produce is still [2]
@@ -261,15 +342,15 @@ class AutoCorrelation(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hype

@classmethod
def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool:
def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: # pragma: no cover
"""
Output whether a column can be processed.
Output whether a column can be processed.

Args:
inputs_metadata: d3m.metadata.base.DataMetadata
column_index: int
Returns:
bool
Args:
inputs_metadata: d3m.metadata.base.DataMetadata
column_index: int
Returns:
bool
"""

column_metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index))
@@ -277,12 +358,13 @@ class AutoCorrelation(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hype
accepted_structural_types = (int, float, np.integer, np.float64) #changed numpy to np
accepted_semantic_types = set()
accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/Attribute")

print("accepted_semantic_types ", accepted_semantic_types)
print("column_metadata['structural_type'] ",column_metadata['structural_type'])
if not issubclass(column_metadata['structural_type'], accepted_structural_types):
return False

semantic_types = set(column_metadata.get('semantic_types', []))
print("semantic_types ", semantic_types)
if len(semantic_types) == 0:
cls.logger.warning("No semantic types found in column metadata")
return False
@@ -307,26 +389,27 @@ class AutoCorrelation(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hype

"""

outputs = container.DataFrame(predictions, generate_metadata=True)
target_columns_metadata = self._copy_inputs_metadata(inputs.metadata, self._columns_to_produce, outputs.metadata, self.hyperparams)
outputs = d3m_dataframe(predictions, generate_metadata=True)
target_columns_metadata = self._add_target_columns_metadata(outputs.metadata, self.hyperparams, self.primitiveNo)
outputs.metadata = self._update_predictions_metadata(inputs.metadata, outputs, target_columns_metadata)
return outputs




@classmethod
def _update_predictions_metadata(cls, inputs_metadata: metadata_base.DataMetadata, outputs: Optional[Outputs],
target_columns_metadata: List[OrderedDict]) -> metadata_base.DataMetadata:
"""
Updata metadata for selected columns.
Updata metadata for selected columns.

Args:
inputs_metadata: metadata_base.DataMetadata
outputs: Container Dataframe
target_columns_metadata: list
Args:
inputs_metadata: metadata_base.DataMetadata
outputs: Container Dataframe
target_columns_metadata: list

Returns:
d3m.metadata.base.DataMetadata
Returns:
d3m.metadata.base.DataMetadata
"""

outputs_metadata = metadata_base.DataMetadata().generate(value=outputs)
@@ -338,50 +421,26 @@ class AutoCorrelation(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hype
return outputs_metadata



@classmethod
def _copy_inputs_metadata(cls, inputs_metadata: metadata_base.DataMetadata, input_indices: List[int],
outputs_metadata: metadata_base.DataMetadata, hyperparams):
def _add_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams, primitiveNo):
"""
Updata metadata for selected columns.

Args:
inputs_metadata: metadata.base.DataMetadata
input_indices: list
outputs_metadata: metadata.base.DataMetadata
hyperparams: d3m.metadata.hyperparams.Hyperparams
Add target columns metadata
Args:
outputs_metadata: metadata.base.DataMetadata
hyperparams: d3m.metadata.hyperparams.Hyperparams

Returns:
d3m.metadata.base.DataMetadata
Returns:
List[OrderedDict]
"""

outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length']
target_columns_metadata: List[OrderedDict] = []
for column_index in input_indices:
column_name = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)).get("name")
if column_name is None:
column_name = "output_{}".format(column_index)

column_metadata = OrderedDict(inputs_metadata.query_column(column_index))
semantic_types = set(column_metadata.get('semantic_types', []))
semantic_types_to_remove = set([])
add_semantic_types = set()
add_semantic_types.add(hyperparams["return_semantic_type"])
semantic_types = semantic_types - semantic_types_to_remove
semantic_types = semantic_types.union(add_semantic_types)
for column_index in range(outputs_length):
column_name = "{0}{1}_{2}".format(cls.metadata.query()['name'], primitiveNo, column_index)
column_metadata = OrderedDict()
semantic_types = set()
semantic_types.add(hyperparams["return_semantic_type"])
column_metadata['semantic_types'] = list(semantic_types)

column_metadata["name"] = str(column_name)
target_columns_metadata.append(column_metadata)

# If outputs has more columns than index, add Attribute Type to all remaining
if outputs_length > len(input_indices):
for column_index in range(len(input_indices), outputs_length):
column_metadata = OrderedDict()
semantic_types = set()
semantic_types.add(hyperparams["return_semantic_type"])
column_name = "output_{}".format(column_index)
column_metadata["semantic_types"] = list(semantic_types)
column_metadata["name"] = str(column_name)
target_columns_metadata.append(column_metadata)
return target_columns_metadata

+ 0
- 376
tods/feature_analysis/BKFilter.py View File

@@ -1,376 +0,0 @@
from typing import Any, Callable, List, Dict, Union, Optional, Sequence, Tuple
from numpy import ndarray
from collections import OrderedDict
from scipy import sparse
import os
import sklearn
import numpy
import typing
import time

from d3m import container
from d3m.primitive_interfaces import base, transformer
from d3m.metadata import base as metadata_base, hyperparams

from d3m.container.numpy import ndarray as d3m_ndarray
from d3m.container import DataFrame as d3m_dataframe
from d3m.metadata import hyperparams, params, base as metadata_base
from d3m import utils
from d3m.base import utils as base_utils
from d3m.exceptions import PrimitiveNotFittedError
from d3m.primitive_interfaces.base import CallResult, DockerContainer


import os.path

import time
import statsmodels.api as sm

__all__ = ('BKFilter',)

Inputs = container.DataFrame
Outputs = container.DataFrame


class Hyperparams(hyperparams.Hyperparams):
# Tuning
low = hyperparams.UniformInt(
lower=0,
upper=100000000,
default=6,
semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'],
description="Minimum period for oscillations, ie., Baxter and King suggest that the Burns-Mitchell U.S. business cycle has 6 for quarterly data and 1.5 for annual data.",
)
high = hyperparams.UniformInt(
lower=0,
upper=100000000,
default=32,
semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'],
description="Maximum period for oscillations BK suggest that the U.S. business cycle has 32 for quarterly data and 8 for annual data.",
)
K = hyperparams.UniformInt(
lower=0,
upper=100000000,
default=1,
semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'],
description="Lead-lag length of the filter. Baxter and King propose a truncation length of 12 for quarterly data and 3 for annual data.",
)

# Control
columns_using_method= hyperparams.Enumeration(
values=['name', 'index'],
default='index',
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
description="Choose to use columns by names or indecies. If 'name', \"use_columns\" or \"exclude_columns\" is used. If 'index', \"use_columns_name\" or \"exclude_columns_name\" is used."
)
use_columns_name = hyperparams.Set(
elements=hyperparams.Hyperparameter[str](''),
default=(),
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
description="A set of column names to force primitive to operate on. If any specified column cannot be parsed, it is skipped.",
)
exclude_columns_name = hyperparams.Set(
elements=hyperparams.Hyperparameter[str](''),
default=(),
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
description="A set of column names to not operate on. Applicable only if \"use_columns_name\" is not provided.",
)
use_columns = hyperparams.Set(
elements=hyperparams.Hyperparameter[int](-1),
default=(),
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
description="A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.",
)
exclude_columns = hyperparams.Set(
elements=hyperparams.Hyperparameter[int](-1),
default=(),
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
description="A set of column indices to not operate on. Applicable only if \"use_columns\" is not provided.",
)
return_result = hyperparams.Enumeration(
values=['append', 'replace', 'new'],
default='append',
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.",
)
use_semantic_types = hyperparams.UniformBool(
default=False,
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe"
)
add_index_columns = hyperparams.UniformBool(
default=False,
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".",
)
error_on_no_input = hyperparams.UniformBool(
default=True,
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.",
)
return_semantic_type = hyperparams.Enumeration[str](
values=['https://metadata.datadrivendiscovery.org/types/Attribute', 'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute'],
default='https://metadata.datadrivendiscovery.org/types/Attribute',
description='Decides what semantic type to attach to generated attributes',
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter']
)

class BKFilter(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]):
"""
Filter a time series using the Baxter-King bandpass filter.

Parameters
----------
low: int
Minimum period for oscillations, ie., Baxter and King suggest that the Burns-Mitchell U.S. business cycle has 6 for quarterly data and 1.5 for annual data.
high: int
Maximum period for oscillations BK suggest that the U.S. business cycle has 32 for quarterly data and 8 for annual data.

K: int
Lead-lag length of the filter. Baxter and King propose a truncation length of 12 for quarterly data and 3 for annual data.

use_columns: Set
A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.
exclude_columns: Set
A set of column indices to not operate on. Applicable only if \"use_columns\" is not provided.
return_result: Enumeration
Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.
use_semantic_types: Bool
Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe.
add_index_columns: Bool
Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".
error_on_no_input: Bool(
Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.
return_semantic_type: Enumeration[str](
Decides what semantic type to attach to generated attributes'
"""

__author__: "DATA Lab at Texas A&M University"
metadata = metadata_base.PrimitiveMetadata({
"name": "Baxter-King Filter Primitive",
"python_path": "d3m.primitives.tods.feature_analysis.bk_filter",
"source": {'name': 'DATA Lab at Texas A&M University', 'contact': 'mailto:khlai037@tamu.edu',
'uris': ['https://gitlab.com/lhenry15/tods.git', 'https://gitlab.com/lhenry15/tods/-/blob/Junjie/anomaly-primitives/anomaly_primitives/DuplicationValidation.py']},
"algorithm_types": [metadata_base.PrimitiveAlgorithmType.BK_FILTER,],
"primitive_family": metadata_base.PrimitiveFamily.FEATURE_CONSTRUCTION,
"id": "b2bfadc5-dbca-482c-b188-8585e5f245c4",
"hyperparams_to_tune": ['low', 'high', 'K'],
"version": "0.0.1",
})


def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]:
"""
Process the testing data.
Args:
inputs: Container DataFrame.

Returns:
Container DataFrame after BKFilter.
"""
# Get cols to fit.
self._fitted = False
self._training_inputs, self._training_indices = self._get_columns_to_fit(inputs, self.hyperparams)
self._input_column_names = self._training_inputs.columns


if len(self._training_indices) > 0:
# self._clf.fit(self._training_inputs)
self._fitted = True
else:
if self.hyperparams['error_on_no_input']:
raise RuntimeError("No input columns were selected")
self.logger.warn("No input columns were selected")



if not self._fitted:
raise PrimitiveNotFittedError("Primitive not fitted.")
sk_inputs = inputs
if self.hyperparams['use_semantic_types']:
sk_inputs = inputs.iloc[:, self._training_indices]
output_columns = []
if len(self._training_indices) > 0:
sk_output = self._bkfilter(sk_inputs, low=self.hyperparams['low'], high=self.hyperparams['high'], K=self.hyperparams['K'])
if sparse.issparse(sk_output):
sk_output = sk_output.toarray()
outputs = self._wrap_predictions(inputs, sk_output)

if len(outputs.columns) == len(self._input_column_names):
outputs.columns = self._input_column_names
output_columns = [outputs]
else:
if self.hyperparams['error_on_no_input']:
raise RuntimeError("No input columns were selected")
self.logger.warn("No input columns were selected")
outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'],
add_index_columns=self.hyperparams['add_index_columns'],
inputs=inputs, column_indices=self._training_indices,
columns_list=output_columns)

# self._write(outputs)
# self.logger.warning('produce was called3')
return CallResult(outputs)
@classmethod
def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams):
"""
Select columns to fit.
Args:
inputs: Container DataFrame
hyperparams: d3m.metadata.hyperparams.Hyperparams

Returns:
list
"""
if not hyperparams['use_semantic_types']:
return inputs, list(range(len(inputs.columns)))

inputs_metadata = inputs.metadata

def can_produce_column(column_index: int) -> bool:
return cls._can_produce_column(inputs_metadata, column_index, hyperparams)

use_columns = []
exclude_columns = []

# if hyperparams['columns_using_method'] == 'name':
# inputs_cols = inputs.columns.values.tolist()
# for i in range(len(inputs_cols)):
# if inputs_cols[i] in hyperparams['use_columns_name']:
# use_columns.append(i)
# elif inputs_cols[i] in hyperparams['exclude_columns_name']:
# exclude_columns.append(i)
# else:
use_columns=hyperparams['use_columns']
exclude_columns=hyperparams['exclude_columns']
columns_to_produce, columns_not_to_produce = base_utils.get_columns_to_use(inputs_metadata, use_columns=use_columns, exclude_columns=exclude_columns, can_use_column=can_produce_column)
return inputs.iloc[:, columns_to_produce], columns_to_produce
# return columns_to_produce

@classmethod
def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool:
"""
Output whether a column can be processed.
Args:
inputs_metadata: d3m.metadata.base.DataMetadata
column_index: int

Returns:
bool
"""
column_metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index))

accepted_structural_types = (int, float, numpy.integer, numpy.float64)
accepted_semantic_types = set()
accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/Attribute")
if not issubclass(column_metadata['structural_type'], accepted_structural_types):
return False

semantic_types = set(column_metadata.get('semantic_types', []))

if len(semantic_types) == 0:
cls.logger.warning("No semantic types found in column metadata")
return False
# Making sure all accepted_semantic_types are available in semantic_types
if len(accepted_semantic_types - semantic_types) == 0:
return True

return False
@classmethod
def _update_predictions_metadata(cls, inputs_metadata: metadata_base.DataMetadata, outputs: Optional[Outputs],
target_columns_metadata: List[OrderedDict]) -> metadata_base.DataMetadata:
"""
Updata metadata for selected columns.
Args:
inputs_metadata: metadata_base.DataMetadata
outputs: Container Dataframe
target_columns_metadata: list

Returns:
d3m.metadata.base.DataMetadata
"""
outputs_metadata = metadata_base.DataMetadata().generate(value=outputs)

for column_index, column_metadata in enumerate(target_columns_metadata):
column_metadata.pop("structural_type", None)
outputs_metadata = outputs_metadata.update_column(column_index, column_metadata)

return outputs_metadata

def _wrap_predictions(self, inputs: Inputs, predictions: ndarray) -> Outputs:
"""
Wrap predictions into dataframe
Args:
inputs: Container Dataframe
predictions: array-like data (n_samples, n_features)

Returns:
Dataframe
"""
outputs = d3m_dataframe(predictions, generate_metadata=True)
target_columns_metadata = self._add_target_columns_metadata(outputs.metadata, self.hyperparams)
outputs.metadata = self._update_predictions_metadata(inputs.metadata, outputs, target_columns_metadata)
return outputs


@classmethod
def _add_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams):
"""
Add target columns metadata
Args:
outputs_metadata: metadata.base.DataMetadata
hyperparams: d3m.metadata.hyperparams.Hyperparams

Returns:
List[OrderedDict]
"""
outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length']
target_columns_metadata: List[OrderedDict] = []
for column_index in range(outputs_length):
column_name = "output_{}".format(column_index)
column_metadata = OrderedDict()
semantic_types = set()
semantic_types.add(hyperparams["return_semantic_type"])
column_metadata['semantic_types'] = list(semantic_types)

column_metadata["name"] = str(column_name)
target_columns_metadata.append(column_metadata)

return target_columns_metadata

def _write(self, inputs:Inputs):
inputs.to_csv(str(time.time())+'.csv')

def _bkfilter(self, X, low, high, K):
"""
Perform BKFilter
Args:
X: slected rows to be performed
K, low, high: Parameters of BKFilter

Returns:
Dataframe, results of BKFilter
"""
transformed_X = utils.pandas.DataFrame()
for col in X.columns:
cycle = sm.tsa.filters.bkfilter(X[col], low=low, high=high, K=K)
cycle_df = utils.pandas.DataFrame(cycle)
transformed_X = utils.pandas.concat([transformed_X,cycle_df], axis=1)

return transformed_X

+ 24
- 38
tods/tests/test_Autocorrelation.py View File

@@ -17,21 +17,18 @@ import pandas as pd
class AutoCorrelationTestCase(unittest.TestCase):
def test_basic(self):
self.maxDiff = None
main = container.DataFrame({'a': [1., 2., 3.], 'b': [2., 3., 4.], 'c': [3., 4., 5.],},
columns=['a', 'b', 'c'],
generate_metadata=True)
"""
main = container.DataFrame({'d3mIndex': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
'timestamp': [1472918400, 1472918700, 1472919000, 1472919300,
1472919600, 1472919900, 1472920200, 1472920500, 1472920800, 1472921100],
'value': [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0],
'ground_truth': [0, 1, 0, 1, 0, 1, 0, 1, 0, 1]},
columns = ['d3mIndex', 'timestamp', 'value', 'ground_truth'], generate_metadata = True)
"""
main.metadata = main.metadata.update_column(0, {'name': 'd3mIndex_'})
main.metadata = main.metadata.update_column(1, {'name': 'timestamp_'})
main.metadata = main.metadata.update_column(2, {'name': 'value_'})
main.metadata = main.metadata.update_column(3, {'name': 'ground_truth_'})
columns = ['d3mIndex', 'timestamp', 'value', 'ground_truth'], generate_metadata = True)
"""

#print(main)

self.assertEqual(utils.to_json_structure(main.metadata.to_internal_simple_structure()), [{
'selector': [],
'metadata': {
@@ -42,7 +39,7 @@ class AutoCorrelationTestCase(unittest.TestCase):
'dimension': {
'name': 'rows',
'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'],
'length': 10,
'length': 3,
},
},
}, {
@@ -51,45 +48,37 @@ class AutoCorrelationTestCase(unittest.TestCase):
'dimension': {
'name': 'columns',
'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'],
'length': 4,
'length': 3,
},
},
}, {
}, {
'selector': ['__ALL_ELEMENTS__', 0],
'metadata': {'structural_type': 'numpy.int64', 'name': 'd3mIndex'},
'metadata': {'structural_type': 'numpy.float64', 'name': 'a'},
}, {
'selector': ['__ALL_ELEMENTS__', 1],
'metadata': {'structural_type': 'numpy.int64', 'name': 'timestamp'},
'metadata': {'structural_type': 'numpy.float64', 'name': 'b'},
}, {
'selector': ['__ALL_ELEMENTS__', 2],
'metadata': {'structural_type': 'numpy.float64', 'name': 'value'},
}, {
'selector': ['__ALL_ELEMENTS__', 3],
'metadata': {'structural_type': 'numpy.int64', 'name': 'ground_truth'},
'metadata': {'structural_type': 'numpy.float64', 'name': 'c'}
}])

self.assertIsInstance(main, container.DataFrame)
hyperparams_class = AutoCorrelation.AutoCorrelation.metadata.get_hyperparams().defaults()
hyperparams_class = hyperparams_class.replace({'nlags': 2})
#hyperparams_class = hyperparams_class.replace({'use_semantic_types': True})
primitive = AutoCorrelation.AutoCorrelation(hyperparams=hyperparams_class)
new_main = primitive.produce(inputs=main).value
print(new_main)
new_main_drop = new_main['value_acf']
new_main_drop = new_main_drop.reset_index(drop = True)
# new_main_drop = new_main['value_acf']
# new_main_drop = new_main_drop.reset_index(drop = True)


expected_result = pd.DataFrame({'acf':[1.000000, 0.700000, 0.412121, 0.148485, -0.078788, -0.257576, -0.375758, -0.421212, -0.381818, -0.245455]})
new_main_drop.reset_index()
# expected_result = pd.DataFrame({'acf':[1.000000, 0.700000, 0.412121, 0.148485, -0.078788, -0.257576, -0.375758, -0.421212, -0.381818, -0.245455]})
# new_main_drop.reset_index()

self.assertEqual(all(new_main_drop), all(expected_result))
# self.assertEqual(all(new_main_drop), all(expected_result))


#print(main.metadata.to_internal_simple_structure())
#print(new_main.metadata.to_internal_simple_structure())

self.assertEqual(utils.to_json_structure(main.metadata.to_internal_simple_structure()), [{
'selector': [],
'metadata': {
@@ -100,7 +89,7 @@ class AutoCorrelationTestCase(unittest.TestCase):
'dimension': {
'name': 'rows',
'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'],
'length': 10,
'length': 3,
},
},
}, {
@@ -109,26 +98,23 @@ class AutoCorrelationTestCase(unittest.TestCase):
'dimension': {
'name': 'columns',
'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'],
'length': 4,
'length': 3,
},
},
}, {
'selector': ['__ALL_ELEMENTS__', 0],
'metadata': {'structural_type': 'numpy.int64', 'name': 'd3mIndex'},
'metadata': {'structural_type': 'numpy.float64', 'name': 'a'},
}, {
'selector': ['__ALL_ELEMENTS__', 1],
'metadata': {'structural_type': 'numpy.int64', 'name': 'timestamp'},
'metadata': {'structural_type': 'numpy.float64', 'name': 'b'},
}, {
'selector': ['__ALL_ELEMENTS__', 2],
'metadata': {'structural_type': 'numpy.float64', 'name': 'value'},
}, {
'selector': ['__ALL_ELEMENTS__', 3],
'metadata': {'structural_type': 'numpy.int64', 'name': 'ground_truth'},
'metadata': {'structural_type': 'numpy.float64', 'name': 'c'}
}])

params = primitive.get_params()
primitive.set_params(params=params)


if __name__ == '__main__':
unittest.main()
if __name__ == '__main__':
unittest.main()

+ 22
- 12
tods/tests/test_DeepLog.py View File

@@ -9,14 +9,14 @@ from tods.detection_algorithm.DeepLog import DeepLogPrimitive
class DeepLogTest(unittest.TestCase):
def test_basic(self):
self.maxDiff = None
main = container.DataFrame({'a': [1., 2., 3., 4.], 'b': [2., 3., 4., 5.], 'c': [3., 4., 5., 6.]},
self.main = container.DataFrame({'a': [1., 2., 3., 4.], 'b': [2., 3., 4., 5.], 'c': [3., 4., 5., 6.]},
columns=['a', 'b', 'c'],
generate_metadata=True)

print(main)
print(self.main)


self.assertEqual(utils.to_json_structure(main.metadata.to_internal_simple_structure()), [{
self.assertEqual(utils.to_json_structure(self.main.metadata.to_internal_simple_structure()), [{
'selector': [],
'metadata': {
# 'top_level': 'main',
@@ -50,7 +50,7 @@ class DeepLogTest(unittest.TestCase):
}])


self.assertIsInstance(main, container.DataFrame)
self.assertIsInstance(self.main, container.DataFrame)


hyperparams_class = DeepLogPrimitive.metadata.get_hyperparams()
@@ -59,15 +59,20 @@ class DeepLogTest(unittest.TestCase):

print(hyperparams)

primitive = DeepLogPrimitive(hyperparams=hyperparams)
primitive.set_training_data(inputs=main)
primitive.fit()
new_main = primitive.produce(inputs=main).value
new_main_score = primitive.produce_score(inputs=main).value
print(new_main)
print(new_main_score)
self.primitive = DeepLogPrimitive(hyperparams=hyperparams)
self.primitive.set_training_data(inputs=self.main)
#print("*****************",self.primitive.get_params())

self.assertEqual(utils.to_json_structure(main.metadata.to_internal_simple_structure()), [{
self.primitive.fit()
self.new_main = self.primitive.produce(inputs=self.main).value
self.new_main_score = self.primitive.produce_score(inputs=self.main).value
print(self.new_main)
print(self.new_main_score)

params = self.primitive.get_params()
self.primitive.set_params(params=params)

self.assertEqual(utils.to_json_structure(self.main.metadata.to_internal_simple_structure()), [{
'selector': [],
'metadata': {
# 'top_level': 'main',
@@ -100,6 +105,11 @@ class DeepLogTest(unittest.TestCase):
'metadata': {'structural_type': 'numpy.float64', 'name': 'c'}
}])

# def test_params(self):
# params = self.primitive.get_params()
# self.primitive.set_params(params=params)



if __name__ == '__main__':
unittest.main()

Loading…
Cancel
Save