Browse Source

update matrix profile

Former-commit-id: 48a3ef02e6 [formerly 13159f8c55] [formerly 0b23e2bda9 [formerly 05749bb5a2]] [formerly 33e05e3ff5 [formerly 851600f299] [formerly 54a88adc36 [formerly 340aebcbf0]]] [formerly 070e4be2c1 [formerly f95708a14c] [formerly ad5c18f108 [formerly 1fa9e103f6]] [formerly 56e3b7bb84 [formerly 1fdcbec72d] [formerly 67fd80f97b [formerly d81285967c]]]] [formerly 8cb16e25b0 [formerly 2bf1c8def7] [formerly 7b0dfbe3ca [formerly a157e85b74]] [formerly fbc7881e68 [formerly 6d71ca798d] [formerly 0e835c8852 [formerly d7ab4df73d]]] [formerly 388e163365 [formerly 8727f79cd7] [formerly f1fe06ebbc [formerly 8390796d0e]] [formerly 22c71ba893 [formerly 1cd288e610] [formerly dea4be6356 [formerly c1084184e6]]]]] [formerly a4bf41dcd8 [formerly 9c119b31d5] [formerly 1ce2d949f9 [formerly 0dc19863b5]] [formerly ccb3a9a3f4 [formerly 021ba1089b] [formerly 6f291f4f64 [formerly 29454c501e]]] [formerly 43d855d13f [formerly 7caff10d36] [formerly a106a560ff [formerly 5e2e057298]] [formerly 27101ce078 [formerly 68c0f64327] [formerly 1a7a7f02c5 [formerly 00d827014a]]]] [formerly 7c6e60d764 [formerly 1119aedcf3] [formerly 3c93c054a0 [formerly 93886e24ca]] [formerly 2e87ada227 [formerly 1050450062] [formerly 901c0120dc [formerly 5a56d38620]]] [formerly ddba674d82 [formerly ca09237b29] [formerly af18015e45 [formerly 3156d9226e]] [formerly 5c0fcc0a79 [formerly a4b3cb6d8e] [formerly 1a840a0a8e [formerly 88bf3940ed]]]]]]
Former-commit-id: 9d524d218b [formerly 1b9abb84f2] [formerly 458dab5bea [formerly 5c95ca1898]] [formerly e34145d9fe [formerly b8153a1fc0] [formerly cd1a4b9034 [formerly d9b67164b7]]] [formerly ffd79cfec6 [formerly 97a1926e12] [formerly 54b7795c36 [formerly 9c6c13005f]] [formerly 960c619026 [formerly 28910b543b] [formerly c89319a0e8 [formerly 8c6e607f2b]]]] [formerly be85e70642 [formerly fafe688986] [formerly 1cfc27a4cc [formerly b7b7e5aafb]] [formerly a24dc1fb54 [formerly d11cd9e41f] [formerly b6fe3d15a4 [formerly fd71056602]]] [formerly 77ae3098df [formerly d46a15b27e] [formerly 6a4c0bb8a8 [formerly 915ec8070c]] [formerly 3f8aef8df3 [formerly e8080094bf] [formerly 1a840a0a8e]]]]
Former-commit-id: cc86c728f5 [formerly 28f1b98e62] [formerly 88255f952e [formerly e257de41c5]] [formerly 41f0cf18e4 [formerly abf4c65b36] [formerly f63363b304 [formerly 9763cdb865]]] [formerly 8b33e8a138 [formerly a39dc00ec5] [formerly 6cdb40092d [formerly d5eb09f9ea]] [formerly 3338b856a2 [formerly 1fa11eba42] [formerly be29380030 [formerly 390808cafe]]]]
Former-commit-id: e000d3f1fd [formerly 36dd067abf] [formerly 46431a8eb9 [formerly d05737caf1]] [formerly 0621078d85 [formerly 0eaa4d929a] [formerly bfb9178c39 [formerly fdea3eeb46]]]
Former-commit-id: 6d72f5b1ac [formerly deaf89d33a] [formerly 1efae821e5 [formerly 533fc4f182]]
Former-commit-id: 6d7a45103c [formerly 470f0e9644]
Former-commit-id: c0b044cb5f
master
YileAllenChen1 5 years ago
parent
commit
7329d8638b
3 changed files with 2 additions and 390 deletions
  1. +1
    -1
      tods/detection_algorithm/MatrixProfile.py
  2. +0
    -381
      tods/detection_algorithm/MatrixProfile2.py
  3. +1
    -8
      tods/tests/detection_algorithm/test_MatrixProfile.py

+ 1
- 1
tods/detection_algorithm/MatrixProfile.py View File

@@ -131,7 +131,7 @@ class MP:
def predict(self, data):
return self.produce(data)
class MatrixProfile(UnsupervisedOutlierDetectorBase[Inputs, Outputs, Params, Hyperparams]):
class MatrixProfilePrimitive(UnsupervisedOutlierDetectorBase[Inputs, Outputs, Params, Hyperparams]):
"""

A primitive that performs matrix profile on a DataFrame using Stumpy package


+ 0
- 381
tods/detection_algorithm/MatrixProfile2.py View File

@@ -1,381 +0,0 @@
import os
import sklearn
import numpy
import typing
import time
from scipy import sparse
from numpy import ndarray
from collections import OrderedDict
from typing import Any, Callable, List, Dict, Union, Optional, Sequence, Tuple

import numpy as np
import pandas as pd
import logging, uuid
from scipy import sparse
from numpy import ndarray
from collections import OrderedDict
from common_primitives import dataframe_utils, utils

from d3m import utils
from d3m import container
from d3m.base import utils as base_utils
from d3m.exceptions import PrimitiveNotFittedError
from d3m.container import DataFrame as d3m_dataframe
from d3m.container.numpy import ndarray as d3m_ndarray
from d3m.primitive_interfaces import base, transformer
from d3m.metadata import base as metadata_base, hyperparams
from d3m.metadata import hyperparams, params, base as metadata_base
from d3m.primitive_interfaces.base import CallResult, DockerContainer

import stumpy

__all__ = ('MatrixProfile',)

Inputs = container.DataFrame
Outputs = container.DataFrame

class PrimitiveCount:
primitive_no = 0


class Hyperparams(hyperparams.Hyperparams):
window_size = hyperparams.UniformInt(
lower = 0,
upper = 100, #TODO: Define the correct the upper bound
default=50,
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
description="window size to calculate"
)
# Keep previous
dataframe_resource = hyperparams.Hyperparameter[typing.Union[str, None]](
default=None,
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
description="Resource ID of a DataFrame to extract if there are multiple tabular resources inside a Dataset and none is a dataset entry point.",
)
use_columns = hyperparams.Set(
elements=hyperparams.Hyperparameter[int](-1),
default=(2,),
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
description="A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.",
)
exclude_columns = hyperparams.Set(
elements=hyperparams.Hyperparameter[int](-1),
default=(0,1,3,),
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
description="A set of column indices to not operate on. Applicable only if \"use_columns\" is not provided.",
)
return_result = hyperparams.Enumeration(
values=['append', 'replace', 'new'],
default='new',
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.",
)
use_semantic_types = hyperparams.UniformBool(
default=False,
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe"
)
add_index_columns = hyperparams.UniformBool(
default=False,
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".",
)
error_on_no_input = hyperparams.UniformBool(
default=True,
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.",
)
return_semantic_type = hyperparams.Enumeration[str](
values=['https://metadata.datadrivendiscovery.org/types/Attribute',
'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute'],
default='https://metadata.datadrivendiscovery.org/types/Attribute',
description='Decides what semantic type to attach to generated attributes',
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter']
)


class MP:
"""
This is the class for matrix profile function
"""
def __init__(self, window_size):
self._window_size = window_size
return

def produce(self, data):

"""

Args:
data: dataframe column
Returns:
nparray

"""
transformed_columns=utils.pandas.DataFrame()
for col in data.columns:
output = stumpy.stump(data[col], m = self._window_size)
output = pd.DataFrame(output)
transformed_columns=pd.concat([transformed_columns,output],axis=1)
return transformed_columns

class MatrixProfile(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]):
"""
A primitive that performs matrix profile on a DataFrame using Stumpy package
Stumpy documentation: https://stumpy.readthedocs.io/en/latest/index.html

Parameters
----------
T_A : ndarray
The time series or sequence for which to compute the matrix profile
m : int
Window size
T_B : ndarray
The time series or sequence that contain your query subsequences
of interest. Default is `None` which corresponds to a self-join.
ignore_trivial : bool
Set to `True` if this is a self-join. Otherwise, for AB-join, set this
to `False`. Default is `True`.
Returns
-------
out : ndarray
The first column consists of the matrix profile, the second column
consists of the matrix profile indices, the third column consists of
the left matrix profile indices, and the fourth column consists of
the right matrix profile indices.
"""

metadata = metadata_base.PrimitiveMetadata({
'__author__': "DATA Lab @Texas A&M University",
'name': "Matrix Profile",
#'python_path': 'd3m.primitives.tods.feature_analysis.matrix_profile',
'python_path': 'd3m.primitives.tods.detection_algorithm.matrix_profile',
'source': {'name': "DATALAB @Taxes A&M University", 'contact': 'mailto:khlai037@tamu.edu',
'uris': ['https://gitlab.com/lhenry15/tods/-/blob/Yile/anomaly-primitives/anomaly_primitives/MatrixProfile.py']},
'algorithm_types': [metadata_base.PrimitiveAlgorithmType.MATRIX_PROFILE,],
'primitive_family': metadata_base.PrimitiveFamily.FEATURE_CONSTRUCTION,
'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'MatrixProfilePrimitive')),
'hyperparams_to_tune': ['window_size'],
'version': '0.0.2',
})


def __init__(self, *, hyperparams: Hyperparams) -> None:
super().__init__(hyperparams=hyperparams)
self._clf = MP(window_size = hyperparams['window_size'])
self.primitiveNo = PrimitiveCount.primitive_no
PrimitiveCount.primitive_no+=1

def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]:

"""

Args:

inputs: Container DataFrame

timeout: Default

iterations: Default

Returns:

Container DataFrame containing Matrix Profile of selected columns
"""

# Get cols to fit.
self._fitted = False
self._training_inputs, self._training_indices = self._get_columns_to_fit(inputs, self.hyperparams)
self._input_column_names = self._training_inputs.columns


if len(self._training_indices) > 0:
self._fitted = True
else: # pragma: no cover
if self.hyperparams['error_on_no_input']:
raise RuntimeError("No input columns were selected")
self.logger.warn("No input columns were selected")

if not self._fitted: # pragma: no cover
raise PrimitiveNotFittedError("Primitive not fitted.")
sk_inputs = inputs
if self.hyperparams['use_semantic_types']: # pragma: no cover
sk_inputs = inputs.iloc[:, self._training_indices]
output_columns = []
if len(self._training_indices) > 0:
sk_output = self._clf.produce(sk_inputs)
if sparse.issparse(sk_output): # pragma: no cover
sk_output = sk_output.toarray()
outputs = self._wrap_predictions(inputs, sk_output)
if len(outputs.columns) == len(self._input_column_names): # pragma: no cover
outputs.columns = self._input_column_names
output_columns = [outputs]

else: # pragma: no cover
if self.hyperparams['error_on_no_input']:
raise RuntimeError("No input columns were selected")
self.logger.warn("No input columns were selected")

outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'],
add_index_columns=self.hyperparams['add_index_columns'],
inputs=inputs, column_indices=self._training_indices,
columns_list=output_columns)

#print(outputs.columns)
#outputs.columns = [str(x) for x in outputs.columns]

return CallResult(outputs)

def _update_metadata(self, outputs): # pragma: no cover
outputs.metadata = outputs.metadata.generate(outputs)
@classmethod
def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): # pragma: no cover

"""

Select columns to fit.
Args:
inputs: Container DataFrame
hyperparams: d3m.metadata.hyperparams.Hyperparams

Returns:
list

"""

if not hyperparams['use_semantic_types']:
return inputs, list(range(len(inputs.columns)))

inputs_metadata = inputs.metadata


def can_produce_column(column_index: int) -> bool:
return cls._can_produce_column(inputs_metadata, column_index, hyperparams)

columns_to_produce, columns_not_to_produce = base_utils.get_columns_to_use(inputs_metadata,
use_columns=hyperparams['use_columns'],
exclude_columns=hyperparams['exclude_columns'],
can_use_column=can_produce_column)


"""
Encountered error: when hyperparams['use_columns'] = (2,3) and hyperparams['exclude_columns'] is (1,2)
columns_to_produce is still [2]
"""
return inputs.iloc[:, columns_to_produce], columns_to_produce

@classmethod
def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: # pragma: no cover

"""

Output whether a column can be processed.
Args:
inputs_metadata: d3m.metadata.base.DataMetadata
column_index: int

Returns:
bool

"""

column_metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index))

accepted_structural_types = (int, float, np.integer, np.float64) #changed numpy to np
accepted_semantic_types = set()
accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/Attribute")

if not issubclass(column_metadata['structural_type'], accepted_structural_types):
return False

semantic_types = set(column_metadata.get('semantic_types', []))

if len(semantic_types) == 0:
cls.logger.warning("No semantic types found in column metadata")
return False

# Making sure all accepted_semantic_types are available in semantic_types
if len(accepted_semantic_types - semantic_types) == 0:
return True

return False

def _wrap_predictions(self, inputs: Inputs, predictions: ndarray) -> Outputs:

"""

Wrap predictions into dataframe
Args:
inputs: Container Dataframe
predictions: array-like data (n_samples, n_features)

Returns:
Dataframe

"""

outputs = d3m_dataframe(predictions, generate_metadata=True)
target_columns_metadata = self._add_target_columns_metadata(outputs.metadata, self.hyperparams, self.primitiveNo)
outputs.metadata = self._update_predictions_metadata(inputs.metadata, outputs, target_columns_metadata)
return outputs



@classmethod
def _update_predictions_metadata(cls, inputs_metadata: metadata_base.DataMetadata, outputs: Optional[Outputs],
target_columns_metadata: List[OrderedDict]) -> metadata_base.DataMetadata:

"""

Updata metadata for selected columns.
Args:
inputs_metadata: metadata_base.DataMetadata
outputs: Container Dataframe
target_columns_metadata: list

Returns:
d3m.metadata.base.DataMetadata

"""

outputs_metadata = metadata_base.DataMetadata().generate(value=outputs)

for column_index, column_metadata in enumerate(target_columns_metadata):
column_metadata.pop("structural_type", None)
outputs_metadata = outputs_metadata.update_column(column_index, column_metadata)

return outputs_metadata


@classmethod
def _add_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams, primitiveNo):
"""
Add target columns metadata
Args:
outputs_metadata: metadata.base.DataMetadata
hyperparams: d3m.metadata.hyperparams.Hyperparams

Returns:
List[OrderedDict]
"""
outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length']
target_columns_metadata: List[OrderedDict] = []
for column_index in range(outputs_length):
column_name = "{0}{1}_{2}".format(cls.metadata.query()['name'], primitiveNo, column_index)
column_metadata = OrderedDict()
semantic_types = set()
semantic_types.add(hyperparams["return_semantic_type"])
column_metadata['semantic_types'] = list(semantic_types)

column_metadata["name"] = str(column_name)
target_columns_metadata.append(column_metadata)
return target_columns_metadata

+ 1
- 8
tods/tests/detection_algorithm/test_MatrixProfile.py View File

@@ -58,17 +58,10 @@ class MatrixProfileTest(unittest.TestCase):
hyperparams_class = MatrixProfilePrimitive.metadata.get_hyperparams()
hyperparams = hyperparams_class.defaults()
hyperparams = hyperparams.replace({'window_size': 3})
<<<<<<< Updated upstream:tods/tests/detection_algorithm/test_MatrixProfile.py

primitive = MatrixProfilePrimitive(hyperparams=hyperparams)
#primitive.set_training_data(inputs=main)
#primitive.fit()
=======
#print(type(main))
primitive = MatrixProfile(hyperparams=hyperparams)
primitive = MatrixProfilePrimitive(hyperparams=hyperparams)
primitive.set_training_data(inputs=main)
primitive.fit()
>>>>>>> Stashed changes:tods/tests/test_MatrixProfile.py
new_main = primitive.produce(inputs=main).value
print(new_main)



Loading…
Cancel
Save