Browse Source

TODO: primitive test for DAGMM

master
lhenry15 4 years ago
parent
commit
0581773751
21 changed files with 62 additions and 57 deletions
  1. +4
    -4
      primitive_tests/data_processing/CategoricalToBinary_pipeline.py
  2. +4
    -4
      primitive_tests/data_processing/ColumnFilter_pipeline.py
  3. +3
    -3
      primitive_tests/data_processing/ContinuityValidation_pipline.py
  4. +3
    -3
      primitive_tests/data_processing/DuplicationValidation_pipeline.py
  5. +3
    -3
      primitive_tests/data_processing/TimeIntervalTransform_pipeline.py
  6. +3
    -2
      primitive_tests/test.sh
  7. +1
    -0
      primitive_tests/tested_file.txt
  8. +5
    -7
      setup.py
  9. +1
    -1
      tods/detection_algorithm/DAGMM.py
  10. +1
    -2
      tods/detection_algorithm/Ensemble.py
  11. +13
    -7
      tods/detection_algorithm/core/CollectiveCommonTest.py
  12. +12
    -7
      tods/detection_algorithm/core/UODCommonTest.py
  13. +1
    -1
      tods/detection_algorithm/core/dagmm/dagmm.py
  14. +1
    -1
      tods/feature_analysis/SKTruncatedSVD.py
  15. +1
    -4
      tods/feature_analysis/TRMF.py
  16. +1
    -0
      tods/resources/.entry_points.ini
  17. +1
    -4
      tods/timeseries_processing/HoltSmoothing.py
  18. +1
    -1
      tods/timeseries_processing/HoltWintersExponentialSmoothing.py
  19. +1
    -1
      tods/timeseries_processing/MovingAverageTransformer.py
  20. +1
    -1
      tods/timeseries_processing/SKQuantileTransformer.py
  21. +1
    -1
      tods/timeseries_processing/SimpleExponentialSmoothing.py

+ 4
- 4
primitive_tests/data_processing/CategoricalToBinary_pipeline.py View File

@@ -10,20 +10,20 @@ pipeline_description.add_input(name='inputs')
# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# Step 1: Column Parser
primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# Step 2: extract_columns_by_semantic_types(attributes)
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data='steps.1.produce')
step_2.add_output('produce')
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE,
data=['https://metadata.datadrivendiscovery.org/types/Attribute'])
@@ -34,7 +34,7 @@ step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_p
step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(3,))
step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append')
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)



+ 4
- 4
primitive_tests/data_processing/ColumnFilter_pipeline.py View File

@@ -12,19 +12,19 @@ pipeline_description.add_input(name='inputs')
# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

#Step 1: column_parser
step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.column_parser'))
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# Step 2: extract_columns_by_semantic_types(attributes)
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data='steps.1.produce')
step_2.add_output('produce')
step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE,
data=['https://metadata.datadrivendiscovery.org/types/Attribute'])
@@ -32,7 +32,7 @@ pipeline_description.add_step(step_2)

# Step 3: column_filter
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.column_filter'))
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)



+ 3
- 3
primitive_tests/data_processing/ContinuityValidation_pipline.py View File

@@ -8,19 +8,19 @@ pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
step_0 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe'))
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# Step 1: column_parser
step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.column_parser'))
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# Step 3: ContinuityValidation
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.continuity_validation'))
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data='steps.1.produce')
step_2.add_output('produce')
step_2.add_hyperparameter(name = 'continuity_option', argument_type=ArgumentType.VALUE, data = 'imputation')
step_2.add_hyperparameter(name = 'interval', argument_type=ArgumentType.VALUE, data = 0.3)


+ 3
- 3
primitive_tests/data_processing/DuplicationValidation_pipeline.py View File

@@ -9,19 +9,19 @@ pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
step_0 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe'))
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# Step 1: column_parser
step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.column_parser'))
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# Step 2: DuplicationValidation
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.duplication_validation'))
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data='steps.1.produce')
step_2.add_output('produce')
step_2.add_hyperparameter(name = 'keep_option', argument_type=ArgumentType.VALUE, data = 'average') # Or: 'first'
pipeline_description.add_step(step_2)


+ 3
- 3
primitive_tests/data_processing/TimeIntervalTransform_pipeline.py View File

@@ -10,20 +10,20 @@ pipeline_description.add_input(name='inputs')
# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# Step 1: column_parser
step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.column_parser'))
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# Step 2: time_interval_transform
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.time_interval_transform'))
step_2.add_hyperparameter(name="time_interval", argument_type=ArgumentType.VALUE, data = 'T')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data='steps.1.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)



+ 3
- 2
primitive_tests/test.sh View File

@@ -1,8 +1,9 @@
#!/bin/bash

modules="data_processing timeseries_processing feature_analysis detection_algorithm reinforcement"
#modules="data_processing timeseries_processing feature_analysis detection_algorithms reinforcement"
#modules="data_processing timeseries_processing"
#modules="detection_algorithm"
modules="data_processing"
#test_scripts=$(ls primitive_tests | grep -v -f tested_file.txt)

for module in $modules
do


+ 1
- 0
primitive_tests/tested_file.txt View File

@@ -0,0 +1 @@
CategoricalToBinary_pipeline.py

+ 5
- 7
setup.py View File

@@ -35,19 +35,17 @@ setup(
]
},
install_requires=[
#'tamu_d3m',
#'tamu_axolotl',
#'Jinja2',
'numpy==1.18.2',
'tamu_d3m==2021.11.24',
'tamu_axolotl',
'numpy<=1.21.2',
'combo',
'simplejson==3.12.0',
#'scikit-learn==0.22.0',
'scikit-learn',
'statsmodels==0.11.1',
'PyWavelets>=1.1.1',
'pillow==7.1.2',
'tensorflow==2.2', # should be removed later
'keras', # should be removed later
'tensorflow==2.4',
'keras==2.4.0',
'pyod',
'nimfa==1.4.0',
'stumpy==1.4.0',


+ 1
- 1
tods/detection_algorithm/DAGMM.py View File

@@ -113,7 +113,7 @@ class DAGMMPrimitive(UnsupervisedOutlierDetectorBase[Inputs, Outputs, Params, Hy
'python_path': 'd3m.primitives.tods.detection_algorithm.dagmm',
'source': {'name': "DATALAB @Taxes A&M University", 'contact': 'mailto:khlai037@tamu.edu',
'uris': ['https://gitlab.com/lhenry15/tods/-/blob/Yile/anomaly-primitives/anomaly_primitives/DAGMM.py']},
'algorithm_types': [metadata_base.PrimitiveAlgorithmType.DEEPLOG],
'algorithm_types': [metadata_base.PrimitiveAlgorithmType.TODS_PRIMITIVE],
'primitive_family': metadata_base.PrimitiveFamily.ANOMALY_DETECTION,
'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'DAGMMPrimitive')),
'hyperparams_to_tune': ['comp_hiddens','est_hiddens','est_dropout_ratio','minibatch_size','epoch_size','rand_seed',


+ 1
- 2
tods/detection_algorithm/Ensemble.py View File

@@ -8,8 +8,7 @@ import numpy
import typing
import pandas as pd
# Custom import commands if any
from sklearn.preprocessing.data import Normalizer
from statsmodels.tsa.api import ExponentialSmoothing, SimpleExpSmoothing, Holt
from sklearn.preprocessing import Normalizer
import uuid




+ 13
- 7
tods/detection_algorithm/core/CollectiveCommonTest.py View File

@@ -9,13 +9,13 @@ import sys
import numpy as np
import unittest
# noinspection PyProtectedMember
from sklearn.utils.testing import assert_allclose
from sklearn.utils.testing import assert_array_less
from sklearn.utils.testing import assert_equal
from sklearn.utils.testing import assert_greater
from sklearn.utils.testing import assert_greater_equal
from sklearn.utils.testing import assert_less_equal
from sklearn.utils.testing import assert_raises
from numpy.testing import assert_equal
from numpy.testing import assert_allclose
from numpy.testing import assert_array_less
from numpy.testing import assert_raises
from unittest import TestCase

from sklearn.utils.estimator_checks import check_estimator

@@ -28,6 +28,12 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))

from pyod.utils.data import generate_data

_dummy = TestCase('__init__')
assert_greater = _dummy.assertGreater
assert_greater_equal = _dummy.assertGreaterEqual
assert_less = _dummy.assertLess
assert_less_equal = _dummy.assertLessEqual


class CollectiveCommonTest:
def __init__(self,


+ 12
- 7
tods/detection_algorithm/core/UODCommonTest.py View File

@@ -9,13 +9,12 @@ import sys
import numpy as np
import unittest
# noinspection PyProtectedMember
from sklearn.utils.testing import assert_allclose
from sklearn.utils.testing import assert_array_less
from sklearn.utils.testing import assert_equal
from sklearn.utils.testing import assert_greater
from sklearn.utils.testing import assert_greater_equal
from sklearn.utils.testing import assert_less_equal
from sklearn.utils.testing import assert_raises
from numpy.testing import assert_equal
from numpy.testing import assert_allclose
from numpy.testing import assert_array_less
from numpy.testing import assert_raises

from unittest import TestCase

from sklearn.utils.estimator_checks import check_estimator

@@ -28,6 +27,12 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))

from pyod.utils.data import generate_data

_dummy = TestCase('__init__')
assert_greater = _dummy.assertGreater
assert_greater_equal = _dummy.assertGreaterEqual
assert_less = _dummy.assertLess
assert_less_equal = _dummy.assertLessEqual


class UODCommonTest:
def __init__(self,


+ 1
- 1
tods/detection_algorithm/core/dagmm/dagmm.py View File

@@ -1,7 +1,7 @@
import tensorflow as tf
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.externals import joblib
import joblib

from .compression_net import CompressionNet
from .estimation_net import EstimationNet


+ 1
- 1
tods/feature_analysis/SKTruncatedSVD.py View File

@@ -10,7 +10,7 @@ import time
import uuid

# Custom import commands if any
from sklearn.decomposition.truncated_svd import TruncatedSVD
from sklearn.decomposition import TruncatedSVD


from d3m.container.numpy import ndarray as d3m_ndarray


+ 1
- 4
tods/feature_analysis/TRMF.py View File

@@ -10,8 +10,6 @@ import time
import uuid

# Custom import commands if any
from sklearn.decomposition.truncated_svd import TruncatedSVD


from d3m.container.numpy import ndarray as d3m_ndarray
from d3m.container import DataFrame as d3m_dataframe
@@ -21,7 +19,6 @@ from d3m.base import utils as base_utils
from d3m.exceptions import PrimitiveNotFittedError
from d3m.primitive_interfaces.base import CallResult, DockerContainer
from d3m.primitive_interfaces import base, transformer
# from d3m.primitive_interfaces.unsupervised_learning import UnsupervisedLearnerPrimitiveBase
from ..common.TODSBasePrimitives import TODSTransformerPrimitiveBase


@@ -249,7 +246,7 @@ class TRMFPrimitive(TODSTransformerPrimitiveBase[Inputs, Outputs, Hyperparams]):
inputs: Container DataFrame.

Returns:
Container DataFrame after Truncated SVD.
Container DataFrame after TRMF.
"""
self._clf = trmf(
lags=self.hyperparams['lags'],


+ 1
- 0
tods/resources/.entry_points.ini View File

@@ -75,6 +75,7 @@ tods.detection_algorithm.AutoRegODetector = tods.detection_algorithm.AutoRegODet
tods.detection_algorithm.LSTMODetector = tods.detection_algorithm.LSTMODetect:LSTMODetectorPrimitive
tods.detection_algorithm.PCAODetector = tods.detection_algorithm.PCAODetect:PCAODetectorPrimitive
tods.detection_algorithm.KDiscordODetector = tods.detection_algorithm.KDiscordODetect:KDiscordODetectorPrimitive
tods.detection_algorithm.dagmm = tods.detection_algorithm.DAGMM:DAGMMPrimitive.
tods.detection_algorithm.deeplog = tods.detection_algorithm.DeepLog:DeepLogPrimitive
tods.detection_algorithm.telemanom = tods.detection_algorithm.Telemanom:TelemanomPrimitive
tods.detection_algorithm.system_wise_detection = tods.detection_algorithm.SystemWiseDetection:SystemWiseDetectionPrimitive


+ 1
- 4
tods/timeseries_processing/HoltSmoothing.py View File

@@ -9,7 +9,7 @@ import numpy
import typing
import pandas as pd
# Custom import commands if any
from sklearn.preprocessing.data import Normalizer
from sklearn.preprocessing import Normalizer
from statsmodels.tsa.api import ExponentialSmoothing, SimpleExpSmoothing, Holt


@@ -191,9 +191,6 @@ class HoltSmoothingPrimitive(UnsupervisedLearnerPrimitiveBase[Inputs, Outputs, P
except Exception as e:
self.logger.error("Error in Calculating Holt smoothing",e)
self._update_metadata(outputs)
#print(inputs)
#print("-------------")
print(outputs)

return base.CallResult(outputs)



+ 1
- 1
tods/timeseries_processing/HoltWintersExponentialSmoothing.py View File

@@ -9,7 +9,7 @@ import uuid
import typing
import pandas as pd
# Custom import commands if any
from sklearn.preprocessing.data import Normalizer
from sklearn.preprocessing import Normalizer
from statsmodels.tsa.api import ExponentialSmoothing, SimpleExpSmoothing, Holt




+ 1
- 1
tods/timeseries_processing/MovingAverageTransformer.py View File

@@ -9,7 +9,7 @@ import typing
import pandas as pd
import uuid
# Custom import commands if any
from sklearn.preprocessing.data import Normalizer
from sklearn.preprocessing import Normalizer


from d3m.container.numpy import ndarray as d3m_ndarray


+ 1
- 1
tods/timeseries_processing/SKQuantileTransformer.py View File

@@ -8,7 +8,7 @@ import numpy
import typing

# Custom import commands if any
from sklearn.preprocessing.data import QuantileTransformer
from sklearn.preprocessing import QuantileTransformer


from d3m.container.numpy import ndarray as d3m_ndarray


+ 1
- 1
tods/timeseries_processing/SimpleExponentialSmoothing.py View File

@@ -8,7 +8,7 @@ import numpy
import typing
import pandas as pd
# Custom import commands if any
from sklearn.preprocessing.data import Normalizer
from sklearn.preprocessing import Normalizer
from statsmodels.tsa.api import ExponentialSmoothing, SimpleExpSmoothing, Holt
import uuid



Loading…
Cancel
Save