import os import typing import numpy from d3m import container, utils as d3m_utils from d3m.metadata import base as metadata_base from d3m.metadata import hyperparams from d3m.primitive_interfaces import base, transformer __all__ = ('TimeStampValidationPrimitive',) Inputs = container.DataFrame Outputs = container.DataFrame class Hyperparams(hyperparams.Hyperparams): pass class TimeStampValidationPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): """ A primitive to check time series is sorted by time stamp , if not then return sorted time series """ __author__ = "DATA Lab at Texas A&M University", metadata = metadata_base.PrimitiveMetadata( { 'id': '5f791b09-e16f-42e1-bc53-39de308f5861', 'version': '0.1.0', 'name': 'Time Stamp Validation', 'python_path': 'd3m.primitives.tods.data_processing.timestamp_validation', 'keywords': ['Time Stamp', 'Sort Order'], 'source': { 'name': 'DATA Lab at Texas A&M University', 'uris': ['https://gitlab.com/lhenry15/tods.git','https://gitlab.com/lhenry15/tods/-/blob/devesh/tods/data_processing/TimeStampValidation.py'], 'contact': 'mailto:khlai037@tamu.edu' }, 'installation': [ {'type': metadata_base.PrimitiveInstallationType.PIP, 'package_uri': 'git+https://gitlab.com/lhenry15/tods.git@{git_commit}#egg=TODS'.format( git_commit=d3m_utils.current_git_commit(os.path.dirname(__file__)), ), } ], 'algorithm_types': [ metadata_base.PrimitiveAlgorithmType.DATA_PROFILING , ], 'primitive_family': metadata_base.PrimitiveFamily.DATA_VALIDATION, } ) def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: """ Args: inputs: Container DataFrame timeout: Default iterations: Default Returns: Container DataFrame sorted by Time Stamp """ self.logger.info('Time Stamp order validation called') outputs = inputs try: if (self._is_time_stamp_sorted(inputs, 'timestamp')): outputs = inputs else: outputs = inputs.sort_values(by=["timestamp"]) self._update_metadata(outputs) outputs.reset_index(drop=True, inplace=True) self.logger.info('Type of data : %s',type(outputs)) except Exception as e : self.logger.error('Time Stamp order validation error %s :',e) print(self.logger.info(base.CallResult(outputs).value)) return base.CallResult(outputs) def _is_time_stamp_sorted(self,input:Inputs,column:str = 'timestamp') -> bool : """ Args: input: Container Dataframe column: Column Name Returns: Boolean : True if timestamp column is sorted False if not """ return all(input[column][i] <= input[column][i+1] for i in range(len(input[column])-1)) def _update_metadata(self, outputs): outputs.metadata = outputs.metadata.generate(outputs)