From 0d962845b5e6fc55865e265df835f41fcd33f4b9 Mon Sep 17 00:00:00 2001 From: lhenry15 Date: Mon, 9 Nov 2020 23:26:52 -0600 Subject: [PATCH] fix the file name spelling Former-commit-id: 89bd39b2bf949768351d4caf1ee5da9487066be9 [formerly d2d67a7a8fba78459bf2db61504265a3130d0296] [formerly 4f97b7e2a9b8032117cfdf5fb817f1623d5a9cc8 [formerly 0b3ad60f0632707f8ff05c060bde5075ec126c53]] [formerly 90b6af304634415c94d558459cb158fb0a049b38 [formerly eb7a882fb80507e51f77f7fa35de311ca7c060e8] [formerly e90d2eb0855e8f6856087942bbf0dc8e4e91ab15 [formerly 8b75ca84dc533715a7467313cde52a48f217fa13]]] [formerly 75eaf5df27c2569c12d37118c210ea2574a2db3d [formerly a31eab992e589368c07ce945fdb2f03249d27d4f] [formerly a2eca3adffc5bbf8be3b57ea34b3a734b6261445 [formerly d8a265360f49b81ef7ad72bacafcf6b6654170c3]] [formerly c1f31f4fa228b58fb8d2886f8c6e0d11c318b6cf [formerly d3b6d793f0d7ec5d74025138d97bf37ab77f2168] [formerly 2ff18e01531732f95da273962e3fe57ca1c631cf [formerly e3cb047523ad70a77dd00c8daa1fb256472a98bf]]]] [formerly 3d9448676f29aa8bef883d3b9bdd9befb23a67b5 [formerly 01bc098ba5a8313f2da371bf0d91a0b59d1dbf19] [formerly d8d2bfa27e6021f09d2df00192a4e8e46218ec3a [formerly 9d8c10748a903b6d60b17cddf7dc52ce4b770c62]] [formerly 7f514d5caa9d66f874078eb8ca100ebc81337a87 [formerly 323bcdf75ae74db7fd1dfa403c35d0e5995d0388] [formerly 88000d934bff90dccd73c44d962cd61c6f6f0d21 [formerly 85819d7f9633f494f0969ed6cd73a5c3915dfe27]]] [formerly 7f0371611230a64b757bab1504dd9f692ec40fb3 [formerly aba35685faf94358a83c99ace9870a174687973f] [formerly ee5d065d8b07a2e45077967e3347e3f8222360a7 [formerly c2fa01a8d9fc1c67eb6ddb0fdd5cdc94849513ca]] [formerly cf6c97ff0eb88dfcb003b7f2907861c9dafd4966 [formerly 81e0ae5f69a1a36c86a9c576870f3b845741a138] [formerly 80735c237b86c4a86dc0422036260d81d97f70e0 [formerly 9cb940cf10955697bb32bbf14e9a7499e0dfddeb]]]]] [formerly 7c4157b29f34f95c49858fef4fba687f1c764f3c [formerly 0d336cae7c88bbea71afe668a6ca993278241651] [formerly caddc5edaf684322eb5d5940a2fd93c24b57b6ba [formerly 1d16809335a8deabc5f005165a6da65f4bafb309]] [formerly 36de01097f8c2d77ffdb58dda0d85707f435e7b5 [formerly 998a68bd73c4065bbd7d5dd8ec26691c3ada8203] [formerly d60075800251b7f0c40034bf922ba9d5b0936e75 [formerly 0f885146a2e5057d44ed179b731d2bd48984bc8d]]] [formerly 3a74384dfbd034a2f78a25ff716c5b602956c12f [formerly 398236eb3924ec59d4711154698e213fa1b9a6d9] [formerly 0fba0e86acbb827f4d81e3b072e88183667af092 [formerly 833c4353d8bbced1ce571fd8fc19e2575e4b6241]] [formerly 49bf49887efb1fa7ed60c098d4d9b96881ba3b46 [formerly e3dd355c54d716e40d166211c9872fbf876ad202] [formerly 2657c760ffe0d773a5b8ecafb11975984a54e528 [formerly 51ed389d2f6cba20ba27349d658c5debf87fa799]]]] [formerly d056eb315953e7b173a685ce8aba7ff7e4afe657 [formerly 17cfc6b22c4a21ffdd48722344a9a07ef0675b95] [formerly 88870237c3403becdbc3ea1367cbbf5a09b6a059 [formerly f2b4b09bcfb3ac4828dfa17b1be07b139a03420d]] [formerly 9be089bdb983788a33253ce6f812dbb7f2ff08da [formerly bc57e821b2cc389abf7d8aec30fb221db817ced3] [formerly bb9ffc04561c05a444584e5c41038f30e87df993 [formerly 9222d20b46d7ead0df2a6d1841269f9c4860c0f1]]] [formerly 4cec912606a47deb4f1d23b1fccbcf069d500b7d [formerly 90c1b2d875b462a2ff2a1d997d51872d5ea50045] [formerly 9e2b30853b756700b7a128204716c5d60655e315 [formerly 5c3ba414f480af72dccff08f79d668702feb1ad9]] [formerly 5068c85747802fd93320fdcd5ba291cfdcb366db [formerly 887aa721aa4287bbaf4eb6a8949852af11c9a3b8] [formerly 1be79c7bec07fb960289c0c8ca8cc3c1768dcbce [formerly 601a8002a67c5ae7062aa90ce12ca3bb28ec3564]]]]]] Former-commit-id: 806b1917ac5efa4d62efe4db153713a8eb9a4507 [formerly 9e1c9afa8c339392063688f7032c352857545163] [formerly 5a16e9bff88d8ba9590d9c876faf9560e2f74199 [formerly d63e58d7d70753d8b1bc81b478fc6fab59fa5f2c]] [formerly 7ed60881d23bf63720031c4fa1e8af7e0af6fabf [formerly f2c7cf0dbe2f234dba0f4cdeacaf7739571d86da] [formerly ba1c949fdd4fe0e5084469f8fb821ef4bdb8847f [formerly 316939c4e0c9790007573329ab08fcad60616465]]] [formerly 7d39a7d56781c35d2da47b0892702bb8f01e6aa2 [formerly 1ab7bbc52db86d30ea6919bf00c9a7f99bb7ea08] [formerly ed9faf4f77a4e3af7dedf0e594644c1ef31b6cae [formerly 3db7cc05e6e197035737cc0fc741bb24b8b3689e]] [formerly e6055b726da49356af54b7320306c860138c7cc9 [formerly c28e09b7a73e969890d7db921ae446f927a90cbe] [formerly c12c2fc98520fb13e628579e6a130865041a9689 [formerly 8883770c295305d2d7ec34f2e0561379ee9d1ffb]]]] [formerly 14bdc0277d9bfb5ddee844b0da56f5bb272f3585 [formerly 72f77425b61c9d68b484c1dfb80d3b928821cc45] [formerly 17f0eb65ef79547970490f7e084adbe92ce9a2f4 [formerly 4d7d110103aa6a81272fe80f8797a9aef4d13b2e]] [formerly 64eed0dd23d71300dc99b5d32263ac89b954026e [formerly cc716c396b301fee51d194efbcd78d157ac578db] [formerly e97e4445c5c840afa05d35cb7fd854fc3b3d04b7 [formerly 51a71dde983dd904fc290250a88c20539dae5543]]] [formerly dbacc561e26669ce6dee43cffcbb69d9a03b6ea1 [formerly ecfd28dd009f68e277a0c0676669ccf20a98d49b] [formerly bb9dfc8c49d962d36398b0bc076ac1d4393ecda4 [formerly 2336fffa6efb62f2c463f482a2955bec0f065c4e]] [formerly d7ddb8c8a011a84d793100dac8f8beaf31bd142b [formerly d2af0f7c4075c741216108c0d8a959e967ee5def] [formerly 1be79c7bec07fb960289c0c8ca8cc3c1768dcbce]]]] Former-commit-id: 43a4b7e87dbd8cb827b6525fefd4aef80446cb64 [formerly afb8d0479f7e2c89b4ee22603a425cd1a8731b2c] [formerly b3d1bdf6d3ad225bc7ac0d84a7443fecf6dbfc73 [formerly c51df118401db5236349f398df7276cb0111f8c1]] [formerly bf6bd4478c1c94c5eba51be4513214655603f9d1 [formerly 6db7031d8836ff3d0c83a0a2673171b2c3ac0ed4] [formerly 388372c71db7e577c75f4bacf85abebe84db01e7 [formerly 43e8f31ec1858cf967d3084154848cf25555f354]]] [formerly ab2dbfa42b40ea465040e4c3008db76df4e228f3 [formerly 40e825431355ba937f28e4032b89be895d016544] [formerly 01b30b89f7a34feac436d2ebc52da388377bbde0 [formerly f76c98b0ef56d48051fcc78e2aa2ca3c7a93133a]] [formerly e734d9c94f2b5556176e0d79e01cdb6180d67472 [formerly 2bc8d77aa4ddbfa2b128d258ae871c68759967d1] [formerly 90f6961e6b35c6c090c7ae5d66265c20ec2603ae [formerly 08acb5034ad22b41763e6f193efcc0076367bc02]]]] Former-commit-id: 15d94b31b9e09f62543ce8d20f8f38f10ad5b1a2 [formerly 321ac7cc3fb4b2109a49da23c203634e0191de4d] [formerly 572f6aa23ae5d0d90bb8fb0f9e464ef9e5b7f686 [formerly f96bedb57beda0b14e7ce5d0db7f516f75ce2de2]] [formerly c0534c1974ca00e75c0501c8d36530f973cbd6bf [formerly 8b1945233d55d3ad9ceab3288ad8fbf956df433f] [formerly 3d71717ab1ab7d8e45f5128f6895f981b8261bce [formerly 8c029d23f604885b6450c8b5a454677416e59b4f]]] Former-commit-id: 69918bc962457095993c7ee363f61b15424efaab [formerly b03bb371a1945972f30e5c4218e4c7c034798909] [formerly 13a0eb2d4ec1de0d5deb39be8ac584f9b20f17e9 [formerly d8ede2bc43e291dee42ab6e26671fd94436c050c]] Former-commit-id: f2a32a2b42c424b4d834d90efe2b76765fbbc663 [formerly ee29e4877a112c520a5baae384fdc90b30db6961] Former-commit-id: 906cb000fc4941753f039b27de427662a338267a --- tods/data_processing/ContructPredictions.py | 261 -------------------- 1 file changed, 261 deletions(-) delete mode 100644 tods/data_processing/ContructPredictions.py diff --git a/tods/data_processing/ContructPredictions.py b/tods/data_processing/ContructPredictions.py deleted file mode 100644 index ecc89cf..0000000 --- a/tods/data_processing/ContructPredictions.py +++ /dev/null @@ -1,261 +0,0 @@ -import os -import typing - -from d3m import container, utils as d3m_utils -from d3m.metadata import base as metadata_base, hyperparams -from d3m.primitive_interfaces import base, transformer -from d3m.contrib.primitives import compute_scores - -import common_primitives - -__all__ = ('ConstructPredictionsPrimitive',) - -Inputs = container.DataFrame -Outputs = container.DataFrame - - -class Hyperparams(hyperparams.Hyperparams): - use_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to operate on. If metadata reconstruction happens, this is used for reference columns." - " If any specified column is not a primary index or a predicted target, it is skipped.", - ) - exclude_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not operate on. If metadata reconstruction happens, this is used for reference columns. Applicable only if \"use_columns\" is not provided.", - ) - - -class ConstructPredictionsPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): - """ - A primitive which takes as input a DataFrame and outputs a DataFrame in Lincoln Labs predictions - format: first column is a d3mIndex column (and other primary index columns, e.g., for object detection - problem), and then predicted targets, each in its column, followed by optional confidence column(s). - - It supports both input columns annotated with semantic types (``https://metadata.datadrivendiscovery.org/types/PrimaryKey``, - ``https://metadata.datadrivendiscovery.org/types/PrimaryMultiKey``, ``https://metadata.datadrivendiscovery.org/types/PredictedTarget``, - ``https://metadata.datadrivendiscovery.org/types/Confidence``), or trying to reconstruct metadata. - This is why the primitive takes also additional input of a reference DataFrame which should - have metadata to help reconstruct missing metadata. If metadata is missing, the primitive - assumes that all ``inputs`` columns are predicted targets, without confidence column(s). - """ - - metadata = metadata_base.PrimitiveMetadata( - { - 'id': '8d38b340-f83f-4877-baaa-162f8e551736', - 'version': '0.3.0', - 'name': "Construct pipeline predictions output", - 'python_path': 'd3m.primitives.tods.data_processing.construct_predictions', - 'source': { - 'name': common_primitives.__author__, - 'contact': 'mailto:mitar.commonprimitives@tnode.com', - 'uris': [ - 'https://gitlab.com/datadrivendiscovery/common-primitives/blob/master/common_primitives/construct_predictions.py', - 'https://gitlab.com/datadrivendiscovery/common-primitives.git', - ], - }, - 'installation': [{ - 'type': metadata_base.PrimitiveInstallationType.PIP, - 'package_uri': 'git+https://gitlab.com/datadrivendiscovery/common-primitives.git@{git_commit}#egg=common_primitives'.format( - git_commit=d3m_utils.current_git_commit(os.path.dirname(__file__)), - ), - }], - 'algorithm_types': [ - metadata_base.PrimitiveAlgorithmType.DATA_CONVERSION, - ], - 'primitive_family': metadata_base.PrimitiveFamily.DATA_TRANSFORMATION, - }, - ) - - def produce(self, *, inputs: Inputs, reference: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: # type: ignore - index_columns = inputs.metadata.get_index_columns() - target_columns = inputs.metadata.list_columns_with_semantic_types(('https://metadata.datadrivendiscovery.org/types/PredictedTarget',)) - - # Target columns cannot be also index columns. This should not really happen, - # but it could happen with buggy primitives. - target_columns = [target_column for target_column in target_columns if target_column not in index_columns] - - if index_columns and target_columns: - outputs = self._produce_using_semantic_types(inputs, index_columns, target_columns) - else: - outputs = self._produce_reconstruct(inputs, reference, index_columns, target_columns) - - outputs = compute_scores.ComputeScoresPrimitive._encode_columns(outputs) - - # Generally we do not care about column names in DataFrame itself (but use names of columns from metadata), - # but in this case setting column names makes it easier to assure that "to_csv" call produces correct output. - # See: https://gitlab.com/datadrivendiscovery/d3m/issues/147 - column_names = [] - for column_index in range(len(outputs.columns)): - column_names.append(outputs.metadata.query_column(column_index).get('name', outputs.columns[column_index])) - outputs.columns = column_names - - return base.CallResult(outputs) - - def _filter_index_columns(self, inputs_metadata: metadata_base.DataMetadata, index_columns: typing.Sequence[int]) -> typing.Sequence[int]: - if self.hyperparams['use_columns']: - index_columns = [index_column_index for index_column_index in index_columns if index_column_index in self.hyperparams['use_columns']] - if not index_columns: - raise ValueError("No index columns listed in \"use_columns\" hyper-parameter, but index columns are required.") - - else: - index_columns = [index_column_index for index_column_index in index_columns if index_column_index not in self.hyperparams['exclude_columns']] - if not index_columns: - raise ValueError("All index columns listed in \"exclude_columns\" hyper-parameter, but index columns are required.") - - names = [] - for index_column in index_columns: - index_metadata = inputs_metadata.query_column(index_column) - # We do not care about empty strings for names either. - if index_metadata.get('name', None): - names.append(index_metadata['name']) - - if 'd3mIndex' not in names: - raise ValueError("\"d3mIndex\" index column is missing.") - - names_set = set(names) - if len(names) != len(names_set): - duplicate_names = names - for name in names_set: - # Removes just the first occurrence. - duplicate_names.remove(name) - - self.logger.warning("Duplicate names for index columns: %(duplicate_names)s", { - 'duplicate_names': list(set(duplicate_names)), - }) - - return index_columns - - def _get_columns(self, inputs_metadata: metadata_base.DataMetadata, index_columns: typing.Sequence[int], target_columns: typing.Sequence[int]) -> typing.List[int]: - assert index_columns - assert target_columns - - index_columns = self._filter_index_columns(inputs_metadata, index_columns) - - if self.hyperparams['use_columns']: - target_columns = [target_column_index for target_column_index in target_columns if target_column_index in self.hyperparams['use_columns']] - if not target_columns: - raise ValueError("No target columns listed in \"use_columns\" hyper-parameter, but target columns are required.") - - else: - target_columns = [target_column_index for target_column_index in target_columns if target_column_index not in self.hyperparams['exclude_columns']] - if not target_columns: - raise ValueError("All target columns listed in \"exclude_columns\" hyper-parameter, but target columns are required.") - - assert index_columns - assert target_columns - - return list(index_columns) + list(target_columns) - - def _get_confidence_columns(self, inputs_metadata: metadata_base.DataMetadata) -> typing.List[int]: - confidence_columns = inputs_metadata.list_columns_with_semantic_types(('https://metadata.datadrivendiscovery.org/types/Confidence',)) - - if self.hyperparams['use_columns']: - confidence_columns = [confidence_column_index for confidence_column_index in confidence_columns if confidence_column_index in self.hyperparams['use_columns']] - else: - confidence_columns = [confidence_column_index for confidence_column_index in confidence_columns if confidence_column_index not in self.hyperparams['exclude_columns']] - - return confidence_columns - - def _produce_using_semantic_types(self, inputs: Inputs, index_columns: typing.Sequence[int], - target_columns: typing.Sequence[int]) -> Outputs: - confidence_columns = self._get_confidence_columns(inputs.metadata) - - output_columns = self._get_columns(inputs.metadata, index_columns, target_columns) + confidence_columns - - # "get_index_columns" makes sure that "d3mIndex" is always listed first. - # And "select_columns" selects columns in order listed, which then - # always puts "d3mIndex" first. - outputs = inputs.select_columns(output_columns) - - if confidence_columns: - outputs.metadata = self._update_confidence_columns(outputs.metadata, confidence_columns) - - return outputs - - def _update_confidence_columns(self, inputs_metadata: metadata_base.DataMetadata, confidence_columns: typing.Sequence[int]) -> metadata_base.DataMetadata: - output_columns_length = inputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - outputs_metadata = inputs_metadata - - # All confidence columns have to be named "confidence". - for column_index in range(output_columns_length - len(confidence_columns), output_columns_length): - outputs_metadata = outputs_metadata.update((metadata_base.ALL_ELEMENTS, column_index), { - 'name': 'confidence', - }) - - return outputs_metadata - - def _produce_reconstruct(self, inputs: Inputs, reference: Inputs, index_columns: typing.Sequence[int], target_columns: typing.Sequence[int]) -> Outputs: - if not index_columns: - reference_index_columns = reference.metadata.get_index_columns() - - if not reference_index_columns: - raise ValueError("Cannot find an index column in reference data, but index column is required.") - - filtered_index_columns = self._filter_index_columns(reference.metadata, reference_index_columns) - index = reference.select_columns(filtered_index_columns) - else: - filtered_index_columns = self._filter_index_columns(inputs.metadata, index_columns) - index = inputs.select_columns(filtered_index_columns) - - if not target_columns: - if index_columns: - raise ValueError("No target columns in input data, but index column(s) present.") - - # We assume all inputs are targets. - targets = inputs - - # We make sure at least basic metadata is generated correctly, so we regenerate metadata. - targets.metadata = targets.metadata.generate(targets) - - # We set target column names from the reference. We set semantic types. - targets.metadata = self._update_targets_metadata(targets.metadata, self._get_target_names(reference.metadata)) - - else: - targets = inputs.select_columns(target_columns) - - return index.append_columns(targets) - - def multi_produce(self, *, produce_methods: typing.Sequence[str], inputs: Inputs, reference: Inputs, timeout: float = None, iterations: int = None) -> base.MultiCallResult: # type: ignore - return self._multi_produce(produce_methods=produce_methods, timeout=timeout, iterations=iterations, inputs=inputs, reference=reference) - - def fit_multi_produce(self, *, produce_methods: typing.Sequence[str], inputs: Inputs, reference: Inputs, timeout: float = None, iterations: int = None) -> base.MultiCallResult: # type: ignore - return self._fit_multi_produce(produce_methods=produce_methods, timeout=timeout, iterations=iterations, inputs=inputs, reference=reference) - - def _get_target_names(self, metadata: metadata_base.DataMetadata) -> typing.List[typing.Union[str, None]]: - target_names = [] - - for column_index in metadata.list_columns_with_semantic_types(('https://metadata.datadrivendiscovery.org/types/TrueTarget',)): - column_metadata = metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - - target_names.append(column_metadata.get('name', None)) - - return target_names - - def _update_targets_metadata(self, metadata: metadata_base.DataMetadata, target_names: typing.Sequence[typing.Union[str, None]]) -> metadata_base.DataMetadata: - targets_length = metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - if targets_length != len(target_names): - raise ValueError("Not an expected number of target columns to apply names for. Expected {target_names}, provided {targets_length}.".format( - target_names=len(target_names), - targets_length=targets_length, - )) - - for column_index, target_name in enumerate(target_names): - metadata = metadata.add_semantic_type((metadata_base.ALL_ELEMENTS, column_index), 'https://metadata.datadrivendiscovery.org/types/Target') - metadata = metadata.add_semantic_type((metadata_base.ALL_ELEMENTS, column_index), 'https://metadata.datadrivendiscovery.org/types/PredictedTarget') - - # We do not have it, let's skip it and hope for the best. - if target_name is None: - continue - - metadata = metadata.update_column(column_index, { - 'name': target_name, - }) - - return metadata