Former-commit-id:mastere09e96da62[formerlya8cb5982ed] [formerlyec7bbd0204[formerlyc1b0617dda]] [formerly8d51c3dc71[formerlyc9abb06f7f] [formerly3a77611d4a[formerlybeb337f107]]] [formerly92c7972ca4[formerlyc5e6426304] [formerlyd929ff85bd[formerly3e436d432a]] [formerly7639c8a1e2[formerlyc1b3c51db7] [formerlyf55464bfac[formerlyfc5e005ecb]]]] [formerly53b8e8d0d1[formerly0e6a992a60] [formerly4d6ea58f86[formerly571088207a]] [formerly2ad36dabad[formerly75fc126cd1] [formerlyb307aaca22[formerlya73468b058]]] [formerly6a13942666[formerlya6d09fa80d] [formerlya8010c86ee[formerly68144e3eed]] [formerly848f3e7cdb[formerlya2c824d305] [formerlya6577468a1[formerly5e237b61f1]]]]] [formerly3ed63112d3[formerly1baf9e8af2] [formerlyda7c68ac57[formerly147d719c68]] [formerly54c5c07e6c[formerly4ad822cf98] [formerlydb190aa1e0[formerly55e8dd2418]]] [formerlye7da822bac[formerly3464d83a01] [formerly18c8f4f009[formerly15a28c1053]] [formerlyfe817c3d70[formerlyedd625b115] [formerly4aa5a5468a[formerlyc276f626e8]]]] [formerly2bc7ff3518[formerly8012cd1139] [formerlye399702d77[formerly585f464185]] [formerlyd367a3d971[formerlyf9b66ea5e1] [formerly097a9e5378[formerly2a2f4c0c01]]] [formerlycfc2bdb341[formerly89c9106d1f] [formerly2b01232957[formerly6c018ff487]] [formerly67a18efc65[formerly045590cf25] [formerly7e64f61103[formerly9bdc4fe665]]]]]] Former-commit-id:73550fce0f[formerly634af6f3b9] [formerlyd6aaa03889[formerly682bdc5673]] [formerly03083e9997[formerlybf83c73a28] [formerlyc2363b6a7a[formerlycd1046fc88]]] [formerlyee07c8ccd9[formerly608cf89946] [formerly84204596fc[formerly70057be770]] [formerly551af69bb0[formerly91c21a4459] [formerlyc2fda8a1e7[formerly508383cffd]]]] [formerly963dcb939a[formerly54735416d8] [formerly30b6124a12[formerly4053364617]] [formerly55753c5ef4[formerly0e10bdc7fa] [formerly407ac8e875[formerlyc3377650e8]]] [formerlyefd116f4b4[formerlybc59622245] [formerly0c0e6d431d[formerly7d3bd74e79]] [formerly5d5a669341[formerlydf92dd3f5f] [formerly7e64f61103]]]] Former-commit-id:34bba1d69b[formerly9811462869] [formerlyd322125fc5[formerly84aaeeb8a9]] [formerlyaa96e46a9a[formerly69b07ace14] [formerlycb41af137f[formerly008d0ade6e]]] [formerly241d534906[formerly6e2c83b4e2] [formerly5d6a805f2e[formerly2819c3ed8c]] [formerly26a162b4e9[formerlyd29919bc4f] [formerly8f226ed24a[formerly622231b2f8]]]] Former-commit-id:654b1d582a[formerlydb0d68e092] [formerly53d73a7b4e[formerly0e46b0f61f]] [formerlyb231e00228[formerlyd5f2960b1e] [formerlybef506be86[formerly8c25f9e63f]]] Former-commit-id:02c292f318[formerly68b1db509b] [formerlyaae9c16fc7[formerlyad5347c30f]] Former-commit-id:66964b0c36[formerlyfd39d202ba] Former-commit-id:af3b0a0a42
| @@ -10,19 +10,19 @@ pipeline_description = Pipeline() | |||
| pipeline_description.add_input(name='inputs') | |||
| # Step 0: dataset_to_dataframe | |||
| step_0 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.dataset_to_dataframe.Common')) | |||
| step_0 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')) | |||
| step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') | |||
| step_0.add_output('produce') | |||
| pipeline_description.add_step(step_0) | |||
| # Step 1: column_parser | |||
| step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.column_parser.Common')) | |||
| step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.column_parser')) | |||
| step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
| step_1.add_output('produce') | |||
| pipeline_description.add_step(step_1) | |||
| # Step 2: extract_columns_by_semantic_types(attributes) | |||
| step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common')) | |||
| step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types')) | |||
| step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
| step_2.add_output('produce') | |||
| step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, | |||
| @@ -30,7 +30,7 @@ step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALU | |||
| pipeline_description.add_step(step_2) | |||
| # Step 3: extract_columns_by_semantic_types(targets) | |||
| step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common')) | |||
| step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types')) | |||
| step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
| step_3.add_output('produce') | |||
| step_3.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, | |||
| @@ -53,7 +53,7 @@ step_5.add_output('produce') | |||
| pipeline_description.add_step(step_5) | |||
| # Step 6: Predictions | |||
| step_6 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.construct_predictions.Common')) | |||
| step_6 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.construct_predictions')) | |||
| step_6.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.5.produce') | |||
| step_6.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
| step_6.add_output('produce') | |||
| @@ -1,70 +0,0 @@ | |||
| from d3m import index | |||
| from d3m.metadata.base import ArgumentType | |||
| from d3m.metadata.pipeline import Pipeline, PrimitiveStep | |||
| # -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest | |||
| # extract_columns_by_semantic_types(targets) -> ^ | |||
| # Creating pipeline | |||
| pipeline_description = Pipeline() | |||
| pipeline_description.add_input(name='inputs') | |||
| # Step 0: dataset_to_dataframe | |||
| step_0 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.dataset_to_dataframe.Common')) | |||
| step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') | |||
| step_0.add_output('produce') | |||
| pipeline_description.add_step(step_0) | |||
| # Step 1: column_parser | |||
| step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.column_parser.Common')) | |||
| step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
| step_1.add_output('produce') | |||
| pipeline_description.add_step(step_1) | |||
| # Step 2: extract_columns_by_semantic_types(attributes) | |||
| step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common')) | |||
| step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
| step_2.add_output('produce') | |||
| step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, | |||
| data=['https://metadata.datadrivendiscovery.org/types/Attribute']) | |||
| pipeline_description.add_step(step_2) | |||
| # Step 3: extract_columns_by_semantic_types(targets) | |||
| step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common')) | |||
| step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
| step_3.add_output('produce') | |||
| step_3.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, | |||
| data=['https://metadata.datadrivendiscovery.org/types/TrueTarget']) | |||
| pipeline_description.add_step(step_3) | |||
| attributes = 'steps.2.produce' | |||
| targets = 'steps.3.produce' | |||
| # Step 4: processing | |||
| step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.axiswise_scaler')) | |||
| step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes) | |||
| step_4.add_output('produce') | |||
| pipeline_description.add_step(step_4) | |||
| # Step 5: algorithm` | |||
| step_5 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.feature_analysis.auto_correlation')) | |||
| step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.4.produce') | |||
| step_5.add_output('produce') | |||
| pipeline_description.add_step(step_5) | |||
| # Step 6: Predictions | |||
| step_6 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.construct_predictions.Common')) | |||
| step_6.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.5.produce') | |||
| step_6.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
| step_6.add_output('produce') | |||
| pipeline_description.add_step(step_6) | |||
| # Final Output | |||
| pipeline_description.add_output(name='output predictions', data_reference='steps.6.produce') | |||
| # Output to json | |||
| data = pipeline_description.to_json() | |||
| with open('example_pipeline.json', 'w') as f: | |||
| f.write(data) | |||
| print(data) | |||
| @@ -1,70 +0,0 @@ | |||
| from d3m import index | |||
| from d3m.metadata.base import ArgumentType | |||
| from d3m.metadata.pipeline import Pipeline, PrimitiveStep | |||
| # -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest | |||
| # extract_columns_by_semantic_types(targets) -> ^ | |||
| # Creating pipeline | |||
| pipeline_description = Pipeline() | |||
| pipeline_description.add_input(name='inputs') | |||
| # Step 0: dataset_to_dataframe | |||
| step_0 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.dataset_to_dataframe.Common')) | |||
| step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') | |||
| step_0.add_output('produce') | |||
| pipeline_description.add_step(step_0) | |||
| # Step 1: column_parser | |||
| step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.column_parser.Common')) | |||
| step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
| step_1.add_output('produce') | |||
| pipeline_description.add_step(step_1) | |||
| # Step 2: extract_columns_by_semantic_types(attributes) | |||
| step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common')) | |||
| step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
| step_2.add_output('produce') | |||
| step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, | |||
| data=['https://metadata.datadrivendiscovery.org/types/Attribute']) | |||
| pipeline_description.add_step(step_2) | |||
| # Step 3: extract_columns_by_semantic_types(targets) | |||
| step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common')) | |||
| step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
| step_3.add_output('produce') | |||
| step_3.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, | |||
| data=['https://metadata.datadrivendiscovery.org/types/TrueTarget']) | |||
| pipeline_description.add_step(step_3) | |||
| attributes = 'steps.2.produce' | |||
| targets = 'steps.3.produce' | |||
| # Step 4: processing | |||
| step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.axiswise_scaler')) | |||
| step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes) | |||
| step_4.add_output('produce') | |||
| pipeline_description.add_step(step_4) | |||
| # Step 5: algorithm` | |||
| step_5 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.deeplog')) | |||
| step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.4.produce') | |||
| step_5.add_output('produce') | |||
| pipeline_description.add_step(step_5) | |||
| # Step 6: Predictions | |||
| step_6 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.construct_predictions.Common')) | |||
| step_6.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.5.produce') | |||
| step_6.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
| step_6.add_output('produce') | |||
| pipeline_description.add_step(step_6) | |||
| # Final Output | |||
| pipeline_description.add_output(name='output predictions', data_reference='steps.6.produce') | |||
| # Output to json | |||
| data = pipeline_description.to_json() | |||
| with open('example_pipeline.json', 'w') as f: | |||
| f.write(data) | |||
| print(data) | |||
| @@ -1,72 +0,0 @@ | |||
| from d3m import index | |||
| from d3m.metadata.base import ArgumentType | |||
| from d3m.metadata.pipeline import Pipeline, PrimitiveStep | |||
| # -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest | |||
| # extract_columns_by_semantic_types(targets) -> ^ | |||
| # Creating pipeline | |||
| pipeline_description = Pipeline() | |||
| pipeline_description.add_input(name='inputs') | |||
| # Step 0: dataset_to_dataframe | |||
| step_0 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.dataset_to_dataframe.Common')) | |||
| step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') | |||
| step_0.add_output('produce') | |||
| pipeline_description.add_step(step_0) | |||
| # Step 1: column_parser | |||
| step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.column_parser.Common')) | |||
| step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
| step_1.add_output('produce') | |||
| pipeline_description.add_step(step_1) | |||
| # Step 2: extract_columns_by_semantic_types(attributes) | |||
| step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common')) | |||
| step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
| step_2.add_output('produce') | |||
| step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, | |||
| data=['https://metadata.datadrivendiscovery.org/types/Attribute']) | |||
| pipeline_description.add_step(step_2) | |||
| # Step 3: extract_columns_by_semantic_types(targets) | |||
| step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common')) | |||
| step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
| step_3.add_output('produce') | |||
| step_3.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, | |||
| data=['https://metadata.datadrivendiscovery.org/types/TrueTarget']) | |||
| pipeline_description.add_step(step_3) | |||
| attributes = 'steps.2.produce' | |||
| targets = 'steps.3.produce' | |||
| # Step 4: auto encoder | |||
| step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_ae')) | |||
| step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes) | |||
| step_4.add_output('produce_score') | |||
| step_4.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=[0,1,2]) | |||
| step_4.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) | |||
| step_4.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') | |||
| pipeline_description.add_step(step_4) | |||
| # Step 5: ensemble | |||
| step_5 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.Ensemble')) | |||
| step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.4.produce_score') | |||
| step_5.add_output('produce') | |||
| pipeline_description.add_step(step_5) | |||
| # Final Output | |||
| pipeline_description.add_output(name='output predictions', data_reference='steps.5.produce') | |||
| # Output to YAML | |||
| #yaml = pipeline_description.to_yaml() | |||
| #with open('pipeline.yml', 'w') as f: | |||
| # f.write(yaml) | |||
| #prin(yaml) | |||
| # Output to json | |||
| data = pipeline_description.to_json() | |||
| with open('example_pipeline.json', 'w') as f: | |||
| f.write(data) | |||
| print(data) | |||
| @@ -1,103 +0,0 @@ | |||
| from d3m import index | |||
| from d3m.metadata.base import ArgumentType | |||
| from d3m.metadata.pipeline import Pipeline, PrimitiveStep | |||
| from d3m.metadata import hyperparams | |||
| # Creating pipeline | |||
| pipeline_description = Pipeline() | |||
| pipeline_description.add_input(name='inputs') | |||
| # Step 0: dataset_to_dataframe | |||
| primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') | |||
| step_0 = PrimitiveStep(primitive=primitive_0) | |||
| step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') | |||
| step_0.add_output('produce') | |||
| pipeline_description.add_step(step_0) | |||
| # # Step 1: column_parser | |||
| primitive_1 = index.get_primitive('d3m.primitives.data_transformation.column_parser.Common') | |||
| step_1 = PrimitiveStep(primitive=primitive_1) | |||
| step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
| step_1.add_output('produce') | |||
| pipeline_description.add_step(step_1) | |||
| # Step 2: extract_columns_by_semantic_types(attributes) | |||
| step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common')) | |||
| step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
| step_2.add_output('produce') | |||
| step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, data=['https://metadata.datadrivendiscovery.org/types/Attribute']) | |||
| pipeline_description.add_step(step_2) | |||
| # Step 3: extract_columns_by_semantic_types(targets) | |||
| step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common')) | |||
| step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
| step_3.add_output('produce') | |||
| step_3.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, | |||
| data=['https://metadata.datadrivendiscovery.org/types/TrueTarget']) | |||
| pipeline_description.add_step(step_3) | |||
| attributes = 'steps.2.produce' | |||
| targets = 'steps.3.produce' | |||
| # Step 4: Power transformation | |||
| primitive_4 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.power_transformer') | |||
| step_4 = PrimitiveStep(primitive=primitive_4) | |||
| step_4.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='new') | |||
| step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes) | |||
| step_4.add_output('produce') | |||
| pipeline_description.add_step(step_4) | |||
| # Step 5: Axiswise scaling | |||
| primitive_5 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.axiswise_scaler') | |||
| step_5 = PrimitiveStep(primitive=primitive_5) | |||
| step_5.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='new') | |||
| step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.4.produce') | |||
| step_5.add_output('produce') | |||
| pipeline_description.add_step(step_5) | |||
| # Step 6: Standarization | |||
| primitive_6 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler') | |||
| step_6 = PrimitiveStep(primitive=primitive_6) | |||
| step_6.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='new') | |||
| step_6.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.5.produce') | |||
| step_6.add_output('produce') | |||
| pipeline_description.add_step(step_6) | |||
| # Step 7: Quantile transformation | |||
| primitive_7 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.quantile_transformer') | |||
| step_7 = PrimitiveStep(primitive=primitive_7) | |||
| step_7.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='new') | |||
| step_7.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.6.produce') | |||
| step_7.add_output('produce') | |||
| pipeline_description.add_step(step_7) | |||
| # Step 4: Isolation Forest | |||
| primitive_8 = index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_iforest') | |||
| step_8 = PrimitiveStep(primitive=primitive_8) | |||
| step_8.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1) | |||
| step_8.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.7.produce') | |||
| # step_8.add_output('produce_score') | |||
| step_8.add_output('produce') | |||
| pipeline_description.add_step(step_8) | |||
| # Step 5: Predictions | |||
| step_9 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.construct_predictions.Common')) | |||
| step_9.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.8.produce') | |||
| step_9.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
| step_9.add_output('produce') | |||
| pipeline_description.add_step(step_9) | |||
| # Final Output | |||
| pipeline_description.add_output(name='output predictions', data_reference='steps.9.produce') | |||
| # Output to json | |||
| data = pipeline_description.to_json() | |||
| with open('example_pipeline.json', 'w') as f: | |||
| f.write(data) | |||
| print(data) | |||
| ## Output to YAML | |||
| #yaml = pipeline_description.to_yaml() | |||
| #with open('pipeline.yml', 'w') as f: | |||
| # f.write(yaml) | |||
| #print(yaml) | |||
| @@ -1,72 +0,0 @@ | |||
| from d3m import index | |||
| from d3m.metadata.base import ArgumentType | |||
| from d3m.metadata.pipeline import Pipeline, PrimitiveStep | |||
| from d3m.metadata import hyperparams | |||
| import copy | |||
| # -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest | |||
| # extract_columns_by_semantic_types(targets) -> ^ | |||
| # Creating pipeline | |||
| pipeline_description = Pipeline() | |||
| pipeline_description.add_input(name='inputs') | |||
| # Step 0: dataset_to_dataframe | |||
| primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') | |||
| step_0 = PrimitiveStep(primitive=primitive_0) | |||
| step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') | |||
| step_0.add_output('produce') | |||
| pipeline_description.add_step(step_0) | |||
| # # Step 1: column_parser | |||
| primitive_1 = index.get_primitive('d3m.primitives.data_transformation.column_parser.Common') | |||
| step_1 = PrimitiveStep(primitive=primitive_1) | |||
| step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
| step_1.add_output('produce') | |||
| pipeline_description.add_step(step_1) | |||
| # Step 2: extract_columns_by_semantic_types(attributes) | |||
| step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common')) | |||
| step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
| step_2.add_output('produce') | |||
| step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, data=['https://metadata.datadrivendiscovery.org/types/Attribute']) | |||
| pipeline_description.add_step(step_2) | |||
| # Step 3: extract_columns_by_semantic_types(targets) | |||
| step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common')) | |||
| step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
| step_3.add_output('produce') | |||
| step_3.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, | |||
| data=['https://metadata.datadrivendiscovery.org/types/TrueTarget']) | |||
| pipeline_description.add_step(step_3) | |||
| attributes = 'steps.2.produce' | |||
| targets = 'steps.3.produce' | |||
| # Step 4: test primitive | |||
| primitive_4 = index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_loda') | |||
| step_4 = PrimitiveStep(primitive=primitive_4) | |||
| step_4.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1) | |||
| step_4.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='new') | |||
| step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes) | |||
| step_4.add_output('produce') | |||
| pipeline_description.add_step(step_4) | |||
| # Step 5: Predictions | |||
| step_5 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.construct_predictions.Common')) | |||
| step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.4.produce') | |||
| step_5.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
| step_5.add_output('produce') | |||
| pipeline_description.add_step(step_5) | |||
| # Final Output | |||
| pipeline_description.add_output(name='output predictions', data_reference='steps.5.produce') | |||
| # Output to json | |||
| data = pipeline_description.to_json() | |||
| with open('example_pipeline.json', 'w') as f: | |||
| f.write(data) | |||
| print(data) | |||
| @@ -1,70 +0,0 @@ | |||
| from d3m import index | |||
| from d3m.metadata.base import ArgumentType | |||
| from d3m.metadata.pipeline import Pipeline, PrimitiveStep | |||
| # -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest | |||
| # extract_columns_by_semantic_types(targets) -> ^ | |||
| # Creating pipeline | |||
| pipeline_description = Pipeline() | |||
| pipeline_description.add_input(name='inputs') | |||
| # Step 0: dataset_to_dataframe | |||
| step_0 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.dataset_to_dataframe.Common')) | |||
| step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') | |||
| step_0.add_output('produce') | |||
| pipeline_description.add_step(step_0) | |||
| # Step 1: column_parser | |||
| step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.column_parser.Common')) | |||
| step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
| step_1.add_output('produce') | |||
| pipeline_description.add_step(step_1) | |||
| # Step 2: extract_columns_by_semantic_types(attributes) | |||
| step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common')) | |||
| step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
| step_2.add_output('produce') | |||
| step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, | |||
| data=['https://metadata.datadrivendiscovery.org/types/Attribute']) | |||
| pipeline_description.add_step(step_2) | |||
| # Step 3: extract_columns_by_semantic_types(targets) | |||
| step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common')) | |||
| step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
| step_3.add_output('produce') | |||
| step_3.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, | |||
| data=['https://metadata.datadrivendiscovery.org/types/TrueTarget']) | |||
| pipeline_description.add_step(step_3) | |||
| attributes = 'steps.2.produce' | |||
| targets = 'steps.3.produce' | |||
| # Step 4: processing | |||
| step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.axiswise_scaler')) | |||
| step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes) | |||
| step_4.add_output('produce') | |||
| pipeline_description.add_step(step_4) | |||
| # Step 5: algorithm` | |||
| step_5 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.matrix_profile')) | |||
| step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.4.produce') | |||
| step_5.add_output('produce') | |||
| pipeline_description.add_step(step_5) | |||
| # Step 6: Predictions | |||
| step_6 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.construct_predictions.Common')) | |||
| step_6.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.5.produce') | |||
| step_6.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
| step_6.add_output('produce') | |||
| pipeline_description.add_step(step_6) | |||
| # Final Output | |||
| pipeline_description.add_output(name='output predictions', data_reference='steps.6.produce') | |||
| # Output to json | |||
| data = pipeline_description.to_json() | |||
| with open('example_pipeline.json', 'w') as f: | |||
| f.write(data) | |||
| print(data) | |||
| @@ -1,70 +0,0 @@ | |||
| from d3m import index | |||
| from d3m.metadata.base import ArgumentType | |||
| from d3m.metadata.pipeline import Pipeline, PrimitiveStep | |||
| # -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest | |||
| # extract_columns_by_semantic_types(targets) -> ^ | |||
| # Creating pipeline | |||
| pipeline_description = Pipeline() | |||
| pipeline_description.add_input(name='inputs') | |||
| # Step 0: dataset_to_dataframe | |||
| step_0 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.dataset_to_dataframe.Common')) | |||
| step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') | |||
| step_0.add_output('produce') | |||
| pipeline_description.add_step(step_0) | |||
| # Step 1: column_parser | |||
| step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.column_parser.Common')) | |||
| step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
| step_1.add_output('produce') | |||
| pipeline_description.add_step(step_1) | |||
| # Step 2: extract_columns_by_semantic_types(attributes) | |||
| step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common')) | |||
| step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
| step_2.add_output('produce') | |||
| step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, | |||
| data=['https://metadata.datadrivendiscovery.org/types/Attribute']) | |||
| pipeline_description.add_step(step_2) | |||
| # Step 3: extract_columns_by_semantic_types(targets) | |||
| step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common')) | |||
| step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
| step_3.add_output('produce') | |||
| step_3.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, | |||
| data=['https://metadata.datadrivendiscovery.org/types/TrueTarget']) | |||
| pipeline_description.add_step(step_3) | |||
| attributes = 'steps.2.produce' | |||
| targets = 'steps.3.produce' | |||
| # Step 4: processing | |||
| step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.axiswise_scaler')) | |||
| step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes) | |||
| step_4.add_output('produce') | |||
| pipeline_description.add_step(step_4) | |||
| # Step 5: algorithm` | |||
| step_5 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_sod')) | |||
| step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.4.produce') | |||
| step_5.add_output('produce') | |||
| pipeline_description.add_step(step_5) | |||
| # Step 6: Predictions | |||
| step_6 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.construct_predictions.Common')) | |||
| step_6.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.5.produce') | |||
| step_6.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
| step_6.add_output('produce') | |||
| pipeline_description.add_step(step_6) | |||
| # Final Output | |||
| pipeline_description.add_output(name='output predictions', data_reference='steps.6.produce') | |||
| # Output to json | |||
| data = pipeline_description.to_json() | |||
| with open('example_pipeline.json', 'w') as f: | |||
| f.write(data) | |||
| print(data) | |||
| @@ -1,74 +0,0 @@ | |||
| from d3m import index | |||
| from d3m.metadata.base import ArgumentType | |||
| from d3m.metadata.pipeline import Pipeline, PrimitiveStep | |||
| # -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest | |||
| # extract_columns_by_semantic_types(targets) -> ^ | |||
| # Creating pipeline | |||
| pipeline_description = Pipeline() | |||
| pipeline_description.add_input(name='inputs') | |||
| # Step 0: dataset_to_dataframe | |||
| step_0 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.dataset_to_dataframe.Common')) | |||
| step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') | |||
| step_0.add_output('produce') | |||
| pipeline_description.add_step(step_0) | |||
| # Step 1: column_parser | |||
| step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.column_parser.Common')) | |||
| step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
| step_1.add_output('produce') | |||
| pipeline_description.add_step(step_1) | |||
| # Step 2: extract_columns_by_semantic_types(attributes) | |||
| step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common')) | |||
| step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') | |||
| step_2.add_output('produce') | |||
| step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, | |||
| data=['https://metadata.datadrivendiscovery.org/types/Attribute']) | |||
| pipeline_description.add_step(step_2) | |||
| # Step 3: extract_columns_by_semantic_types(targets) | |||
| step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common')) | |||
| step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') | |||
| step_3.add_output('produce') | |||
| step_3.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, | |||
| data=['https://metadata.datadrivendiscovery.org/types/TrueTarget']) | |||
| pipeline_description.add_step(step_3) | |||
| attributes = 'steps.2.produce' | |||
| targets = 'steps.3.produce' | |||
| # Step 4: auto encoder | |||
| step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_ae')) | |||
| step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes) | |||
| step_4.add_output('produce_score') | |||
| #step_4.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=[2]) | |||
| #step_4.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) | |||
| step_4.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') | |||
| pipeline_description.add_step(step_4) | |||
| # Step 5: ensemble | |||
| step_5 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.system_wise_detection')) | |||
| step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.4.produce_score') | |||
| step_5.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='new') | |||
| step_5.add_output('produce') | |||
| pipeline_description.add_step(step_5) | |||
| # Final Output | |||
| pipeline_description.add_output(name='output predictions', data_reference='steps.5.produce') | |||
| # Output to YAML | |||
| #yaml = pipeline_description.to_yaml() | |||
| #with open('pipeline.yml', 'w') as f: | |||
| # f.write(yaml) | |||
| #prin(yaml) | |||
| # Output to json | |||
| data = pipeline_description.to_json() | |||
| with open('example_pipeline.json', 'w') as f: | |||
| f.write(data) | |||
| print(data) | |||
| @@ -9,13 +9,13 @@ this_path = os.path.dirname(os.path.abspath(__file__)) | |||
| #table_path = 'datasets/NAB/realTweets/labeled_Twitter_volume_IBM.csv' # The path of the dataset | |||
| parser = argparse.ArgumentParser(description='Arguments for running predefined pipelin.') | |||
| parser.add_argument('--table_path', type=str, default=os.path.join(this_path, '../datasets/yahoo_sub_5.csv'), | |||
| parser.add_argument('--table_path', type=str, default=os.path.join(this_path, '../../datasets/yahoo_sub_5.csv'), | |||
| help='Input the path of the input data table') | |||
| parser.add_argument('--target_index', type=int, default=6, | |||
| help='Index of the ground truth (for evaluation)') | |||
| parser.add_argument('--metric',type=str, default='F1_MACRO', | |||
| help='Evaluation Metric (F1, F1_MACRO)') | |||
| parser.add_argument('--pipeline_path', default=os.path.join(this_path, './example_pipeline.json'), | |||
| parser.add_argument('--pipeline_path', default=os.path.join(this_path, './example_pipelines/abod_pipeline.json'), | |||
| help='Input the path of the pre-built pipeline description') | |||
| # parser.add_argument('--pipeline_path', default=os.path.join(this_path, '../tods/resources/default_pipeline.json'), | |||
| # help='Input the path of the pre-built pipeline description') | |||
| @@ -33,6 +33,9 @@ dataset = generate_dataset(df, target_index) | |||
| # Load the default pipeline | |||
| pipeline = load_pipeline(pipeline_path) | |||
| print(dir(pipeline)) | |||
| print(pipeline.steps) | |||
| print(dir(dataset)) | |||
| # Run the pipeline | |||
| pipeline_result = evaluate_pipeline(dataset, pipeline, metric) | |||
| @@ -0,0 +1,148 @@ | |||
| import os | |||
| import pickle | |||
| import unittest | |||
| from d3m import container | |||
| from d3m.metadata import base as metadata_base | |||
| from tods.common import FixedSplit | |||
| class FixedSplitDatasetSplitPrimitiveTestCase(unittest.TestCase): | |||
| def test_produce_train_values(self): | |||
| dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'tests', 'data', 'datasets', 'iris_dataset_1', 'datasetDoc.json')) | |||
| dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) | |||
| # We set semantic types like runtime would. | |||
| dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Target') | |||
| dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') | |||
| dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Attribute') | |||
| hyperparams_class = FixedSplit.FixedSplitDatasetSplitPrimitive.metadata.get_hyperparams() | |||
| hyperparams = hyperparams_class.defaults().replace({ | |||
| 'primary_index_values': ['9', '11', '13'], | |||
| }) | |||
| # We want to make sure "primary_index_values" is encoded just as a list and not | |||
| # a pickle because runtime populates this primitive as a list from a split file. | |||
| self.assertEqual(hyperparams.values_to_json_structure(), {'primary_index_values': ['9', '11', '13'], 'row_indices': [], 'delete_recursive': False}) | |||
| primitive = FixedSplit.FixedSplitDatasetSplitPrimitive(hyperparams=hyperparams) | |||
| primitive.set_training_data(dataset=dataset) | |||
| primitive.fit() | |||
| # To test that pickling works. | |||
| pickle.dumps(primitive) | |||
| results = primitive.produce(inputs=container.List([0], generate_metadata=True)).value | |||
| self.assertEqual(len(results), 1) | |||
| for dataset in results: | |||
| self.assertEqual(len(dataset), 1) | |||
| self.assertEqual(results[0]['learningData'].shape[0], 147) | |||
| self.assertEqual(list(results[0]['learningData'].iloc[:, 0]), [str(i) for i in range(150) if i not in [9, 11, 13]]) | |||
| def test_produce_score_values(self): | |||
| dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'tests', 'data', 'datasets', 'iris_dataset_1', 'datasetDoc.json')) | |||
| dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) | |||
| # We set semantic types like runtime would. | |||
| dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Target') | |||
| dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') | |||
| dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Attribute') | |||
| hyperparams_class = fixed_split.FixedSplitDatasetSplitPrimitive.metadata.get_hyperparams() | |||
| hyperparams = hyperparams_class.defaults().replace({ | |||
| 'primary_index_values': ['9', '11', '13'], | |||
| }) | |||
| # We want to make sure "primary_index_values" is encoded just as a list and not | |||
| # a pickle because runtime populates this primitive as a list from a split file. | |||
| self.assertEqual(hyperparams.values_to_json_structure(), {'primary_index_values': ['9', '11', '13'], 'row_indices': [], 'delete_recursive': False}) | |||
| primitive = fixed_split.FixedSplitDatasetSplitPrimitive(hyperparams=hyperparams) | |||
| primitive.set_training_data(dataset=dataset) | |||
| primitive.fit() | |||
| results = primitive.produce_score_data(inputs=container.List([0], generate_metadata=True)).value | |||
| self.assertEqual(len(results), 1) | |||
| for dataset in results: | |||
| self.assertEqual(len(dataset), 1) | |||
| self.assertEqual(results[0]['learningData'].shape[0], 3) | |||
| self.assertEqual(list(results[0]['learningData'].iloc[:, 0]), [str(i) for i in range(150) if i in [9, 11, 13]]) | |||
| def test_produce_train_indices(self): | |||
| dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'tests', 'data', 'datasets', 'iris_dataset_1', 'datasetDoc.json')) | |||
| dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) | |||
| # We set semantic types like runtime would. | |||
| dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Target') | |||
| dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') | |||
| dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Attribute') | |||
| hyperparams_class = fixed_split.FixedSplitDatasetSplitPrimitive.metadata.get_hyperparams() | |||
| primitive = fixed_split.FixedSplitDatasetSplitPrimitive(hyperparams=hyperparams_class.defaults().replace({ | |||
| 'row_indices': [9, 11, 13], | |||
| })) | |||
| primitive.set_training_data(dataset=dataset) | |||
| primitive.fit() | |||
| # To test that pickling works. | |||
| pickle.dumps(primitive) | |||
| results = primitive.produce(inputs=container.List([0], generate_metadata=True)).value | |||
| self.assertEqual(len(results), 1) | |||
| for dataset in results: | |||
| self.assertEqual(len(dataset), 1) | |||
| self.assertEqual(results[0]['learningData'].shape[0], 147) | |||
| self.assertEqual(list(results[0]['learningData'].iloc[:, 0]), [str(i) for i in range(150) if i not in [9, 11, 13]]) | |||
| def test_produce_score_indices(self): | |||
| dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'tests', 'data', 'datasets', 'iris_dataset_1', 'datasetDoc.json')) | |||
| dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) | |||
| # We set semantic types like runtime would. | |||
| dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Target') | |||
| dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') | |||
| dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Attribute') | |||
| hyperparams_class = fixed_split.FixedSplitDatasetSplitPrimitive.metadata.get_hyperparams() | |||
| primitive = fixed_split.FixedSplitDatasetSplitPrimitive(hyperparams=hyperparams_class.defaults().replace({ | |||
| 'row_indices': [9, 11, 13], | |||
| })) | |||
| primitive.set_training_data(dataset=dataset) | |||
| primitive.fit() | |||
| results = primitive.produce_score_data(inputs=container.List([0], generate_metadata=True)).value | |||
| self.assertEqual(len(results), 1) | |||
| for dataset in results: | |||
| self.assertEqual(len(dataset), 1) | |||
| self.assertEqual(results[0]['learningData'].shape[0], 3) | |||
| self.assertEqual(list(results[0]['learningData'].iloc[:, 0]), [str(i) for i in range(150) if i in [9, 11, 13]]) | |||
| if __name__ == '__main__': | |||
| unittest.main() | |||
| @@ -0,0 +1,100 @@ | |||
| import os | |||
| import pickle | |||
| import unittest | |||
| from d3m import container | |||
| from d3m.metadata import base as metadata_base | |||
| from common_primitives import kfold_split | |||
| class KFoldDatasetSplitPrimitiveTestCase(unittest.TestCase): | |||
| def test_produce_train(self): | |||
| dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'tests', 'data', 'datasets', 'database_dataset_1', 'datasetDoc.json')) | |||
| dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) | |||
| # We set semantic types like runtime would. | |||
| dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/Target') | |||
| dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') | |||
| dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/Attribute') | |||
| hyperparams_class = kfold_split.KFoldDatasetSplitPrimitive.metadata.get_hyperparams() | |||
| primitive = kfold_split.KFoldDatasetSplitPrimitive(hyperparams=hyperparams_class.defaults().replace({ | |||
| 'number_of_folds': 10, | |||
| 'shuffle': True, | |||
| 'delete_recursive': True, | |||
| })) | |||
| primitive.set_training_data(dataset=dataset) | |||
| primitive.fit() | |||
| # To test that pickling works. | |||
| pickle.dumps(primitive) | |||
| results = primitive.produce(inputs=container.List([0, 1], generate_metadata=True)).value | |||
| self.assertEqual(len(results), 2) | |||
| for dataset in results: | |||
| self.assertEqual(len(dataset), 4) | |||
| self.assertEqual(results[0]['codes'].shape[0], 3) | |||
| self.assertEqual(results[1]['codes'].shape[0], 3) | |||
| self.assertEqual(set(results[0]['codes'].iloc[:, 0]), {'AAA', 'BBB', 'CCC'}) | |||
| self.assertEqual(len(results[0]['learningData'].iloc[:, 0]), 40) | |||
| self.assertEqual(set(results[0]['learningData'].iloc[:, 1]), {'AAA', 'BBB', 'CCC'}) | |||
| self.assertEqual(set(results[0]['learningData'].iloc[:, 2]), {'aaa', 'bbb', 'ccc', 'ddd', 'eee'}) | |||
| self.assertEqual(set(results[0]['learningData'].iloc[:, 3]), {'1990', '2000', '2010'}) | |||
| self.assertEqual(set(results[1]['codes'].iloc[:, 0]), {'AAA', 'BBB', 'CCC'}) | |||
| self.assertEqual(len(results[1]['learningData'].iloc[:, 0]), 40) | |||
| self.assertEqual(set(results[1]['learningData'].iloc[:, 1]), {'AAA', 'BBB', 'CCC'}) | |||
| self.assertEqual(set(results[1]['learningData'].iloc[:, 2]), {'aaa', 'bbb', 'ccc', 'ddd', 'eee'}) | |||
| self.assertEqual(set(results[1]['learningData'].iloc[:, 3]), {'1990', '2000', '2010'}) | |||
| def test_produce_score(self): | |||
| dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'tests', 'data', 'datasets', 'database_dataset_1', 'datasetDoc.json')) | |||
| dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) | |||
| # We set semantic types like runtime would. | |||
| dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/Target') | |||
| dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') | |||
| dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/Attribute') | |||
| hyperparams_class = kfold_split.KFoldDatasetSplitPrimitive.metadata.get_hyperparams() | |||
| primitive = kfold_split.KFoldDatasetSplitPrimitive(hyperparams=hyperparams_class.defaults().replace({ | |||
| 'number_of_folds': 10, | |||
| 'shuffle': True, | |||
| 'delete_recursive': True, | |||
| })) | |||
| primitive.set_training_data(dataset=dataset) | |||
| primitive.fit() | |||
| results = primitive.produce_score_data(inputs=container.List([0, 1], generate_metadata=True)).value | |||
| self.assertEqual(len(results), 2) | |||
| for dataset in results: | |||
| self.assertEqual(len(dataset), 4) | |||
| self.assertEqual(set(results[0]['codes'].iloc[:, 0]), {'AAA', 'BBB'}) | |||
| self.assertEqual(set(results[0]['learningData'].iloc[:, 0]), {'5', '11', '28', '31', '38'}) | |||
| self.assertEqual(set(results[0]['learningData'].iloc[:, 1]), {'AAA', 'BBB'}) | |||
| self.assertEqual(set(results[0]['learningData'].iloc[:, 2]), {'aaa', 'bbb', 'ddd', 'eee'}) | |||
| self.assertEqual(set(results[0]['learningData'].iloc[:, 3]), {'1990', '2000'}) | |||
| self.assertEqual(set(results[1]['codes'].iloc[:, 0]), {'BBB', 'CCC'}) | |||
| self.assertEqual(set(results[1]['learningData'].iloc[:, 0]), {'12', '26', '29', '32', '39'}) | |||
| self.assertEqual(set(results[1]['learningData'].iloc[:, 1]), {'BBB', 'CCC'}) | |||
| self.assertEqual(set(results[1]['learningData'].iloc[:, 2]), {'bbb', 'ccc', 'ddd', 'eee'}) | |||
| self.assertEqual(set(results[1]['learningData'].iloc[:, 3]), {'1990', '2000', '2010'}) | |||
| if __name__ == '__main__': | |||
| unittest.main() | |||
| @@ -0,0 +1,223 @@ | |||
| import os | |||
| import pickle | |||
| import unittest | |||
| from d3m import container | |||
| from d3m.metadata import base as metadata_base | |||
| from common_primitives import kfold_split_timeseries | |||
| class KFoldTimeSeriesSplitPrimitiveTestCase(unittest.TestCase): | |||
| def test_produce_train_timeseries_1(self): | |||
| dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'tests', 'data', 'datasets', 'timeseries_dataset_1', 'datasetDoc.json')) | |||
| dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) | |||
| # We set semantic types like runtime would. | |||
| dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/Target') | |||
| dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') | |||
| dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/Attribute') | |||
| hyperparams_class = kfold_split_timeseries.KFoldTimeSeriesSplitPrimitive.metadata.get_hyperparams() | |||
| folds = 5 | |||
| primitive = kfold_split_timeseries.KFoldTimeSeriesSplitPrimitive(hyperparams=hyperparams_class.defaults().replace({ | |||
| 'number_of_folds': folds, | |||
| 'number_of_window_folds': 1, | |||
| })) | |||
| primitive.set_training_data(dataset=dataset) | |||
| primitive.fit() | |||
| # To test that pickling works. | |||
| pickle.dumps(primitive) | |||
| results = primitive.produce(inputs=container.List([0, 1], generate_metadata=True)).value | |||
| self.assertEqual(len(results), 2) | |||
| for dataset in results: | |||
| self.assertEqual(len(dataset), 1) | |||
| self.assertEqual(len(results[0]['learningData'].iloc[:, 0]), 8) | |||
| self.assertEqual(set(results[0]['learningData'].iloc[:, 3]), {'2013-11-05', '2013-11-06', '2013-11-07', '2013-11-08', '2013-11-11', | |||
| '2013-11-12', '2013-11-13', '2013-11-14'}) | |||
| self.assertEqual(len(results[1]['learningData'].iloc[:, 0]), 8) | |||
| self.assertEqual(set(results[1]['learningData'].iloc[:, 3]), {'2013-11-13', '2013-11-14', '2013-11-15', '2013-11-18', '2013-11-19', | |||
| '2013-11-20', '2013-11-21', '2013-11-22'}) | |||
| def test_produce_score_timeseries_1(self): | |||
| dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'tests', 'data', 'datasets', 'timeseries_dataset_1', 'datasetDoc.json')) | |||
| dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) | |||
| # We set semantic types like runtime would. | |||
| dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/Target') | |||
| dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') | |||
| dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/Attribute') | |||
| hyperparams_class = kfold_split_timeseries.KFoldTimeSeriesSplitPrimitive.metadata.get_hyperparams() | |||
| folds = 5 | |||
| primitive = kfold_split_timeseries.KFoldTimeSeriesSplitPrimitive(hyperparams=hyperparams_class.defaults().replace({ | |||
| 'number_of_folds': folds, | |||
| 'number_of_window_folds': 1, | |||
| })) | |||
| primitive.set_training_data(dataset=dataset) | |||
| primitive.fit() | |||
| results = primitive.produce_score_data(inputs=container.List([0, 1], generate_metadata=True)).value | |||
| self.assertEqual(len(results), 2) | |||
| for dataset in results: | |||
| self.assertEqual(len(dataset), 1) | |||
| self.assertEqual(len(results[0]['learningData'].iloc[:, 0]), 6) | |||
| self.assertEqual(set(results[0]['learningData'].iloc[:, 3]), {'2013-11-15', '2013-11-18', '2013-11-19', | |||
| '2013-11-20', '2013-11-21', '2013-11-22'}) | |||
| self.assertEqual(len(results[1]['learningData'].iloc[:, 0]), 6) | |||
| self.assertEqual(set(results[1]['learningData'].iloc[:, 3]), {'2013-11-25', '2013-11-26', '2013-11-27', | |||
| '2013-11-29', '2013-12-02', '2013-12-03'}) | |||
| def test_produce_train(self): | |||
| dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'tests', 'data', 'datasets', 'database_dataset_1', 'datasetDoc.json')) | |||
| dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) | |||
| # We set semantic types like runtime would. | |||
| dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/Target') | |||
| dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') | |||
| dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/Attribute') | |||
| # We fake that the dataset is time-series. | |||
| dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 3), 'https://metadata.datadrivendiscovery.org/types/Time') | |||
| hyperparams_class = kfold_split_timeseries.KFoldTimeSeriesSplitPrimitive.metadata.get_hyperparams() | |||
| folds = 5 | |||
| primitive = kfold_split_timeseries.KFoldTimeSeriesSplitPrimitive(hyperparams=hyperparams_class.defaults().replace({ | |||
| 'number_of_folds': folds, | |||
| 'number_of_window_folds': 1, | |||
| })) | |||
| primitive.set_training_data(dataset=dataset) | |||
| primitive.fit() | |||
| # To test that pickling works. | |||
| pickle.dumps(primitive) | |||
| results = primitive.produce(inputs=container.List([0, 1], generate_metadata=True)).value | |||
| self.assertEqual(len(results), 2) | |||
| for dataset in results: | |||
| self.assertEqual(len(dataset), 4) | |||
| self.assertEqual(results[0]['codes'].shape[0], 3) | |||
| self.assertEqual(results[1]['codes'].shape[0], 3) | |||
| self.assertEqual(set(results[0]['codes'].iloc[:, 0]), {'AAA', 'BBB', 'CCC'}) | |||
| self.assertEqual(len(results[0]['learningData'].iloc[:, 0]), 9) | |||
| self.assertEqual(set(results[0]['learningData'].iloc[:, 1]), {'AAA', 'BBB', 'CCC'}) | |||
| self.assertEqual(set(results[0]['learningData'].iloc[:, 2]), {'bbb', 'ccc', 'ddd'}) | |||
| self.assertEqual(set(results[0]['learningData'].iloc[:, 3]), {'1990'}) | |||
| self.assertEqual(set(results[1]['codes'].iloc[:, 0]), {'AAA', 'BBB', 'CCC'}) | |||
| self.assertEqual(len(results[1]['learningData'].iloc[:, 0]), 9) | |||
| self.assertEqual(set(results[1]['learningData'].iloc[:, 1]), {'AAA', 'BBB', 'CCC'}) | |||
| self.assertEqual(set(results[1]['learningData'].iloc[:, 2]), {'aaa', 'bbb', 'ddd', 'eee'}) | |||
| self.assertEqual(set(results[1]['learningData'].iloc[:, 3]), {'1990', '2000'}) | |||
| def test_produce_score(self): | |||
| dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'tests', 'data', 'datasets', 'database_dataset_1', 'datasetDoc.json')) | |||
| dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) | |||
| # We set semantic types like runtime would. | |||
| dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/Target') | |||
| dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') | |||
| dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/Attribute') | |||
| # We fake that the dataset is time-series. | |||
| dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 3), 'https://metadata.datadrivendiscovery.org/types/Time') | |||
| hyperparams_class = kfold_split_timeseries.KFoldTimeSeriesSplitPrimitive.metadata.get_hyperparams() | |||
| folds = 5 | |||
| primitive = kfold_split_timeseries.KFoldTimeSeriesSplitPrimitive(hyperparams=hyperparams_class.defaults().replace({ | |||
| 'number_of_folds': folds, | |||
| 'number_of_window_folds': 1, | |||
| })) | |||
| primitive.set_training_data(dataset=dataset) | |||
| primitive.fit() | |||
| results = primitive.produce_score_data(inputs=container.List([0, 1], generate_metadata=True)).value | |||
| self.assertEqual(len(results), 2) | |||
| for dataset in results: | |||
| self.assertEqual(len(dataset), 4) | |||
| self.assertEqual(results[0]['codes'].shape[0], 3) | |||
| self.assertEqual(results[1]['codes'].shape[0], 3) | |||
| self.assertEqual(set(results[0]['codes'].iloc[:, 0]), {'AAA', 'BBB', 'CCC'}) | |||
| self.assertEqual(set(results[0]['learningData'].iloc[:, 0]), {'2', '3', '32', '33', '37', '38', '39'}) | |||
| self.assertEqual(set(results[0]['learningData'].iloc[:, 1]), {'AAA', 'BBB', 'CCC'}) | |||
| self.assertEqual(set(results[0]['learningData'].iloc[:, 2]), {'aaa', 'ddd', 'eee'}) | |||
| self.assertEqual(set(results[0]['learningData'].iloc[:, 3]), {'1990', '2000'}) | |||
| self.assertEqual(set(results[1]['codes'].iloc[:, 0]), {'AAA', 'BBB', 'CCC'}) | |||
| self.assertEqual(set(results[1]['learningData'].iloc[:, 0]), {'22', '23', '24', '31', '40', '41', '42'}) | |||
| self.assertEqual(set(results[1]['learningData'].iloc[:, 1]), {'AAA', 'BBB', 'CCC'}) | |||
| self.assertEqual(set(results[1]['learningData'].iloc[:, 2]), {'ccc', 'ddd', 'eee'}) | |||
| self.assertEqual(set(results[1]['learningData'].iloc[:, 3]), {'2000'}) | |||
| def test_unsorted_datetimes_timeseries_4(self): | |||
| dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'tests', 'data', 'datasets', 'timeseries_dataset_4', 'datasetDoc.json')) | |||
| dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) | |||
| # We set semantic types like runtime would. | |||
| dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/Target') | |||
| dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') | |||
| dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/Attribute') | |||
| hyperparams_class = kfold_split_timeseries.KFoldTimeSeriesSplitPrimitive.metadata.get_hyperparams() | |||
| folds = 5 | |||
| primitive = kfold_split_timeseries.KFoldTimeSeriesSplitPrimitive(hyperparams=hyperparams_class.defaults().replace({ | |||
| 'number_of_folds': folds, | |||
| 'number_of_window_folds': 1, | |||
| })) | |||
| primitive.set_training_data(dataset=dataset) | |||
| primitive.fit() | |||
| # To test that pickling works. | |||
| pickle.dumps(primitive) | |||
| results = primitive.produce(inputs=container.List([0, 1], generate_metadata=True)).value | |||
| self.assertEqual(len(results), 2) | |||
| for dataset in results: | |||
| self.assertEqual(len(dataset), 1) | |||
| self.assertEqual(len(results[0]['learningData'].iloc[:, 0]), 8) | |||
| self.assertEqual(set(results[0]['learningData'].iloc[:, 3]), {'2013-11-05', '2013-11-06', '2013-11-07', '2013-11-08', '2013-11-11', | |||
| '2013-11-12', '2013-11-13', '2013-11-14'}) | |||
| self.assertEqual(len(results[1]['learningData'].iloc[:, 0]), 8) | |||
| self.assertEqual(set(results[1]['learningData'].iloc[:, 3]), {'2013-11-13', '2013-11-14', '2013-11-15', '2013-11-18', '2013-11-19', | |||
| '2013-11-20', '2013-11-21', '2013-11-22'}) | |||
| if __name__ == '__main__': | |||
| unittest.main() | |||
| @@ -0,0 +1,71 @@ | |||
| import os | |||
| import pickle | |||
| import unittest | |||
| from d3m import container | |||
| from d3m.metadata import base as metadata_base | |||
| from common_primitives import no_split | |||
| class NoSplitDatasetSplitPrimitiveTestCase(unittest.TestCase): | |||
| def test_produce_train(self): | |||
| dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'tests', 'data', 'datasets', 'iris_dataset_1', 'datasetDoc.json')) | |||
| dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) | |||
| # We set semantic types like runtime would. | |||
| dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Target') | |||
| dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') | |||
| dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Attribute') | |||
| hyperparams_class = no_split.NoSplitDatasetSplitPrimitive.metadata.get_hyperparams() | |||
| primitive = no_split.NoSplitDatasetSplitPrimitive(hyperparams=hyperparams_class.defaults()) | |||
| primitive.set_training_data(dataset=dataset) | |||
| primitive.fit() | |||
| # To test that pickling works. | |||
| pickle.dumps(primitive) | |||
| results = primitive.produce(inputs=container.List([0], generate_metadata=True)).value | |||
| self.assertEqual(len(results), 1) | |||
| for dataset in results: | |||
| self.assertEqual(len(dataset), 1) | |||
| self.assertEqual(results[0]['learningData'].shape[0], 150) | |||
| self.assertEqual(list(results[0]['learningData'].iloc[:, 0]), [str(i) for i in range(150)]) | |||
| def test_produce_score(self): | |||
| dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'tests', 'data', 'datasets', 'iris_dataset_1', 'datasetDoc.json')) | |||
| dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) | |||
| # We set semantic types like runtime would. | |||
| dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Target') | |||
| dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') | |||
| dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Attribute') | |||
| hyperparams_class = no_split.NoSplitDatasetSplitPrimitive.metadata.get_hyperparams() | |||
| primitive = no_split.NoSplitDatasetSplitPrimitive(hyperparams=hyperparams_class.defaults()) | |||
| primitive.set_training_data(dataset=dataset) | |||
| primitive.fit() | |||
| results = primitive.produce_score_data(inputs=container.List([0], generate_metadata=True)).value | |||
| self.assertEqual(len(results), 1) | |||
| for dataset in results: | |||
| self.assertEqual(len(dataset), 1) | |||
| self.assertEqual(results[0]['learningData'].shape[0], 150) | |||
| self.assertEqual(list(results[0]['learningData'].iloc[:, 0]), [str(i) for i in range(150)]) | |||
| if __name__ == '__main__': | |||
| unittest.main() | |||
| @@ -0,0 +1,173 @@ | |||
| import os | |||
| import unittest | |||
| from d3m import container, utils | |||
| from d3m.metadata import base as metadata_base | |||
| from common_primitives import redact_columns | |||
| class RedactColumnsPrimitiveTestCase(unittest.TestCase): | |||
| def _get_datasets(self): | |||
| dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), 'data', 'datasets', 'iris_dataset_1', 'datasetDoc.json')) | |||
| dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) | |||
| # We set semantic types like runtime would. | |||
| dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Target') | |||
| dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') | |||
| dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Attribute') | |||
| datasets = container.List([dataset], { | |||
| 'schema': metadata_base.CONTAINER_SCHEMA_VERSION, | |||
| 'structural_type': container.List, | |||
| 'dimension': { | |||
| 'length': 1, | |||
| }, | |||
| }, generate_metadata=False) | |||
| # We update metadata based on metadata of each dataset. | |||
| # TODO: In the future this might be done automatically by generate_metadata. | |||
| # See: https://gitlab.com/datadrivendiscovery/d3m/issues/119 | |||
| for index, dataset in enumerate(datasets): | |||
| datasets.metadata = dataset.metadata.copy_to(datasets.metadata, (), (index,)) | |||
| return dataset_doc_path, datasets | |||
| def test_basic(self): | |||
| dataset_doc_path, datasets = self._get_datasets() | |||
| hyperparams_class = redact_columns.RedactColumnsPrimitive.metadata.get_hyperparams() | |||
| primitive = redact_columns.RedactColumnsPrimitive(hyperparams=hyperparams_class.defaults().replace({ | |||
| 'semantic_types': ('https://metadata.datadrivendiscovery.org/types/TrueTarget',), | |||
| 'add_semantic_types': ('https://metadata.datadrivendiscovery.org/types/RedactedTarget', 'https://metadata.datadrivendiscovery.org/types/MissingData'), | |||
| })) | |||
| redacted_datasets = primitive.produce(inputs=datasets).value | |||
| self.assertTrue(len(redacted_datasets), 1) | |||
| redacted_dataset = redacted_datasets[0] | |||
| self.assertIsInstance(redacted_dataset, container.Dataset) | |||
| self.assertEqual(redacted_dataset['learningData']['species'].values.tolist(), [''] * 150) | |||
| self._test_metadata(redacted_datasets.metadata, dataset_doc_path, True) | |||
| self._test_metadata(redacted_dataset.metadata, dataset_doc_path, False) | |||
| def _test_metadata(self, metadata, dataset_doc_path, is_list): | |||
| top_metadata = { | |||
| 'structural_type': 'd3m.container.dataset.Dataset', | |||
| 'id': 'iris_dataset_1', | |||
| 'version': '4.0.0', | |||
| 'name': 'Iris Dataset', | |||
| 'location_uris': [ | |||
| 'file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path), | |||
| ], | |||
| 'dimension': { | |||
| 'name': 'resources', | |||
| 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/DatasetResource'], | |||
| 'length': 1, | |||
| }, | |||
| 'digest': '49404bf166238fbdac2b6d6baa899a0d1bf8ed5976525fa7353fd732ac218a85', | |||
| 'source': { | |||
| 'license': 'CC', | |||
| 'redacted': False, | |||
| 'human_subjects_research': False, | |||
| }, | |||
| } | |||
| if is_list: | |||
| prefix = [0] | |||
| list_metadata = [{ | |||
| 'selector': [], | |||
| 'metadata': { | |||
| 'dimension': { | |||
| 'length': 1, | |||
| }, | |||
| 'schema': metadata_base.CONTAINER_SCHEMA_VERSION, | |||
| 'structural_type': 'd3m.container.list.List', | |||
| }, | |||
| }] | |||
| else: | |||
| prefix = [] | |||
| list_metadata = [] | |||
| top_metadata['schema'] = metadata_base.CONTAINER_SCHEMA_VERSION | |||
| self.assertEqual(utils.to_json_structure(metadata.to_internal_simple_structure()), list_metadata + [{ | |||
| 'selector': prefix + [], | |||
| 'metadata': top_metadata, | |||
| }, { | |||
| 'selector': prefix + ['learningData'], | |||
| 'metadata': { | |||
| 'structural_type': 'd3m.container.pandas.DataFrame', | |||
| 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table', 'https://metadata.datadrivendiscovery.org/types/DatasetEntryPoint'], | |||
| 'dimension': { | |||
| 'name': 'rows', | |||
| 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'], | |||
| 'length': 150, | |||
| }, | |||
| }, | |||
| }, { | |||
| 'selector': prefix + ['learningData', '__ALL_ELEMENTS__'], | |||
| 'metadata': { | |||
| 'dimension': { | |||
| 'name': 'columns', | |||
| 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], | |||
| 'length': 6, | |||
| }, | |||
| }, | |||
| }, { | |||
| 'selector': prefix + ['learningData', '__ALL_ELEMENTS__', 0], | |||
| 'metadata': { | |||
| 'name': 'd3mIndex', | |||
| 'structural_type': 'str', | |||
| 'semantic_types': ['http://schema.org/Integer', 'https://metadata.datadrivendiscovery.org/types/PrimaryKey'], | |||
| }, | |||
| }, { | |||
| 'selector': prefix + ['learningData', '__ALL_ELEMENTS__', 1], | |||
| 'metadata': { | |||
| 'name': 'sepalLength', | |||
| 'structural_type': 'str', | |||
| 'semantic_types': ['http://schema.org/Float', 'https://metadata.datadrivendiscovery.org/types/Attribute'], | |||
| }, | |||
| }, { | |||
| 'selector': prefix + ['learningData', '__ALL_ELEMENTS__', 2], | |||
| 'metadata': { | |||
| 'name': 'sepalWidth', | |||
| 'structural_type': 'str', | |||
| 'semantic_types': ['http://schema.org/Float', 'https://metadata.datadrivendiscovery.org/types/Attribute'], | |||
| }, | |||
| }, { | |||
| 'selector': prefix + ['learningData', '__ALL_ELEMENTS__', 3], | |||
| 'metadata': { | |||
| 'name': 'petalLength', | |||
| 'structural_type': 'str', | |||
| 'semantic_types': ['http://schema.org/Float', 'https://metadata.datadrivendiscovery.org/types/Attribute'], | |||
| }, | |||
| }, { | |||
| 'selector': prefix + ['learningData', '__ALL_ELEMENTS__', 4], | |||
| 'metadata': { | |||
| 'name': 'petalWidth', | |||
| 'structural_type': 'str', | |||
| 'semantic_types': ['http://schema.org/Float', 'https://metadata.datadrivendiscovery.org/types/Attribute'], | |||
| }, | |||
| }, { | |||
| 'selector': prefix + ['learningData', '__ALL_ELEMENTS__', 5], | |||
| 'metadata': { | |||
| 'name': 'species', | |||
| 'structural_type': 'str', | |||
| 'semantic_types': [ | |||
| 'https://metadata.datadrivendiscovery.org/types/CategoricalData', | |||
| 'https://metadata.datadrivendiscovery.org/types/SuggestedTarget', | |||
| 'https://metadata.datadrivendiscovery.org/types/Target', | |||
| 'https://metadata.datadrivendiscovery.org/types/TrueTarget', | |||
| 'https://metadata.datadrivendiscovery.org/types/RedactedTarget', | |||
| 'https://metadata.datadrivendiscovery.org/types/MissingData', | |||
| ], | |||
| }, | |||
| }]) | |||
| if __name__ == '__main__': | |||
| unittest.main() | |||
| @@ -0,0 +1,130 @@ | |||
| import os | |||
| import pickle | |||
| import unittest | |||
| from d3m import container | |||
| from d3m.metadata import base as metadata_base | |||
| from common_primitives import train_score_split | |||
| class TrainScoreDatasetSplitPrimitiveTestCase(unittest.TestCase): | |||
| def test_produce_train(self): | |||
| dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'tests', 'data', 'datasets', 'iris_dataset_1', 'datasetDoc.json')) | |||
| dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) | |||
| # We set semantic types like runtime would. | |||
| dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Target') | |||
| dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') | |||
| dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Attribute') | |||
| hyperparams_class = train_score_split.TrainScoreDatasetSplitPrimitive.metadata.get_hyperparams() | |||
| primitive = train_score_split.TrainScoreDatasetSplitPrimitive(hyperparams=hyperparams_class.defaults().replace({ | |||
| 'shuffle': True, | |||
| })) | |||
| primitive.set_training_data(dataset=dataset) | |||
| primitive.fit() | |||
| # To test that pickling works. | |||
| pickle.dumps(primitive) | |||
| results = primitive.produce(inputs=container.List([0], generate_metadata=True)).value | |||
| self.assertEqual(len(results), 1) | |||
| for dataset in results: | |||
| self.assertEqual(len(dataset), 1) | |||
| self.assertEqual(results[0]['learningData'].shape[0], 112) | |||
| self.assertEqual(list(results[0]['learningData'].iloc[:, 0]), [ | |||
| '0', '1', '2', '3', '4', '5', '6', '9', '10', '11', '12', '13', '14', '15', '17', '19', '20', | |||
| '21', '23', '25', '28', '29', '30', '31', '32', '34', '35', '36', '38', '39', '41', '42', '43', | |||
| '46', '47', '48', '49', '50', '52', '53', '55', '56', '57', '58', '60', '61', '64', '65', '67', | |||
| '68', '69', '70', '72', '74', '75', '77', '79', '80', '81', '82', '85', '87', '88', '89', '91', | |||
| '92', '94', '95', '96', '98', '99', '101', '102', '103', '104', '105', '106', '108', '109', '110', | |||
| '111', '112', '113', '115', '116', '117', '118', '119', '120', '122', '123', '124', '125', '128', | |||
| '129', '130', '131', '133', '135', '136', '138', '139', '140', '141', '142', '143', '144', '145', | |||
| '146', '147', '148', '149', | |||
| ]) | |||
| self.assertEqual(results.metadata.query((0, 'learningData'))['dimension']['length'], 112) | |||
| column_names = ['d3mIndex', 'sepalLength', 'sepalWidth', 'petalLength', 'petalWidth', 'species'] | |||
| for i in range(6): | |||
| self.assertEqual(results.metadata.query((0, 'learningData', metadata_base.ALL_ELEMENTS, i))['name'], column_names[i]) | |||
| self.assertEqual(results.metadata.query((0, 'learningData', metadata_base.ALL_ELEMENTS, 0))['semantic_types'], ( | |||
| "http://schema.org/Integer", "https://metadata.datadrivendiscovery.org/types/PrimaryKey" | |||
| )) | |||
| for i in range(1, 5): | |||
| self.assertEqual(results.metadata.query((0, 'learningData', metadata_base.ALL_ELEMENTS, i))['semantic_types'], ( | |||
| 'http://schema.org/Float', 'https://metadata.datadrivendiscovery.org/types/Attribute' | |||
| )) | |||
| self.assertEqual(results.metadata.query((0, 'learningData', metadata_base.ALL_ELEMENTS, 5))['semantic_types'],( | |||
| 'https://metadata.datadrivendiscovery.org/types/CategoricalData', | |||
| 'https://metadata.datadrivendiscovery.org/types/SuggestedTarget', | |||
| 'https://metadata.datadrivendiscovery.org/types/Target', | |||
| 'https://metadata.datadrivendiscovery.org/types/TrueTarget' | |||
| )) | |||
| def test_produce_score(self): | |||
| dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'tests', 'data', 'datasets', 'iris_dataset_1', 'datasetDoc.json')) | |||
| dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) | |||
| # We set semantic types like runtime would. | |||
| dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Target') | |||
| dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') | |||
| dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Attribute') | |||
| hyperparams_class = train_score_split.TrainScoreDatasetSplitPrimitive.metadata.get_hyperparams() | |||
| primitive = train_score_split.TrainScoreDatasetSplitPrimitive(hyperparams=hyperparams_class.defaults().replace({ | |||
| 'shuffle': True, | |||
| })) | |||
| primitive.set_training_data(dataset=dataset) | |||
| primitive.fit() | |||
| results = primitive.produce_score_data(inputs=container.List([0], generate_metadata=True)).value | |||
| self.assertEqual(len(results), 1) | |||
| for dataset in results: | |||
| self.assertEqual(len(dataset), 1) | |||
| self.assertEqual(results[0]['learningData'].shape[0], 38) | |||
| self.assertEqual(list(results[0]['learningData'].iloc[:, 0]), [ | |||
| '7', '8', '16', '18', '22', '24', '26', '27', '33', '37', '40', '44', '45', '51', '54', | |||
| '59', '62', '63', '66', '71', '73', '76', '78', '83', '84', '86', '90', '93', '97', '100', | |||
| '107', '114', '121', '126', '127', '132', '134', '137', | |||
| ]) | |||
| self.assertEqual(results.metadata.query((0, 'learningData'))['dimension']['length'], 38) | |||
| column_names = ['d3mIndex', 'sepalLength', 'sepalWidth', 'petalLength', 'petalWidth', 'species'] | |||
| for i in range(6): | |||
| self.assertEqual(results.metadata.query((0, 'learningData', metadata_base.ALL_ELEMENTS, i))['name'], | |||
| column_names[i]) | |||
| self.assertEqual(results.metadata.query((0, 'learningData', metadata_base.ALL_ELEMENTS, 0))['semantic_types'], ( | |||
| "http://schema.org/Integer", "https://metadata.datadrivendiscovery.org/types/PrimaryKey" | |||
| )) | |||
| for i in range(1, 5): | |||
| self.assertEqual( | |||
| results.metadata.query((0, 'learningData', metadata_base.ALL_ELEMENTS, i))['semantic_types'], ( | |||
| 'http://schema.org/Float', 'https://metadata.datadrivendiscovery.org/types/Attribute' | |||
| )) | |||
| self.assertEqual(results.metadata.query((0, 'learningData', metadata_base.ALL_ELEMENTS, 5))['semantic_types'], ( | |||
| 'https://metadata.datadrivendiscovery.org/types/CategoricalData', | |||
| 'https://metadata.datadrivendiscovery.org/types/SuggestedTarget', | |||
| 'https://metadata.datadrivendiscovery.org/types/Target', | |||
| 'https://metadata.datadrivendiscovery.org/types/TrueTarget' | |||
| )) | |||
| if __name__ == '__main__': | |||
| unittest.main() | |||