wangwei
/
tods

 
			
			   
				 
					
						
						
							
							# todo change name
id: 5bed1f23-ac17-4b52-9d06-a5b77a6aea51
schema: https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json
source:
  name: Jeffrey Gleason
created: "2019-12-19T16:29:34.702933Z"
context: TESTING
name: K-fold split of timeseries datasets
description: |
  K-fold split of timeseries datasets for cross-validation.
inputs:
  - name: folds
  - name: full dataset
outputs:
  - name: train datasets
    data: steps.2.produce
  - name: test datasets
    data: steps.4.produce
  - name: score datasets
    data: steps.3.produce
steps:
  # Step 0. Simon Data Typing primitive to infer DateTime column
  - type: PRIMITIVE
    primitive:
      id: d2fa8df2-6517-3c26-bafc-87b701c4043a
      version: 1.2.2
      python_path: d3m.primitives.data_cleaning.column_type_profiler.Simon
      name: simon
  # Step 1. Mapped Simon Data Typing primitive to infer DateTime column
  - type: PRIMITIVE
    primitive:
      id: 5bef5738-1638-48d6-9935-72445f0eecdc
      version: 0.1.0
      python_path: d3m.primitives.operator.dataset_map.DataFrameCommon
      name: Map DataFrame resources to new resources using provided primitive
    arguments:
      inputs:
        type: CONTAINER
        data: inputs.1
    outputs:
      - id: produce
    hyperparams:
      primitive:
        type: PRIMITIVE
        data: 0
  # Step 2. K-fold cross-validation timeseries dataset splits
  - type: PRIMITIVE
    primitive:
      id: 002f9ad1-46e3-40f4-89ed-eeffbb3a102b
      version: 0.1.0
      python_path: d3m.primitives.evaluation.kfold_time_series_split.Common
      name: K-fold cross-validation timeseries dataset splits
    arguments:
      inputs:
        type: CONTAINER
        data: inputs.0
      dataset:
        type: CONTAINER
        data: steps.1.produce
    outputs:
      - id: produce
      - id: produce_score_data
  # Step 3. We redact privileged attributes for both score and test splits.
  - type: PRIMITIVE
    primitive:
      id: 744c4090-e2f6-489e-8efc-8b1e051bfad6
      version: 0.2.0
      python_path: d3m.primitives.evaluation.redact_columns.Common
      name: Redact columns for evaluation
    arguments:
      inputs:
        type: CONTAINER
        data: steps.2.produce_score_data
    outputs:
      - id: produce
    hyperparams:
      semantic_types:
        type: VALUE
        data:
          - https://metadata.datadrivendiscovery.org/types/PrivilegedData
      add_semantic_types:
        type: VALUE
        data:
          - https://metadata.datadrivendiscovery.org/types/RedactedPrivilegedData
          - https://metadata.datadrivendiscovery.org/types/MissingData
  # Step 4. We further redact targets in test split.
  - type: PRIMITIVE
    primitive:
      id: 744c4090-e2f6-489e-8efc-8b1e051bfad6
      version: 0.2.0
      python_path: d3m.primitives.evaluation.redact_columns.Common
      name: Redact columns for evaluation
    arguments:
      inputs:
        type: CONTAINER
        data: steps.3.produce
    outputs:
      - id: produce
    hyperparams:
      semantic_types:
        type: VALUE
        data:
          - https://metadata.datadrivendiscovery.org/types/TrueTarget
      add_semantic_types:
        type: VALUE
        data:
          - https://metadata.datadrivendiscovery.org/types/RedactedTarget
          - https://metadata.datadrivendiscovery.org/types/MissingData