import uuid import random import pandas as pd import json from pprint import pprint from sklearn.datasets import make_classification from d3m import container from d3m.metadata.pipeline import Pipeline from d3m.metadata.problem import TaskKeyword, PerformanceMetric from axolotl.utils import data_problem from axolotl.backend.simple import SimpleRunner # from axolotl.backend.ray import RayRunner # from axolotl.algorithms.base import PipelineSearchBase from axolotl.utils import pipeline as pipeline_utils, schemas as schemas_utils import tods from tods.search import BruteForceSearch table_path = 'datasets/anomaly/yahoo_sub_5/yahoo_sub_5_dataset/tables/learningData.csv' df = pd.read_csv(table_path) dataset, problem_description = data_problem.generate_dataset_problem(df, target_index=7, task_keywords=[TaskKeyword.ANOMALY_DETECTION,], performance_metrics=[{'metric': PerformanceMetric.F1}]) print(dataset) print(problem_description) metrics = [{'metric': PerformanceMetric.F1, 'params': {'pos_label': '1'}}, ] pipeline_path = 'example_pipeline.json' pipeline = pipeline_utils.load_pipeline(pipeline_path) print(pipeline) data_preparation_pipeline = schemas_utils.get_splitting_pipeline("TRAINING_DATA") scoring_pipeline = schemas_utils.get_scoring_pipeline() data_preparation_params = schemas_utils.DATA_PREPARATION_PARAMS['no_split'] backend = SimpleRunner(random_seed=0) pipeline_result = backend.evaluate_pipeline(problem_description=problem_description, pipeline=pipeline, input_data=[dataset], metrics=metrics, data_preparation_pipeline=data_preparation_pipeline, scoring_pipeline=scoring_pipeline, data_preparation_params=data_preparation_params) print(pipeline_result)