| @@ -1,68 +0,0 @@ | |||||
| NAB Data Corpus | |||||
| --- | |||||
| Data are ordered, timestamped, single-valued metrics. All data files contain anomalies, unless otherwise noted. | |||||
| ### Real data | |||||
| - realAWSCloudwatch/ | |||||
| AWS server metrics as collected by the AmazonCloudwatch service. Example metrics include CPU Utilization, Network Bytes In, and Disk Read Bytes. | |||||
| - realAdExchange/ | |||||
| Online advertisement clicking rates, where the metrics are cost-per-click (CPC) and cost per thousand impressions (CPM). One of the files is normal, without anomalies. | |||||
| - realKnownCause/ | |||||
| This is data for which we know the anomaly causes; no hand labeling. | |||||
| - ambient_temperature_system_failure.csv: The ambient temperature in an office | |||||
| setting. | |||||
| - cpu_utilization_asg_misconfiguration.csv: From Amazon Web Services (AWS) | |||||
| monitoring CPU usage – i.e. average CPU usage across a given cluster. When | |||||
| usage is high, AWS spins up a new machine, and uses fewer machines when usage | |||||
| is low. | |||||
| - ec2_request_latency_system_failure.csv: CPU usage data from a server in | |||||
| Amazon's East Coast datacenter. The dataset ends with complete system failure | |||||
| resulting from a documented failure of AWS API servers. There's an interesting | |||||
| story behind this data in the [Numenta | |||||
| blog](http://numenta.com/blog/anomaly-of-the-week.html). | |||||
| - machine_temperature_system_failure.csv: Temperature sensor data of an | |||||
| internal component of a large, industrial mahcine. The first anomaly is a | |||||
| planned shutdown of the machine. The second anomaly is difficult to detect and | |||||
| directly led to the third anomaly, a catastrophic failure of the machine. | |||||
| - nyc_taxi.csv: Number of NYC taxi passengers, where the five anomalies occur | |||||
| during the NYC marathon, Thanksgiving, Christmas, New Years day, and a snow | |||||
| storm. The raw data is from the [NYC Taxi and Limousine Commission](http://www.nyc.gov/html/tlc/html/about/trip_record_data.shtml). | |||||
| The data file included here consists of aggregating the total number of | |||||
| taxi passengers into 30 minute buckets. | |||||
| - rogue_agent_key_hold.csv: Timing the key holds for several users of a | |||||
| computer, where the anomalies represent a change in the user. | |||||
| - rogue_agent_key_updown.csv: Timing the key strokes for several users of a | |||||
| computer, where the anomalies represent a change in the user. | |||||
| - realTraffic/ | |||||
| Real time traffic data from the Twin Cities Metro area in Minnesota, collected | |||||
| by the | |||||
| [Minnesota Department of Transportation](http://www.dot.state.mn.us/tmc/trafficinfo/developers.html). | |||||
| Included metrics include occupancy, speed, and travel time from specific | |||||
| sensors. | |||||
| - realTweets/ | |||||
| A collection of Twitter mentions of large publicly-traded companies | |||||
| such as Google and IBM. The metric value represents the number of mentions | |||||
| for a given ticker symbol every 5 minutes. | |||||
| ### Artificial data | |||||
| - artificialNoAnomaly/ | |||||
| Artificially-generated data without any anomalies. | |||||
| - artificialWithAnomaly/ | |||||
| Artificially-generated data with varying types of anomalies. | |||||
| @@ -1,232 +0,0 @@ | |||||
| { | |||||
| "artificialNoAnomaly/art_daily_no_noise.csv": [], | |||||
| "artificialNoAnomaly/art_daily_perfect_square_wave.csv": [], | |||||
| "artificialNoAnomaly/art_daily_small_noise.csv": [], | |||||
| "artificialNoAnomaly/art_flatline.csv": [], | |||||
| "artificialNoAnomaly/art_noisy.csv": [], | |||||
| "artificialWithAnomaly/art_daily_flatmiddle.csv": [ | |||||
| "2014-04-11 00:00:00" | |||||
| ], | |||||
| "artificialWithAnomaly/art_daily_jumpsdown.csv": [ | |||||
| "2014-04-11 09:00:00" | |||||
| ], | |||||
| "artificialWithAnomaly/art_daily_jumpsup.csv": [ | |||||
| "2014-04-11 09:00:00" | |||||
| ], | |||||
| "artificialWithAnomaly/art_daily_nojump.csv": [ | |||||
| "2014-04-11 09:00:00" | |||||
| ], | |||||
| "artificialWithAnomaly/art_increase_spike_density.csv": [ | |||||
| "2014-04-07 23:10:00" | |||||
| ], | |||||
| "artificialWithAnomaly/art_load_balancer_spikes.csv": [ | |||||
| "2014-04-11 04:35:00" | |||||
| ], | |||||
| "realAWSCloudwatch/ec2_cpu_utilization_24ae8d.csv": [ | |||||
| "2014-02-26 22:05:00", | |||||
| "2014-02-27 17:15:00" | |||||
| ], | |||||
| "realAWSCloudwatch/ec2_cpu_utilization_53ea38.csv": [ | |||||
| "2014-02-19 19:10:00", | |||||
| "2014-02-23 20:05:00" | |||||
| ], | |||||
| "realAWSCloudwatch/ec2_cpu_utilization_5f5533.csv": [ | |||||
| "2014-02-19 00:22:00", | |||||
| "2014-02-24 18:37:00" | |||||
| ], | |||||
| "realAWSCloudwatch/ec2_cpu_utilization_77c1ca.csv": [ | |||||
| "2014-04-09 10:15:00" | |||||
| ], | |||||
| "realAWSCloudwatch/ec2_cpu_utilization_825cc2.csv": [ | |||||
| "2014-04-15 15:44:00", | |||||
| "2014-04-16 03:34:00" | |||||
| ], | |||||
| "realAWSCloudwatch/ec2_cpu_utilization_ac20cd.csv": [ | |||||
| "2014-04-15 00:49:00" | |||||
| ], | |||||
| "realAWSCloudwatch/ec2_cpu_utilization_c6585a.csv": [], | |||||
| "realAWSCloudwatch/ec2_cpu_utilization_fe7f93.csv": [ | |||||
| "2014-02-17 06:12:00", | |||||
| "2014-02-22 00:02:00", | |||||
| "2014-02-23 15:17:00" | |||||
| ], | |||||
| "realAWSCloudwatch/ec2_disk_write_bytes_1ef3de.csv": [ | |||||
| "2014-03-10 21:09:00" | |||||
| ], | |||||
| "realAWSCloudwatch/ec2_disk_write_bytes_c0d644.csv": [ | |||||
| "2014-04-09 01:30:00", | |||||
| "2014-04-10 14:35:00", | |||||
| "2014-04-13 03:00:00" | |||||
| ], | |||||
| "realAWSCloudwatch/ec2_network_in_257a54.csv": [ | |||||
| "2014-04-15 16:44:00" | |||||
| ], | |||||
| "realAWSCloudwatch/ec2_network_in_5abac7.csv": [ | |||||
| "2014-03-10 18:56:00", | |||||
| "2014-03-12 21:01:00" | |||||
| ], | |||||
| "realAWSCloudwatch/elb_request_count_8c0756.csv": [ | |||||
| "2014-04-12 17:24:00", | |||||
| "2014-04-22 19:34:00" | |||||
| ], | |||||
| "realAWSCloudwatch/grok_asg_anomaly.csv": [ | |||||
| "2014-01-20 08:30:00", | |||||
| "2014-01-21 10:45:00", | |||||
| "2014-01-29 00:45:00" | |||||
| ], | |||||
| "realAWSCloudwatch/iio_us-east-1_i-a2eb1cd9_NetworkIn.csv": [ | |||||
| "2013-10-10 09:35:00", | |||||
| "2013-10-10 20:40:00" | |||||
| ], | |||||
| "realAWSCloudwatch/rds_cpu_utilization_cc0c53.csv": [ | |||||
| "2014-02-25 07:15:00", | |||||
| "2014-02-27 00:50:00" | |||||
| ], | |||||
| "realAWSCloudwatch/rds_cpu_utilization_e47b3b.csv": [ | |||||
| "2014-04-13 06:52:00", | |||||
| "2014-04-18 23:27:00" | |||||
| ], | |||||
| "realAdExchange/exchange-2_cpc_results.csv": [ | |||||
| "2011-07-14 13:00:01" | |||||
| ], | |||||
| "realAdExchange/exchange-2_cpm_results.csv": [ | |||||
| "2011-07-26 06:00:01", | |||||
| "2011-08-10 17:00:01" | |||||
| ], | |||||
| "realAdExchange/exchange-3_cpc_results.csv": [ | |||||
| "2011-07-14 10:15:01", | |||||
| "2011-07-20 10:15:01", | |||||
| "2011-08-13 10:15:01" | |||||
| ], | |||||
| "realAdExchange/exchange-3_cpm_results.csv": [ | |||||
| "2011-08-19 18:15:01" | |||||
| ], | |||||
| "realAdExchange/exchange-4_cpc_results.csv": [ | |||||
| "2011-07-16 09:15:01", | |||||
| "2011-08-02 12:15:01", | |||||
| "2011-08-23 08:15:01" | |||||
| ], | |||||
| "realAdExchange/exchange-4_cpm_results.csv": [ | |||||
| "2011-07-16 09:15:01", | |||||
| "2011-08-01 07:15:01", | |||||
| "2011-08-23 08:15:01", | |||||
| "2011-08-28 13:15:01" | |||||
| ], | |||||
| "realKnownCause/ambient_temperature_system_failure.csv": [ | |||||
| "2013-12-22 20:00:00", | |||||
| "2014-04-13 09:00:00" | |||||
| ], | |||||
| "realKnownCause/cpu_utilization_asg_misconfiguration.csv": [ | |||||
| "2014-07-12 02:04:00", | |||||
| "2014-07-14 21:44:00" | |||||
| ], | |||||
| "realKnownCause/ec2_request_latency_system_failure.csv": [ | |||||
| "2014-03-14 09:06:00", | |||||
| "2014-03-18 22:41:00", | |||||
| "2014-03-21 03:01:00" | |||||
| ], | |||||
| "realKnownCause/machine_temperature_system_failure.csv": [ | |||||
| "2013-12-11 06:00:00", | |||||
| "2013-12-16 17:25:00", | |||||
| "2014-01-28 13:55:00", | |||||
| "2014-02-08 14:30:00" | |||||
| ], | |||||
| "realKnownCause/nyc_taxi.csv": [ | |||||
| "2014-11-01 19:00:00", | |||||
| "2014-11-27 15:30:00", | |||||
| "2014-12-25 15:00:00", | |||||
| "2015-01-01 01:00:00", | |||||
| "2015-01-27 00:00:00" | |||||
| ], | |||||
| "realKnownCause/rogue_agent_key_hold.csv": [ | |||||
| "2014-07-15 08:30:00", | |||||
| "2014-07-17 09:50:00" | |||||
| ], | |||||
| "realKnownCause/rogue_agent_key_updown.csv": [ | |||||
| "2014-07-15 04:00:00", | |||||
| "2014-07-17 08:50:00" | |||||
| ], | |||||
| "realTraffic/TravelTime_387.csv": [ | |||||
| "2015-07-30 12:29:00", | |||||
| "2015-08-18 16:26:00", | |||||
| "2015-09-01 05:34:00" | |||||
| ], | |||||
| "realTraffic/TravelTime_451.csv": [ | |||||
| "2015-08-11 12:07:00" | |||||
| ], | |||||
| "realTraffic/occupancy_6005.csv": [ | |||||
| "2015-09-15 06:55:00" | |||||
| ], | |||||
| "realTraffic/occupancy_t4013.csv": [ | |||||
| "2015-09-16 08:09:00", | |||||
| "2015-09-17 07:55:00" | |||||
| ], | |||||
| "realTraffic/speed_6005.csv": [ | |||||
| "2015-09-17 07:00:00" | |||||
| ], | |||||
| "realTraffic/speed_7578.csv": [ | |||||
| "2015-09-11 16:44:00", | |||||
| "2015-09-15 14:34:00", | |||||
| "2015-09-16 14:14:00", | |||||
| "2015-09-16 17:10:00" | |||||
| ], | |||||
| "realTraffic/speed_t4013.csv": [ | |||||
| "2015-09-16 08:04:00", | |||||
| "2015-09-17 08:15:00" | |||||
| ], | |||||
| "realTweets/Twitter_volume_AAPL.csv": [ | |||||
| "2015-03-03 21:07:53", | |||||
| "2015-03-09 17:32:53", | |||||
| "2015-03-16 02:57:53", | |||||
| "2015-03-31 03:27:53" | |||||
| ], | |||||
| "realTweets/Twitter_volume_AMZN.csv": [ | |||||
| "2015-03-05 19:47:53", | |||||
| "2015-03-11 20:57:53", | |||||
| "2015-04-01 21:57:53", | |||||
| "2015-04-08 04:52:53" | |||||
| ], | |||||
| "realTweets/Twitter_volume_CRM.csv": [ | |||||
| "2015-03-09 19:07:53", | |||||
| "2015-03-19 23:07:53", | |||||
| "2015-03-26 19:07:53" | |||||
| ], | |||||
| "realTweets/Twitter_volume_CVS.csv": [ | |||||
| "2015-03-04 16:02:53", | |||||
| "2015-03-05 19:57:53", | |||||
| "2015-03-26 14:07:53", | |||||
| "2015-04-14 22:37:53" | |||||
| ], | |||||
| "realTweets/Twitter_volume_FB.csv": [ | |||||
| "2015-03-16 07:07:53", | |||||
| "2015-04-03 17:47:53" | |||||
| ], | |||||
| "realTweets/Twitter_volume_GOOG.csv": [ | |||||
| "2015-03-13 20:22:53", | |||||
| "2015-03-14 16:27:53", | |||||
| "2015-03-22 22:52:53", | |||||
| "2015-04-01 05:27:53" | |||||
| ], | |||||
| "realTweets/Twitter_volume_IBM.csv": [ | |||||
| "2015-03-23 22:27:53", | |||||
| "2015-04-20 20:07:53" | |||||
| ], | |||||
| "realTweets/Twitter_volume_KO.csv": [ | |||||
| "2015-03-20 13:12:53", | |||||
| "2015-04-08 23:42:53", | |||||
| "2015-04-14 14:52:53" | |||||
| ], | |||||
| "realTweets/Twitter_volume_PFE.csv": [ | |||||
| "2015-03-02 21:22:53", | |||||
| "2015-03-04 10:32:53", | |||||
| "2015-03-13 19:57:53", | |||||
| "2015-04-07 23:42:53" | |||||
| ], | |||||
| "realTweets/Twitter_volume_UPS.csv": [ | |||||
| "2015-03-03 00:27:53", | |||||
| "2015-03-04 11:07:53", | |||||
| "2015-03-05 15:22:53", | |||||
| "2015-03-24 18:17:53", | |||||
| "2015-03-29 16:27:53" | |||||
| ] | |||||
| } | |||||
| @@ -1 +0,0 @@ | |||||
| 948611b07519538ef036e0ec1c948f6bf97009cf | |||||
| @@ -1 +0,0 @@ | |||||
| 428229640a5466e68014f74649a24f00abb1150b | |||||
| @@ -1 +0,0 @@ | |||||
| 68dd1084ed091fb9affe45b4e0894250c6c62c07 | |||||
| @@ -1 +0,0 @@ | |||||
| 7bebf0fe077dda56f789d644090faf1d2484913c | |||||
| @@ -1 +0,0 @@ | |||||
| 59fdf3c2b8d171704e3de1e10d8ccfca72c8ab9a | |||||
| @@ -1 +0,0 @@ | |||||
| d20453833fc13c681f0b5f5a830f3aba52b774cd | |||||
| @@ -1 +0,0 @@ | |||||
| ed60bba6f53c779335874c39966b7d5e4309e2c3 | |||||
| @@ -1 +0,0 @@ | |||||
| f01b654d9a6a6ebc7efc65da240f83680de2131d | |||||
| @@ -1 +0,0 @@ | |||||
| 8e0088d97641d6ab39b808fe03ac0a7ec9ea99b9 | |||||
| @@ -1 +0,0 @@ | |||||
| d72fffb08da82bb70ecc379bb1fa56316efda557 | |||||
| @@ -1 +0,0 @@ | |||||
| 4c2f8543201c0a66e44815dee128d9044a41c382 | |||||
| @@ -1 +0,0 @@ | |||||
| 25a0dd3110986418d379a887cc575f9fdc45a6da | |||||
| @@ -1 +0,0 @@ | |||||
| 44db328c252a8156434142a37ef65765869e7548 | |||||
| @@ -1 +0,0 @@ | |||||
| bea5d1c052730eaba76b84ff5df854477cdfa80b | |||||
| @@ -1,63 +0,0 @@ | |||||
| { | |||||
| "about": { | |||||
| "datasetID": "kpi_dataset_TEST", | |||||
| "datasetName": "NULL", | |||||
| "description": "Database of baseball players and play statistics, including 'Games_played', 'At_bats', 'Runs', 'Hits', 'Doubles', 'Triples', 'Home_runs', 'RBIs', 'Walks', 'Strikeouts', 'Batting_average', 'On_base_pct', 'Slugging_pct' and 'Fielding_ave'", | |||||
| "citation": " @book{simonoff2003analyzing,title={Analyzing Categorical Data},author={Simonoff, J.S.},isbn={9780387007496},lccn={2003044946},series={Springer Texts in Statistics},url={https://books.google.com/books?id=G8wrifweAoC},year={2003},publisher={Springer New York}} ", | |||||
| "license": " CC Public Domain Mark 1.0 ", | |||||
| "source": "OpenML", | |||||
| "sourceURI": "http://www.openml.org/d/185", | |||||
| "approximateSize": "", | |||||
| "datasetSchemaVersion": "4.0.0", | |||||
| "redacted": false, | |||||
| "datasetVersion": "4.0.0" | |||||
| }, | |||||
| "dataResources": [ | |||||
| { | |||||
| "resID": "learningData", | |||||
| "resPath": "tables/learningData.csv", | |||||
| "resType": "table", | |||||
| "resFormat": { | |||||
| "text/csv": [ | |||||
| "csv" | |||||
| ] | |||||
| }, | |||||
| "isCollection": false, | |||||
| "columns": [ | |||||
| { | |||||
| "colIndex": 0, | |||||
| "colName": "d3mIndex", | |||||
| "colType": "integer", | |||||
| "role": [ | |||||
| "index" | |||||
| ] | |||||
| }, | |||||
| { | |||||
| "colIndex": 1, | |||||
| "colName": "timestamp", | |||||
| "colType": "integer", | |||||
| "role": [ | |||||
| "attribute" | |||||
| ] | |||||
| }, | |||||
| { | |||||
| "colIndex": 2, | |||||
| "colName": "value", | |||||
| "colType": "real", | |||||
| "role": [ | |||||
| "attribute" | |||||
| ] | |||||
| }, | |||||
| { | |||||
| "colIndex": 3, | |||||
| "colName": "ground_truth", | |||||
| "colType": "integer", | |||||
| "role": [ | |||||
| "suggestedTarget" | |||||
| ] | |||||
| } | |||||
| ], | |||||
| "columnsCount": 4 | |||||
| } | |||||
| ] | |||||
| } | |||||
| @@ -1,65 +0,0 @@ | |||||
| { | |||||
| "about": { | |||||
| "problemID": "kpi_problem", | |||||
| "problemName": "kpi_problem", | |||||
| "problemDescription": "Anomaly detection", | |||||
| "problemVersion": "4.0.0", | |||||
| "problemSchemaVersion": "4.0.0", | |||||
| "taskKeywords": [ | |||||
| "classification", | |||||
| "binary", | |||||
| "tabular" | |||||
| ] | |||||
| }, | |||||
| "inputs": { | |||||
| "data": [ | |||||
| { | |||||
| "datasetID": "kpi_dataset", | |||||
| "targets": [ | |||||
| { | |||||
| "targetIndex": 0, | |||||
| "resID": "learningData", | |||||
| "colIndex": 3, | |||||
| "colName": "ground_truth" | |||||
| } | |||||
| ] | |||||
| } | |||||
| ], | |||||
| "dataSplits": { | |||||
| "method": "holdOut", | |||||
| "testSize": 0.2, | |||||
| "stratified": true, | |||||
| "numRepeats": 0, | |||||
| "randomSeed": 42, | |||||
| "splitsFile": "dataSplits.csv", | |||||
| "datasetViewMaps": { | |||||
| "train": [ | |||||
| { | |||||
| "from": "kpi_dataset", | |||||
| "to": "kpi_dataset_TRAIN" | |||||
| } | |||||
| ], | |||||
| "test": [ | |||||
| { | |||||
| "from": "kpi_dataset", | |||||
| "to": "kpi_dataset_TEST" | |||||
| } | |||||
| ], | |||||
| "score": [ | |||||
| { | |||||
| "from": "kpi_dataset", | |||||
| "to": "kpi_dataset_SCORE" | |||||
| } | |||||
| ] | |||||
| } | |||||
| }, | |||||
| "performanceMetrics": [ | |||||
| { | |||||
| "metric": "f1Macro" | |||||
| } | |||||
| ] | |||||
| }, | |||||
| "expectedOutputs": { | |||||
| "predictionsFile": "predictions.csv" | |||||
| } | |||||
| } | |||||
| @@ -1,63 +0,0 @@ | |||||
| { | |||||
| "about": { | |||||
| "datasetID": "kpi_dataset_TEST", | |||||
| "datasetName": "NULL", | |||||
| "description": "Database of baseball players and play statistics, including 'Games_played', 'At_bats', 'Runs', 'Hits', 'Doubles', 'Triples', 'Home_runs', 'RBIs', 'Walks', 'Strikeouts', 'Batting_average', 'On_base_pct', 'Slugging_pct' and 'Fielding_ave'", | |||||
| "citation": " @book{simonoff2003analyzing,title={Analyzing Categorical Data},author={Simonoff, J.S.},isbn={9780387007496},lccn={2003044946},series={Springer Texts in Statistics},url={https://books.google.com/books?id=G8wrifweAoC},year={2003},publisher={Springer New York}} ", | |||||
| "license": " CC Public Domain Mark 1.0 ", | |||||
| "source": "OpenML", | |||||
| "sourceURI": "http://www.openml.org/d/185", | |||||
| "approximateSize": "", | |||||
| "datasetSchemaVersion": "4.0.0", | |||||
| "redacted": false, | |||||
| "datasetVersion": "4.0.0" | |||||
| }, | |||||
| "dataResources": [ | |||||
| { | |||||
| "resID": "learningData", | |||||
| "resPath": "tables/learningData.csv", | |||||
| "resType": "table", | |||||
| "resFormat": { | |||||
| "text/csv": [ | |||||
| "csv" | |||||
| ] | |||||
| }, | |||||
| "isCollection": false, | |||||
| "columns": [ | |||||
| { | |||||
| "colIndex": 0, | |||||
| "colName": "d3mIndex", | |||||
| "colType": "integer", | |||||
| "role": [ | |||||
| "index" | |||||
| ] | |||||
| }, | |||||
| { | |||||
| "colIndex": 1, | |||||
| "colName": "timestamp", | |||||
| "colType": "integer", | |||||
| "role": [ | |||||
| "attribute" | |||||
| ] | |||||
| }, | |||||
| { | |||||
| "colIndex": 2, | |||||
| "colName": "value", | |||||
| "colType": "real", | |||||
| "role": [ | |||||
| "attribute" | |||||
| ] | |||||
| }, | |||||
| { | |||||
| "colIndex": 3, | |||||
| "colName": "ground_truth", | |||||
| "colType": "integer", | |||||
| "role": [ | |||||
| "suggestedTarget" | |||||
| ] | |||||
| } | |||||
| ], | |||||
| "columnsCount": 4 | |||||
| } | |||||
| ] | |||||
| } | |||||
| @@ -1,65 +0,0 @@ | |||||
| { | |||||
| "about": { | |||||
| "problemID": "kpi_problem", | |||||
| "problemName": "kpi_problem", | |||||
| "problemDescription": "Anomaly detection", | |||||
| "problemVersion": "4.0.0", | |||||
| "problemSchemaVersion": "4.0.0", | |||||
| "taskKeywords": [ | |||||
| "classification", | |||||
| "binary", | |||||
| "tabular" | |||||
| ] | |||||
| }, | |||||
| "inputs": { | |||||
| "data": [ | |||||
| { | |||||
| "datasetID": "kpi_dataset", | |||||
| "targets": [ | |||||
| { | |||||
| "targetIndex": 0, | |||||
| "resID": "learningData", | |||||
| "colIndex": 3, | |||||
| "colName": "ground_truth" | |||||
| } | |||||
| ] | |||||
| } | |||||
| ], | |||||
| "dataSplits": { | |||||
| "method": "holdOut", | |||||
| "testSize": 0.2, | |||||
| "stratified": true, | |||||
| "numRepeats": 0, | |||||
| "randomSeed": 42, | |||||
| "splitsFile": "dataSplits.csv", | |||||
| "datasetViewMaps": { | |||||
| "train": [ | |||||
| { | |||||
| "from": "kpi_dataset", | |||||
| "to": "kpi_dataset_TRAIN" | |||||
| } | |||||
| ], | |||||
| "test": [ | |||||
| { | |||||
| "from": "kpi_dataset", | |||||
| "to": "kpi_dataset_TEST" | |||||
| } | |||||
| ], | |||||
| "score": [ | |||||
| { | |||||
| "from": "kpi_dataset", | |||||
| "to": "kpi_dataset_SCORE" | |||||
| } | |||||
| ] | |||||
| } | |||||
| }, | |||||
| "performanceMetrics": [ | |||||
| { | |||||
| "metric": "f1Macro" | |||||
| } | |||||
| ] | |||||
| }, | |||||
| "expectedOutputs": { | |||||
| "predictionsFile": "predictions.csv" | |||||
| } | |||||
| } | |||||
| @@ -1,63 +0,0 @@ | |||||
| { | |||||
| "about": { | |||||
| "datasetID": "kpi_dataset_TRAIN", | |||||
| "datasetName": "NULL", | |||||
| "description": "Database of baseball players and play statistics, including 'Games_played', 'At_bats', 'Runs', 'Hits', 'Doubles', 'Triples', 'Home_runs', 'RBIs', 'Walks', 'Strikeouts', 'Batting_average', 'On_base_pct', 'Slugging_pct' and 'Fielding_ave'", | |||||
| "citation": " @book{simonoff2003analyzing,title={Analyzing Categorical Data},author={Simonoff, J.S.},isbn={9780387007496},lccn={2003044946},series={Springer Texts in Statistics},url={https://books.google.com/books?id=G8wrifweAoC},year={2003},publisher={Springer New York}} ", | |||||
| "license": " CC Public Domain Mark 1.0 ", | |||||
| "source": "OpenML", | |||||
| "sourceURI": "http://www.openml.org/d/185", | |||||
| "approximateSize": "", | |||||
| "datasetSchemaVersion": "4.0.0", | |||||
| "redacted": false, | |||||
| "datasetVersion": "4.0.0" | |||||
| }, | |||||
| "dataResources": [ | |||||
| { | |||||
| "resID": "learningData", | |||||
| "resPath": "tables/learningData.csv", | |||||
| "resType": "table", | |||||
| "resFormat": { | |||||
| "text/csv": [ | |||||
| "csv" | |||||
| ] | |||||
| }, | |||||
| "isCollection": false, | |||||
| "columns": [ | |||||
| { | |||||
| "colIndex": 0, | |||||
| "colName": "d3mIndex", | |||||
| "colType": "integer", | |||||
| "role": [ | |||||
| "index" | |||||
| ] | |||||
| }, | |||||
| { | |||||
| "colIndex": 1, | |||||
| "colName": "timestamp", | |||||
| "colType": "integer", | |||||
| "role": [ | |||||
| "attribute" | |||||
| ] | |||||
| }, | |||||
| { | |||||
| "colIndex": 2, | |||||
| "colName": "value", | |||||
| "colType": "real", | |||||
| "role": [ | |||||
| "attribute" | |||||
| ] | |||||
| }, | |||||
| { | |||||
| "colIndex": 3, | |||||
| "colName": "ground_truth", | |||||
| "colType": "integer", | |||||
| "role": [ | |||||
| "suggestedTarget" | |||||
| ] | |||||
| } | |||||
| ], | |||||
| "columnsCount": 4 | |||||
| } | |||||
| ] | |||||
| } | |||||
| @@ -1 +0,0 @@ | |||||
| 44db328c252a8156434142a37ef65765869e7548 | |||||
| @@ -1,65 +0,0 @@ | |||||
| { | |||||
| "about": { | |||||
| "problemID": "kpi_problem", | |||||
| "problemName": "kpi_problem", | |||||
| "problemDescription": "Anomaly detection", | |||||
| "problemVersion": "4.0.0", | |||||
| "problemSchemaVersion": "4.0.0", | |||||
| "taskKeywords": [ | |||||
| "classification", | |||||
| "binary", | |||||
| "tabular" | |||||
| ] | |||||
| }, | |||||
| "inputs": { | |||||
| "data": [ | |||||
| { | |||||
| "datasetID": "kpi_dataset", | |||||
| "targets": [ | |||||
| { | |||||
| "targetIndex": 0, | |||||
| "resID": "learningData", | |||||
| "colIndex": 3, | |||||
| "colName": "ground_truth" | |||||
| } | |||||
| ] | |||||
| } | |||||
| ], | |||||
| "dataSplits": { | |||||
| "method": "holdOut", | |||||
| "testSize": 0.2, | |||||
| "stratified": true, | |||||
| "numRepeats": 0, | |||||
| "randomSeed": 42, | |||||
| "splitsFile": "dataSplits.csv", | |||||
| "datasetViewMaps": { | |||||
| "train": [ | |||||
| { | |||||
| "from": "kpi_dataset", | |||||
| "to": "kpi_dataset_TRAIN" | |||||
| } | |||||
| ], | |||||
| "test": [ | |||||
| { | |||||
| "from": "kpi_dataset", | |||||
| "to": "kpi_dataset_TEST" | |||||
| } | |||||
| ], | |||||
| "score": [ | |||||
| { | |||||
| "from": "kpi_dataset", | |||||
| "to": "kpi_dataset_SCORE" | |||||
| } | |||||
| ] | |||||
| } | |||||
| }, | |||||
| "performanceMetrics": [ | |||||
| { | |||||
| "metric": "f1Macro" | |||||
| } | |||||
| ] | |||||
| }, | |||||
| "expectedOutputs": { | |||||
| "predictionsFile": "predictions.csv" | |||||
| } | |||||
| } | |||||
| @@ -1,63 +0,0 @@ | |||||
| { | |||||
| "about": { | |||||
| "datasetID": "kpi_dataset", | |||||
| "datasetName": "kpi", | |||||
| "description": "Database of baseball players and play statistics, including 'Games_played', 'At_bats', 'Runs', 'Hits', 'Doubles', 'Triples', 'Home_runs', 'RBIs', 'Walks', 'Strikeouts', 'Batting_average', 'On_base_pct', 'Slugging_pct' and 'Fielding_ave'", | |||||
| "citation": " @book{simonoff2003analyzing,title={Analyzing Categorical Data},author={Simonoff, J.S.},isbn={9780387007496},lccn={2003044946},series={Springer Texts in Statistics},url={https://books.google.com/books?id=G8wrifweAoC},year={2003},publisher={Springer New York}} ", | |||||
| "license": " CC Public Domain Mark 1.0 ", | |||||
| "source": "OpenML", | |||||
| "sourceURI": "http://www.openml.org/d/185", | |||||
| "approximateSize": "", | |||||
| "datasetSchemaVersion": "4.0.0", | |||||
| "redacted": false, | |||||
| "datasetVersion": "4.0.0" | |||||
| }, | |||||
| "dataResources": [ | |||||
| { | |||||
| "resID": "learningData", | |||||
| "resPath": "tables/learningData.csv", | |||||
| "resType": "table", | |||||
| "resFormat": { | |||||
| "text/csv": [ | |||||
| "csv" | |||||
| ] | |||||
| }, | |||||
| "isCollection": false, | |||||
| "columns": [ | |||||
| { | |||||
| "colIndex": 0, | |||||
| "colName": "d3mIndex", | |||||
| "colType": "integer", | |||||
| "role": [ | |||||
| "index" | |||||
| ] | |||||
| }, | |||||
| { | |||||
| "colIndex": 1, | |||||
| "colName": "timestamp", | |||||
| "colType": "integer", | |||||
| "role": [ | |||||
| "attribute" | |||||
| ] | |||||
| }, | |||||
| { | |||||
| "colIndex": 2, | |||||
| "colName": "value", | |||||
| "colType": "real", | |||||
| "role": [ | |||||
| "attribute" | |||||
| ] | |||||
| }, | |||||
| { | |||||
| "colIndex": 3, | |||||
| "colName": "ground_truth", | |||||
| "colType": "integer", | |||||
| "role": [ | |||||
| "suggestedTarget" | |||||
| ] | |||||
| } | |||||
| ], | |||||
| "columnsCount": 4 | |||||
| } | |||||
| ] | |||||
| } | |||||
| @@ -1 +0,0 @@ | |||||
| d80846dc46c173472f646a52005a1fb3670ccd09 | |||||
| @@ -1 +0,0 @@ | |||||
| 44db328c252a8156434142a37ef65765869e7548 | |||||
| @@ -1,65 +0,0 @@ | |||||
| { | |||||
| "about": { | |||||
| "problemID": "kpi_problem", | |||||
| "problemName": "kpi_problem", | |||||
| "problemDescription": "Anomaly detection", | |||||
| "problemVersion": "4.0.0", | |||||
| "problemSchemaVersion": "4.0.0", | |||||
| "taskKeywords": [ | |||||
| "classification", | |||||
| "binary", | |||||
| "tabular" | |||||
| ] | |||||
| }, | |||||
| "inputs": { | |||||
| "data": [ | |||||
| { | |||||
| "datasetID": "kpi_dataset", | |||||
| "targets": [ | |||||
| { | |||||
| "targetIndex": 0, | |||||
| "resID": "learningData", | |||||
| "colIndex": 3, | |||||
| "colName": "ground_truth" | |||||
| } | |||||
| ] | |||||
| } | |||||
| ], | |||||
| "dataSplits": { | |||||
| "method": "holdOut", | |||||
| "testSize": 0.2, | |||||
| "stratified": true, | |||||
| "numRepeats": 0, | |||||
| "randomSeed": 42, | |||||
| "splitsFile": "dataSplits.csv", | |||||
| "datasetViewMaps": { | |||||
| "train": [ | |||||
| { | |||||
| "from": "kpi_dataset", | |||||
| "to": "kpi_dataset_TRAIN" | |||||
| } | |||||
| ], | |||||
| "test": [ | |||||
| { | |||||
| "from": "kpi_dataset", | |||||
| "to": "kpi_dataset_TEST" | |||||
| } | |||||
| ], | |||||
| "score": [ | |||||
| { | |||||
| "from": "kpi_dataset", | |||||
| "to": "kpi_dataset_SCORE" | |||||
| } | |||||
| ] | |||||
| } | |||||
| }, | |||||
| "performanceMetrics": [ | |||||
| { | |||||
| "metric": "f1Macro" | |||||
| } | |||||
| ] | |||||
| }, | |||||
| "expectedOutputs": { | |||||
| "predictionsFile": "predictions.csv" | |||||
| } | |||||
| } | |||||
| @@ -1 +0,0 @@ | |||||
| bea5d1c052730eaba76b84ff5df854477cdfa80b | |||||
| @@ -1,183 +0,0 @@ | |||||
| { | |||||
| "about": { | |||||
| "datasetID": "template", | |||||
| "datasetName": "baseball", | |||||
| "description": "Database of baseball players and play statistics, including 'Games_played', 'At_bats', 'Runs', 'Hits', 'Doubles', 'Triples', 'Home_runs', 'RBIs', 'Walks', 'Strikeouts', 'Batting_average', 'On_base_pct', 'Slugging_pct' and 'Fielding_ave'", | |||||
| "citation": " @book{simonoff2003analyzing,title={Analyzing Categorical Data},author={Simonoff, J.S.},isbn={9780387007496},lccn={2003044946},series={Springer Texts in Statistics},url={https://books.google.com/books?id=G8wrifweAoC},year={2003},publisher={Springer New York}} ", | |||||
| "license": " CC Public Domain Mark 1.0 ", | |||||
| "source": "OpenML", | |||||
| "sourceURI": "http://www.openml.org/d/185", | |||||
| "approximateSize": "", | |||||
| "datasetSchemaVersion": "4.0.0", | |||||
| "redacted": false, | |||||
| "datasetVersion": "4.0.0" | |||||
| }, | |||||
| "dataResources": [ | |||||
| { | |||||
| "resID": "learningData", | |||||
| "resPath": "tables/learningData.csv", | |||||
| "resType": "table", | |||||
| "resFormat": { | |||||
| "text/csv": [ | |||||
| "csv" | |||||
| ] | |||||
| }, | |||||
| "isCollection": false, | |||||
| "columns": [ | |||||
| { | |||||
| "colIndex": 0, | |||||
| "colName": "d3mIndex", | |||||
| "colType": "integer", | |||||
| "role": [ | |||||
| "index" | |||||
| ] | |||||
| }, | |||||
| { | |||||
| "colIndex": 1, | |||||
| "colName": "Player", | |||||
| "colType": "categorical", | |||||
| "role": [ | |||||
| "attribute" | |||||
| ] | |||||
| }, | |||||
| { | |||||
| "colIndex": 2, | |||||
| "colName": "Number_seasons", | |||||
| "colType": "integer", | |||||
| "role": [ | |||||
| "attribute" | |||||
| ] | |||||
| }, | |||||
| { | |||||
| "colIndex": 3, | |||||
| "colName": "Games_played", | |||||
| "colType": "integer", | |||||
| "role": [ | |||||
| "attribute" | |||||
| ] | |||||
| }, | |||||
| { | |||||
| "colIndex": 4, | |||||
| "colName": "At_bats", | |||||
| "colType": "integer", | |||||
| "role": [ | |||||
| "attribute" | |||||
| ] | |||||
| }, | |||||
| { | |||||
| "colIndex": 5, | |||||
| "colName": "Runs", | |||||
| "colType": "integer", | |||||
| "role": [ | |||||
| "attribute" | |||||
| ] | |||||
| }, | |||||
| { | |||||
| "colIndex": 6, | |||||
| "colName": "Hits", | |||||
| "colType": "integer", | |||||
| "role": [ | |||||
| "attribute" | |||||
| ] | |||||
| }, | |||||
| { | |||||
| "colIndex": 7, | |||||
| "colName": "Doubles", | |||||
| "colType": "integer", | |||||
| "role": [ | |||||
| "attribute" | |||||
| ] | |||||
| }, | |||||
| { | |||||
| "colIndex": 8, | |||||
| "colName": "Triples", | |||||
| "colType": "integer", | |||||
| "role": [ | |||||
| "attribute" | |||||
| ] | |||||
| }, | |||||
| { | |||||
| "colIndex": 9, | |||||
| "colName": "Home_runs", | |||||
| "colType": "integer", | |||||
| "role": [ | |||||
| "attribute" | |||||
| ] | |||||
| }, | |||||
| { | |||||
| "colIndex": 10, | |||||
| "colName": "RBIs", | |||||
| "colType": "integer", | |||||
| "role": [ | |||||
| "attribute" | |||||
| ] | |||||
| }, | |||||
| { | |||||
| "colIndex": 11, | |||||
| "colName": "Walks", | |||||
| "colType": "integer", | |||||
| "role": [ | |||||
| "attribute" | |||||
| ] | |||||
| }, | |||||
| { | |||||
| "colIndex": 12, | |||||
| "colName": "Strikeouts", | |||||
| "colType": "integer", | |||||
| "role": [ | |||||
| "attribute" | |||||
| ] | |||||
| }, | |||||
| { | |||||
| "colIndex": 13, | |||||
| "colName": "Batting_average", | |||||
| "colType": "real", | |||||
| "role": [ | |||||
| "attribute" | |||||
| ] | |||||
| }, | |||||
| { | |||||
| "colIndex": 14, | |||||
| "colName": "On_base_pct", | |||||
| "colType": "real", | |||||
| "role": [ | |||||
| "attribute" | |||||
| ] | |||||
| }, | |||||
| { | |||||
| "colIndex": 15, | |||||
| "colName": "Slugging_pct", | |||||
| "colType": "real", | |||||
| "role": [ | |||||
| "attribute" | |||||
| ] | |||||
| }, | |||||
| { | |||||
| "colIndex": 16, | |||||
| "colName": "Fielding_ave", | |||||
| "colType": "real", | |||||
| "role": [ | |||||
| "attribute" | |||||
| ] | |||||
| }, | |||||
| { | |||||
| "colIndex": 17, | |||||
| "colName": "Position", | |||||
| "colType": "categorical", | |||||
| "role": [ | |||||
| "attribute" | |||||
| ] | |||||
| }, | |||||
| { | |||||
| "colIndex": 18, | |||||
| "colName": "Hall_of_Fame", | |||||
| "colType": "categorical", | |||||
| "role": [ | |||||
| "suggestedTarget" | |||||
| ] | |||||
| } | |||||
| ], | |||||
| "columnsCount": 19 | |||||
| } | |||||
| ] | |||||
| } | |||||
| @@ -1,65 +0,0 @@ | |||||
| { | |||||
| "about": { | |||||
| "problemID": "template", | |||||
| "problemName": "baseball_problem", | |||||
| "problemDescription": "**Author**: Jeffrey S. Simonoff \n**Source**: [AnalCatData](http://www.stern.nyu.edu/~jsimonof/AnalCatData) - 2003 \n**Please cite**: Jeffrey S. Simonoff, Analyzing Categorical Data, Springer-Verlag, New York, 2003 \n \nDatabase of baseball players and play statistics, including 'Games_played', 'At_bats', 'Runs', 'Hits', 'Doubles', 'Triples', 'Home_runs', 'RBIs', 'Walks', 'Strikeouts', 'Batting_average', 'On_base_pct', 'Slugging_pct' and 'Fielding_ave' \n\nNotes: \n* Quotes, Single-Quotes and Backslashes were removed, Blanks replaced with Underscores\n* Player is an identifier that should be ignored when modelling the data", | |||||
| "problemVersion": "4.0.0", | |||||
| "problemSchemaVersion": "4.0.0", | |||||
| "taskKeywords": [ | |||||
| "classification", | |||||
| "multiClass", | |||||
| "tabular" | |||||
| ] | |||||
| }, | |||||
| "inputs": { | |||||
| "data": [ | |||||
| { | |||||
| "datasetID": "185_baseball_dataset", | |||||
| "targets": [ | |||||
| { | |||||
| "targetIndex": 0, | |||||
| "resID": "learningData", | |||||
| "colIndex": 18, | |||||
| "colName": "Hall_of_Fame" | |||||
| } | |||||
| ] | |||||
| } | |||||
| ], | |||||
| "dataSplits": { | |||||
| "method": "holdOut", | |||||
| "testSize": 0.2, | |||||
| "stratified": true, | |||||
| "numRepeats": 0, | |||||
| "randomSeed": 42, | |||||
| "splitsFile": "dataSplits.csv", | |||||
| "datasetViewMaps": { | |||||
| "train": [ | |||||
| { | |||||
| "from": "185_baseball_dataset", | |||||
| "to": "185_baseball_dataset_TRAIN" | |||||
| } | |||||
| ], | |||||
| "test": [ | |||||
| { | |||||
| "from": "185_baseball_dataset", | |||||
| "to": "185_baseball_dataset_TEST" | |||||
| } | |||||
| ], | |||||
| "score": [ | |||||
| { | |||||
| "from": "185_baseball_dataset", | |||||
| "to": "185_baseball_dataset_SCORE" | |||||
| } | |||||
| ] | |||||
| } | |||||
| }, | |||||
| "performanceMetrics": [ | |||||
| { | |||||
| "metric": "f1Macro" | |||||
| } | |||||
| ] | |||||
| }, | |||||
| "expectedOutputs": { | |||||
| "predictionsFile": "predictions.csv" | |||||
| } | |||||
| } | |||||
| @@ -1,160 +0,0 @@ | |||||
| # TODO: Wrap it as a class and connect it to GUI | |||||
| # A script to transform anomaly data to d3m format | |||||
| import pandas as pd | |||||
| import numpy as np | |||||
| import os | |||||
| import json | |||||
| ############################## | |||||
| # Some information for the dataset to be transformed | |||||
| # Designed for time series data | |||||
| name = 'kpi' | |||||
| src_path = './raw_data/kpi.csv' | |||||
| label_name = 'label' | |||||
| timestamp_name = 'timestamp' | |||||
| value_names = ['value'] | |||||
| ratio = 0.8 # Ratio of training data, the rest is for testing | |||||
| ############################### | |||||
| dst_root = './' + name | |||||
| dirs = ['./', 'SCORE', 'TEST', 'TRAIN'] | |||||
| maps = {'./': None, 'SCORE': 'TEST', 'TEST': 'TEST', 'TRAIN': 'TRAIN'} | |||||
| # Create the corresponding directories | |||||
| for d in dirs: | |||||
| if maps[d] is not None: | |||||
| dataset_name = 'dataset_' + maps[d] | |||||
| problem_name = 'problem_' + maps[d] | |||||
| else: | |||||
| dataset_name = name + '_dataset' | |||||
| problem_name = name + '_problem' | |||||
| tables_dir = os.path.join(dst_root, d, dataset_name, 'tables') | |||||
| if not os.path.exists(tables_dir): | |||||
| os.makedirs(tables_dir) | |||||
| problem_dir = os.path.join(dst_root, d, problem_name) | |||||
| if not os.path.exists(problem_dir): | |||||
| os.makedirs(problem_dir) | |||||
| # Process data | |||||
| _df = pd.DataFrame() | |||||
| df = pd.read_csv(src_path) | |||||
| _df['d3mIndex'] = df.index | |||||
| _df['timestamp'] = df[timestamp_name] | |||||
| for value_name in value_names: | |||||
| _df[value_name] = df[value_name] | |||||
| _df['ground_truth'] = df[label_name] | |||||
| df = _df | |||||
| cols = df.columns.tolist() | |||||
| # Save all the data | |||||
| df.to_csv(os.path.join(dst_root, name+'_dataset', 'tables', 'learningData.csv'), index=False) | |||||
| # Save training and testing data | |||||
| train_df, test_df = df[:int(df.shape[0]*ratio)], df[int(df.shape[0]*ratio):] | |||||
| train_df.to_csv(os.path.join(dst_root, 'TRAIN', 'dataset_TRAIN', 'tables', 'learningData.csv'), index=False) | |||||
| test_df.to_csv(os.path.join(dst_root, 'TEST', 'dataset_TEST', 'tables', 'learningData.csv'), index=False) | |||||
| test_df.to_csv(os.path.join(dst_root, 'SCORE', 'dataset_TEST', 'tables', 'learningData.csv'), index=False) | |||||
| # Data splits | |||||
| row_0 = train_df.shape[0] | |||||
| row_1 = train_df.shape[0] | |||||
| row = row_0 + row_1 | |||||
| df = pd.DataFrame(np.array([[i for i in range(row)], ['TRAIN' for _ in range(row_0)] + ['TEST' for _ in range(row_1)], [0 for _ in range(row)], [0 for _ in range(row)]]).transpose(), columns = ['d3mIndex', 'type', 'repeat', 'fold']) | |||||
| # Save data splits for all data | |||||
| train_df.to_csv(os.path.join(dst_root, name+'_problem', 'dataSplits.csv'), index=False) | |||||
| # Save training and testing splits | |||||
| train_df, test_df = df[:row_0], df[row_0:] | |||||
| train_df.to_csv(os.path.join(dst_root, 'TRAIN', 'problem_TRAIN', 'dataSplits.csv'), index=False) | |||||
| test_df.to_csv(os.path.join(dst_root, 'TEST', 'problem_TEST', 'dataSplits.csv'), index=False) | |||||
| test_df.to_csv(os.path.join(dst_root, 'SCORE', 'problem_TEST', 'dataSplits.csv'), index=False) | |||||
| # Dataset JSON files | |||||
| # Load template | |||||
| with open('template/datasetDoc.json') as json_file: | |||||
| data = json.load(json_file) | |||||
| columns = [] | |||||
| for i in range(len(cols)): | |||||
| c = {} | |||||
| c['colIndex'] = i | |||||
| c['colName'] = cols[i] | |||||
| if i == 0: | |||||
| c['colType'] = 'integer' | |||||
| c['role'] = ['index'] | |||||
| elif i == 1: | |||||
| c['colType'] = 'integer' | |||||
| c['role'] = ['attribute'] | |||||
| elif i == len(cols)-1: | |||||
| c['colType'] = 'integer' | |||||
| c['role'] = ['suggestedTarget'] | |||||
| else: | |||||
| c['colType'] = 'real' | |||||
| c['role'] = ['attribute'] | |||||
| columns.append(c) | |||||
| data['dataResources'][0]['columns'] = columns | |||||
| data['dataResources'][0]['columnsCount'] = len(cols) | |||||
| data['about']['datasetID'] = name + '_dataset' | |||||
| data['about']['datasetName'] = name | |||||
| with open(os.path.join(dst_root, name+'_dataset', 'datasetDoc.json'), 'w') as outfile: | |||||
| json.dump(data, outfile, indent=4) | |||||
| data['about']['datasetID'] = name +'_dataset_TRAIN' | |||||
| data['about']['datasetName'] = "NULL" | |||||
| with open(os.path.join(dst_root, 'TRAIN', 'dataset_TRAIN', 'datasetDoc.json'), 'w') as outfile: | |||||
| json.dump(data, outfile, indent=4) | |||||
| data['about']['datasetID'] = name + '_dataset_TEST' | |||||
| data['about']['datasetName'] = 'NULL' | |||||
| with open(os.path.join(dst_root, 'TEST', 'dataset_TEST', 'datasetDoc.json'), 'w') as outfile: | |||||
| json.dump(data, outfile, indent=4) | |||||
| data['about']['datasetID'] = name + '_dataset_TEST' | |||||
| data['about']['datasetName'] = 'NULL' | |||||
| with open(os.path.join(dst_root, 'SCORE', 'dataset_TEST', 'datasetDoc.json'), 'w') as outfile: | |||||
| json.dump(data, outfile, indent=4) | |||||
| # Problem JSON files | |||||
| # Load template | |||||
| with open('template/problemDoc.json') as json_file: | |||||
| data = json.load(json_file) | |||||
| data['about']['problemID'] = name+'_problem' | |||||
| data['about']['problemName'] = name+'_problem' | |||||
| data['about']['problemDescription'] = 'Anomaly detection' | |||||
| data['about']['taskKeywords'] = ['classification', 'binary', 'tabular'] | |||||
| data['inputs']['data'][0]['datasetID'] = name + '_dataset' | |||||
| data['inputs']['data'][0]['targets'][0]['colIndex'] = len(cols)-1 | |||||
| data['inputs']['data'][0]['targets'][0]['colName'] = cols[-1] | |||||
| data['inputs']['dataSplits']['datasetViewMaps']['train'][0]['from'] = name+'_dataset' | |||||
| data['inputs']['dataSplits']['datasetViewMaps']['test'][0]['from'] = name+'_dataset' | |||||
| data['inputs']['dataSplits']['datasetViewMaps']['score'][0]['from'] = name+'_dataset' | |||||
| data['inputs']['dataSplits']['datasetViewMaps']['train'][0]['to'] = name+'_dataset_TRAIN' | |||||
| data['inputs']['dataSplits']['datasetViewMaps']['test'][0]['to'] = name+'_dataset_TEST' | |||||
| data['inputs']['dataSplits']['datasetViewMaps']['score'][0]['to'] = name+'_dataset_SCORE' | |||||
| with open(os.path.join(dst_root, name+'_problem', 'problemDoc.json'), 'w') as outfile: | |||||
| json.dump(data, outfile, indent=4) | |||||
| with open(os.path.join(dst_root, 'TRAIN', 'problem_TRAIN', 'problemDoc.json'), 'w') as outfile: | |||||
| json.dump(data, outfile, indent=4) | |||||
| with open(os.path.join(dst_root, 'TEST', 'problem_TEST', 'problemDoc.json'), 'w') as outfile: | |||||
| json.dump(data, outfile, indent=4) | |||||
| with open(os.path.join(dst_root, 'SCORE', 'problem_TEST', 'problemDoc.json'), 'w') as outfile: | |||||
| json.dump(data, outfile, indent=4) | |||||
| # Make an empty targets.csv | |||||
| with open(os.path.join(dst_root, 'SCORE', 'targets.csv'), 'w') as outfile: | |||||
| outfile.write('') | |||||
| @@ -1,95 +0,0 @@ | |||||
| { | |||||
| "about": { | |||||
| "datasetID": "yahoo_sub_5_dataset_TEST", | |||||
| "datasetName": "NULL", | |||||
| "description": "Database of baseball players and play statistics, including 'Games_played', 'At_bats', 'Runs', 'Hits', 'Doubles', 'Triples', 'Home_runs', 'RBIs', 'Walks', 'Strikeouts', 'Batting_average', 'On_base_pct', 'Slugging_pct' and 'Fielding_ave'", | |||||
| "citation": " @book{simonoff2003analyzing,title={Analyzing Categorical Data},author={Simonoff, J.S.},isbn={9780387007496},lccn={2003044946},series={Springer Texts in Statistics},url={https://books.google.com/books?id=G8wrifweAoC},year={2003},publisher={Springer New York}} ", | |||||
| "license": " CC Public Domain Mark 1.0 ", | |||||
| "source": "OpenML", | |||||
| "sourceURI": "http://www.openml.org/d/185", | |||||
| "approximateSize": "", | |||||
| "datasetSchemaVersion": "4.0.0", | |||||
| "redacted": false, | |||||
| "datasetVersion": "4.0.0" | |||||
| }, | |||||
| "dataResources": [ | |||||
| { | |||||
| "resID": "learningData", | |||||
| "resPath": "tables/learningData.csv", | |||||
| "resType": "table", | |||||
| "resFormat": { | |||||
| "text/csv": [ | |||||
| "csv" | |||||
| ] | |||||
| }, | |||||
| "isCollection": false, | |||||
| "columns": [ | |||||
| { | |||||
| "colIndex": 0, | |||||
| "colName": "d3mIndex", | |||||
| "colType": "integer", | |||||
| "role": [ | |||||
| "index" | |||||
| ] | |||||
| }, | |||||
| { | |||||
| "colIndex": 1, | |||||
| "colName": "timestamp", | |||||
| "colType": "integer", | |||||
| "role": [ | |||||
| "attribute" | |||||
| ] | |||||
| }, | |||||
| { | |||||
| "colIndex": 2, | |||||
| "colName": "value_0", | |||||
| "colType": "real", | |||||
| "role": [ | |||||
| "attribute" | |||||
| ] | |||||
| }, | |||||
| { | |||||
| "colIndex": 3, | |||||
| "colName": "value_1", | |||||
| "colType": "real", | |||||
| "role": [ | |||||
| "attribute" | |||||
| ] | |||||
| }, | |||||
| { | |||||
| "colIndex": 4, | |||||
| "colName": "value_2", | |||||
| "colType": "real", | |||||
| "role": [ | |||||
| "attribute" | |||||
| ] | |||||
| }, | |||||
| { | |||||
| "colIndex": 5, | |||||
| "colName": "value_3", | |||||
| "colType": "real", | |||||
| "role": [ | |||||
| "attribute" | |||||
| ] | |||||
| }, | |||||
| { | |||||
| "colIndex": 6, | |||||
| "colName": "value_4", | |||||
| "colType": "real", | |||||
| "role": [ | |||||
| "attribute" | |||||
| ] | |||||
| }, | |||||
| { | |||||
| "colIndex": 7, | |||||
| "colName": "ground_truth", | |||||
| "colType": "integer", | |||||
| "role": [ | |||||
| "suggestedTarget" | |||||
| ] | |||||
| } | |||||
| ], | |||||
| "columnsCount": 8 | |||||
| } | |||||
| ] | |||||
| } | |||||
| @@ -1,141 +0,0 @@ | |||||
| d3mIndex,timestamp,value_0,value_1,value_2,value_3,value_4,ground_truth | |||||
| 1260,1261,7782,0.034280386319742985,2.5072222222222003,104,3119,0 | |||||
| 1261,1262,7829,0.039360296791109,2.5927777777778,82,3590,0 | |||||
| 1262,1263,7902,0.0,2.6894444444444,208,3893,0 | |||||
| 1263,1264,8039,0.038944065994356014,2.6291666666667,92,3264,0 | |||||
| 1264,1265,8350,0.18176011684739,2.6469444444444,53,3963,0 | |||||
| 1265,1266,8142,0.18521047165852,2.7461111111111003,65,2757,0 | |||||
| 1266,1267,7886,0.13079770999921,2.9363888888889,62,2306,0 | |||||
| 1267,1268,7743,0.13310058077443,3.2797222222222,73,2549,0 | |||||
| 1268,1269,7707,0.054750658073534006,3.5194444444444,84,2212,0 | |||||
| 1269,1270,7726,0.030588852697706,3.8130555555556,90,2286,0 | |||||
| 1270,1271,7717,0.12998124134227002,3.7941666666667,80,2979,0 | |||||
| 1271,1272,10331,0.09100057249197198,3.6086111111111,90,3158,0 | |||||
| 1272,1273,10515,0.19464543002904006,3.3858333333333,84,2645,0 | |||||
| 1273,1274,10415,0.22178651521516,3.3336111111111,34,3161,0 | |||||
| 1274,1275,10387,0.22983578430825,3.3116666666667003,67,4460,0 | |||||
| 1275,1276,10471,0.298229429356,3.2616666666667005,74,2630,0 | |||||
| 1276,1277,10385,0.12923377484588,3.0044444444444003,44,2593,0 | |||||
| 1277,1278,10439,0.19609416059774,2.6741666666667,64,2625,0 | |||||
| 1278,1279,10516,0.040518533819385014,2.3191666666667,70,4834,0 | |||||
| 1279,1280,10587,0.07099894663641,2.0597222222222,96,4056,0 | |||||
| 1280,1281,10586,0.07584150637714701,2.0547222222222,110,5713,0 | |||||
| 1281,1282,10684,0.08180100127782801,2.1511111111111,68,3940,0 | |||||
| 1282,1283,10880,0.0,2.2602777777778,90,4414,0 | |||||
| 1283,1284,10830,0.0,2.2883333333333,90,5044,0 | |||||
| 1284,1285,10794,0.09140162014739303,2.3736111111111,69,3894,0 | |||||
| 1285,1286,10843,0.0,2.5869444444444,46,3993,0 | |||||
| 1286,1287,10805,0.0,2.6480555555556,74,4404,0 | |||||
| 1287,1288,10996,0.0,2.6077777777777995,68,4072,0 | |||||
| 1288,1289,11327,0.05363316840061,2.6069444444444,67,4182,0 | |||||
| 1289,1290,11090,0.26818151064716,2.6908333333333,51,3351,0 | |||||
| 1290,1291,10578,0.21887772653901,2.9019444444444003,39,4183,0 | |||||
| 1291,1292,10528,0.32371296573811,3.2711111111111,26,4068,0 | |||||
| 1292,1293,10475,0.12565805017257,3.5872222222222,25,8139,0 | |||||
| 1293,1294,10664,0.092277247744574,3.6913888888889,32,11000,0 | |||||
| 1294,1295,10513,0.077016875742983,3.6313888888889,17,2975,0 | |||||
| 1295,1296,9072,0.3714480797312501,3.5605555555556,19,2692,0 | |||||
| 1296,1297,9069,0.19332372237792,3.4402777777778,16,2502,0 | |||||
| 1297,1298,9089,0.06345811641554701,3.35,28,2510,0 | |||||
| 1298,1299,9027,0.2267121559473,3.3469444444444,24,2663,0 | |||||
| 1299,1300,8969,0.053072279964629,3.2708333333333,35,3575,0 | |||||
| 1300,1301,9073,0.13336345197744,3.2519444444444,49,2586,0 | |||||
| 1301,1302,8957,0.1252855094715,2.7311111111111,106,2908,0 | |||||
| 1302,1303,9126,0.096211952864224,2.3875,80,3530,0 | |||||
| 1303,1304,9122,0.096524467517755,2.0847222222222,90,2776,0 | |||||
| 1304,1305,9231,0.08924770147957402,2.0975,169,2962,0 | |||||
| 1305,1306,9368,0.11889606284162,2.1763888888889,98,3441,0 | |||||
| 1306,1307,9458,0.031429841710104,2.2327777777777995,92,4376,0 | |||||
| 1307,1308,9463,0.0,2.2725,91,3857,0 | |||||
| 1308,1309,9356,0.036512411627868,2.3202777777778,99,4685,0 | |||||
| 1309,1310,9340,0.0,2.5425,90,4585,0 | |||||
| 1310,1311,9340,0.0,2.5986111111111,126,3542,0 | |||||
| 1311,1312,9276,0.0,2.6319444444444,102,3370,0 | |||||
| 1312,1313,9611,0.10106696361212,2.5836111111111,132,3515,0 | |||||
| 1313,1314,9532,0.14854949043035,2.675,88,3793,0 | |||||
| 1314,1315,9156,0.08612162048398897,2.8522222222222,135,2954,0 | |||||
| 1315,1316,9222,0.16494200410492002,3.1302777777778,114,2627,0 | |||||
| 1316,1317,9282,0.28637713141253,3.4805555555556,35,2550,0 | |||||
| 1317,1318,9573,0.13206535647488,3.5994444444444,24,2480,0 | |||||
| 1318,1319,9333,0.27364025607799,3.5847222222222,44,2521,0 | |||||
| 1319,1320,9987,0.38382339961227,3.4963888888889,26,2860,0 | |||||
| 1320,1321,10133,0.08426242877623301,3.3825,37,3675,0 | |||||
| 1321,1322,10010,0.3290413568025901,3.2694444444444,45,2704,0 | |||||
| 1322,1323,10028,0.22632868808708,3.2322222222222,42,3121,0 | |||||
| 1323,1324,9984,0.17914189971361,3.1936111111111005,47,2603,0 | |||||
| 1324,1325,10041,0.30046815361859003,3.0536111111111004,34,3984,0 | |||||
| 1325,1326,10072,0.22650915594248,2.7819444444444,56,2537,0 | |||||
| 1326,1327,10025,0.0,2.4152777777778,87,3349,0 | |||||
| 1327,1328,10116,0.1223093269317,2.1569444444444,74,3958,0 | |||||
| 1328,1329,10232,0.1696074188221,2.1125,90,4243,0 | |||||
| 1329,1330,10516,0.0,2.1833333333333003,79,4159,0 | |||||
| 1330,1331,10449,0.028193633007367,2.205,97,5637,0 | |||||
| 1331,1332,10598,0.0,2.1697222222222,90,8142,0 | |||||
| 1332,1333,10337,0.0,2.3075,77,5713,0 | |||||
| 1333,1334,10469,0.097305232437507,2.4575,101,3668,0 | |||||
| 1334,1335,10426,0.11905908868379,2.6077777777777995,74,4307,0 | |||||
| 1335,1336,10531,0.11660374103282,2.6275,439,4354,0 | |||||
| 1336,1337,10875,0.060474297756584014,2.6144444444444,79,4262,0 | |||||
| 1337,1338,10494,0.22568442027805,2.6477777777777995,165,3446,0 | |||||
| 1338,1339,10195,0.14077736537045002,2.8594444444444003,139,2677,0 | |||||
| 1339,1340,9918,0.1924574892026,3.2675,56,4450,0 | |||||
| 1340,1341,9889,0.18922597300629,3.5136111111111004,102,3044,0 | |||||
| 1341,1342,9947,0.041593949118095004,3.5725,101,3428,0 | |||||
| 1342,1343,9977,0.2502095174271,3.6863888888889,41,2845,0 | |||||
| 1343,1344,10835,0.18663972932643,3.5636111111111,94,2781,0 | |||||
| 1344,1345,10765,0.07351854082400297,3.4127777777778,116,2743,0 | |||||
| 1345,1346,10656,0.081949111399618,3.295,94,4470,0 | |||||
| 1346,1347,10485,0.20148511394009,3.2666666666667004,89,2596,0 | |||||
| 1347,1348,10681,0.11515101921294,3.1933333333333,141,3249,0 | |||||
| 1348,1349,10852,0.07797276382811,3.0688888888889,167,2529,0 | |||||
| 1349,1350,10728,0.07244862879413201,2.8102777777778,148,2452,0 | |||||
| 1350,1351,10874,0.07310929970435699,2.42,105,2934,0 | |||||
| 1351,1352,10964,0.066868365737218,2.1358333333333,210,3159,0 | |||||
| 1352,1353,10984,0.05788512501593701,1.9916666666667,145,3974,0 | |||||
| 1353,1354,11055,0.09727414207464803,2.0947222222222,136,4305,0 | |||||
| 1354,1355,11233,0.033270317741558,2.1591666666667,126,5012,0 | |||||
| 1355,1356,11161,0.0,2.2377777777778,157,4455,0 | |||||
| 1356,1357,10966,0.038270957919533,2.2511111111111,105,4108,0 | |||||
| 1357,1358,11193,0.08728058888363299,2.4208333333333,114,4339,0 | |||||
| 1358,1359,11167,0.10536774813238,2.5241666666667,104,5056,0 | |||||
| 1359,1360,11367,0.1233991317089,2.5794444444444,69,5573,0 | |||||
| 1360,1361,51251,0.042565915766552,2.5936111111111,75,3366,1 | |||||
| 1361,1362,17953,0.23147422367229,2.6830555555556,73,2559,1 | |||||
| 1362,1363,170029,0.08983405162538903,2.8188888888889,74,1999,1 | |||||
| 1363,1364,10955,0.07464756469365201,2.9513888888888995,126,1993,0 | |||||
| 1364,1365,10984,0.099244104918934,3.2830555555556,67,1913,0 | |||||
| 1365,1366,10964,0.11535172009194,3.4819444444444,32,1760,0 | |||||
| 1366,1367,10980,0.21774881707852,3.5886111111111005,38,1890,0 | |||||
| 1367,1368,10852,0.1305066423559,3.4836111111111,34,2469,0 | |||||
| 1368,1369,10786,0.10054853030204,3.3955555555556,36,2133,0 | |||||
| 1369,1370,10841,0.02468393737575,3.2847222222222,26,3359,0 | |||||
| 1370,1371,10762,0.10018007414459,3.2383333333332995,74,3783,0 | |||||
| 1371,1372,10419,0.12522619841308,3.2188888888889,85,1809,0 | |||||
| 1372,1373,10467,0.11781887197077,2.9483333333333,67,2143,0 | |||||
| 1373,1374,10502,0.13417256350298,2.5855555555556,84,2567,0 | |||||
| 1374,1375,10519,0.07474686582090599,2.3005555555556003,1630,2176,0 | |||||
| 1375,1376,10579,0.13570963056519,2.0855555555556,1435,1929,0 | |||||
| 1376,1377,10502,0.076431907457478,1.9027777777778,857,2244,0 | |||||
| 1377,1378,10661,0.0,1.9411111111111,31,1810,0 | |||||
| 1378,1379,10818,0.1936428046839,2.0444444444444,500,2088,0 | |||||
| 1379,1380,10918,0.052826773889684014,2.1363888888889,53,2371,0 | |||||
| 1380,1381,10871,0.0,2.22,61,1843,0 | |||||
| 1381,1382,10796,0.054466597481213,2.3530555555556,158,2668,0 | |||||
| 1382,1383,10774,0.057459020289436,2.545,184,2309,0 | |||||
| 1383,1384,10898,0.28750562005936,2.6202777777778,91,1998,0 | |||||
| 1384,1385,11442,0.075538554674309,2.6847222222222,60,2480,0 | |||||
| 1385,1386,11113,0.08112608570492501,2.6591666666667004,107,2147,0 | |||||
| 1386,1387,10888,0.21563803296368,2.7863888888888995,5157,1802,0 | |||||
| 1387,1388,10894,0.095725002305685,3.0269444444444003,28,1789,0 | |||||
| 1388,1389,10888,0.17516056892320994,3.3227777777778,24,1999,0 | |||||
| 1389,1390,10896,0.32902836018586,3.6097222222222,21,2142,0 | |||||
| 1390,1391,10800,0.10216065221678,3.6805555555556,12,1904,0 | |||||
| 1391,1392,11000,0.19741931250852,3.6075,24,1876,0 | |||||
| 1392,1393,10985,0.10149107903671,3.4091666666667004,17,2434,0 | |||||
| 1393,1394,11017,0.17479255893624,3.3666666666667004,48,2472,0 | |||||
| 1394,1395,10863,0.034385029573777,3.3158333333333,41,1744,0 | |||||
| 1395,1396,10875,0.21988771218053,3.1622222222222,1088,2404,0 | |||||
| 1396,1397,10987,0.10149107903671,3.1086111111111,68,1971,0 | |||||
| 1397,1398,10778,0.10269981175445,2.6552777777778,2575,1713,0 | |||||
| 1398,1399,10957,0.11258759940039,2.2730555555556,4688,1765,0 | |||||
| 1399,1400,10832,0.13022351806001,2.0591666666667,477,3156,0 | |||||
| @@ -1,65 +0,0 @@ | |||||
| { | |||||
| "about": { | |||||
| "problemID": "yahoo_sub_5_problem", | |||||
| "problemName": "yahoo_sub_5_problem", | |||||
| "problemDescription": "Anomaly detection", | |||||
| "problemVersion": "4.0.0", | |||||
| "problemSchemaVersion": "4.0.0", | |||||
| "taskKeywords": [ | |||||
| "classification", | |||||
| "binary", | |||||
| "tabular" | |||||
| ] | |||||
| }, | |||||
| "inputs": { | |||||
| "data": [ | |||||
| { | |||||
| "datasetID": "yahoo_sub_5_dataset", | |||||
| "targets": [ | |||||
| { | |||||
| "targetIndex": 0, | |||||
| "resID": "learningData", | |||||
| "colIndex": 7, | |||||
| "colName": "ground_truth" | |||||
| } | |||||
| ] | |||||
| } | |||||
| ], | |||||
| "dataSplits": { | |||||
| "method": "holdOut", | |||||
| "testSize": 0.2, | |||||
| "stratified": true, | |||||
| "numRepeats": 0, | |||||
| "randomSeed": 42, | |||||
| "splitsFile": "dataSplits.csv", | |||||
| "datasetViewMaps": { | |||||
| "train": [ | |||||
| { | |||||
| "from": "yahoo_sub_5_dataset", | |||||
| "to": "yahoo_sub_5_dataset_TRAIN" | |||||
| } | |||||
| ], | |||||
| "test": [ | |||||
| { | |||||
| "from": "yahoo_sub_5_dataset", | |||||
| "to": "yahoo_sub_5_dataset_TEST" | |||||
| } | |||||
| ], | |||||
| "score": [ | |||||
| { | |||||
| "from": "yahoo_sub_5_dataset", | |||||
| "to": "yahoo_sub_5_dataset_SCORE" | |||||
| } | |||||
| ] | |||||
| } | |||||
| }, | |||||
| "performanceMetrics": [ | |||||
| { | |||||
| "metric": "f1Macro" | |||||
| } | |||||
| ] | |||||
| }, | |||||
| "expectedOutputs": { | |||||
| "predictionsFile": "predictions.csv" | |||||
| } | |||||
| } | |||||
| @@ -1,95 +0,0 @@ | |||||
| { | |||||
| "about": { | |||||
| "datasetID": "yahoo_sub_5_dataset_TEST", | |||||
| "datasetName": "NULL", | |||||
| "description": "Database of baseball players and play statistics, including 'Games_played', 'At_bats', 'Runs', 'Hits', 'Doubles', 'Triples', 'Home_runs', 'RBIs', 'Walks', 'Strikeouts', 'Batting_average', 'On_base_pct', 'Slugging_pct' and 'Fielding_ave'", | |||||
| "citation": " @book{simonoff2003analyzing,title={Analyzing Categorical Data},author={Simonoff, J.S.},isbn={9780387007496},lccn={2003044946},series={Springer Texts in Statistics},url={https://books.google.com/books?id=G8wrifweAoC},year={2003},publisher={Springer New York}} ", | |||||
| "license": " CC Public Domain Mark 1.0 ", | |||||
| "source": "OpenML", | |||||
| "sourceURI": "http://www.openml.org/d/185", | |||||
| "approximateSize": "", | |||||
| "datasetSchemaVersion": "4.0.0", | |||||
| "redacted": false, | |||||
| "datasetVersion": "4.0.0" | |||||
| }, | |||||
| "dataResources": [ | |||||
| { | |||||
| "resID": "learningData", | |||||
| "resPath": "tables/learningData.csv", | |||||
| "resType": "table", | |||||
| "resFormat": { | |||||
| "text/csv": [ | |||||
| "csv" | |||||
| ] | |||||
| }, | |||||
| "isCollection": false, | |||||
| "columns": [ | |||||
| { | |||||
| "colIndex": 0, | |||||
| "colName": "d3mIndex", | |||||
| "colType": "integer", | |||||
| "role": [ | |||||
| "index" | |||||
| ] | |||||
| }, | |||||
| { | |||||
| "colIndex": 1, | |||||
| "colName": "timestamp", | |||||
| "colType": "integer", | |||||
| "role": [ | |||||
| "attribute" | |||||
| ] | |||||
| }, | |||||
| { | |||||
| "colIndex": 2, | |||||
| "colName": "value_0", | |||||
| "colType": "real", | |||||
| "role": [ | |||||
| "attribute" | |||||
| ] | |||||
| }, | |||||
| { | |||||
| "colIndex": 3, | |||||
| "colName": "value_1", | |||||
| "colType": "real", | |||||
| "role": [ | |||||
| "attribute" | |||||
| ] | |||||
| }, | |||||
| { | |||||
| "colIndex": 4, | |||||
| "colName": "value_2", | |||||
| "colType": "real", | |||||
| "role": [ | |||||
| "attribute" | |||||
| ] | |||||
| }, | |||||
| { | |||||
| "colIndex": 5, | |||||
| "colName": "value_3", | |||||
| "colType": "real", | |||||
| "role": [ | |||||
| "attribute" | |||||
| ] | |||||
| }, | |||||
| { | |||||
| "colIndex": 6, | |||||
| "colName": "value_4", | |||||
| "colType": "real", | |||||
| "role": [ | |||||
| "attribute" | |||||
| ] | |||||
| }, | |||||
| { | |||||
| "colIndex": 7, | |||||
| "colName": "ground_truth", | |||||
| "colType": "integer", | |||||
| "role": [ | |||||
| "suggestedTarget" | |||||
| ] | |||||
| } | |||||
| ], | |||||
| "columnsCount": 8 | |||||
| } | |||||
| ] | |||||
| } | |||||
| @@ -1,141 +0,0 @@ | |||||
| d3mIndex,timestamp,value_0,value_1,value_2,value_3,value_4,ground_truth | |||||
| 1260,1261,7782,0.034280386319742985,2.5072222222222003,104,3119,0 | |||||
| 1261,1262,7829,0.039360296791109,2.5927777777778,82,3590,0 | |||||
| 1262,1263,7902,0.0,2.6894444444444,208,3893,0 | |||||
| 1263,1264,8039,0.038944065994356014,2.6291666666667,92,3264,0 | |||||
| 1264,1265,8350,0.18176011684739,2.6469444444444,53,3963,0 | |||||
| 1265,1266,8142,0.18521047165852,2.7461111111111003,65,2757,0 | |||||
| 1266,1267,7886,0.13079770999921,2.9363888888889,62,2306,0 | |||||
| 1267,1268,7743,0.13310058077443,3.2797222222222,73,2549,0 | |||||
| 1268,1269,7707,0.054750658073534006,3.5194444444444,84,2212,0 | |||||
| 1269,1270,7726,0.030588852697706,3.8130555555556,90,2286,0 | |||||
| 1270,1271,7717,0.12998124134227002,3.7941666666667,80,2979,0 | |||||
| 1271,1272,10331,0.09100057249197198,3.6086111111111,90,3158,0 | |||||
| 1272,1273,10515,0.19464543002904006,3.3858333333333,84,2645,0 | |||||
| 1273,1274,10415,0.22178651521516,3.3336111111111,34,3161,0 | |||||
| 1274,1275,10387,0.22983578430825,3.3116666666667003,67,4460,0 | |||||
| 1275,1276,10471,0.298229429356,3.2616666666667005,74,2630,0 | |||||
| 1276,1277,10385,0.12923377484588,3.0044444444444003,44,2593,0 | |||||
| 1277,1278,10439,0.19609416059774,2.6741666666667,64,2625,0 | |||||
| 1278,1279,10516,0.040518533819385014,2.3191666666667,70,4834,0 | |||||
| 1279,1280,10587,0.07099894663641,2.0597222222222,96,4056,0 | |||||
| 1280,1281,10586,0.07584150637714701,2.0547222222222,110,5713,0 | |||||
| 1281,1282,10684,0.08180100127782801,2.1511111111111,68,3940,0 | |||||
| 1282,1283,10880,0.0,2.2602777777778,90,4414,0 | |||||
| 1283,1284,10830,0.0,2.2883333333333,90,5044,0 | |||||
| 1284,1285,10794,0.09140162014739303,2.3736111111111,69,3894,0 | |||||
| 1285,1286,10843,0.0,2.5869444444444,46,3993,0 | |||||
| 1286,1287,10805,0.0,2.6480555555556,74,4404,0 | |||||
| 1287,1288,10996,0.0,2.6077777777777995,68,4072,0 | |||||
| 1288,1289,11327,0.05363316840061,2.6069444444444,67,4182,0 | |||||
| 1289,1290,11090,0.26818151064716,2.6908333333333,51,3351,0 | |||||
| 1290,1291,10578,0.21887772653901,2.9019444444444003,39,4183,0 | |||||
| 1291,1292,10528,0.32371296573811,3.2711111111111,26,4068,0 | |||||
| 1292,1293,10475,0.12565805017257,3.5872222222222,25,8139,0 | |||||
| 1293,1294,10664,0.092277247744574,3.6913888888889,32,11000,0 | |||||
| 1294,1295,10513,0.077016875742983,3.6313888888889,17,2975,0 | |||||
| 1295,1296,9072,0.3714480797312501,3.5605555555556,19,2692,0 | |||||
| 1296,1297,9069,0.19332372237792,3.4402777777778,16,2502,0 | |||||
| 1297,1298,9089,0.06345811641554701,3.35,28,2510,0 | |||||
| 1298,1299,9027,0.2267121559473,3.3469444444444,24,2663,0 | |||||
| 1299,1300,8969,0.053072279964629,3.2708333333333,35,3575,0 | |||||
| 1300,1301,9073,0.13336345197744,3.2519444444444,49,2586,0 | |||||
| 1301,1302,8957,0.1252855094715,2.7311111111111,106,2908,0 | |||||
| 1302,1303,9126,0.096211952864224,2.3875,80,3530,0 | |||||
| 1303,1304,9122,0.096524467517755,2.0847222222222,90,2776,0 | |||||
| 1304,1305,9231,0.08924770147957402,2.0975,169,2962,0 | |||||
| 1305,1306,9368,0.11889606284162,2.1763888888889,98,3441,0 | |||||
| 1306,1307,9458,0.031429841710104,2.2327777777777995,92,4376,0 | |||||
| 1307,1308,9463,0.0,2.2725,91,3857,0 | |||||
| 1308,1309,9356,0.036512411627868,2.3202777777778,99,4685,0 | |||||
| 1309,1310,9340,0.0,2.5425,90,4585,0 | |||||
| 1310,1311,9340,0.0,2.5986111111111,126,3542,0 | |||||
| 1311,1312,9276,0.0,2.6319444444444,102,3370,0 | |||||
| 1312,1313,9611,0.10106696361212,2.5836111111111,132,3515,0 | |||||
| 1313,1314,9532,0.14854949043035,2.675,88,3793,0 | |||||
| 1314,1315,9156,0.08612162048398897,2.8522222222222,135,2954,0 | |||||
| 1315,1316,9222,0.16494200410492002,3.1302777777778,114,2627,0 | |||||
| 1316,1317,9282,0.28637713141253,3.4805555555556,35,2550,0 | |||||
| 1317,1318,9573,0.13206535647488,3.5994444444444,24,2480,0 | |||||
| 1318,1319,9333,0.27364025607799,3.5847222222222,44,2521,0 | |||||
| 1319,1320,9987,0.38382339961227,3.4963888888889,26,2860,0 | |||||
| 1320,1321,10133,0.08426242877623301,3.3825,37,3675,0 | |||||
| 1321,1322,10010,0.3290413568025901,3.2694444444444,45,2704,0 | |||||
| 1322,1323,10028,0.22632868808708,3.2322222222222,42,3121,0 | |||||
| 1323,1324,9984,0.17914189971361,3.1936111111111005,47,2603,0 | |||||
| 1324,1325,10041,0.30046815361859003,3.0536111111111004,34,3984,0 | |||||
| 1325,1326,10072,0.22650915594248,2.7819444444444,56,2537,0 | |||||
| 1326,1327,10025,0.0,2.4152777777778,87,3349,0 | |||||
| 1327,1328,10116,0.1223093269317,2.1569444444444,74,3958,0 | |||||
| 1328,1329,10232,0.1696074188221,2.1125,90,4243,0 | |||||
| 1329,1330,10516,0.0,2.1833333333333003,79,4159,0 | |||||
| 1330,1331,10449,0.028193633007367,2.205,97,5637,0 | |||||
| 1331,1332,10598,0.0,2.1697222222222,90,8142,0 | |||||
| 1332,1333,10337,0.0,2.3075,77,5713,0 | |||||
| 1333,1334,10469,0.097305232437507,2.4575,101,3668,0 | |||||
| 1334,1335,10426,0.11905908868379,2.6077777777777995,74,4307,0 | |||||
| 1335,1336,10531,0.11660374103282,2.6275,439,4354,0 | |||||
| 1336,1337,10875,0.060474297756584014,2.6144444444444,79,4262,0 | |||||
| 1337,1338,10494,0.22568442027805,2.6477777777777995,165,3446,0 | |||||
| 1338,1339,10195,0.14077736537045002,2.8594444444444003,139,2677,0 | |||||
| 1339,1340,9918,0.1924574892026,3.2675,56,4450,0 | |||||
| 1340,1341,9889,0.18922597300629,3.5136111111111004,102,3044,0 | |||||
| 1341,1342,9947,0.041593949118095004,3.5725,101,3428,0 | |||||
| 1342,1343,9977,0.2502095174271,3.6863888888889,41,2845,0 | |||||
| 1343,1344,10835,0.18663972932643,3.5636111111111,94,2781,0 | |||||
| 1344,1345,10765,0.07351854082400297,3.4127777777778,116,2743,0 | |||||
| 1345,1346,10656,0.081949111399618,3.295,94,4470,0 | |||||
| 1346,1347,10485,0.20148511394009,3.2666666666667004,89,2596,0 | |||||
| 1347,1348,10681,0.11515101921294,3.1933333333333,141,3249,0 | |||||
| 1348,1349,10852,0.07797276382811,3.0688888888889,167,2529,0 | |||||
| 1349,1350,10728,0.07244862879413201,2.8102777777778,148,2452,0 | |||||
| 1350,1351,10874,0.07310929970435699,2.42,105,2934,0 | |||||
| 1351,1352,10964,0.066868365737218,2.1358333333333,210,3159,0 | |||||
| 1352,1353,10984,0.05788512501593701,1.9916666666667,145,3974,0 | |||||
| 1353,1354,11055,0.09727414207464803,2.0947222222222,136,4305,0 | |||||
| 1354,1355,11233,0.033270317741558,2.1591666666667,126,5012,0 | |||||
| 1355,1356,11161,0.0,2.2377777777778,157,4455,0 | |||||
| 1356,1357,10966,0.038270957919533,2.2511111111111,105,4108,0 | |||||
| 1357,1358,11193,0.08728058888363299,2.4208333333333,114,4339,0 | |||||
| 1358,1359,11167,0.10536774813238,2.5241666666667,104,5056,0 | |||||
| 1359,1360,11367,0.1233991317089,2.5794444444444,69,5573,0 | |||||
| 1360,1361,51251,0.042565915766552,2.5936111111111,75,3366,1 | |||||
| 1361,1362,17953,0.23147422367229,2.6830555555556,73,2559,1 | |||||
| 1362,1363,170029,0.08983405162538903,2.8188888888889,74,1999,1 | |||||
| 1363,1364,10955,0.07464756469365201,2.9513888888888995,126,1993,0 | |||||
| 1364,1365,10984,0.099244104918934,3.2830555555556,67,1913,0 | |||||
| 1365,1366,10964,0.11535172009194,3.4819444444444,32,1760,0 | |||||
| 1366,1367,10980,0.21774881707852,3.5886111111111005,38,1890,0 | |||||
| 1367,1368,10852,0.1305066423559,3.4836111111111,34,2469,0 | |||||
| 1368,1369,10786,0.10054853030204,3.3955555555556,36,2133,0 | |||||
| 1369,1370,10841,0.02468393737575,3.2847222222222,26,3359,0 | |||||
| 1370,1371,10762,0.10018007414459,3.2383333333332995,74,3783,0 | |||||
| 1371,1372,10419,0.12522619841308,3.2188888888889,85,1809,0 | |||||
| 1372,1373,10467,0.11781887197077,2.9483333333333,67,2143,0 | |||||
| 1373,1374,10502,0.13417256350298,2.5855555555556,84,2567,0 | |||||
| 1374,1375,10519,0.07474686582090599,2.3005555555556003,1630,2176,0 | |||||
| 1375,1376,10579,0.13570963056519,2.0855555555556,1435,1929,0 | |||||
| 1376,1377,10502,0.076431907457478,1.9027777777778,857,2244,0 | |||||
| 1377,1378,10661,0.0,1.9411111111111,31,1810,0 | |||||
| 1378,1379,10818,0.1936428046839,2.0444444444444,500,2088,0 | |||||
| 1379,1380,10918,0.052826773889684014,2.1363888888889,53,2371,0 | |||||
| 1380,1381,10871,0.0,2.22,61,1843,0 | |||||
| 1381,1382,10796,0.054466597481213,2.3530555555556,158,2668,0 | |||||
| 1382,1383,10774,0.057459020289436,2.545,184,2309,0 | |||||
| 1383,1384,10898,0.28750562005936,2.6202777777778,91,1998,0 | |||||
| 1384,1385,11442,0.075538554674309,2.6847222222222,60,2480,0 | |||||
| 1385,1386,11113,0.08112608570492501,2.6591666666667004,107,2147,0 | |||||
| 1386,1387,10888,0.21563803296368,2.7863888888888995,5157,1802,0 | |||||
| 1387,1388,10894,0.095725002305685,3.0269444444444003,28,1789,0 | |||||
| 1388,1389,10888,0.17516056892320994,3.3227777777778,24,1999,0 | |||||
| 1389,1390,10896,0.32902836018586,3.6097222222222,21,2142,0 | |||||
| 1390,1391,10800,0.10216065221678,3.6805555555556,12,1904,0 | |||||
| 1391,1392,11000,0.19741931250852,3.6075,24,1876,0 | |||||
| 1392,1393,10985,0.10149107903671,3.4091666666667004,17,2434,0 | |||||
| 1393,1394,11017,0.17479255893624,3.3666666666667004,48,2472,0 | |||||
| 1394,1395,10863,0.034385029573777,3.3158333333333,41,1744,0 | |||||
| 1395,1396,10875,0.21988771218053,3.1622222222222,1088,2404,0 | |||||
| 1396,1397,10987,0.10149107903671,3.1086111111111,68,1971,0 | |||||
| 1397,1398,10778,0.10269981175445,2.6552777777778,2575,1713,0 | |||||
| 1398,1399,10957,0.11258759940039,2.2730555555556,4688,1765,0 | |||||
| 1399,1400,10832,0.13022351806001,2.0591666666667,477,3156,0 | |||||
| @@ -1,65 +0,0 @@ | |||||
| { | |||||
| "about": { | |||||
| "problemID": "yahoo_sub_5_problem", | |||||
| "problemName": "yahoo_sub_5_problem", | |||||
| "problemDescription": "Anomaly detection", | |||||
| "problemVersion": "4.0.0", | |||||
| "problemSchemaVersion": "4.0.0", | |||||
| "taskKeywords": [ | |||||
| "classification", | |||||
| "binary", | |||||
| "tabular" | |||||
| ] | |||||
| }, | |||||
| "inputs": { | |||||
| "data": [ | |||||
| { | |||||
| "datasetID": "yahoo_sub_5_dataset", | |||||
| "targets": [ | |||||
| { | |||||
| "targetIndex": 0, | |||||
| "resID": "learningData", | |||||
| "colIndex": 7, | |||||
| "colName": "ground_truth" | |||||
| } | |||||
| ] | |||||
| } | |||||
| ], | |||||
| "dataSplits": { | |||||
| "method": "holdOut", | |||||
| "testSize": 0.2, | |||||
| "stratified": true, | |||||
| "numRepeats": 0, | |||||
| "randomSeed": 42, | |||||
| "splitsFile": "dataSplits.csv", | |||||
| "datasetViewMaps": { | |||||
| "train": [ | |||||
| { | |||||
| "from": "yahoo_sub_5_dataset", | |||||
| "to": "yahoo_sub_5_dataset_TRAIN" | |||||
| } | |||||
| ], | |||||
| "test": [ | |||||
| { | |||||
| "from": "yahoo_sub_5_dataset", | |||||
| "to": "yahoo_sub_5_dataset_TEST" | |||||
| } | |||||
| ], | |||||
| "score": [ | |||||
| { | |||||
| "from": "yahoo_sub_5_dataset", | |||||
| "to": "yahoo_sub_5_dataset_SCORE" | |||||
| } | |||||
| ] | |||||
| } | |||||
| }, | |||||
| "performanceMetrics": [ | |||||
| { | |||||
| "metric": "f1Macro" | |||||
| } | |||||
| ] | |||||
| }, | |||||
| "expectedOutputs": { | |||||
| "predictionsFile": "predictions.csv" | |||||
| } | |||||
| } | |||||
| @@ -1,95 +0,0 @@ | |||||
| { | |||||
| "about": { | |||||
| "datasetID": "yahoo_sub_5_dataset_TRAIN", | |||||
| "datasetName": "NULL", | |||||
| "description": "Database of baseball players and play statistics, including 'Games_played', 'At_bats', 'Runs', 'Hits', 'Doubles', 'Triples', 'Home_runs', 'RBIs', 'Walks', 'Strikeouts', 'Batting_average', 'On_base_pct', 'Slugging_pct' and 'Fielding_ave'", | |||||
| "citation": " @book{simonoff2003analyzing,title={Analyzing Categorical Data},author={Simonoff, J.S.},isbn={9780387007496},lccn={2003044946},series={Springer Texts in Statistics},url={https://books.google.com/books?id=G8wrifweAoC},year={2003},publisher={Springer New York}} ", | |||||
| "license": " CC Public Domain Mark 1.0 ", | |||||
| "source": "OpenML", | |||||
| "sourceURI": "http://www.openml.org/d/185", | |||||
| "approximateSize": "", | |||||
| "datasetSchemaVersion": "4.0.0", | |||||
| "redacted": false, | |||||
| "datasetVersion": "4.0.0" | |||||
| }, | |||||
| "dataResources": [ | |||||
| { | |||||
| "resID": "learningData", | |||||
| "resPath": "tables/learningData.csv", | |||||
| "resType": "table", | |||||
| "resFormat": { | |||||
| "text/csv": [ | |||||
| "csv" | |||||
| ] | |||||
| }, | |||||
| "isCollection": false, | |||||
| "columns": [ | |||||
| { | |||||
| "colIndex": 0, | |||||
| "colName": "d3mIndex", | |||||
| "colType": "integer", | |||||
| "role": [ | |||||
| "index" | |||||
| ] | |||||
| }, | |||||
| { | |||||
| "colIndex": 1, | |||||
| "colName": "timestamp", | |||||
| "colType": "integer", | |||||
| "role": [ | |||||
| "attribute" | |||||
| ] | |||||
| }, | |||||
| { | |||||
| "colIndex": 2, | |||||
| "colName": "value_0", | |||||
| "colType": "real", | |||||
| "role": [ | |||||
| "attribute" | |||||
| ] | |||||
| }, | |||||
| { | |||||
| "colIndex": 3, | |||||
| "colName": "value_1", | |||||
| "colType": "real", | |||||
| "role": [ | |||||
| "attribute" | |||||
| ] | |||||
| }, | |||||
| { | |||||
| "colIndex": 4, | |||||
| "colName": "value_2", | |||||
| "colType": "real", | |||||
| "role": [ | |||||
| "attribute" | |||||
| ] | |||||
| }, | |||||
| { | |||||
| "colIndex": 5, | |||||
| "colName": "value_3", | |||||
| "colType": "real", | |||||
| "role": [ | |||||
| "attribute" | |||||
| ] | |||||
| }, | |||||
| { | |||||
| "colIndex": 6, | |||||
| "colName": "value_4", | |||||
| "colType": "real", | |||||
| "role": [ | |||||
| "attribute" | |||||
| ] | |||||
| }, | |||||
| { | |||||
| "colIndex": 7, | |||||
| "colName": "ground_truth", | |||||
| "colType": "integer", | |||||
| "role": [ | |||||
| "suggestedTarget" | |||||
| ] | |||||
| } | |||||
| ], | |||||
| "columnsCount": 8 | |||||
| } | |||||
| ] | |||||
| } | |||||
| @@ -1,65 +0,0 @@ | |||||
| { | |||||
| "about": { | |||||
| "problemID": "yahoo_sub_5_problem", | |||||
| "problemName": "yahoo_sub_5_problem", | |||||
| "problemDescription": "Anomaly detection", | |||||
| "problemVersion": "4.0.0", | |||||
| "problemSchemaVersion": "4.0.0", | |||||
| "taskKeywords": [ | |||||
| "classification", | |||||
| "binary", | |||||
| "tabular" | |||||
| ] | |||||
| }, | |||||
| "inputs": { | |||||
| "data": [ | |||||
| { | |||||
| "datasetID": "yahoo_sub_5_dataset", | |||||
| "targets": [ | |||||
| { | |||||
| "targetIndex": 0, | |||||
| "resID": "learningData", | |||||
| "colIndex": 7, | |||||
| "colName": "ground_truth" | |||||
| } | |||||
| ] | |||||
| } | |||||
| ], | |||||
| "dataSplits": { | |||||
| "method": "holdOut", | |||||
| "testSize": 0.2, | |||||
| "stratified": true, | |||||
| "numRepeats": 0, | |||||
| "randomSeed": 42, | |||||
| "splitsFile": "dataSplits.csv", | |||||
| "datasetViewMaps": { | |||||
| "train": [ | |||||
| { | |||||
| "from": "yahoo_sub_5_dataset", | |||||
| "to": "yahoo_sub_5_dataset_TRAIN" | |||||
| } | |||||
| ], | |||||
| "test": [ | |||||
| { | |||||
| "from": "yahoo_sub_5_dataset", | |||||
| "to": "yahoo_sub_5_dataset_TEST" | |||||
| } | |||||
| ], | |||||
| "score": [ | |||||
| { | |||||
| "from": "yahoo_sub_5_dataset", | |||||
| "to": "yahoo_sub_5_dataset_SCORE" | |||||
| } | |||||
| ] | |||||
| } | |||||
| }, | |||||
| "performanceMetrics": [ | |||||
| { | |||||
| "metric": "f1Macro" | |||||
| } | |||||
| ] | |||||
| }, | |||||
| "expectedOutputs": { | |||||
| "predictionsFile": "predictions.csv" | |||||
| } | |||||
| } | |||||