|
- import os.path
- import pickle
- import unittest
-
- from d3m import utils
- from d3m.metadata import problem, pipeline_run
-
-
- class TestProblem(unittest.TestCase):
- def test_basic(self):
- self.maxDiff = None
-
- problem_doc_path = os.path.join(os.path.dirname(__file__), 'data', 'problems', 'iris_problem_1', 'problemDoc.json')
-
- problem_uri = 'file://{problem_doc_path}'.format(problem_doc_path=problem_doc_path)
-
- problem_description = problem.Problem.load(problem_uri)
-
- self.assertEqual(problem_description.to_simple_structure(), {
- 'id': 'iris_problem_1',
- 'digest': '1a12135422967aa0de0c4629f4f58d08d39e97f9133f7b50da71420781aa18a5',
- 'version': '4.0.0',
- 'location_uris': [
- problem_uri,
- ],
- 'name': 'Distinguish Iris flowers',
- 'description': 'Distinguish Iris flowers of three related species.',
- 'schema': problem.PROBLEM_SCHEMA_VERSION,
- 'problem': {
- 'task_keywords': [problem.TaskKeyword.CLASSIFICATION, problem.TaskKeyword.MULTICLASS],
- 'performance_metrics': [
- {
- 'metric': problem.PerformanceMetric.ACCURACY,
- }
- ]
- },
- 'inputs': [
- {
- 'dataset_id': 'iris_dataset_1',
- 'targets': [
- {
- 'target_index': 0,
- 'resource_id': 'learningData',
- 'column_index': 5,
- 'column_name': 'species',
- }
- ]
- }
- ],
- })
-
- self.assertEqual(problem_description.to_json_structure(), {
- 'id': 'iris_problem_1',
- 'digest': '1a12135422967aa0de0c4629f4f58d08d39e97f9133f7b50da71420781aa18a5',
- 'version': '4.0.0',
- 'location_uris': [
- problem_uri,
- ],
- 'name': 'Distinguish Iris flowers',
- 'description': 'Distinguish Iris flowers of three related species.',
- 'schema': problem.PROBLEM_SCHEMA_VERSION,
- 'problem': {
- 'task_keywords': [problem.TaskKeyword.CLASSIFICATION, problem.TaskKeyword.MULTICLASS],
- 'performance_metrics': [
- {
- 'metric': problem.PerformanceMetric.ACCURACY,
- }
- ]
- },
- 'inputs': [
- {
- 'dataset_id': 'iris_dataset_1',
- 'targets': [
- {
- 'target_index': 0,
- 'resource_id': 'learningData',
- 'column_index': 5,
- 'column_name': 'species',
- }
- ]
- }
- ],
- })
-
- self.assertEqual(problem_description.to_json_structure(), {
- 'id': 'iris_problem_1',
- 'digest': '1a12135422967aa0de0c4629f4f58d08d39e97f9133f7b50da71420781aa18a5',
- 'version': '4.0.0',
- 'location_uris': [
- problem_uri,
- ],
- 'name': 'Distinguish Iris flowers',
- 'description': 'Distinguish Iris flowers of three related species.',
- 'schema': problem.PROBLEM_SCHEMA_VERSION,
- 'problem': {
- 'task_keywords': ['CLASSIFICATION', 'MULTICLASS'],
- 'performance_metrics': [
- {
- 'metric': 'ACCURACY',
- }
- ]
- },
- 'inputs': [
- {
- 'dataset_id': 'iris_dataset_1',
- 'targets': [
- {
- 'target_index': 0,
- 'resource_id': 'learningData',
- 'column_index': 5,
- 'column_name': 'species',
- }
- ]
- }
- ],
- })
-
- pipeline_run.validate_problem(problem_description.to_json_structure(canonical=True))
- problem.PROBLEM_SCHEMA_VALIDATOR.validate(problem_description.to_json_structure(canonical=True))
-
- def test_conversion(self):
- problem_doc_path = os.path.join(os.path.dirname(__file__), 'data', 'problems', 'iris_problem_1', 'problemDoc.json')
-
- problem_uri = 'file://{problem_doc_path}'.format(problem_doc_path=problem_doc_path)
-
- problem_description = problem.Problem.load(problem_uri)
-
- self.assertEqual(problem_description.to_simple_structure(), problem.Problem.from_json_structure(problem_description.to_json_structure(), strict_digest=True).to_simple_structure())
-
- # Legacy.
- self.assertEqual(utils.to_json_structure(problem_description.to_simple_structure()), problem.Problem.from_json_structure(utils.to_json_structure(problem_description.to_simple_structure()), strict_digest=True).to_simple_structure())
-
- self.assertIs(problem.Problem.from_json_structure(problem_description.to_json_structure(), strict_digest=True)['problem']['task_keywords'][0], problem.TaskKeyword.CLASSIFICATION)
-
- def test_unparse(self):
- self.assertEqual(problem.TaskKeyword.CLASSIFICATION.unparse(), 'classification')
- self.assertEqual(problem.TaskKeyword.MULTICLASS.unparse(), 'multiClass')
- self.assertEqual(problem.PerformanceMetric.ACCURACY.unparse(), 'accuracy')
-
- def test_normalize(self):
- self.assertEqual(problem.PerformanceMetric._normalize(0, 1, 0.5), 0.5)
- self.assertEqual(problem.PerformanceMetric._normalize(0, 2, 0.5), 0.25)
- self.assertEqual(problem.PerformanceMetric._normalize(1, 2, 1.5), 0.5)
-
- self.assertEqual(problem.PerformanceMetric._normalize(-1, 0, -0.5), 0.5)
- self.assertEqual(problem.PerformanceMetric._normalize(-2, 0, -1.5), 0.25)
- self.assertEqual(problem.PerformanceMetric._normalize(-2, -1, -1.5), 0.5)
-
- self.assertEqual(problem.PerformanceMetric._normalize(1, 0, 0.5), 0.5)
- self.assertEqual(problem.PerformanceMetric._normalize(2, 0, 0.5), 0.75)
- self.assertEqual(problem.PerformanceMetric._normalize(2, 1, 1.5), 0.5)
-
- self.assertEqual(problem.PerformanceMetric._normalize(0, -1, -0.5), 0.5)
- self.assertEqual(problem.PerformanceMetric._normalize(0, -2, -1.5), 0.75)
- self.assertEqual(problem.PerformanceMetric._normalize(-1, -2, -1.5), 0.5)
-
- self.assertAlmostEqual(problem.PerformanceMetric._normalize(float('inf'), 0, 0.0), 1.0)
- self.assertAlmostEqual(problem.PerformanceMetric._normalize(float('inf'), 0, 0.5), 0.9997500000052083)
- self.assertAlmostEqual(problem.PerformanceMetric._normalize(float('inf'), 0, 1000.0), 0.5378828427399902)
- self.assertAlmostEqual(problem.PerformanceMetric._normalize(float('inf'), 0, 5000.0), 0.013385701848569713)
-
- self.assertAlmostEqual(problem.PerformanceMetric._normalize(float('inf'), 1, 1.0), 1.0)
- self.assertAlmostEqual(problem.PerformanceMetric._normalize(float('inf'), 1, 1.5), 0.9997500000052083)
- self.assertAlmostEqual(problem.PerformanceMetric._normalize(float('inf'), 1, 1000.0), 0.5382761574524354)
- self.assertAlmostEqual(problem.PerformanceMetric._normalize(float('inf'), 1, 5000.0), 0.013399004523107192)
-
- self.assertAlmostEqual(problem.PerformanceMetric._normalize(float('inf'), -1, -1.0), 1.0)
- self.assertAlmostEqual(problem.PerformanceMetric._normalize(float('inf'), -1, -0.5), 0.9997500000052083)
- self.assertAlmostEqual(problem.PerformanceMetric._normalize(float('inf'), -1, 1000.0), 0.5374897097430198)
- self.assertAlmostEqual(problem.PerformanceMetric._normalize(float('inf'), -1, 5000.0), 0.01337241229216877)
- self.assertAlmostEqual(problem.PerformanceMetric._normalize(float('inf'), -1, 0.0), 0.9995000000416667)
-
- self.assertAlmostEqual(problem.PerformanceMetric._normalize(float('-inf'), 0, 0.0), 1.0)
- self.assertAlmostEqual(problem.PerformanceMetric._normalize(float('-inf'), 0, -0.5), 0.9997500000052083)
- self.assertAlmostEqual(problem.PerformanceMetric._normalize(float('-inf'), 0, -1000.0), 0.5378828427399902)
- self.assertAlmostEqual(problem.PerformanceMetric._normalize(float('-inf'), 0, -5000.0), 0.013385701848569713)
-
- self.assertAlmostEqual(problem.PerformanceMetric._normalize(float('-inf'), 1, 1.0), 1.0)
- self.assertAlmostEqual(problem.PerformanceMetric._normalize(float('-inf'), 1, 0.5), 0.9997500000052083)
- self.assertAlmostEqual(problem.PerformanceMetric._normalize(float('-inf'), 1, -1000.0), 0.5374897097430198)
- self.assertAlmostEqual(problem.PerformanceMetric._normalize(float('-inf'), 1, -5000.0), 0.01337241229216877)
- self.assertAlmostEqual(problem.PerformanceMetric._normalize(float('-inf'), 1, 0.0), 0.9995000000416667)
-
- self.assertAlmostEqual(problem.PerformanceMetric._normalize(float('-inf'), -1, -1.0), 1.0)
- self.assertAlmostEqual(problem.PerformanceMetric._normalize(float('-inf'), -1, -1.5), 0.9997500000052083)
- self.assertAlmostEqual(problem.PerformanceMetric._normalize(float('-inf'), -1, -1000.0), 0.5382761574524354)
- self.assertAlmostEqual(problem.PerformanceMetric._normalize(float('-inf'), -1, -5000.0), 0.013399004523107192)
-
- self.assertAlmostEqual(problem.PerformanceMetric._normalize(0, float('inf'), 0.0), 1 - 1.0)
- self.assertAlmostEqual(problem.PerformanceMetric._normalize(0, float('inf'), 0.5), 1 - 0.9997500000052083)
- self.assertAlmostEqual(problem.PerformanceMetric._normalize(0, float('inf'), 1000.0), 1 - 0.5378828427399902)
- self.assertAlmostEqual(problem.PerformanceMetric._normalize(0, float('inf'), 5000.0), 1 - 0.013385701848569713)
-
- self.assertAlmostEqual(problem.PerformanceMetric._normalize(1, float('inf'), 1.0), 1 - 1.0)
- self.assertAlmostEqual(problem.PerformanceMetric._normalize(1, float('inf'), 1.5), 1 - 0.9997500000052083)
- self.assertAlmostEqual(problem.PerformanceMetric._normalize(1, float('inf'), 1000.0), 1 - 0.5382761574524354)
- self.assertAlmostEqual(problem.PerformanceMetric._normalize(1, float('inf'), 5000.0), 1 - 0.013399004523107192)
-
- self.assertAlmostEqual(problem.PerformanceMetric._normalize(-1, float('inf'), -1.0), 1 - 1.0)
- self.assertAlmostEqual(problem.PerformanceMetric._normalize(-1, float('inf'), -0.5), 1 - 0.9997500000052083)
- self.assertAlmostEqual(problem.PerformanceMetric._normalize(-1, float('inf'), 1000.0), 1 - 0.5374897097430198)
- self.assertAlmostEqual(problem.PerformanceMetric._normalize(-1, float('inf'), 5000.0), 1 - 0.01337241229216877)
- self.assertAlmostEqual(problem.PerformanceMetric._normalize(-1, float('inf'), 0.0), 1 - 0.9995000000416667)
-
- self.assertAlmostEqual(problem.PerformanceMetric._normalize(0, float('-inf'), 0.0), 1 - 1.0)
- self.assertAlmostEqual(problem.PerformanceMetric._normalize(0, float('-inf'), -0.5), 1 - 0.9997500000052083)
- self.assertAlmostEqual(problem.PerformanceMetric._normalize(0, float('-inf'), -1000.0), 1 - 0.5378828427399902)
- self.assertAlmostEqual(problem.PerformanceMetric._normalize(0, float('-inf'), -5000.0), 1 - 0.013385701848569713)
-
- self.assertAlmostEqual(problem.PerformanceMetric._normalize(1, float('-inf'), 1.0), 1 - 1.0)
- self.assertAlmostEqual(problem.PerformanceMetric._normalize(1, float('-inf'), 0.5), 1 - 0.9997500000052083)
- self.assertAlmostEqual(problem.PerformanceMetric._normalize(1, float('-inf'), -1000.0), 1 - 0.5374897097430198)
- self.assertAlmostEqual(problem.PerformanceMetric._normalize(1, float('-inf'), -5000.0), 1 - 0.01337241229216877)
- self.assertAlmostEqual(problem.PerformanceMetric._normalize(1, float('-inf'), 0.0), 1 - 0.9995000000416667)
-
- self.assertAlmostEqual(problem.PerformanceMetric._normalize(-1, float('-inf'), -1.0), 1 - 1.0)
- self.assertAlmostEqual(problem.PerformanceMetric._normalize(-1, float('-inf'), -1.5), 1 - 0.9997500000052083)
- self.assertAlmostEqual(problem.PerformanceMetric._normalize(-1, float('-inf'), -1000.0), 1 - 0.5382761574524354)
- self.assertAlmostEqual(problem.PerformanceMetric._normalize(-1, float('-inf'), -5000.0), 1 - 0.013399004523107192)
-
- def test_pickle(self):
- value = problem.PerformanceMetric.ACCURACY
-
- pickled = pickle.dumps(value)
- unpickled = pickle.loads(pickled)
-
- self.assertEqual(value, unpickled)
- self.assertIs(value.get_class(), unpickled.get_class())
-
-
- if __name__ == '__main__':
- unittest.main()
|