You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

test_runtime.py 73 kB

first commit Former-commit-id: 08bc23ba02cffbce3cf63962390a65459a132e48 [formerly 0795edd4834b9b7dc66db8d10d4cbaf42bbf82cb] [formerly b5010b42541add7e2ea2578bf2da537efc457757 [formerly a7ca09c2c34c4fc8b3d8e01fcfa08eeeb2cae99d]] [formerly 615058473a2177ca5b89e9edbb797f4c2a59c7e5 [formerly 743d8dfc6843c4c205051a8ab309fbb2116c895e] [formerly bb0ea98b1e14154ef464e2f7a16738705894e54b [formerly 960a69da74b81ef8093820e003f2d6c59a34974c]]] [formerly 2fa3be52c1b44665bc81a7cc7d4cea4bbf0d91d5 [formerly 2054589f0898627e0a17132fd9d4cc78efc91867] [formerly 3b53730e8a895e803dfdd6ca72bc05e17a4164c1 [formerly 8a2fa8ab7baf6686d21af1f322df46fd58c60e69]] [formerly 87d1e3a07a19d03c7d7c94d93ab4fa9f58dada7c [formerly f331916385a5afac1234854ee8d7f160f34b668f] [formerly 69fb3c78a483343f5071da4f7e2891b83a49dd18 [formerly 386086f05aa9487f65bce2ee54438acbdce57650]]]] Former-commit-id: a00aed8c934a6460c4d9ac902b9a74a3d6864697 [formerly 26fdeca29c2f07916d837883983ca2982056c78e] [formerly 0e3170d41a2f99ecf5c918183d361d4399d793bf [formerly 3c12ad4c88ac5192e0f5606ac0d88dd5bf8602dc]] [formerly d5894f84f2fd2e77a6913efdc5ae388cf1be0495 [formerly ad3e7bc670ff92c992730d29c9d3aa1598d844e8] [formerly 69fb3c78a483343f5071da4f7e2891b83a49dd18]] Former-commit-id: 3c19c9fae64f6106415fbc948a4dc613b9ee12f8 [formerly 467ddc0549c74bb007e8f01773bb6dc9103b417d] [formerly 5fa518345d958e2760e443b366883295de6d991c [formerly 3530e130b9fdb7280f638dbc2e785d2165ba82aa]] Former-commit-id: 9f5d473d42a435ec0d60149939d09be1acc25d92 [formerly be0b25c4ec2cde052a041baf0e11f774a158105d] Former-commit-id: 9eca71cb73ba9edccd70ac06a3b636b8d4093b04
5 years ago
1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534
  1. import json
  2. import os
  3. import pickle
  4. import shutil
  5. import sys
  6. import tempfile
  7. import typing
  8. import unittest
  9. import jsonschema
  10. import pandas
  11. COMMON_PRIMITIVES_DIR = os.path.join(os.path.dirname(__file__), 'common-primitives')
  12. # NOTE: This insertion should appear before any code attempting to resolve or load primitives,
  13. # so the git submodule version of `common_primitives` is looked at first.
  14. sys.path.insert(0, COMMON_PRIMITIVES_DIR)
  15. from common_primitives.dataset_to_dataframe import DatasetToDataFramePrimitive
  16. from common_primitives.redact_columns import RedactColumnsPrimitive
  17. from common_primitives.train_score_split import TrainScoreDatasetSplitPrimitive
  18. from common_primitives.random_forest import RandomForestClassifierPrimitive
  19. from common_primitives.column_parser import ColumnParserPrimitive
  20. from common_primitives.construct_predictions import ConstructPredictionsPrimitive
  21. from common_primitives.no_split import NoSplitDatasetSplitPrimitive
  22. from common_primitives.remove_columns import RemoveColumnsPrimitive
  23. from common_primitives.simple_profiler import SimpleProfilerPrimitive
  24. TEST_PRIMITIVES_DIR = os.path.join(os.path.dirname(__file__), 'data', 'primitives')
  25. sys.path.insert(0, TEST_PRIMITIVES_DIR)
  26. from test_primitives.monomial import MonomialPrimitive
  27. from test_primitives.random import RandomPrimitive
  28. from test_primitives.sum import SumPrimitive
  29. from test_primitives.increment import IncrementPrimitive, Hyperparams as IncrementHyperparams
  30. from test_primitives.primitive_sum import PrimitiveSumPrimitive
  31. from test_primitives.null import NullUnsupervisedLearnerPrimitive
  32. from test_primitives.null import NullTransformerPrimitive
  33. from test_primitives.random_classifier import RandomClassifierPrimitive
  34. from test_primitives.fail import FailPrimitive
  35. from test_primitives.data_hyperparam import DataHyperparamPrimitive
  36. from test_primitives.abs_sum import AbsSumPrimitive
  37. from test_primitives.container_hyperparam import ContainerHyperparamPrimitive
  38. from test_primitives.multi_data_hyperparam import MultiDataHyperparamPrimitive
  39. from test_primitives.primitive_hyperparam import PrimitiveHyperparamPrimitive
  40. from d3m import container, exceptions, index, runtime, utils
  41. from d3m.metadata import base as metadata_base, hyperparams, pipeline as pipeline_module, problem
  42. from d3m.metadata.pipeline_run import PIPELINE_RUN_SCHEMA_VALIDATOR, PipelineRun, RuntimeEnvironment, _validate_pipeline_run_status_consistency, _validate_pipeline_run_random_seeds, _validate_pipeline_run_timestamps
  43. from d3m.primitive_interfaces import base, transformer
  44. TEST_PIPELINE_1 = """
  45. {
  46. "created": "2018-11-05T04:14:02.720699Z",
  47. "id": "3ffcc6a0-313e-44ae-b551-2ade1386c11e",
  48. "inputs": [
  49. {
  50. "name": "inputs1"
  51. },
  52. {
  53. "name": "inputs2"
  54. },
  55. {
  56. "name": "inputs3"
  57. }
  58. ],
  59. "outputs": [
  60. {
  61. "data": "steps.1.produce",
  62. "name": "Metafeatures"
  63. }
  64. ],
  65. "schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json",
  66. "steps": [
  67. {
  68. "arguments": {
  69. "inputs": {
  70. "data": [
  71. "inputs.0",
  72. "inputs.1",
  73. "inputs.2"
  74. ],
  75. "type": "CONTAINER"
  76. }
  77. },
  78. "outputs": [
  79. {
  80. "id": "produce"
  81. }
  82. ],
  83. "primitive": {
  84. "id": "8a8a8c15-bb69-488e-834c-f129de2dd2f6",
  85. "name": "Vertical Concatenate Primitive",
  86. "python_path": "d3m.primitives.data_transformation.vertical_concatenate.Test",
  87. "version": "0.1.0"
  88. },
  89. "type": "PRIMITIVE"
  90. },
  91. {
  92. "arguments": {
  93. "inputs": {
  94. "data": "steps.0.produce",
  95. "type": "CONTAINER"
  96. }
  97. },
  98. "outputs": [
  99. {
  100. "id": "produce"
  101. }
  102. ],
  103. "primitive": {
  104. "id": "aea7fc39-f40b-43ce-b926-89758e560e50",
  105. "name": "Voting Primitive",
  106. "python_path": "d3m.primitives.classification.voting.Test",
  107. "version": "0.1.0"
  108. },
  109. "type": "PRIMITIVE"
  110. }
  111. ]
  112. }
  113. """
  114. class Resolver(pipeline_module.Resolver):
  115. def _get_primitive(self, primitive_description: typing.Dict) -> typing.Optional[typing.Type[base.PrimitiveBase]]:
  116. # To hide any logging or stdout output.
  117. with utils.silence():
  118. return super()._get_primitive(primitive_description)
  119. class Hyperparams(hyperparams.Hyperparams):
  120. pass
  121. DataFramesInputs = container.List
  122. DataFrameOutputs = container.DataFrame
  123. class VerticalConcatenatePrimitive(transformer.TransformerPrimitiveBase[DataFramesInputs, DataFrameOutputs, Hyperparams]):
  124. """Description."""
  125. metadata = metadata_base.PrimitiveMetadata({
  126. 'id': '8a8a8c15-bb69-488e-834c-f129de2dd2f6',
  127. 'version': '0.1.0',
  128. 'name': "Vertical Concatenate Primitive",
  129. 'python_path': 'd3m.primitives.data_transformation.vertical_concatenate.Test',
  130. 'algorithm_types': [
  131. metadata_base.PrimitiveAlgorithmType.ARRAY_CONCATENATION,
  132. ],
  133. 'primitive_family': metadata_base.PrimitiveFamily.DATA_TRANSFORMATION
  134. })
  135. def produce(self, *, inputs: DataFramesInputs, timeout: float = None, iterations: int = None) -> base.CallResult[DataFrameOutputs]:
  136. for i in range(len(inputs)):
  137. if not inputs.metadata.has_semantic_type((i, metadata_base.ALL_ELEMENTS, 1), 'https://metadata.datadrivendiscovery.org/types/PredictedTarget'):
  138. raise Exception("Required metadata missing.")
  139. outputs = pandas.concat(inputs, ignore_index=True)
  140. outputs.metadata = outputs.metadata.generate(outputs)
  141. return base.CallResult(outputs)
  142. VotingInputs = container.DataFrame
  143. VotingOutputs = container.DataFrame
  144. class VotingPrimitive(transformer.TransformerPrimitiveBase[VotingInputs, VotingOutputs, Hyperparams]):
  145. """Description."""
  146. metadata = metadata_base.PrimitiveMetadata({
  147. 'id': 'aea7fc39-f40b-43ce-b926-89758e560e50',
  148. 'version': '0.1.0',
  149. 'name': "Voting Primitive",
  150. 'python_path': 'd3m.primitives.classification.voting.Test',
  151. 'algorithm_types': [
  152. metadata_base.PrimitiveAlgorithmType.AGGREGATE_FUNCTION,
  153. ],
  154. 'primitive_family': metadata_base.PrimitiveFamily.CLASSIFICATION
  155. })
  156. def produce(self, *, inputs: VotingInputs, timeout: float = None, iterations: int = None) -> base.CallResult[VotingOutputs]:
  157. result = inputs.groupby('d3mIndex').apply(lambda x: x['class'].mode())
  158. result.columns = ['class']
  159. result = result.reset_index()
  160. return base.CallResult(container.DataFrame(result, generate_metadata=True))
  161. def set_additionProperties_False(schema_json):
  162. if isinstance(schema_json, typing.Dict):
  163. if 'additionalProperties' in schema_json:
  164. schema_json['additionalProperties'] = False
  165. for key, value in schema_json.items():
  166. set_additionProperties_False(value)
  167. elif isinstance(schema_json, typing.List):
  168. for item in schema_json:
  169. set_additionProperties_False(item)
  170. class TestRuntime(unittest.TestCase):
  171. def setUp(self):
  172. self.test_dir = tempfile.mkdtemp()
  173. def tearDown(self):
  174. shutil.rmtree(self.test_dir)
  175. @classmethod
  176. def setUpClass(cls):
  177. to_register = {
  178. 'd3m.primitives.regression.monomial.Test': MonomialPrimitive,
  179. 'd3m.primitives.data_generation.random.Test': RandomPrimitive,
  180. 'd3m.primitives.operator.sum.Test': SumPrimitive,
  181. 'd3m.primitives.operator.increment.Test': IncrementPrimitive,
  182. 'd3m.primitives.operator.primitive_sum.Test': PrimitiveSumPrimitive,
  183. 'd3m.primitives.classification.voting.Test': VotingPrimitive,
  184. 'd3m.primitives.data_transformation.vertical_concatenate.Test': VerticalConcatenatePrimitive,
  185. 'd3m.primitives.operator.null.FailTest': FailPrimitive,
  186. 'd3m.primitives.operator.sum.ContainerHyperparamTest': ContainerHyperparamPrimitive,
  187. 'd3m.primitives.operator.sum.DataHyperparamTest': DataHyperparamPrimitive,
  188. 'd3m.primitives.operator.sum.MultiDataHyperparamTest': MultiDataHyperparamPrimitive,
  189. 'd3m.primitives.operator.sum.PrimitiveHyperparamTest': PrimitiveHyperparamPrimitive,
  190. 'd3m.primitives.operator.sum.AbsTest': AbsSumPrimitive,
  191. 'd3m.primitives.operator.null.UnsupervisedLearnerTest': NullUnsupervisedLearnerPrimitive,
  192. 'd3m.primitives.operator.null.TransformerTest': NullTransformerPrimitive,
  193. 'd3m.primitives.data_transformation.dataset_to_dataframe.Common': DatasetToDataFramePrimitive,
  194. 'd3m.primitives.classification.random_classifier.Test': RandomClassifierPrimitive,
  195. 'd3m.primitives.evaluation.redact_columns.Common': RedactColumnsPrimitive,
  196. 'd3m.primitives.evaluation.train_score_dataset_split.Common': TrainScoreDatasetSplitPrimitive,
  197. 'd3m.primitives.classification.random_forest.Common': RandomForestClassifierPrimitive,
  198. 'd3m.primitives.data_transformation.column_parser.Common': ColumnParserPrimitive,
  199. 'd3m.primitives.data_transformation.construct_predictions.Common': ConstructPredictionsPrimitive,
  200. 'd3m.primitives.evaluation.no_split_dataset_split.Common': NoSplitDatasetSplitPrimitive,
  201. 'd3m.primitives.data_transformation.remove_columns.Common': RemoveColumnsPrimitive,
  202. 'd3m.primitives.schema_discovery.profiler.Common': SimpleProfilerPrimitive
  203. }
  204. # To hide any logging or stdout output.
  205. with utils.silence():
  206. for python_path, primitive in to_register.items():
  207. index.register_primitive(python_path, primitive)
  208. from common_primitives.dataset_map import DataFrameDatasetMapPrimitive
  209. # We have to do it here because it depends on other primitives being first registered.
  210. index.register_primitive('d3m.primitives.operator.dataset_map.DataFrameCommon', DataFrameDatasetMapPrimitive)
  211. # We create runtime environment ourselves so that it is done only once.
  212. with utils.silence():
  213. cls.runtime_enviroment = RuntimeEnvironment(
  214. worker_id='test',
  215. base_docker_image={
  216. 'image_name': 'test',
  217. 'image_digest': 'sha256:' + ('0' * 64),
  218. },
  219. docker_image={
  220. 'image_name': 'test',
  221. 'image_digest': 'sha256:' + ('0' * 64),
  222. },
  223. )
  224. def test_basic(self):
  225. with open(os.path.join(os.path.dirname(__file__), 'data', 'pipelines', 'random-sample.yml'), 'r') as pipeline_file:
  226. p = pipeline_module.Pipeline.from_yaml(pipeline_file, resolver=Resolver())
  227. r = runtime.Runtime(p, context=metadata_base.Context.TESTING, environment=self.runtime_enviroment)
  228. inputs = [container.List([0, 1, 42], generate_metadata=True)]
  229. result = r.fit(inputs, return_values=['outputs.0'])
  230. result.check_success()
  231. self.assertTrue(result.pipeline_run)
  232. self.assertEqual(len(result.values), 1)
  233. dataframe = result.values['outputs.0']
  234. self.assertEqual(dataframe.values.tolist(), [
  235. [1.764052345967664 + 1],
  236. [0.4001572083672233 + 1],
  237. [-1.7062701906250126 + 1],
  238. ])
  239. result = r.produce(inputs, return_values=['outputs.0'])
  240. result.check_success()
  241. self.assertEqual(len(result.values), 1)
  242. self.assertTrue(result.pipeline_run)
  243. dataframe = result.values['outputs.0']
  244. self.assertEqual(dataframe.values.tolist(), [
  245. [1.764052345967664 + 1],
  246. [0.4001572083672233 + 1],
  247. [-1.7062701906250126 + 1],
  248. ])
  249. pickled = pickle.dumps(r)
  250. restored = pickle.loads(pickled)
  251. result = restored.produce(inputs, return_values=['outputs.0'])
  252. result.check_success()
  253. self.assertEqual(len(result.values), 1)
  254. self.assertTrue(result.pipeline_run)
  255. dataframe = result.values['outputs.0']
  256. self.assertEqual(dataframe.values.tolist(), [
  257. [1.764052345967664 + 1],
  258. [0.4001572083672233 + 1],
  259. [-1.7062701906250126 + 1],
  260. ])
  261. pickle.dumps(r)
  262. r = runtime.Runtime(p, random_seed=42, context=metadata_base.Context.TESTING, environment=self.runtime_enviroment)
  263. inputs = [container.List([0, 1, 42], generate_metadata=True)]
  264. result = r.fit(inputs, return_values=['outputs.0'])
  265. result.check_success()
  266. self.assertEqual(len(result.values), 1)
  267. self.assertTrue(result.pipeline_run)
  268. dataframe = result.values['outputs.0']
  269. self.assertEqual(dataframe.values.tolist(), [
  270. [0.4967141530112327 + 1],
  271. [-0.13826430117118466 + 1],
  272. [-0.11564828238824053 + 1],
  273. ])
  274. r = runtime.Runtime(p, [{}, {'amount': 10}], random_seed=42, context=metadata_base.Context.TESTING, environment=self.runtime_enviroment)
  275. pickle.dumps(r)
  276. inputs = [container.List([0, 1, 42], generate_metadata=True)]
  277. result = r.fit(inputs, return_values=['outputs.0'])
  278. result.check_success()
  279. self.assertEqual(len(result.values), 1)
  280. self.assertTrue(result.pipeline_run)
  281. dataframe = result.values['outputs.0']
  282. self.assertEqual(dataframe.values.tolist(), [
  283. [0.4967141530112327 + 10],
  284. [-0.13826430117118466 + 10],
  285. [-0.11564828238824053 + 10],
  286. ])
  287. pickle.dumps(r)
  288. def test_argument_list(self):
  289. p = pipeline_module.Pipeline.from_json(TEST_PIPELINE_1, resolver=Resolver())
  290. r = runtime.Runtime(p, context=metadata_base.Context.TESTING, environment=self.runtime_enviroment)
  291. inputs = [
  292. container.DataFrame({'d3mIndex': [1, 2, 3], 'class': [0, 0, 0]}, generate_metadata=True),
  293. container.DataFrame({'d3mIndex': [1, 2, 3], 'class': [0, 0, 1]}, generate_metadata=True),
  294. container.DataFrame({'d3mIndex': [1, 2, 3], 'class': [0, 1, 1]}, generate_metadata=True),
  295. ]
  296. for df in inputs:
  297. df.metadata = df.metadata.add_semantic_type((metadata_base.ALL_ELEMENTS, 1), 'https://metadata.datadrivendiscovery.org/types/PredictedTarget')
  298. result = r.fit(inputs, return_values=['outputs.0'])
  299. result.check_success()
  300. dataframe = result.values['outputs.0']
  301. self.assertEqual(dataframe.values.tolist(), [[1, 0], [2, 0], [3, 1]])
  302. pickle.dumps(r)
  303. def test_pipeline_with_primitives_as_hyperparams_from_pipeline(self):
  304. # We create the pipeline.
  305. pipeline_description = pipeline_module.Pipeline()
  306. pipeline_description.add_input(name='input_0')
  307. pipeline_description.add_input(name='input_1')
  308. step_0_primitive = index.get_primitive('d3m.primitives.regression.monomial.Test')
  309. step_0_primitive_metadata = step_0_primitive.metadata.query()
  310. step_0_primitive_description = {
  311. 'id': step_0_primitive_metadata['id'],
  312. 'version': step_0_primitive_metadata['version'],
  313. 'python_path': step_0_primitive_metadata['python_path'],
  314. 'name': step_0_primitive_metadata['name'],
  315. 'digest': step_0_primitive_metadata['digest'],
  316. }
  317. step_0 = pipeline_module.PrimitiveStep(primitive_description=step_0_primitive_description)
  318. step_0.add_argument(name='inputs', argument_type=metadata_base.ArgumentType.CONTAINER, data_reference='inputs.0')
  319. step_0.add_argument(name='outputs', argument_type=metadata_base.ArgumentType.CONTAINER, data_reference='inputs.1')
  320. step_0.add_output('produce')
  321. pipeline_description.add_step(step_0)
  322. step_1_primitive = index.get_primitive('d3m.primitives.operator.primitive_sum.Test')
  323. step_1_primitive_metadata = step_1_primitive.metadata.query()
  324. step_1_primitive_description = {
  325. 'id': step_1_primitive_metadata['id'],
  326. 'version': step_1_primitive_metadata['version'],
  327. 'python_path': step_1_primitive_metadata['python_path'],
  328. 'name': step_1_primitive_metadata['name'],
  329. 'digest': step_1_primitive_metadata['digest'],
  330. }
  331. step_1 = pipeline_module.PrimitiveStep(primitive_description=step_1_primitive_description)
  332. step_1.add_argument(name='inputs', argument_type=metadata_base.ArgumentType.CONTAINER, data_reference='inputs.0')
  333. step_1.add_hyperparameter(name='primitive_1', argument_type=metadata_base.ArgumentType.PRIMITIVE, data=0)
  334. step_1.add_hyperparameter(name='primitive_2', argument_type=metadata_base.ArgumentType.PRIMITIVE, data=0)
  335. step_1.add_output('produce')
  336. pipeline_description.add_step(step_1)
  337. pipeline_description.add_output(name='output', data_reference='steps.1.produce')
  338. r = runtime.Runtime(pipeline_description, context=metadata_base.Context.TESTING, environment=self.runtime_enviroment)
  339. inputs = [container.List([1, 2, 3, 4, 5], generate_metadata=True), container.List([2, 4, 6, 8, 100], generate_metadata=True)]
  340. result = r.fit(inputs, return_values=['outputs.0'])
  341. result.check_success()
  342. self.assertEqual(len(result.values), 1)
  343. results = result.values['outputs.0']
  344. self.assertEqual(results, [
  345. 11.2,
  346. 22.4,
  347. 33.599999999999994,
  348. 44.8,
  349. 56.0,
  350. ])
  351. result = r.produce(inputs, return_values=['outputs.0'])
  352. result.check_success()
  353. self.assertEqual(len(result.values), 1)
  354. results = result.values['outputs.0']
  355. self.assertEqual(results, [
  356. 11.2,
  357. 22.4,
  358. 33.599999999999994,
  359. 44.8,
  360. 56.0,
  361. ])
  362. # Random seed should be different from 0 for hyper-parameter primitive instance.
  363. self.assertEqual(result.pipeline_run.previous_pipeline_run.steps[1].hyperparams['primitive_1'].random_seed, 1)
  364. # Primitive should not be the same instance.
  365. self.assertIsNot(result.pipeline_run.previous_pipeline_run.steps[1].hyperparams['primitive_1'], result.pipeline_run.previous_pipeline_run.steps[1].hyperparams['primitive_2'])
  366. pickle._dumps(r)
  367. def test_pipeline_with_primitives_as_hyperparams_as_class_value(self):
  368. # We create the pipeline.
  369. pipeline_description = pipeline_module.Pipeline()
  370. pipeline_description.add_input(name='input_0')
  371. null_primitive = index.get_primitive('d3m.primitives.operator.null.TransformerTest')
  372. step_0_primitive = index.get_primitive('d3m.primitives.operator.primitive_sum.Test')
  373. step_0_primitive_metadata = step_0_primitive.metadata.query()
  374. step_0_primitive_description = {
  375. 'id': step_0_primitive_metadata['id'],
  376. 'version': step_0_primitive_metadata['version'],
  377. 'python_path': step_0_primitive_metadata['python_path'],
  378. 'name': step_0_primitive_metadata['name'],
  379. 'digest': step_0_primitive_metadata['digest'],
  380. }
  381. step_0 = pipeline_module.PrimitiveStep(primitive_description=step_0_primitive_description)
  382. step_0.add_argument(name='inputs', argument_type=metadata_base.ArgumentType.CONTAINER, data_reference='inputs.0')
  383. step_0.add_hyperparameter(name='primitive_1', argument_type=metadata_base.ArgumentType.VALUE, data=null_primitive)
  384. step_0.add_hyperparameter(name='primitive_2', argument_type=metadata_base.ArgumentType.VALUE, data=null_primitive)
  385. step_0.add_output('produce')
  386. pipeline_description.add_step(step_0)
  387. pipeline_description.add_output(name='output', data_reference='steps.0.produce')
  388. r = runtime.Runtime(pipeline_description, context=metadata_base.Context.TESTING, environment=self.runtime_enviroment)
  389. inputs = [container.List([1, 2, 3, 4, 5], generate_metadata=True)]
  390. result = r.fit(inputs, return_values=['outputs.0'])
  391. result.check_success()
  392. self.assertEqual(len(result.values), 1)
  393. results = result.values['outputs.0']
  394. self.assertEqual(results, [
  395. 2, 4, 6, 8, 10,
  396. ])
  397. result = r.produce(inputs, return_values=['outputs.0'])
  398. result.check_success()
  399. self.assertEqual(len(result.values), 1)
  400. results = result.values['outputs.0']
  401. self.assertEqual(results, [
  402. 2, 4, 6, 8, 10,
  403. ])
  404. # Primitive should not be the same instance.
  405. self.assertIsNot(result.pipeline_run.previous_pipeline_run.steps[0].hyperparams['primitive_1'], result.pipeline_run.previous_pipeline_run.steps[0].hyperparams['primitive_2'])
  406. pickle.dumps(r)
  407. def test_pipeline_with_primitives_as_hyperparams_as_instance_value(self):
  408. # We create the pipeline.
  409. pipeline_description = pipeline_module.Pipeline()
  410. pipeline_description.add_input(name='input_0')
  411. null_primitive = index.get_primitive('d3m.primitives.operator.null.TransformerTest')
  412. hyperparams_class = null_primitive.metadata.get_hyperparams()
  413. primitive = null_primitive(hyperparams=hyperparams_class.defaults())
  414. step_0_primitive = index.get_primitive('d3m.primitives.operator.primitive_sum.Test')
  415. step_0_primitive_metadata = step_0_primitive.metadata.query()
  416. step_0_primitive_description = {
  417. 'id': step_0_primitive_metadata['id'],
  418. 'version': step_0_primitive_metadata['version'],
  419. 'python_path': step_0_primitive_metadata['python_path'],
  420. 'name': step_0_primitive_metadata['name'],
  421. 'digest': step_0_primitive_metadata['digest'],
  422. }
  423. step_0 = pipeline_module.PrimitiveStep(primitive_description=step_0_primitive_description)
  424. step_0.add_argument(name='inputs', argument_type=metadata_base.ArgumentType.CONTAINER, data_reference='inputs.0')
  425. step_0.add_hyperparameter(name='primitive_1', argument_type=metadata_base.ArgumentType.VALUE, data=primitive)
  426. step_0.add_hyperparameter(name='primitive_2', argument_type=metadata_base.ArgumentType.VALUE, data=primitive)
  427. step_0.add_output('produce')
  428. pipeline_description.add_step(step_0)
  429. pipeline_description.add_output(name='output', data_reference='steps.0.produce')
  430. r = runtime.Runtime(pipeline_description, context=metadata_base.Context.TESTING, environment=self.runtime_enviroment)
  431. inputs = [container.List([1, 2, 3, 4, 5], generate_metadata=True)]
  432. result = r.fit(inputs, return_values=['outputs.0'])
  433. result.check_success()
  434. self.assertEqual(len(result.values), 1)
  435. results = result.values['outputs.0']
  436. self.assertEqual(results, [
  437. 2, 4, 6, 8, 10,
  438. ])
  439. result = r.produce(inputs, return_values=['outputs.0'])
  440. result.check_success()
  441. self.assertEqual(len(result.values), 1)
  442. results = result.values['outputs.0']
  443. self.assertEqual(results, [
  444. 2, 4, 6, 8, 10,
  445. ])
  446. # Primitive should not be the same instance.
  447. self.assertIsNot(null_primitive, result.pipeline_run.previous_pipeline_run.steps[0].hyperparams['primitive_1'])
  448. self.assertIsNot(result.pipeline_run.previous_pipeline_run.steps[0].hyperparams['primitive_1'], result.pipeline_run.previous_pipeline_run.steps[0].hyperparams['primitive_2'])
  449. pickle.dumps(r)
  450. def _fake_inputs(self, runtime, pipeline_run, inputs):
  451. # We fake that inputs were added even if this is not a standard pipeline.
  452. # TODO: Make tests not require this.
  453. for input_dataset in inputs:
  454. pipeline_run.add_input_dataset(input_dataset)
  455. if runtime is not None:
  456. runtime._previous_pipeline_run_id = pipeline_run.get_id()
  457. def _build_pipeline(self, pipeline_id: str, sequence=None):
  458. if sequence is None:
  459. sequence = [{'primitive_class': RandomPrimitive}, {'primitive_class': IncrementPrimitive}]
  460. pipeline_description = {
  461. 'source': {
  462. 'name': 'Test team'
  463. },
  464. 'name': 'Test pipeline',
  465. 'description': 'Pipeline created to test pipeline-run'
  466. }
  467. pipe = pipeline_module.Pipeline(
  468. pipeline_id,
  469. source=pipeline_module.Pipeline._get_source(pipeline_description),
  470. name=pipeline_description['name'],
  471. description=pipeline_description['description'],
  472. )
  473. pipe.add_input('input_data')
  474. for index, element in enumerate(sequence):
  475. # default input, argument name is 'inputs', value specified below
  476. if index == 0:
  477. inputs = 'inputs.0'
  478. else:
  479. inputs = 'steps.{}.produce'.format(index - 1)
  480. if isinstance(element, pipeline_module.Pipeline):
  481. step = pipeline_module.SubpipelineStep(element.to_json_structure(nest_subpipelines=True))
  482. step.add_input(inputs)
  483. elif isinstance(element, dict):
  484. primitive_description = element['primitive_class'].metadata.query()
  485. step = pipeline_module.PrimitiveStep(primitive_description)
  486. if 'INPUTS' in element:
  487. for arg_name, value in element['INPUTS']:
  488. value_str = 'steps.{}.produce'.format(value)
  489. step.add_argument(arg_name, metadata_base.ArgumentType.CONTAINER, value_str)
  490. else:
  491. # if not specified, use default
  492. step.add_argument('inputs', metadata_base.ArgumentType.CONTAINER, inputs)
  493. if 'HYPERPARAMS' in element:
  494. for hyperparam_name in element['HYPERPARAMS']:
  495. hyperparam = element['HYPERPARAMS'][hyperparam_name]
  496. step.add_hyperparameter(hyperparam_name, hyperparam['TYPE'], hyperparam['DATA'])
  497. else:
  498. raise exceptions.InvalidArgumentTypeError(
  499. 'Unknown type {} in parameter \'sequence\''.format(type(element)))
  500. step.add_output('produce')
  501. pipe.add_step(step)
  502. pipe.add_output('steps.{}.produce'.format(len(sequence) - 1))
  503. return pipe
  504. def _get_inputs(self):
  505. # TODO: Make tests use a real Dataset instead of a list. Pipeline runs are defined on standard pipelines.
  506. input_data = container.List([1, 3, 4, 2, 5, 3], generate_metadata=True)
  507. # First have to add dummy metadata to the list, which otherwise exist in the dataset.
  508. input_data.metadata = input_data.metadata.update((), {
  509. 'id': '0000000000000000000000000000000000000000000000000000000000000000',
  510. 'digest': '0000000000000000000000000000000000000000000000000000000000000000'
  511. })
  512. inputs = [input_data]
  513. return inputs
  514. def _fit_pipeline(
  515. self, pipeline, inputs, problem_description=None, context=metadata_base.Context.TESTING, return_values=None
  516. ):
  517. r = runtime.Runtime(
  518. pipeline, problem_description=problem_description, context=context,
  519. environment=self.runtime_enviroment,
  520. )
  521. fit_result = r.fit(inputs, return_values=return_values)
  522. self.assertTrue(fit_result.pipeline_run)
  523. # We fake that inputs were added even if this is not a standard pipeline.
  524. # TODO: Make tests not require this.
  525. for input_dataset in inputs:
  526. fit_result.pipeline_run.add_input_dataset(input_dataset)
  527. return fit_result.pipeline_run
  528. def _fit_and_produce_pipeline(
  529. self, pipeline, inputs, problem_description = None, context = metadata_base.Context.TESTING
  530. ):
  531. r = runtime.Runtime(
  532. pipeline, problem_description=problem_description, context=context,
  533. environment=self.runtime_enviroment,
  534. )
  535. fit_result = r.fit(inputs)
  536. self.assertTrue(fit_result.pipeline_run)
  537. self._fake_inputs(r, fit_result.pipeline_run, inputs)
  538. self._check_pipelines_valid_and_succeeded([fit_result.pipeline_run])
  539. produce_result = r.produce(inputs)
  540. self.assertTrue(produce_result.pipeline_run)
  541. self._fake_inputs(r, produce_result.pipeline_run, inputs)
  542. self._check_pipelines_valid_and_succeeded([produce_result.pipeline_run])
  543. return (fit_result.pipeline_run, produce_result.pipeline_run)
  544. def _is_pipeline_run_successful(self, pipeline_run_json):
  545. if pipeline_run_json['status']['state'] == metadata_base.PipelineRunStatusState.SUCCESS:
  546. return True
  547. elif pipeline_run_json['status']['state'] == metadata_base.PipelineRunStatusState.FAILURE:
  548. return False
  549. else:
  550. self.fail('Pipeline-run document status state set to invalid value')
  551. def _validate_pipeline_run_structure(self, json_structure):
  552. try:
  553. PIPELINE_RUN_SCHEMA_VALIDATOR.validate(json_structure)
  554. _validate_pipeline_run_status_consistency(json_structure)
  555. _validate_pipeline_run_timestamps(json_structure)
  556. _validate_pipeline_run_random_seeds(json_structure)
  557. except jsonschema.exceptions.ValidationError as error:
  558. print('\n', error, '\n')
  559. print("##### PRINTING RECURSIVE SUBERRORS #####\n")
  560. self.print_recursive_suberrors(error, indent='\n')
  561. self.fail("Pipeline_run document failed to validate against the schema")
  562. def _invalidate_pipeline_run_structure(self, json_structure):
  563. is_valid = False
  564. try:
  565. PIPELINE_RUN_SCHEMA_VALIDATOR.validate(json_structure)
  566. is_valid = True
  567. except jsonschema.exceptions.ValidationError as error:
  568. pass
  569. if is_valid:
  570. self.fail("Pipeline_run document should not have validated against the schema")
  571. def _check_pipelines_valid_and_succeeded(self, pipeline_runs):
  572. for pipeline_run in pipeline_runs:
  573. pipeline_run_json = pipeline_run.to_json_structure()
  574. self._validate_pipeline_run_structure(pipeline_run_json)
  575. self.assertTrue(self._is_pipeline_run_successful(pipeline_run_json), json.dumps(pipeline_run_json, indent=4))
  576. def _check_pipelines_valid_and_failed(self, pipeline_runs):
  577. for pipeline_run in pipeline_runs:
  578. pipeline_run_json = pipeline_run.to_json_structure()
  579. self._validate_pipeline_run_structure(pipeline_run_json)
  580. self.assertFalse(self._is_pipeline_run_successful(pipeline_run_json))
  581. def _check_pipelines_invalid(self, pipeline_runs):
  582. for pipeline_run in pipeline_runs:
  583. pipeline_run_json = pipeline_run.to_json_structure()
  584. self._invalidate_pipeline_run_structure(pipeline_run_json)
  585. def test_basic_pipeline_run(self):
  586. inputs = self._get_inputs()
  587. pipe = self._build_pipeline('1490432b-b48a-4a62-8977-5a56e52a3e85')
  588. pipeline_runs = self._fit_and_produce_pipeline(pipe, inputs)
  589. self._check_pipelines_valid_and_succeeded(pipeline_runs)
  590. def test_pipeline_fit_with_return_values(self):
  591. inputs = self._get_inputs()
  592. pipe = self._build_pipeline('cf2e4f93-4b9a-4a49-9ab5-92927b3125df')
  593. pipeline_runs = self._fit_pipeline(pipe, inputs, return_values=['steps.0.produce'])
  594. self._check_pipelines_valid_and_succeeded([pipeline_runs])
  595. def test_pipeline_run_failure(self):
  596. inputs = self._get_inputs()
  597. for hyperparam in ('__init__', 'set_training_data', 'fit', 'produce'):
  598. failure_pipeline = self._build_pipeline('18e96ab3-e3c5-4b29-a446-3e81982eba9c', sequence=[{'primitive_class': RandomPrimitive},
  599. {'primitive_class': FailPrimitive, 'HYPERPARAMS': {'method_to_fail': {'TYPE': metadata_base.ArgumentType.VALUE, 'DATA': hyperparam}}}])
  600. fit_pipeline_run = self._fit_pipeline(failure_pipeline, inputs)
  601. self._check_pipelines_valid_and_failed([fit_pipeline_run])
  602. def test_pipeline_run_failure_return_error(self):
  603. inputs = self._get_inputs()
  604. pipeline = self._build_pipeline('80dee50d-9ca4-4ad5-9a52-7ea30f3eb3e5', sequence=[{'primitive_class': RandomPrimitive},
  605. {'primitive_class': FailPrimitive, 'HYPERPARAMS': {'method_to_fail': {'TYPE': metadata_base.ArgumentType.VALUE, 'DATA': 'fit'}}}])
  606. r = runtime.Runtime(
  607. pipeline, context=metadata_base.Context.TESTING,
  608. environment=self.runtime_enviroment,
  609. )
  610. fit_result = r.fit(inputs)
  611. self.assertTrue(fit_result.error)
  612. self.assertEqual(str(fit_result.error), 'Step 1 for pipeline 80dee50d-9ca4-4ad5-9a52-7ea30f3eb3e5 failed.')
  613. self.assertIsInstance(fit_result.error, exceptions.StepFailedError)
  614. with self.assertRaises(exceptions.StepFailedError) as cm:
  615. fit_result.check_success()
  616. self.assertEqual(str(cm.exception), 'Step 1 for pipeline 80dee50d-9ca4-4ad5-9a52-7ea30f3eb3e5 failed.')
  617. def test_pipeline_run_failure_with_subpipeline(self):
  618. inputs = self._get_inputs()
  619. for hyperparam in ('__init__', 'set_training_data', 'fit', 'produce'):
  620. failure_subpipeline = self._build_pipeline('bcd96144-34ae-4a67-a1b5-b911a07d03ed', sequence=[{'primitive_class': FailPrimitive, 'HYPERPARAMS': {'method_to_fail': {'TYPE': metadata_base.ArgumentType.VALUE, 'DATA': hyperparam}}}])
  621. failure_pipeline = self._build_pipeline('cbec1cb2-64df-4d4a-81ea-a829eeac0612', sequence=[{'primitive_class': RandomPrimitive}, failure_subpipeline, {'primitive_class': IncrementPrimitive}])
  622. fit_pipeline_run = self._fit_pipeline(failure_pipeline, inputs)
  623. self._check_pipelines_valid_and_failed([fit_pipeline_run])
  624. # tests previous_pipeline_run when it should be None, and when it should be full
  625. def test_all_previous_pipeline_run_types(self):
  626. inputs = self._get_inputs()
  627. pipe = self._build_pipeline('2617ca0c-552a-4014-a999-2904184ed648')
  628. fit_pipeline_run, produce_pipeline_run = self._fit_and_produce_pipeline(pipe, inputs)
  629. self._check_pipelines_valid_and_succeeded([fit_pipeline_run, produce_pipeline_run])
  630. fit_pipeline_run_json = fit_pipeline_run.to_json_structure()
  631. self.assertTrue(
  632. 'previous_pipeline_run' not in fit_pipeline_run_json,
  633. 'pipeline_run should not contain previous_pipeline_run'
  634. )
  635. produce_pipeline_run_json = produce_pipeline_run.to_json_structure()
  636. self.assertNotEqual(produce_pipeline_run_json['previous_pipeline_run'], None)
  637. self.assertEqual(fit_pipeline_run_json['id'], produce_pipeline_run_json['previous_pipeline_run']['id'])
  638. # tests pipeline_run given each type of context
  639. def test_all_pipeline_run_context_types(self):
  640. inputs = self._get_inputs()
  641. pipe = self._build_pipeline('4fb64b4b-baa6-404a-afe3-1ad68a1993c1')
  642. for context in metadata_base.Context:
  643. pipeline_runs = self._fit_and_produce_pipeline(
  644. pipe, inputs, context=context
  645. )
  646. self._check_pipelines_valid_and_succeeded(pipeline_runs)
  647. class InvalidContext:
  648. def __init__(self, name):
  649. self.name = name
  650. invalid_context = InvalidContext('INVALID_CONTEXT')
  651. pipe = self._build_pipeline('1c05ae77-1f74-48bd-9341-c31338a9c9f0')
  652. with self.assertRaises(jsonschema.exceptions.ValidationError):
  653. pipeline_runs = self._fit_and_produce_pipeline(pipe, inputs, context=invalid_context)
  654. # tests pipeline_run given primitive steps and given subpipeline steps
  655. def test_all_pipeline_run_step_types(self):
  656. inputs = self._get_inputs()
  657. pipeline_without_subpipeline = self._build_pipeline('dca8efbe-4daa-47a6-a811-9ca633ffc90b', [{'primitive_class': RandomPrimitive}, {'primitive_class': IncrementPrimitive}, {'primitive_class': IncrementPrimitive}, {'primitive_class': IncrementPrimitive}])
  658. pipeline_runs = self._fit_and_produce_pipeline(pipeline_without_subpipeline, inputs)
  659. self._check_pipelines_valid_and_succeeded(pipeline_runs)
  660. subpipeline = self._build_pipeline('06dfb07a-f151-467c-9f1c-51a6bf6378a3', [{'primitive_class': IncrementPrimitive}, {'primitive_class': IncrementPrimitive}])
  661. pipeline_with_subpipeline = self._build_pipeline('293c1883-f81a-459d-a1a8-ba19467d5ad6', [{'primitive_class': RandomPrimitive}, subpipeline, {'primitive_class': IncrementPrimitive}])
  662. pipeline_runs = self._fit_and_produce_pipeline(pipeline_with_subpipeline, inputs)
  663. self._check_pipelines_valid_and_succeeded(pipeline_runs)
  664. # tests when there is a subpipeline within a subpipeline
  665. def test_recursive_subpipeline(self):
  666. inputs = self._get_inputs()
  667. subpipeline = self._build_pipeline('1eba8278-45da-448e-92a8-a6daf780563f', [{'primitive_class': IncrementPrimitive}, {'primitive_class': IncrementPrimitive}])
  668. subpipeline = self._build_pipeline('b350beb3-4421-4627-906c-92cbbe900834', [{'primitive_class': IncrementPrimitive}, subpipeline, {'primitive_class': IncrementPrimitive}])
  669. pipeline_with_recursive_subpipeline = self._build_pipeline('17e3ae59-e132-4c56-8573-20be6f84ea05', [{'primitive_class': RandomPrimitive}, subpipeline, {'primitive_class': IncrementPrimitive}])
  670. pipeline_runs = self._fit_and_produce_pipeline(pipeline_with_recursive_subpipeline, inputs)
  671. self._check_pipelines_valid_and_succeeded(pipeline_runs)
  672. def test_all_pipeline_run_hyperparam_types(self):
  673. inputs = self._get_inputs()
  674. # test value_argument hyperparams (runtime sets defaults)
  675. pipeline = self._build_pipeline('301702a9-cf1e-4332-9116-696c9908586a')
  676. pipeline_runs = self._fit_and_produce_pipeline(pipeline, inputs)
  677. self._check_pipelines_valid_and_succeeded(pipeline_runs)
  678. # test container_argument
  679. pipeline = self._build_pipeline('8390ab6f-d619-4cc5-b343-22b91f81eecd', sequence=[{'primitive_class': RandomPrimitive},
  680. {'primitive_class': ContainerHyperparamPrimitive, 'HYPERPARAMS': {'dataframe': {'TYPE': metadata_base.ArgumentType.CONTAINER, 'DATA': 'steps.0.produce'}}}])
  681. pipeline_runs = self._fit_and_produce_pipeline(pipeline, inputs)
  682. self._check_pipelines_valid_and_succeeded(pipeline_runs)
  683. # test data_argument
  684. pipeline = self._build_pipeline('f0e0e370-97db-4e67-9eff-5e9b79f253e6', sequence=[{'primitive_class': RandomPrimitive}, {'primitive_class': AbsSumPrimitive},
  685. {'primitive_class': DataHyperparamPrimitive, 'INPUTS': [('inputs', 0)], 'HYPERPARAMS': {'value': {'TYPE': metadata_base.ArgumentType.DATA, 'DATA': 'steps.1.produce'}}}])
  686. pipeline_runs = self._fit_and_produce_pipeline(pipeline, inputs)
  687. self._check_pipelines_valid_and_succeeded(pipeline_runs)
  688. # test data_arguments
  689. pipeline = self._build_pipeline('ab71ff74-5cd1-4e36-8c63-c2cd79085173', sequence=[{'primitive_class': RandomPrimitive}, {'primitive_class': AbsSumPrimitive}, {'primitive_class': AbsSumPrimitive, 'INPUTS': [('inputs', 0)]},
  690. {'primitive_class': MultiDataHyperparamPrimitive, 'INPUTS': [('inputs', 0)], 'HYPERPARAMS': {'values': {'TYPE': metadata_base.ArgumentType.DATA, 'DATA': ['steps.1.produce', 'steps.2.produce']}}}])
  691. pipeline_runs = self._fit_and_produce_pipeline(pipeline, inputs)
  692. self._check_pipelines_valid_and_succeeded(pipeline_runs)
  693. # test primitive argument
  694. pipeline = self._build_pipeline('c8b291f1-ff67-49e0-b8a3-a0e6a2d6f013', sequence=[{'primitive_class': RandomPrimitive}, {'primitive_class': AbsSumPrimitive},
  695. {'primitive_class': PrimitiveHyperparamPrimitive, 'INPUTS': [('inputs', 0)], 'HYPERPARAMS': {'primitive': {'TYPE': metadata_base.ArgumentType.PRIMITIVE, 'DATA': 1}}}])
  696. pipeline_runs = self._fit_and_produce_pipeline(pipeline, inputs)
  697. self._check_pipelines_valid_and_succeeded(pipeline_runs)
  698. def test_all_pipeline_run_method_call_base_metadata_types(self):
  699. pipeline = pipeline_module.Pipeline.from_json(TEST_PIPELINE_1, resolver=Resolver())
  700. pipeline_run = PipelineRun(
  701. pipeline, phase=metadata_base.PipelineRunPhase.FIT, context=metadata_base.Context.TESTING,
  702. environment=self.runtime_enviroment, random_seed=0
  703. )
  704. inputs = self._get_inputs()[0]
  705. pipeline_run.add_input_dataset(inputs)
  706. pipeline_run.run_started()
  707. pipeline_run.step_started(0)
  708. primitive_step_id = pipeline_run.add_primitive_step(pipeline.steps[0])
  709. method_call_id = pipeline_run.add_method_call_to_primitive_step(primitive_step_id, 'fit')
  710. pipeline_run.method_call_started(method_call_id)
  711. result = base.CallResult(inputs)
  712. pipeline_run.method_call_successful(method_call_id)
  713. pipeline_run.set_method_call_result_metadata(method_call_id, result)
  714. pipeline_run.step_successful(primitive_step_id)
  715. pipeline_run.run_successful()
  716. self._validate_pipeline_run_structure(pipeline_run.to_json_structure())
  717. # test that the phase is set correctly for fit and produce
  718. def test_all_pipeline_run_phase_types(self):
  719. inputs = self._get_inputs()
  720. pipeline = self._build_pipeline('d95a9816-8ede-4fe2-89c5-f5c9d9f1d9fd')
  721. pipeline_runs = self._fit_and_produce_pipeline(pipeline, inputs)
  722. self._check_pipelines_valid_and_succeeded(pipeline_runs)
  723. fit_pipeline_run = pipeline_runs[0]
  724. fit_pipeline_run_json = fit_pipeline_run.to_json_structure()
  725. self.assertEqual(fit_pipeline_run_json['run']['phase'], 'FIT')
  726. produce_pipeline_run = pipeline_runs[1]
  727. produce_pipeline_run_json = produce_pipeline_run.to_json_structure()
  728. self.assertEqual(produce_pipeline_run_json['run']['phase'], 'PRODUCE')
  729. # tests that the first method_call of each step is __init__()
  730. def test_pipeline_run_init_method_calls(self):
  731. inputs = self._get_inputs()
  732. pipeline = self._build_pipeline('5a9321df-7e40-443b-9e12-f1d840a677cd')
  733. pipeline_runs = self._fit_and_produce_pipeline(pipeline, inputs)
  734. for pipeline_run in pipeline_runs:
  735. pipeline_run_json = pipeline_run.to_json_structure()
  736. if pipeline_run_json['run']['phase'] == 'FIT':
  737. for step in pipeline_run_json['steps']:
  738. first_method_call = step['method_calls'][0]
  739. self.assertEqual(first_method_call['name'], '__init__')
  740. def print_recursive_suberrors(self, error, indent):
  741. for suberror in sorted(error.context, key=lambda e: e.schema_path):
  742. print(f'{indent}', list(suberror.schema_path), ", ", suberror.message)
  743. self.print_recursive_suberrors(suberror, indent + '\t')
  744. def get_data(self, dataset_name='iris_dataset_1', problem_name='iris_problem_1'):
  745. if problem_name:
  746. problem_doc_path = os.path.join(
  747. os.path.dirname(__file__), 'data', 'problems', problem_name, 'problemDoc.json'
  748. )
  749. problem_description = problem.Problem.load('file://' + problem_doc_path)
  750. else:
  751. problem_description = None
  752. datasetDoc_path = 'file://' + os.path.join(os.path.dirname(__file__), 'data', 'datasets', dataset_name, 'datasetDoc.json')
  753. iris_dataset = container.Dataset.load(datasetDoc_path)
  754. return problem_description, iris_dataset
  755. def test_recording_hyperparams(self):
  756. pipeline = self._build_pipeline(
  757. '84d5dbb8-6e82-4187-801e-83a46069608f',
  758. sequence=[
  759. {
  760. 'primitive_class': IncrementPrimitive
  761. },
  762. {
  763. 'primitive_class': IncrementPrimitive,
  764. 'HYPERPARAMS': {
  765. 'amount': {
  766. 'TYPE': metadata_base.ArgumentType.VALUE,
  767. 'DATA': 3.14
  768. }
  769. }
  770. },
  771. {
  772. 'primitive_class': IncrementPrimitive
  773. }
  774. ],
  775. )
  776. runtime_hyperparams = [{}, {}, {'amount': 2.72}]
  777. inputs = [container.DataFrame({'a': [1,2,3], 'b': [3,5,8]}, generate_metadata=True)]
  778. # TODO: Make tests use a real Dataset instead of a dataframe. Pipeline runs are defined on standard pipelines.
  779. # First have to add dummy metadata to the dataframe, which otherwise exist in the dataset.
  780. inputs[0].metadata = inputs[0].metadata.update((), {
  781. 'id': '0000000000000000000000000000000000000000000000000000000000000000',
  782. 'digest': '0000000000000000000000000000000000000000000000000000000000000000'
  783. })
  784. r = runtime.Runtime(pipeline, runtime_hyperparams, context=metadata_base.Context.TESTING, environment=self.runtime_enviroment)
  785. fit_result = r.fit(inputs=inputs)
  786. self._fake_inputs(r, fit_result.pipeline_run, inputs)
  787. fit_pipeline_run_json = fit_result.pipeline_run.to_json_structure()
  788. # test default hyperparams recorded in pipeline_run
  789. self.assertTrue(
  790. 'amount' in fit_pipeline_run_json['steps'][0]['hyperparams'],
  791. 'default hyperparams not recorded in pipeline_run'
  792. )
  793. self.assertEqual(
  794. IncrementHyperparams.defaults().values_to_json_structure()['amount'],
  795. fit_pipeline_run_json['steps'][0]['hyperparams']['amount']['data'],
  796. 'defualt hyperparams incorrectly recorded in pipeline_run'
  797. )
  798. # test hyperparams specified in pipeline not recored in pipeline_run
  799. self.assertFalse(
  800. 'hyperparams' in fit_pipeline_run_json['steps'][1],
  801. 'hyperparams specified in the pipeline should not be recorded in the pipeline_run'
  802. )
  803. # test hyperparams set at runtime recored in pipeline_run
  804. self.assertTrue(
  805. 'amount' in fit_pipeline_run_json['steps'][2]['hyperparams'],
  806. 'runtime hyperparams not recorded in pipeline_run'
  807. )
  808. self.assertEqual(
  809. runtime_hyperparams[2]['amount'],
  810. fit_pipeline_run_json['steps'][2]['hyperparams']['amount']['data'],
  811. 'defualt hyperparams incorrectly recorded in pipeline_run'
  812. )
  813. produce_result = r.produce(inputs=inputs)
  814. self._fake_inputs(r, produce_result.pipeline_run, inputs)
  815. for step in produce_result.pipeline_run.to_json_structure()['steps']:
  816. self.assertFalse(
  817. 'hyperparams' in step,
  818. 'hyperparams should not be set in produce pipeline_runs'
  819. )
  820. def test_recording_arguments(self):
  821. pipeline = self._build_pipeline('46bb32a5-f9a0-4c33-97c8-f426ed147e0a')
  822. inputs = self._get_inputs()
  823. r = runtime.Runtime(pipeline, context=metadata_base.Context.TESTING, environment=self.runtime_enviroment)
  824. fit_result = r.fit(inputs=inputs)
  825. self._fake_inputs(r, fit_result.pipeline_run, inputs)
  826. fit_pipeline_run_json = fit_result.pipeline_run.to_json_structure()
  827. pipeline_json_structure = pipeline.to_json_structure()
  828. for pipeline_step, pipeline_run_step in zip(pipeline_json_structure['steps'], fit_pipeline_run_json['steps']):
  829. if 'arguments' in pipeline_run_step:
  830. for argument_name in pipeline_step['arguments']:
  831. self.assertFalse(
  832. argument_name in pipeline_run_step['arguments'],
  833. 'pipeline step arguments should not be recorded in pipeline_run method_call arguments'
  834. )
  835. produce_result = r.produce(inputs=inputs)
  836. self._fake_inputs(r, produce_result.pipeline_run, inputs)
  837. produce_pipeline_run_json = produce_result.pipeline_run.to_json_structure()
  838. for pipeline_step, pipeline_run_step in zip(pipeline_json_structure['steps'], produce_pipeline_run_json['steps']):
  839. if 'arguments' in pipeline_run_step:
  840. for argument_name in pipeline_step['arguments']:
  841. self.assertFalse(
  842. argument_name in pipeline_run_step['arguments'],
  843. 'pipeline step arguments should not be recorded in pipeline_run method_call arguments'
  844. )
  845. def test_saving_to_file(self):
  846. if not os.path.exists(self.test_dir):
  847. os.makedirs(self.test_dir)
  848. inputs = self._get_inputs()
  849. pipeline = self._build_pipeline('4327ce61-0580-48b3-9aeb-d3e35c09376d')
  850. r = runtime.Runtime(pipeline, context=metadata_base.Context.TESTING, environment=self.runtime_enviroment)
  851. fit_result = r.fit(inputs=inputs)
  852. self._fake_inputs(r, fit_result.pipeline_run, inputs)
  853. fit_pipeline_run = fit_result.pipeline_run
  854. fit_pipeline_run_json = fit_pipeline_run.to_json_structure()
  855. fit_file_name = '{}.json'.format(fit_pipeline_run_json['id'])
  856. fit_file_path = os.path.join(self.test_dir, fit_file_name)
  857. with open(fit_file_path, 'w') as fit_file:
  858. fit_pipeline_run.to_yaml(fit_file)
  859. self.assertTrue(os.path.exists(fit_file_path), 'The fit pipeline_run object should have been saved to {}'.format(fit_file_path))
  860. with open(fit_file_path, 'r') as fit_file:
  861. fit_json = utils.yaml_load(fit_file)
  862. self._validate_pipeline_run_structure(fit_json)
  863. self.assertEqual(fit_json['id'], fit_pipeline_run_json['id'])
  864. self.assertEqual(len(fit_json['steps']), len(fit_pipeline_run.steps))
  865. self.assertEqual(fit_json['status'], fit_pipeline_run.status)
  866. produce_result = r.produce(inputs=inputs)
  867. self._fake_inputs(r, produce_result.pipeline_run, inputs)
  868. produce_pipeline_run = produce_result.pipeline_run
  869. produce_pipeline_run_json = produce_pipeline_run.to_json_structure()
  870. fit_produce_file_name = 'produce_pipeline.json'
  871. fit_produce_file_path = os.path.join(self.test_dir, fit_produce_file_name)
  872. with open(fit_produce_file_path, 'w') as fit_produce_file:
  873. fit_pipeline_run.to_yaml(fit_produce_file)
  874. produce_pipeline_run.to_yaml(fit_produce_file, appending=True)
  875. self.assertTrue(os.path.exists(fit_produce_file_path), 'The fit and produce pipeline_run objects should have been saved to {}'.format(fit_produce_file_path))
  876. with open(fit_produce_file_path, 'r') as fit_produce_file:
  877. fit_produce_jsons = list(utils.yaml_load_all(fit_produce_file))
  878. self.assertIsInstance(fit_produce_jsons, typing.Sequence, 'The fit_produce_file should contain a sequence of pipeline_run objects')
  879. self.assertEqual(len(fit_produce_jsons), 2, 'The fit_produce_file should contain 2 pipeline_run objects')
  880. fit_json = fit_produce_jsons[0]
  881. self._validate_pipeline_run_structure(fit_json)
  882. self.assertEqual(fit_json['id'], fit_pipeline_run_json['id'])
  883. self.assertEqual(len(fit_json['steps']), len(fit_pipeline_run.steps))
  884. self.assertEqual(fit_json['status'], fit_pipeline_run.status)
  885. produce_json = fit_produce_jsons[1]
  886. self._validate_pipeline_run_structure(produce_json)
  887. self.assertEqual(produce_json['id'], produce_pipeline_run_json['id'])
  888. self.assertEqual(len(produce_json['steps']), len(produce_pipeline_run.steps))
  889. self.assertEqual(produce_json['status'], produce_pipeline_run.status)
  890. def test_fit(self):
  891. pipeline = self._build_pipeline(
  892. '6e79c2cc-e36d-4f22-9016-8184d3385714',
  893. sequence=[
  894. {
  895. 'primitive_class': DatasetToDataFramePrimitive,
  896. },
  897. {
  898. 'primitive_class': RandomClassifierPrimitive,
  899. 'INPUTS': [('inputs', 0), ('outputs', 0)],
  900. },
  901. ],
  902. )
  903. iris_problem, iris_dataset = self.get_data()
  904. inputs = [iris_dataset]
  905. hyperparams = None
  906. random_seed = 0
  907. volumes_dir: str = None
  908. fitted_pipeline, predictions, fit_result = runtime.fit(
  909. pipeline, inputs, problem_description=iris_problem, hyperparams=hyperparams, random_seed=random_seed,
  910. volumes_dir=volumes_dir, context=metadata_base.Context.TESTING,
  911. runtime_environment=self.runtime_enviroment,
  912. )
  913. self._validate_pipeline_run_structure(fit_result.pipeline_run.to_json_structure())
  914. def test_prepare_data(self):
  915. with open(
  916. os.path.join(os.path.dirname(__file__), 'data', 'pipelines', 'data-preparation-no-split.yml'),
  917. 'r',
  918. ) as data_pipeline_file:
  919. data_pipeline = pipeline_module.Pipeline.from_yaml(data_pipeline_file, resolver=Resolver())
  920. with open(
  921. os.path.join(os.path.dirname(__file__), 'data', 'pipelines', 'random-forest-classifier.yml'),
  922. 'r',
  923. ) as data_pipeline_file:
  924. with utils.silence():
  925. pipeline = pipeline_module.Pipeline.from_yaml(data_pipeline_file, resolver=Resolver())
  926. iris_problem, iris_dataset = self.get_data(dataset_name='iris_dataset_1', problem_name='iris_problem_1')
  927. inputs = [iris_dataset]
  928. outputs, data_result = runtime.prepare_data(
  929. data_pipeline=data_pipeline, problem_description=iris_problem, inputs=inputs,
  930. data_params={}, context=metadata_base.Context.TESTING, runtime_environment=self.runtime_enviroment)
  931. fitted_pipeline, predictions, fit_result = runtime.fit(
  932. pipeline, inputs, problem_description=iris_problem, context=metadata_base.Context.TESTING,
  933. runtime_environment=self.runtime_enviroment,
  934. )
  935. self.assertFalse(fit_result.has_error(), fit_result.error)
  936. self.assertFalse(data_result.has_error(), data_result.error)
  937. with self.assertRaisesRegex(exceptions.InvalidStateError, "Pipeline run for a non-standard pipeline cannot be converted to a JSON structure."):
  938. data_result.pipeline_run.to_json_structure()
  939. runtime.combine_pipeline_runs(
  940. fit_result.pipeline_run, data_pipeline_run=data_result.pipeline_run,
  941. )
  942. self.assertFalse(fit_result.has_error(), fit_result.error)
  943. self.assertEqual(len(outputs), 3)
  944. self._validate_pipeline_run_structure(fit_result.pipeline_run.to_json_structure())
  945. def test_multi_input_fit(self):
  946. with open(
  947. os.path.join(os.path.dirname(__file__), 'data', 'pipelines', 'multi-input-test.json'), 'r'
  948. ) as pipeline_file:
  949. with utils.silence():
  950. pipeline = pipeline_module.Pipeline.from_json(pipeline_file, resolver=Resolver())
  951. iris_problem, iris_dataset = self.get_data(dataset_name='iris_dataset_1', problem_name='multi_dataset_problem')
  952. _, boston_dataset = self.get_data(dataset_name='boston_dataset_1', problem_name='')
  953. inputs = [iris_dataset, boston_dataset]
  954. hyperparams = None
  955. random_seed = 0
  956. volumes_dir: str = None
  957. fitted_pipeline, predictions, fit_result = runtime.fit(
  958. pipeline, inputs, problem_description=iris_problem, hyperparams=hyperparams, random_seed=random_seed,
  959. volumes_dir=volumes_dir, context=metadata_base.Context.TESTING,
  960. runtime_environment=self.runtime_enviroment,
  961. )
  962. self._validate_pipeline_run_structure(fit_result.pipeline_run.to_json_structure())
  963. def test_multi_input_fit_without_problem(self):
  964. with open(
  965. os.path.join(os.path.dirname(__file__), 'data', 'pipelines', 'multi-input-test.json'), 'r'
  966. ) as pipeline_file:
  967. with utils.silence():
  968. pipeline = pipeline_module.Pipeline.from_json(pipeline_file, resolver=Resolver())
  969. _, iris_dataset = self.get_data(dataset_name='iris_dataset_1', problem_name='')
  970. _, boston_dataset = self.get_data(dataset_name='boston_dataset_1', problem_name='')
  971. inputs = [iris_dataset, boston_dataset]
  972. hyperparams = None
  973. random_seed = 0
  974. volumes_dir: str = None
  975. r = runtime.Runtime(pipeline, context=metadata_base.Context.TESTING, environment=self.runtime_enviroment,
  976. hyperparams=hyperparams, random_seed=random_seed, volumes_dir=volumes_dir)
  977. r.fit(inputs=inputs)
  978. def test_multi_input_fit_with_one_dataset_associated(self):
  979. with open(
  980. os.path.join(os.path.dirname(__file__), 'data', 'pipelines', 'multi-input-test.json'), 'r'
  981. ) as pipeline_file:
  982. with utils.silence():
  983. pipeline = pipeline_module.Pipeline.from_json(pipeline_file, resolver=Resolver())
  984. _, iris_dataset = self.get_data(dataset_name='iris_dataset_1', problem_name='')
  985. boston_problem, boston_dataset = self.get_data(dataset_name='boston_dataset_1', problem_name='boston_problem_1')
  986. inputs = [iris_dataset, boston_dataset]
  987. hyperparams = None
  988. random_seed = 0
  989. volumes_dir: str = None
  990. r = runtime.Runtime(pipeline, context=metadata_base.Context.TESTING, environment=self.runtime_enviroment,
  991. hyperparams=hyperparams, random_seed=random_seed, volumes_dir=volumes_dir,
  992. problem_description=boston_problem)
  993. r.fit(inputs=inputs)
  994. def test_produce(self):
  995. pipeline = self._build_pipeline(
  996. 'c99ae185-2a74-4919-88b1-66d02e2e21b2',
  997. sequence=[
  998. {
  999. 'primitive_class': DatasetToDataFramePrimitive
  1000. },
  1001. {
  1002. 'primitive_class': RandomClassifierPrimitive,
  1003. 'INPUTS': [('inputs', 0), ('outputs', 0)],
  1004. },
  1005. ],
  1006. )
  1007. iris_problem, iris_dataset = self.get_data()
  1008. inputs = [iris_dataset]
  1009. hyperparams = None
  1010. random_seed = 0
  1011. volumes_dir: str = None
  1012. fitted_pipeline, predictions, fit_result = runtime.fit(
  1013. pipeline, inputs, problem_description=iris_problem, hyperparams=hyperparams, random_seed=random_seed,
  1014. volumes_dir=volumes_dir, context=metadata_base.Context.TESTING,
  1015. runtime_environment=self.runtime_enviroment,
  1016. )
  1017. predictions, produce_result = runtime.produce(fitted_pipeline, inputs)
  1018. self._validate_pipeline_run_structure(produce_result.pipeline_run.to_json_structure())
  1019. def test_multi_input_produce(self):
  1020. with open(
  1021. os.path.join(os.path.dirname(__file__), 'data', 'pipelines', 'multi-input-test.json'), 'r'
  1022. ) as pipeline_file:
  1023. with utils.silence():
  1024. pipeline = pipeline_module.Pipeline.from_json(pipeline_file, resolver=Resolver())
  1025. iris_problem, iris_dataset = self.get_data(dataset_name='iris_dataset_1', problem_name='multi_dataset_problem')
  1026. _, iris_dataset_2 = self.get_data(dataset_name='boston_dataset_1', problem_name='')
  1027. inputs = [iris_dataset, iris_dataset_2]
  1028. hyperparams = None
  1029. random_seed = 0
  1030. volumes_dir: str = None
  1031. r = runtime.Runtime(pipeline, context=metadata_base.Context.TESTING, environment=self.runtime_enviroment,
  1032. hyperparams=hyperparams, random_seed=random_seed, volumes_dir=volumes_dir,
  1033. problem_description=iris_problem)
  1034. r.fit(inputs=inputs)
  1035. r.produce(inputs=inputs)
  1036. def test_multi_input_produce_without_problem(self):
  1037. with open(
  1038. os.path.join(os.path.dirname(__file__), 'data', 'pipelines', 'multi-input-test.json'), 'r'
  1039. ) as pipeline_file:
  1040. with utils.silence():
  1041. pipeline = pipeline_module.Pipeline.from_json(pipeline_file, resolver=Resolver())
  1042. _, iris_dataset = self.get_data(dataset_name='iris_dataset_1', problem_name='')
  1043. _, boston_dataset = self.get_data(dataset_name='boston_dataset_1', problem_name='')
  1044. inputs = [iris_dataset, boston_dataset]
  1045. hyperparams = None
  1046. random_seed = 0
  1047. volumes_dir: str = None
  1048. r = runtime.Runtime(pipeline, context=metadata_base.Context.TESTING, environment=self.runtime_enviroment,
  1049. hyperparams=hyperparams, random_seed=random_seed, volumes_dir=volumes_dir)
  1050. r.fit(inputs=inputs)
  1051. r.produce(inputs=inputs)
  1052. def test_multi_input_produce_with_one_dataset_associated(self):
  1053. with open(
  1054. os.path.join(os.path.dirname(__file__), 'data', 'pipelines', 'multi-input-test.json'), 'r'
  1055. ) as pipeline_file:
  1056. with utils.silence():
  1057. pipeline = pipeline_module.Pipeline.from_json(pipeline_file, resolver=Resolver())
  1058. _, iris_dataset_1 = self.get_data(dataset_name='iris_dataset_1', problem_name='')
  1059. boston_problem, iris_dataset_2 = self.get_data(dataset_name='boston_dataset_1', problem_name='boston_problem_1')
  1060. inputs = [iris_dataset_1, iris_dataset_2]
  1061. hyperparams = None
  1062. random_seed = 0
  1063. volumes_dir: str = None
  1064. r = runtime.Runtime(pipeline, context=metadata_base.Context.TESTING, environment=self.runtime_enviroment,
  1065. hyperparams=hyperparams, random_seed=random_seed, volumes_dir=volumes_dir,
  1066. problem_description=boston_problem)
  1067. r.fit(inputs=inputs)
  1068. r.produce(inputs=inputs)
  1069. @staticmethod
  1070. def _build_fail_runtime(method_name, message):
  1071. class FailRuntime(runtime.Runtime):
  1072. pass
  1073. def fail_method(*args, **kwargs):
  1074. raise Exception(message)
  1075. setattr(FailRuntime, method_name, fail_method)
  1076. return FailRuntime
  1077. def test_error_propgation(self):
  1078. for method_name in [
  1079. '_call_primitive_method', '_create_pipeline_primitive',
  1080. '_run_primitive', '_run_subpipeline', '_run_step', '_do_run_step', '_do_run',
  1081. ]:
  1082. error_message = 'runtime failed in method "{}"'.format(method_name)
  1083. inputs = self._get_inputs()
  1084. subpipeline = self._build_pipeline('06dfb07a-f151-467c-9f1c-51a6bf6378a3', [{'primitive_class': IncrementPrimitive}, {'primitive_class': IncrementPrimitive}])
  1085. pipeline_with_subpipeline = self._build_pipeline('293c1883-f81a-459d-a1a8-ba19467d5ad6', [{'primitive_class': RandomPrimitive}, subpipeline, {'primitive_class': IncrementPrimitive}])
  1086. fail_runtime_class = self._build_fail_runtime(method_name, error_message)
  1087. r = fail_runtime_class(
  1088. pipeline_with_subpipeline, context=metadata_base.Context.TESTING,
  1089. environment=self.runtime_enviroment,
  1090. )
  1091. fit_result = r.fit(inputs)
  1092. self.assertTrue(fit_result.pipeline_run)
  1093. self._fake_inputs(r, fit_result.pipeline_run, inputs)
  1094. self._check_pipelines_valid_and_failed([fit_result.pipeline_run])
  1095. self.assertTrue(
  1096. str(fit_result.error) in [
  1097. error_message,
  1098. 'Step 0 for pipeline 293c1883-f81a-459d-a1a8-ba19467d5ad6 failed.',
  1099. 'Step 1 for pipeline 293c1883-f81a-459d-a1a8-ba19467d5ad6 failed.',
  1100. ],
  1101. 'Unexpected error message: {}'.format(fit_result.error)
  1102. )
  1103. def test_get_singleton_value(self):
  1104. l = container.List([1], generate_metadata=True)
  1105. l.metadata = l.metadata.update((0,), {'custom': 'metadata'})
  1106. s = runtime.get_singleton_value(l)
  1107. self.assertEqual(s, 1)
  1108. l = container.List([container.List([1], generate_metadata=True)], generate_metadata=True)
  1109. l.metadata = l.metadata.update((0,), {'custom': 'metadata1'})
  1110. l.metadata = l.metadata.update((0, 0), {'custom': 'metadata2'})
  1111. s = runtime.get_singleton_value(l)
  1112. self.assertEqual(s, [1])
  1113. self.assertEqual(utils.to_json_structure(s.metadata.to_internal_simple_structure()), [{
  1114. 'selector': [],
  1115. 'metadata': {
  1116. 'custom': 'metadata1',
  1117. 'dimension': {'length': 1},
  1118. 'schema': 'https://metadata.datadrivendiscovery.org/schemas/v0/container.json',
  1119. 'structural_type': 'd3m.container.list.List'
  1120. },
  1121. }, {
  1122. 'selector': ['__ALL_ELEMENTS__'],
  1123. 'metadata': {'structural_type': 'int'},
  1124. }, {
  1125. 'selector': [0],
  1126. 'metadata': {'custom': 'metadata2'},
  1127. }])
  1128. d = container.DataFrame({'a': [1], 'b': ['one']}, generate_metadata=True)
  1129. s = runtime.get_singleton_value(d)
  1130. self.assertEqual(s, [1, 'one'])
  1131. self.assertEqual(utils.to_json_structure(s.metadata.to_internal_simple_structure()), [{
  1132. 'selector': [],
  1133. 'metadata': {
  1134. 'dimension': {
  1135. 'length': 2,
  1136. # TODO: "name" and "semantic_types" here should be removed.
  1137. # See: https://gitlab.com/datadrivendiscovery/d3m/issues/336
  1138. 'name': 'columns',
  1139. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'],
  1140. },
  1141. 'schema': 'https://metadata.datadrivendiscovery.org/schemas/v0/container.json',
  1142. 'structural_type': 'd3m.container.list.List',
  1143. },
  1144. }, {
  1145. 'selector': [0],
  1146. 'metadata': {'name': 'a', 'structural_type': 'numpy.int64'},
  1147. }, {
  1148. 'selector': [1],
  1149. 'metadata': {'name': 'b', 'structural_type': 'str'},
  1150. }])
  1151. def test_unfitted_primitive(self):
  1152. pipeline = pipeline_module.Pipeline()
  1153. pipeline.add_input()
  1154. step = pipeline_module.PrimitiveStep(
  1155. {
  1156. 'id': '3b09ba74-cc90-4f22-9e0a-0cf4f29a7e28',
  1157. 'version': '0.1.0',
  1158. 'name': "Removes columns",
  1159. 'python_path': 'd3m.primitives.data_transformation.remove_columns.Common',
  1160. },
  1161. resolver=pipeline_module.Resolver(),
  1162. )
  1163. step.add_hyperparameter('columns', metadata_base.ArgumentType.VALUE, [3])
  1164. pipeline.add_step(step)
  1165. step = pipeline_module.PrimitiveStep(
  1166. {
  1167. 'id': '5bef5738-1638-48d6-9935-72445f0eecdc',
  1168. 'version': '0.1.0',
  1169. 'name': "Map DataFrame resources to new resources using provided primitive",
  1170. 'python_path': 'd3m.primitives.operator.dataset_map.DataFrameCommon',
  1171. },
  1172. resolver=pipeline_module.Resolver(),
  1173. )
  1174. step.add_argument('inputs', metadata_base.ArgumentType.CONTAINER, 'inputs.0')
  1175. step.add_output('produce')
  1176. step.add_hyperparameter('primitive', metadata_base.ArgumentType.PRIMITIVE, 0)
  1177. pipeline.add_step(step)
  1178. pipeline.add_output('steps.1.produce')
  1179. pipeline.check(allow_placeholders=False, standard_pipeline=False, input_types={'inputs.0': container.Dataset})
  1180. _, dataset = self.get_data()
  1181. self.assertEqual(dataset['learningData'].shape, (150, 6))
  1182. r = runtime.Runtime(pipeline, context=metadata_base.Context.TESTING, is_standard_pipeline=False, environment=self.runtime_enviroment)
  1183. inputs = [dataset]
  1184. result = r.fit(inputs, return_values=['outputs.0'])
  1185. result.check_success()
  1186. self.assertTrue(result.pipeline_run)
  1187. self.assertEqual(len(result.values), 1)
  1188. output_dataset = result.values['outputs.0']
  1189. self.assertEqual(output_dataset['learningData'].shape, (150, 5))
  1190. result = r.produce(inputs, return_values=['outputs.0'])
  1191. result.check_success()
  1192. self.assertEqual(len(result.values), 1)
  1193. self.assertTrue(result.pipeline_run)
  1194. output_dataset = result.values['outputs.0']
  1195. self.assertEqual(output_dataset['learningData'].shape, (150, 5))
  1196. pickled = pickle.dumps(r)
  1197. restored = pickle.loads(pickled)
  1198. result = restored.produce(inputs, return_values=['outputs.0'])
  1199. result.check_success()
  1200. self.assertEqual(len(result.values), 1)
  1201. self.assertTrue(result.pipeline_run)
  1202. output_dataset = result.values['outputs.0']
  1203. self.assertEqual(output_dataset['learningData'].shape, (150, 5))
  1204. pickle.dumps(r)
  1205. def test_pipeline_openml(self):
  1206. # Creating pipeline
  1207. pipeline_description = pipeline_module.Pipeline()
  1208. pipeline_description.add_input(name='inputs')
  1209. # Step 0: dataset_to_dataframe
  1210. step_0 = pipeline_module.PrimitiveStep(
  1211. primitive=index.get_primitive('d3m.primitives.data_transformation.dataset_to_dataframe.Common'),
  1212. )
  1213. step_0.add_argument(name='inputs', argument_type=metadata_base.ArgumentType.CONTAINER, data_reference='inputs.0')
  1214. step_0.add_output('produce')
  1215. pipeline_description.add_step(step_0)
  1216. # Step 1: profiler
  1217. step_1 = pipeline_module.PrimitiveStep(
  1218. primitive=index.get_primitive('d3m.primitives.schema_discovery.profiler.Common'),
  1219. )
  1220. step_1.add_argument(name='inputs', argument_type=metadata_base.ArgumentType.CONTAINER, data_reference='steps.0.produce')
  1221. step_1.add_output('produce')
  1222. pipeline_description.add_step(step_1)
  1223. # Step 2: column_parser
  1224. step_2 = pipeline_module.PrimitiveStep(
  1225. primitive=index.get_primitive('d3m.primitives.data_transformation.column_parser.Common'),
  1226. )
  1227. step_2.add_argument(name='inputs', argument_type=metadata_base.ArgumentType.CONTAINER, data_reference='steps.1.produce')
  1228. step_2.add_output('produce')
  1229. pipeline_description.add_step(step_2)
  1230. # Step 4: random_forest
  1231. step_3 = pipeline_module.PrimitiveStep(
  1232. primitive=index.get_primitive('d3m.primitives.classification.random_forest.Common'),
  1233. )
  1234. step_3.add_argument(name='inputs', argument_type=metadata_base.ArgumentType.CONTAINER, data_reference='steps.2.produce')
  1235. step_3.add_argument(name='outputs', argument_type=metadata_base.ArgumentType.CONTAINER, data_reference='steps.2.produce')
  1236. step_3.add_hyperparameter(name='return_result', argument_type=metadata_base.ArgumentType.VALUE, data='replace')
  1237. step_3.add_output('produce')
  1238. pipeline_description.add_step(step_3)
  1239. # Step 5: construct predictions
  1240. step_4 = pipeline_module.PrimitiveStep(
  1241. primitive=index.get_primitive('d3m.primitives.data_transformation.construct_predictions.Common'),
  1242. )
  1243. step_4.add_argument(name='inputs', argument_type=metadata_base.ArgumentType.CONTAINER, data_reference='steps.3.produce')
  1244. step_4.add_argument(name='reference', argument_type=metadata_base.ArgumentType.CONTAINER, data_reference='steps.2.produce')
  1245. step_4.add_output('produce')
  1246. pipeline_description.add_step(step_4)
  1247. # Final Output
  1248. pipeline_description.add_output(name='output predictions', data_reference='steps.4.produce')
  1249. # Load OpenML Dataset
  1250. dataset_id = 61
  1251. dataset_name = 'iris'
  1252. openml_dataset_uri = 'https://www.openml.org/d/{dataset_id}'.format(dataset_id=dataset_id)
  1253. ds = container.Dataset.load(openml_dataset_uri, dataset_id=str(dataset_id), dataset_name=dataset_name)
  1254. with utils.silence():
  1255. r = runtime.Runtime(pipeline=pipeline_description, context=metadata_base.Context.TESTING)
  1256. r.fit(inputs=[ds])
  1257. result = r.produce(inputs=[ds])
  1258. result.check_success()
  1259. predictions = result.values['outputs.0']
  1260. self.assertEqual(predictions.shape, (150, 2))
  1261. self.assertTrue(predictions.metadata.has_semantic_type(
  1262. (metadata_base.ALL_ELEMENTS, 1),
  1263. 'https://metadata.datadrivendiscovery.org/types/PredictedTarget'),
  1264. )
  1265. self.assertFalse(predictions.metadata.has_semantic_type(
  1266. (metadata_base.ALL_ELEMENTS, 1),
  1267. 'https://metadata.datadrivendiscovery.org/types/TrueTarget'),
  1268. )
  1269. if __name__ == '__main__':
  1270. unittest.main()

全栈的自动化机器学习系统,主要针对多变量时间序列数据的异常检测。TODS提供了详尽的用于构建基于机器学习的异常检测系统的模块,它们包括:数据处理(data processing),时间序列处理( time series processing),特征分析(feature analysis),检测算法(detection algorithms),和强化模块( reinforcement module)。这些模块所提供的功能包括常见的数据预处理、时间序列数据的平滑或变换,从时域或频域中抽取特征、多种多样的检测算