You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

test_containers.py 102 kB

first commit Former-commit-id: 08bc23ba02cffbce3cf63962390a65459a132e48 [formerly 0795edd4834b9b7dc66db8d10d4cbaf42bbf82cb] [formerly b5010b42541add7e2ea2578bf2da537efc457757 [formerly a7ca09c2c34c4fc8b3d8e01fcfa08eeeb2cae99d]] [formerly 615058473a2177ca5b89e9edbb797f4c2a59c7e5 [formerly 743d8dfc6843c4c205051a8ab309fbb2116c895e] [formerly bb0ea98b1e14154ef464e2f7a16738705894e54b [formerly 960a69da74b81ef8093820e003f2d6c59a34974c]]] [formerly 2fa3be52c1b44665bc81a7cc7d4cea4bbf0d91d5 [formerly 2054589f0898627e0a17132fd9d4cc78efc91867] [formerly 3b53730e8a895e803dfdd6ca72bc05e17a4164c1 [formerly 8a2fa8ab7baf6686d21af1f322df46fd58c60e69]] [formerly 87d1e3a07a19d03c7d7c94d93ab4fa9f58dada7c [formerly f331916385a5afac1234854ee8d7f160f34b668f] [formerly 69fb3c78a483343f5071da4f7e2891b83a49dd18 [formerly 386086f05aa9487f65bce2ee54438acbdce57650]]]] Former-commit-id: a00aed8c934a6460c4d9ac902b9a74a3d6864697 [formerly 26fdeca29c2f07916d837883983ca2982056c78e] [formerly 0e3170d41a2f99ecf5c918183d361d4399d793bf [formerly 3c12ad4c88ac5192e0f5606ac0d88dd5bf8602dc]] [formerly d5894f84f2fd2e77a6913efdc5ae388cf1be0495 [formerly ad3e7bc670ff92c992730d29c9d3aa1598d844e8] [formerly 69fb3c78a483343f5071da4f7e2891b83a49dd18]] Former-commit-id: 3c19c9fae64f6106415fbc948a4dc613b9ee12f8 [formerly 467ddc0549c74bb007e8f01773bb6dc9103b417d] [formerly 5fa518345d958e2760e443b366883295de6d991c [formerly 3530e130b9fdb7280f638dbc2e785d2165ba82aa]] Former-commit-id: 9f5d473d42a435ec0d60149939d09be1acc25d92 [formerly be0b25c4ec2cde052a041baf0e11f774a158105d] Former-commit-id: 9eca71cb73ba9edccd70ac06a3b636b8d4093b04
5 years ago
1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608
  1. import copy
  2. import os.path
  3. import pickle
  4. import tempfile
  5. import unittest
  6. import warnings
  7. import numpy
  8. import pandas
  9. import pandas.core.common
  10. from d3m import container, utils
  11. from d3m.container import utils as container_utils
  12. from d3m.metadata import base as metadata_base
  13. copy_functions = {
  14. 'obj.copy()': lambda obj: obj.copy(),
  15. 'obj[:]': lambda obj: obj[:],
  16. 'copy.copy()': lambda obj: copy.copy(obj),
  17. 'copy.deepcopy()': lambda obj: copy.deepcopy(obj),
  18. 'pickle.loads(pickle.dumps())': lambda obj: pickle.loads(pickle.dumps(obj)),
  19. }
  20. class TestContainers(unittest.TestCase):
  21. def test_list(self):
  22. l = container.List()
  23. self.assertTrue(hasattr(l, 'metadata'))
  24. l = container.List([1, 2, 3], generate_metadata=True)
  25. l.metadata = l.metadata.update((), {
  26. 'test': 'foobar',
  27. })
  28. self.assertSequenceEqual(l, [1, 2, 3])
  29. self.assertIsInstance(l, container.List)
  30. self.assertTrue(hasattr(l, 'metadata'))
  31. self.assertEqual(l.metadata.query(()).get('test'), 'foobar')
  32. self.assertIsInstance(l, container.List)
  33. self.assertIsInstance(l, list)
  34. self.assertNotIsInstance([], container.List)
  35. for name, copy_function in copy_functions.items():
  36. l_copy = copy_function(l)
  37. self.assertIsInstance(l_copy, container.List, name)
  38. self.assertTrue(hasattr(l_copy, 'metadata'), name)
  39. self.assertSequenceEqual(l, l_copy, name)
  40. self.assertEqual(l.metadata.to_internal_json_structure(), l_copy.metadata.to_internal_json_structure(), name)
  41. self.assertEqual(l_copy.metadata.query(()).get('test'), 'foobar', name)
  42. l_copy = container.List(l, {
  43. 'test2': 'barfoo',
  44. }, generate_metadata=True)
  45. self.assertIsInstance(l_copy, container.List)
  46. self.assertTrue(hasattr(l_copy, 'metadata'))
  47. self.assertSequenceEqual(l, l_copy)
  48. self.assertEqual(l_copy.metadata.query(()), {
  49. 'schema': metadata_base.CONTAINER_SCHEMA_VERSION,
  50. 'structural_type': container.List,
  51. 'dimension': {
  52. 'length': 3,
  53. },
  54. 'test': 'foobar',
  55. 'test2': 'barfoo',
  56. })
  57. self.assertEqual(l[1], 2)
  58. with self.assertRaisesRegex(TypeError, 'list indices must be integers or slices, not tuple'):
  59. l[1, 2]
  60. l_slice = l[1:3]
  61. self.assertSequenceEqual(l, [1, 2, 3])
  62. self.assertSequenceEqual(l_slice, [2, 3])
  63. self.assertIsInstance(l_slice, container.List)
  64. self.assertTrue(hasattr(l_slice, 'metadata'))
  65. self.assertEqual(l.metadata.to_internal_json_structure(), l_slice.metadata.to_internal_json_structure())
  66. l_added = l + [4, 5]
  67. self.assertSequenceEqual(l, [1, 2, 3])
  68. self.assertSequenceEqual(l_added, [1, 2, 3, 4, 5])
  69. self.assertIsInstance(l_added, container.List)
  70. self.assertTrue(hasattr(l_added, 'metadata'))
  71. self.assertEqual(l.metadata.to_internal_json_structure(), l_added.metadata.to_internal_json_structure())
  72. l_added += [6, 7]
  73. self.assertSequenceEqual(l_added, [1, 2, 3, 4, 5, 6, 7])
  74. self.assertIsInstance(l_added, container.List)
  75. self.assertTrue(hasattr(l_added, 'metadata'))
  76. self.assertEqual(l.metadata.to_internal_json_structure(), l_added.metadata.to_internal_json_structure())
  77. l_multiplied = l * 3
  78. self.assertSequenceEqual(l, [1, 2, 3])
  79. self.assertSequenceEqual(l_multiplied, [1, 2, 3, 1, 2, 3, 1, 2, 3])
  80. self.assertIsInstance(l_multiplied, container.List)
  81. self.assertTrue(hasattr(l_multiplied, 'metadata'))
  82. self.assertEqual(l.metadata.to_internal_json_structure(), l_multiplied.metadata.to_internal_json_structure())
  83. l_multiplied = 3 * l
  84. self.assertSequenceEqual(l, [1, 2, 3])
  85. self.assertSequenceEqual(l_multiplied, [1, 2, 3, 1, 2, 3, 1, 2, 3])
  86. self.assertIsInstance(l_multiplied, container.List)
  87. self.assertTrue(hasattr(l_multiplied, 'metadata'))
  88. self.assertEqual(l.metadata.to_internal_json_structure(), l_multiplied.metadata.to_internal_json_structure())
  89. l_multiplied *= 2
  90. self.assertSequenceEqual(l_multiplied, [1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3])
  91. self.assertIsInstance(l_multiplied, container.List)
  92. self.assertTrue(hasattr(l_multiplied, 'metadata'))
  93. self.assertEqual(l.metadata.to_internal_json_structure(), l_multiplied.metadata.to_internal_json_structure())
  94. def test_ndarray(self):
  95. array = container.ndarray(numpy.array([1, 2, 3]), generate_metadata=True)
  96. self.assertTrue(numpy.array_equal(array, [1, 2, 3]))
  97. self.assertIsInstance(array, container.ndarray)
  98. self.assertTrue(hasattr(array, 'metadata'))
  99. self.assertIsInstance(array, numpy.ndarray)
  100. self.assertNotIsInstance(numpy.array([]), container.ndarray)
  101. array.metadata = array.metadata.update((), {
  102. 'test': 'foobar',
  103. })
  104. self.assertEqual(array.metadata.query(()).get('test'), 'foobar')
  105. for name, copy_function in copy_functions.items():
  106. array_copy = copy_function(array)
  107. self.assertIsInstance(array_copy, container.ndarray, name)
  108. self.assertTrue(hasattr(array_copy, 'metadata'), name)
  109. self.assertTrue(numpy.array_equal(array, array_copy), name)
  110. self.assertEqual(array.metadata.to_internal_json_structure(), array_copy.metadata.to_internal_json_structure(), name)
  111. self.assertEqual(array_copy.metadata.query(()).get('test'), 'foobar', name)
  112. array_copy = container.ndarray(array, {
  113. 'test2': 'barfoo',
  114. }, generate_metadata=True)
  115. self.assertIsInstance(array_copy, container.ndarray)
  116. self.assertTrue(hasattr(array_copy, 'metadata'))
  117. self.assertTrue(numpy.array_equal(array, array_copy))
  118. self.assertEqual(array_copy.metadata.query(()), {
  119. 'schema': metadata_base.CONTAINER_SCHEMA_VERSION,
  120. 'structural_type': container.ndarray,
  121. 'dimension': {
  122. 'length': 3,
  123. },
  124. 'test': 'foobar',
  125. 'test2': 'barfoo',
  126. })
  127. array_from_list = container.ndarray([1, 2, 3], generate_metadata=True)
  128. self.assertTrue(numpy.array_equal(array_from_list, [1, 2, 3]))
  129. self.assertIsInstance(array_from_list, container.ndarray)
  130. self.assertTrue(hasattr(array_from_list, 'metadata'))
  131. def test_dataframe_to_csv(self):
  132. df = container.DataFrame(pandas.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}), generate_metadata=True)
  133. df.metadata = df.metadata.update((metadata_base.ALL_ELEMENTS, 0), {'name': 'E'})
  134. df.metadata = df.metadata.update((metadata_base.ALL_ELEMENTS, 1), {'name': 'F'})
  135. self.assertEqual(df.columns.tolist(), ['A', 'B'])
  136. self.assertEqual(df.to_csv(), 'E,F\n1,4\n2,5\n3,6\n')
  137. def test_dataframe(self):
  138. df = container.DataFrame(pandas.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9]}), generate_metadata=True)
  139. self.assertTrue(df._data.equals(pandas.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9]})._data))
  140. self.assertIsInstance(df, container.DataFrame)
  141. self.assertTrue(hasattr(df, 'metadata'))
  142. self.assertIsInstance(df, pandas.DataFrame)
  143. self.assertNotIsInstance(pandas.DataFrame({'A': [1, 2, 3]}), container.DataFrame)
  144. df.metadata = df.metadata.update((), {
  145. 'test': 'foobar',
  146. })
  147. self.assertEqual(df.metadata.query(()).get('test'), 'foobar')
  148. for name, copy_function in copy_functions.items():
  149. df_copy = copy_function(df)
  150. self.assertIsInstance(df_copy, container.DataFrame, name)
  151. self.assertTrue(hasattr(df_copy, 'metadata'), name)
  152. self.assertTrue(df.equals(df_copy), name)
  153. self.assertEqual(df.metadata.to_internal_json_structure(), df_copy.metadata.to_internal_json_structure(), name)
  154. self.assertEqual(df_copy.metadata.query(()).get('test'), 'foobar', name)
  155. df_copy = container.DataFrame(df, {
  156. 'test2': 'barfoo',
  157. }, generate_metadata=True)
  158. self.assertIsInstance(df_copy, container.DataFrame)
  159. self.assertTrue(hasattr(df_copy, 'metadata'))
  160. self.assertTrue(numpy.array_equal(df, df_copy))
  161. self.assertEqual(df_copy.metadata.query(()), {
  162. 'schema': metadata_base.CONTAINER_SCHEMA_VERSION,
  163. 'structural_type': container.DataFrame,
  164. 'semantic_types': ('https://metadata.datadrivendiscovery.org/types/Table',),
  165. 'dimension': {
  166. 'name': 'rows',
  167. 'semantic_types': ('https://metadata.datadrivendiscovery.org/types/TabularRow',),
  168. 'length': 3
  169. },
  170. 'test': 'foobar',
  171. 'test2': 'barfoo',
  172. })
  173. df_from_dict = container.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9]}, generate_metadata=True)
  174. self.assertTrue(df_from_dict._data.equals(pandas.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9]})._data))
  175. self.assertIsInstance(df_from_dict, container.DataFrame)
  176. self.assertTrue(hasattr(df_from_dict, 'metadata'))
  177. # Regression tests to make sure column name cannot overwrite DataFrame
  178. # attributes we use (like metadata and custom methods).
  179. dataframe = container.DataFrame({'metadata': [0], 'select_columns': [1]})
  180. self.assertIsInstance(dataframe.metadata, metadata_base.DataMetadata)
  181. self.assertIsInstance(dataframe.select_columns([0]), container.DataFrame)
  182. self.assertEqual(dataframe.loc[0, 'metadata'], 0)
  183. self.assertEqual(dataframe.loc[0, 'select_columns'], 1)
  184. def test_dataset(self):
  185. dataset = container.Dataset.load('sklearn://boston')
  186. self.assertIsInstance(dataset, container.Dataset)
  187. self.assertTrue(hasattr(dataset, 'metadata'))
  188. dataset.metadata = dataset.metadata.update((), {
  189. 'test': 'foobar',
  190. })
  191. self.assertEqual(dataset.metadata.query(()).get('test'), 'foobar')
  192. for name, copy_function in copy_functions.items():
  193. # Not supported on dicts.
  194. if name == 'obj[:]':
  195. continue
  196. dataset_copy = copy_function(dataset)
  197. self.assertIsInstance(dataset_copy, container.Dataset, name)
  198. self.assertTrue(hasattr(dataset_copy, 'metadata'), name)
  199. self.assertEqual(len(dataset), len(dataset_copy), name)
  200. self.assertEqual(dataset.keys(), dataset_copy.keys(), name)
  201. for resource_name in dataset.keys():
  202. self.assertTrue(numpy.array_equal(dataset[resource_name], dataset_copy[resource_name]), name)
  203. self.assertEqual(dataset.metadata.to_internal_json_structure(), dataset_copy.metadata.to_internal_json_structure(), name)
  204. self.assertEqual(dataset_copy.metadata.query(()).get('test'), 'foobar', name)
  205. def test_list_ndarray_int(self):
  206. # With custom metadata which should be preserved.
  207. l = container.List([1, 2, 3], {
  208. 'foo': 'bar',
  209. }, generate_metadata=True)
  210. self.assertEqual(utils.to_json_structure(l.metadata.to_internal_simple_structure()), [{
  211. 'selector': [],
  212. 'metadata': {
  213. 'foo': 'bar',
  214. 'schema': metadata_base.CONTAINER_SCHEMA_VERSION,
  215. 'structural_type': 'd3m.container.list.List',
  216. 'dimension': {
  217. 'length': 3,
  218. },
  219. },
  220. }, {
  221. 'selector': ['__ALL_ELEMENTS__'],
  222. 'metadata': {
  223. 'structural_type': 'int',
  224. },
  225. }])
  226. array = container.ndarray(l, generate_metadata=True)
  227. self.assertEqual(utils.to_json_structure(array.metadata.to_internal_simple_structure()), [{
  228. 'selector': [],
  229. 'metadata': {
  230. 'foo': 'bar',
  231. 'schema': metadata_base.CONTAINER_SCHEMA_VERSION,
  232. 'structural_type': 'd3m.container.numpy.ndarray',
  233. 'dimension': {
  234. 'length': 3,
  235. },
  236. },
  237. }, {
  238. 'selector': ['__ALL_ELEMENTS__'],
  239. 'metadata': {
  240. 'structural_type': 'numpy.int64',
  241. },
  242. }])
  243. l2 = container.List(array, generate_metadata=True)
  244. self.assertEqual(utils.to_json_structure(l2.metadata.to_internal_simple_structure()), [{
  245. 'selector': [],
  246. 'metadata': {
  247. 'foo': 'bar',
  248. 'schema': metadata_base.CONTAINER_SCHEMA_VERSION,
  249. 'structural_type': 'd3m.container.list.List',
  250. 'dimension': {
  251. 'length': 3,
  252. },
  253. },
  254. }, {
  255. 'selector': ['__ALL_ELEMENTS__'],
  256. 'metadata': {
  257. 'structural_type': 'numpy.int64',
  258. },
  259. }])
  260. def test_dataframe_ndarray_int_noncompact_metadata(self):
  261. # With custom metadata which should be preserved.
  262. df = container.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9]}, {
  263. 'foo': 'bar',
  264. }, generate_metadata=False)
  265. df.metadata = df.metadata.generate(df, compact=False)
  266. self.assertEqual(utils.to_json_structure(df.metadata.to_internal_simple_structure()), [{
  267. 'selector': [],
  268. 'metadata': {
  269. 'foo': 'bar',
  270. 'schema': metadata_base.CONTAINER_SCHEMA_VERSION,
  271. 'structural_type': 'd3m.container.pandas.DataFrame',
  272. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'],
  273. 'dimension': {
  274. 'name': 'rows',
  275. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'],
  276. 'length': 3,
  277. },
  278. },
  279. }, {
  280. 'selector': ['__ALL_ELEMENTS__'],
  281. 'metadata': {
  282. 'dimension': {
  283. 'name': 'columns',
  284. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'],
  285. 'length': 3,
  286. },
  287. },
  288. }, {
  289. 'selector': ['__ALL_ELEMENTS__', 0],
  290. 'metadata': {
  291. 'name': 'A',
  292. 'structural_type': 'numpy.int64',
  293. },
  294. }, {
  295. 'selector': ['__ALL_ELEMENTS__', 1],
  296. 'metadata': {
  297. 'name': 'B',
  298. 'structural_type': 'numpy.int64',
  299. },
  300. }, {
  301. 'selector': ['__ALL_ELEMENTS__', 2],
  302. 'metadata': {
  303. 'name': 'C',
  304. 'structural_type': 'numpy.int64',
  305. },
  306. }])
  307. array = container.ndarray(df, generate_metadata=False)
  308. array.metadata = array.metadata.generate(array, compact=False)
  309. self.assertEqual(utils.to_json_structure(array.metadata.to_internal_simple_structure()), [{
  310. 'selector': [],
  311. 'metadata': {
  312. 'foo': 'bar',
  313. 'schema': metadata_base.CONTAINER_SCHEMA_VERSION,
  314. 'structural_type': 'd3m.container.numpy.ndarray',
  315. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'],
  316. 'dimension': {
  317. 'name': 'rows',
  318. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'],
  319. 'length': 3,
  320. },
  321. },
  322. }, {
  323. 'selector': ['__ALL_ELEMENTS__'],
  324. 'metadata': {
  325. 'dimension': {
  326. 'name': 'columns',
  327. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'],
  328. 'length': 3,
  329. },
  330. },
  331. }, {
  332. 'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__'],
  333. 'metadata': {
  334. 'structural_type': 'numpy.int64',
  335. },
  336. }, {
  337. 'selector': ['__ALL_ELEMENTS__', 0],
  338. 'metadata': {
  339. 'name': 'A',
  340. },
  341. }, {
  342. 'selector': ['__ALL_ELEMENTS__', 1],
  343. 'metadata': {
  344. 'name': 'B',
  345. },
  346. }, {
  347. 'selector': ['__ALL_ELEMENTS__', 2],
  348. 'metadata': {
  349. 'name': 'C',
  350. },
  351. }])
  352. df2 = container.DataFrame(array, generate_metadata=False)
  353. df2.metadata = df2.metadata.generate(df2, compact=False)
  354. self.assertEqual(utils.to_json_structure(df2.metadata.to_internal_simple_structure()), [{
  355. 'selector': [],
  356. 'metadata': {
  357. 'foo': 'bar',
  358. 'schema': metadata_base.CONTAINER_SCHEMA_VERSION,
  359. 'structural_type': 'd3m.container.pandas.DataFrame',
  360. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'],
  361. 'dimension': {
  362. 'name': 'rows',
  363. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'],
  364. 'length': 3,
  365. },
  366. },
  367. }, {
  368. 'selector': ['__ALL_ELEMENTS__'],
  369. 'metadata': {
  370. 'dimension': {
  371. 'name': 'columns',
  372. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'],
  373. 'length': 3,
  374. },
  375. },
  376. }, {
  377. 'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__'],
  378. 'metadata': {
  379. 'structural_type': 'numpy.int64',
  380. },
  381. }, {
  382. 'selector': ['__ALL_ELEMENTS__', 0],
  383. 'metadata': {
  384. 'name': 'A',
  385. 'structural_type': 'numpy.int64',
  386. },
  387. }, {
  388. 'selector': ['__ALL_ELEMENTS__', 1],
  389. 'metadata': {
  390. 'name': 'B',
  391. 'structural_type': 'numpy.int64',
  392. },
  393. }, {
  394. 'selector': ['__ALL_ELEMENTS__', 2],
  395. 'metadata': {
  396. 'name': 'C',
  397. 'structural_type': 'numpy.int64',
  398. },
  399. }])
  400. def test_dataframe_ndarray_int_compact_metadata(self):
  401. # With custom metadata which should be preserved.
  402. df = container.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9]}, {
  403. 'foo': 'bar',
  404. }, generate_metadata=False)
  405. df.metadata = df.metadata.generate(df, compact=True)
  406. self.assertEqual(utils.to_json_structure(df.metadata.to_internal_simple_structure()), [{
  407. 'selector': [],
  408. 'metadata': {
  409. 'foo': 'bar',
  410. 'schema': metadata_base.CONTAINER_SCHEMA_VERSION,
  411. 'structural_type': 'd3m.container.pandas.DataFrame',
  412. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'],
  413. 'dimension': {
  414. 'name': 'rows',
  415. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'],
  416. 'length': 3,
  417. },
  418. },
  419. }, {
  420. 'selector': ['__ALL_ELEMENTS__'],
  421. 'metadata': {
  422. 'dimension': {
  423. 'name': 'columns',
  424. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'],
  425. 'length': 3,
  426. },
  427. },
  428. }, {
  429. 'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__'],
  430. 'metadata': {
  431. 'structural_type': 'numpy.int64',
  432. },
  433. }, {
  434. 'selector': ['__ALL_ELEMENTS__', 0],
  435. 'metadata': {
  436. 'name': 'A',
  437. },
  438. }, {
  439. 'selector': ['__ALL_ELEMENTS__', 1],
  440. 'metadata': {
  441. 'name': 'B',
  442. },
  443. }, {
  444. 'selector': ['__ALL_ELEMENTS__', 2],
  445. 'metadata': {
  446. 'name': 'C',
  447. },
  448. }])
  449. array = container.ndarray(df, generate_metadata=False)
  450. array.metadata = array.metadata.generate(array, compact=True)
  451. self.assertEqual(utils.to_json_structure(array.metadata.to_internal_simple_structure()), [{
  452. 'selector': [],
  453. 'metadata': {
  454. 'foo': 'bar',
  455. 'schema': metadata_base.CONTAINER_SCHEMA_VERSION,
  456. 'structural_type': 'd3m.container.numpy.ndarray',
  457. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'],
  458. 'dimension': {
  459. 'name': 'rows',
  460. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'],
  461. 'length': 3,
  462. },
  463. },
  464. }, {
  465. 'selector': ['__ALL_ELEMENTS__'],
  466. 'metadata': {
  467. 'dimension': {
  468. 'name': 'columns',
  469. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'],
  470. 'length': 3,
  471. },
  472. },
  473. }, {
  474. 'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__'],
  475. 'metadata': {
  476. 'structural_type': 'numpy.int64',
  477. },
  478. }, {
  479. 'selector': ['__ALL_ELEMENTS__', 0],
  480. 'metadata': {
  481. 'name': 'A',
  482. },
  483. }, {
  484. 'selector': ['__ALL_ELEMENTS__', 1],
  485. 'metadata': {
  486. 'name': 'B',
  487. },
  488. }, {
  489. 'selector': ['__ALL_ELEMENTS__', 2],
  490. 'metadata': {
  491. 'name': 'C',
  492. },
  493. }])
  494. df2 = container.DataFrame(array, generate_metadata=False)
  495. df2.metadata = df2.metadata.generate(df2, compact=True)
  496. self.assertEqual(utils.to_json_structure(df2.metadata.to_internal_simple_structure()), [{
  497. 'selector': [],
  498. 'metadata': {
  499. 'foo': 'bar',
  500. 'schema': metadata_base.CONTAINER_SCHEMA_VERSION,
  501. 'structural_type': 'd3m.container.pandas.DataFrame',
  502. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'],
  503. 'dimension': {
  504. 'name': 'rows',
  505. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'],
  506. 'length': 3,
  507. },
  508. },
  509. }, {
  510. 'selector': ['__ALL_ELEMENTS__'],
  511. 'metadata': {
  512. 'dimension': {
  513. 'name': 'columns',
  514. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'],
  515. 'length': 3,
  516. },
  517. },
  518. }, {
  519. 'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__'],
  520. 'metadata': {
  521. 'structural_type': 'numpy.int64',
  522. },
  523. }, {
  524. 'selector': ['__ALL_ELEMENTS__', 0],
  525. 'metadata': {
  526. 'name': 'A',
  527. },
  528. }, {
  529. 'selector': ['__ALL_ELEMENTS__', 1],
  530. 'metadata': {
  531. 'name': 'B',
  532. },
  533. }, {
  534. 'selector': ['__ALL_ELEMENTS__', 2],
  535. 'metadata': {
  536. 'name': 'C',
  537. },
  538. }])
  539. def test_dataframe_list_int_compact_metadata(self):
  540. # With custom metadata which should be preserved.
  541. df = container.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9]}, {
  542. 'foo': 'bar',
  543. }, generate_metadata=False)
  544. df.metadata = df.metadata.generate(df, compact=True)
  545. self.assertEqual(utils.to_json_structure(df.metadata.to_internal_simple_structure()), [{
  546. 'selector': [],
  547. 'metadata': {
  548. 'foo': 'bar',
  549. 'schema': metadata_base.CONTAINER_SCHEMA_VERSION,
  550. 'structural_type': 'd3m.container.pandas.DataFrame',
  551. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'],
  552. 'dimension': {
  553. 'name': 'rows',
  554. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'],
  555. 'length': 3,
  556. },
  557. },
  558. }, {
  559. 'selector': ['__ALL_ELEMENTS__'],
  560. 'metadata': {
  561. 'dimension': {
  562. 'name': 'columns',
  563. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'],
  564. 'length': 3,
  565. },
  566. },
  567. }, {
  568. 'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__'],
  569. 'metadata': {
  570. 'structural_type': 'numpy.int64',
  571. },
  572. }, {
  573. 'selector': ['__ALL_ELEMENTS__', 0],
  574. 'metadata': {
  575. 'name': 'A',
  576. },
  577. }, {
  578. 'selector': ['__ALL_ELEMENTS__', 1],
  579. 'metadata': {
  580. 'name': 'B',
  581. },
  582. }, {
  583. 'selector': ['__ALL_ELEMENTS__', 2],
  584. 'metadata': {
  585. 'name': 'C',
  586. },
  587. }])
  588. l = container.List(df, generate_metadata=False)
  589. l.metadata = l.metadata.generate(l, compact=True)
  590. self.assertEqual(utils.to_json_structure(l.metadata.to_internal_simple_structure()), [{
  591. 'selector': [],
  592. 'metadata': {
  593. 'foo': 'bar',
  594. 'schema': metadata_base.CONTAINER_SCHEMA_VERSION,
  595. 'structural_type': 'd3m.container.list.List',
  596. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'],
  597. 'dimension': {
  598. 'name': 'rows',
  599. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'],
  600. 'length': 3,
  601. },
  602. },
  603. }, {
  604. 'selector': ['__ALL_ELEMENTS__'],
  605. 'metadata': {
  606. 'structural_type': 'd3m.container.list.List',
  607. 'dimension': {
  608. 'name': 'columns',
  609. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'],
  610. 'length': 3,
  611. },
  612. },
  613. }, {
  614. 'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__'],
  615. 'metadata': {
  616. 'structural_type': 'int',
  617. },
  618. }, {
  619. 'selector': ['__ALL_ELEMENTS__', 0],
  620. 'metadata': {
  621. 'name': 'A',
  622. },
  623. }, {
  624. 'selector': ['__ALL_ELEMENTS__', 1],
  625. 'metadata': {
  626. 'name': 'B',
  627. },
  628. }, {
  629. 'selector': ['__ALL_ELEMENTS__', 2],
  630. 'metadata': {
  631. 'name': 'C',
  632. },
  633. }])
  634. df2 = container.DataFrame(l, generate_metadata=False)
  635. df2.metadata = df2.metadata.generate(df2, compact=True)
  636. self.assertEqual(utils.to_json_structure(df2.metadata.to_internal_simple_structure()), [{
  637. 'selector': [],
  638. 'metadata': {
  639. 'foo': 'bar',
  640. 'schema': metadata_base.CONTAINER_SCHEMA_VERSION,
  641. 'structural_type': 'd3m.container.pandas.DataFrame',
  642. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'],
  643. 'dimension': {
  644. 'name': 'rows',
  645. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'],
  646. 'length': 3,
  647. },
  648. },
  649. }, {
  650. 'selector': ['__ALL_ELEMENTS__'],
  651. 'metadata': {
  652. 'dimension': {
  653. 'name': 'columns',
  654. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'],
  655. 'length': 3,
  656. },
  657. # This is not really required, but current implementation adds it.
  658. # It is OK if in the future this gets removed.
  659. 'structural_type': '__NO_VALUE__',
  660. },
  661. }, {
  662. 'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__'],
  663. 'metadata': {
  664. 'structural_type': 'numpy.int64',
  665. },
  666. }, {
  667. 'selector': ['__ALL_ELEMENTS__', 0],
  668. 'metadata': {
  669. 'name': 'A',
  670. },
  671. }, {
  672. 'selector': ['__ALL_ELEMENTS__', 1],
  673. 'metadata': {
  674. 'name': 'B',
  675. },
  676. }, {
  677. 'selector': ['__ALL_ELEMENTS__', 2],
  678. 'metadata': {
  679. 'name': 'C',
  680. },
  681. }])
  682. def test_dataframe_list_int_noncompact_metadata(self):
  683. # With custom metadata which should be preserved.
  684. df = container.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9]}, {
  685. 'foo': 'bar',
  686. }, generate_metadata=False)
  687. df.metadata = df.metadata.generate(df, compact=False)
  688. self.assertEqual(utils.to_json_structure(df.metadata.to_internal_simple_structure()), [{
  689. 'selector': [],
  690. 'metadata': {
  691. 'foo': 'bar',
  692. 'schema': metadata_base.CONTAINER_SCHEMA_VERSION,
  693. 'structural_type': 'd3m.container.pandas.DataFrame',
  694. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'],
  695. 'dimension': {
  696. 'name': 'rows',
  697. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'],
  698. 'length': 3,
  699. },
  700. },
  701. }, {
  702. 'selector': ['__ALL_ELEMENTS__'],
  703. 'metadata': {
  704. 'dimension': {
  705. 'name': 'columns',
  706. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'],
  707. 'length': 3,
  708. },
  709. },
  710. }, {
  711. 'selector': ['__ALL_ELEMENTS__', 0],
  712. 'metadata': {
  713. 'name': 'A',
  714. 'structural_type': 'numpy.int64',
  715. },
  716. }, {
  717. 'selector': ['__ALL_ELEMENTS__', 1],
  718. 'metadata': {
  719. 'name': 'B',
  720. 'structural_type': 'numpy.int64',
  721. },
  722. }, {
  723. 'selector': ['__ALL_ELEMENTS__', 2],
  724. 'metadata': {
  725. 'name': 'C',
  726. 'structural_type': 'numpy.int64',
  727. },
  728. }])
  729. l = container.List(df, generate_metadata=False)
  730. l.metadata = l.metadata.generate(l, compact=False)
  731. self.assertEqual(utils.to_json_structure(l.metadata.to_internal_simple_structure()), [{
  732. 'selector': [],
  733. 'metadata': {
  734. 'foo': 'bar',
  735. 'schema': metadata_base.CONTAINER_SCHEMA_VERSION,
  736. 'structural_type': 'd3m.container.list.List',
  737. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'],
  738. 'dimension': {
  739. 'name': 'rows',
  740. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'],
  741. 'length': 3,
  742. },
  743. },
  744. }, {
  745. 'selector': ['__ALL_ELEMENTS__'],
  746. 'metadata': {
  747. 'structural_type': 'd3m.container.list.List',
  748. 'dimension': {
  749. 'name': 'columns',
  750. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'],
  751. 'length': 3,
  752. },
  753. },
  754. }, {
  755. 'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__'],
  756. 'metadata': {
  757. 'structural_type': 'int',
  758. },
  759. }, {
  760. 'selector': ['__ALL_ELEMENTS__', 0],
  761. 'metadata': {
  762. 'name': 'A',
  763. },
  764. }, {
  765. 'selector': ['__ALL_ELEMENTS__', 1],
  766. 'metadata': {
  767. 'name': 'B',
  768. },
  769. }, {
  770. 'selector': ['__ALL_ELEMENTS__', 2],
  771. 'metadata': {
  772. 'name': 'C',
  773. },
  774. }])
  775. df2 = container.DataFrame(l, generate_metadata=False)
  776. df2.metadata = df2.metadata.generate(df2, compact=False)
  777. self.assertEqual(utils.to_json_structure(df2.metadata.to_internal_simple_structure()), [{
  778. 'selector': [],
  779. 'metadata': {
  780. 'foo': 'bar',
  781. 'schema': metadata_base.CONTAINER_SCHEMA_VERSION,
  782. 'structural_type': 'd3m.container.pandas.DataFrame',
  783. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'],
  784. 'dimension': {
  785. 'name': 'rows',
  786. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'],
  787. 'length': 3,
  788. },
  789. },
  790. }, {
  791. 'selector': ['__ALL_ELEMENTS__'],
  792. 'metadata': {
  793. 'dimension': {
  794. 'name': 'columns',
  795. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'],
  796. 'length': 3,
  797. },
  798. # This is not really required, but current implementation adds it.
  799. # It is OK if in the future this gets removed.
  800. 'structural_type': '__NO_VALUE__',
  801. },
  802. }, {
  803. 'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__'],
  804. 'metadata': {
  805. 'structural_type': 'int',
  806. },
  807. }, {
  808. 'selector': ['__ALL_ELEMENTS__', 0],
  809. 'metadata': {
  810. 'name': 'A',
  811. 'structural_type': 'numpy.int64',
  812. },
  813. }, {
  814. 'selector': ['__ALL_ELEMENTS__', 1],
  815. 'metadata': {
  816. 'name': 'B',
  817. 'structural_type': 'numpy.int64',
  818. },
  819. }, {
  820. 'selector': ['__ALL_ELEMENTS__', 2],
  821. 'metadata': {
  822. 'name': 'C',
  823. 'structural_type': 'numpy.int64',
  824. },
  825. }])
  826. def test_deep_ndarray_compact_metadata(self):
  827. # With custom metadata which should be preserved.
  828. array = container.ndarray(numpy.arange(3 * 4 * 5 * 5 * 5).reshape((3, 4, 5, 5, 5)), {
  829. 'foo': 'bar',
  830. }, generate_metadata=False)
  831. array.metadata = array.metadata.generate(array, compact=True)
  832. self.assertEqual(utils.to_json_structure(array.metadata.to_internal_simple_structure()), [{
  833. 'selector': [],
  834. 'metadata': {
  835. 'foo': 'bar',
  836. 'schema': metadata_base.CONTAINER_SCHEMA_VERSION,
  837. 'structural_type': 'd3m.container.numpy.ndarray',
  838. 'dimension': {
  839. 'length': 3,
  840. },
  841. },
  842. }, {
  843. 'selector': ['__ALL_ELEMENTS__'],
  844. 'metadata': {
  845. 'dimension': {
  846. 'length': 4,
  847. },
  848. },
  849. }, {
  850. 'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__'],
  851. 'metadata': {
  852. 'dimension': {
  853. 'length': 5,
  854. },
  855. },
  856. }, {
  857. 'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__', '__ALL_ELEMENTS__'],
  858. 'metadata': {
  859. 'dimension': {
  860. 'length': 5,
  861. },
  862. },
  863. }, {
  864. 'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__', '__ALL_ELEMENTS__', '__ALL_ELEMENTS__'],
  865. 'metadata': {
  866. 'dimension': {
  867. 'length': 5,
  868. },
  869. },
  870. }, {
  871. 'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__', '__ALL_ELEMENTS__', '__ALL_ELEMENTS__', '__ALL_ELEMENTS__'],
  872. 'metadata': {
  873. 'structural_type': 'numpy.int64',
  874. },
  875. }])
  876. df = container.DataFrame(array, generate_metadata=False)
  877. df.metadata = df.metadata.generate(df, compact=True)
  878. self.assertEqual(utils.to_json_structure(df.metadata.to_internal_simple_structure()), [{
  879. 'selector': [],
  880. 'metadata': {
  881. 'foo': 'bar',
  882. 'schema': metadata_base.CONTAINER_SCHEMA_VERSION,
  883. 'structural_type': 'd3m.container.pandas.DataFrame',
  884. 'dimension': {
  885. 'length': 3,
  886. 'name': 'rows',
  887. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'],
  888. },
  889. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'],
  890. },
  891. }, {
  892. 'selector': ['__ALL_ELEMENTS__'],
  893. 'metadata': {
  894. 'dimension': {
  895. 'length': 4,
  896. 'name': 'columns',
  897. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'],
  898. },
  899. },
  900. }, {
  901. 'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__'],
  902. 'metadata': {
  903. 'dimension': {
  904. 'length': 5,
  905. },
  906. 'structural_type': 'd3m.container.numpy.ndarray',
  907. },
  908. }, {
  909. 'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__', '__ALL_ELEMENTS__'],
  910. 'metadata': {
  911. 'dimension': {
  912. 'length': 5,
  913. },
  914. },
  915. }, {
  916. 'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__', '__ALL_ELEMENTS__', '__ALL_ELEMENTS__'],
  917. 'metadata': {
  918. 'dimension': {
  919. 'length': 5,
  920. },
  921. },
  922. }, {
  923. 'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__', '__ALL_ELEMENTS__', '__ALL_ELEMENTS__', '__ALL_ELEMENTS__'],
  924. 'metadata': {
  925. 'structural_type': 'numpy.int64',
  926. },
  927. }])
  928. array2 = container.ndarray(df, generate_metadata=False)
  929. array2.metadata = array2.metadata.generate(array2, compact=True)
  930. # We do not automatically compact numpy with nested numpy arrays into one array
  931. # (there might be an exception if array is jagged).
  932. self.assertEqual(utils.to_json_structure(array2.metadata.to_internal_simple_structure()), [{
  933. 'selector': [],
  934. 'metadata': {
  935. 'schema': metadata_base.CONTAINER_SCHEMA_VERSION,
  936. 'structural_type': 'd3m.container.numpy.ndarray',
  937. 'dimension': {
  938. 'length': 3,
  939. 'name': 'rows',
  940. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'],
  941. },
  942. 'foo': 'bar',
  943. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'],
  944. },
  945. }, {
  946. 'selector': ['__ALL_ELEMENTS__'],
  947. 'metadata': {
  948. 'dimension': {
  949. 'length': 4,
  950. 'name': 'columns',
  951. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'],
  952. },
  953. },
  954. }, {
  955. 'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__'],
  956. 'metadata': {
  957. 'dimension': {
  958. 'length': 5,
  959. },
  960. 'structural_type': 'd3m.container.numpy.ndarray',
  961. },
  962. }, {
  963. 'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__', '__ALL_ELEMENTS__'],
  964. 'metadata': {
  965. 'dimension': {
  966. 'length': 5,
  967. },
  968. },
  969. }, {
  970. 'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__', '__ALL_ELEMENTS__', '__ALL_ELEMENTS__'],
  971. 'metadata': {
  972. 'dimension': {
  973. 'length': 5,
  974. },
  975. },
  976. }, {
  977. 'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__', '__ALL_ELEMENTS__', '__ALL_ELEMENTS__', '__ALL_ELEMENTS__'],
  978. 'metadata': {
  979. 'structural_type': 'numpy.int64',
  980. },
  981. }])
  982. def test_deep_ndarray_noncompact_metadata(self):
  983. # With custom metadata which should be preserved.
  984. array = container.ndarray(numpy.arange(3 * 4 * 5 * 5 * 5).reshape((3, 4, 5, 5, 5)), {
  985. 'foo': 'bar',
  986. }, generate_metadata=False)
  987. array.metadata = array.metadata.generate(array, compact=False)
  988. self.assertEqual(utils.to_json_structure(array.metadata.to_internal_simple_structure()), [{
  989. 'selector': [],
  990. 'metadata': {
  991. 'foo': 'bar',
  992. 'schema': metadata_base.CONTAINER_SCHEMA_VERSION,
  993. 'structural_type': 'd3m.container.numpy.ndarray',
  994. 'dimension': {
  995. 'length': 3,
  996. },
  997. },
  998. }, {
  999. 'selector': ['__ALL_ELEMENTS__'],
  1000. 'metadata': {
  1001. 'dimension': {
  1002. 'length': 4,
  1003. },
  1004. },
  1005. }, {
  1006. 'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__'],
  1007. 'metadata': {
  1008. 'dimension': {
  1009. 'length': 5,
  1010. },
  1011. },
  1012. }, {
  1013. 'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__', '__ALL_ELEMENTS__'],
  1014. 'metadata': {
  1015. 'dimension': {
  1016. 'length': 5,
  1017. },
  1018. },
  1019. }, {
  1020. 'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__', '__ALL_ELEMENTS__', '__ALL_ELEMENTS__'],
  1021. 'metadata': {
  1022. 'dimension': {
  1023. 'length': 5,
  1024. },
  1025. },
  1026. }, {
  1027. 'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__', '__ALL_ELEMENTS__', '__ALL_ELEMENTS__', '__ALL_ELEMENTS__'],
  1028. 'metadata': {
  1029. 'structural_type': 'numpy.int64',
  1030. },
  1031. }])
  1032. df = container.DataFrame(array, generate_metadata=False)
  1033. df.metadata = df.metadata.generate(df, compact=False)
  1034. self.assertEqual(utils.to_json_structure(df.metadata.to_internal_simple_structure()), [{
  1035. 'selector': [],
  1036. 'metadata': {
  1037. 'foo': 'bar',
  1038. 'schema': metadata_base.CONTAINER_SCHEMA_VERSION,
  1039. 'structural_type': 'd3m.container.pandas.DataFrame',
  1040. 'dimension': {
  1041. 'length': 3,
  1042. 'name': 'rows',
  1043. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'],
  1044. },
  1045. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'],
  1046. },
  1047. }, {
  1048. 'selector': ['__ALL_ELEMENTS__'],
  1049. 'metadata': {
  1050. 'dimension': {
  1051. 'length': 4,
  1052. 'name': 'columns',
  1053. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'],
  1054. },
  1055. },
  1056. }, {
  1057. 'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__'],
  1058. 'metadata': {
  1059. 'dimension': {
  1060. 'length': 5,
  1061. },
  1062. },
  1063. }, {
  1064. 'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__', '__ALL_ELEMENTS__'],
  1065. 'metadata': {
  1066. 'dimension': {
  1067. 'length': 5,
  1068. },
  1069. },
  1070. }, {
  1071. 'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__', '__ALL_ELEMENTS__', '__ALL_ELEMENTS__'],
  1072. 'metadata': {
  1073. 'dimension': {
  1074. 'length': 5,
  1075. },
  1076. },
  1077. }, {
  1078. 'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__', '__ALL_ELEMENTS__', '__ALL_ELEMENTS__', '__ALL_ELEMENTS__'],
  1079. 'metadata': {
  1080. 'structural_type': 'numpy.int64',
  1081. },
  1082. }, {
  1083. 'selector': ['__ALL_ELEMENTS__', 0],
  1084. 'metadata': {
  1085. 'dimension': {
  1086. 'length': 5,
  1087. },
  1088. 'structural_type': 'd3m.container.numpy.ndarray',
  1089. },
  1090. }, {
  1091. 'selector': ['__ALL_ELEMENTS__', 0, '__ALL_ELEMENTS__'],
  1092. 'metadata': {
  1093. 'dimension': {
  1094. 'length': 5,
  1095. },
  1096. },
  1097. }, {
  1098. 'selector': ['__ALL_ELEMENTS__', 0, '__ALL_ELEMENTS__', '__ALL_ELEMENTS__'],
  1099. 'metadata': {
  1100. 'dimension': {
  1101. 'length': 5,
  1102. },
  1103. },
  1104. }, {
  1105. 'selector': ['__ALL_ELEMENTS__', 0, '__ALL_ELEMENTS__', '__ALL_ELEMENTS__', '__ALL_ELEMENTS__'],
  1106. 'metadata': {
  1107. 'structural_type': 'numpy.int64',
  1108. },
  1109. }, {
  1110. 'selector': ['__ALL_ELEMENTS__', 1],
  1111. 'metadata': {
  1112. 'dimension': {
  1113. 'length': 5,
  1114. },
  1115. 'structural_type': 'd3m.container.numpy.ndarray',
  1116. },
  1117. }, {
  1118. 'selector': ['__ALL_ELEMENTS__', 1, '__ALL_ELEMENTS__'],
  1119. 'metadata': {
  1120. 'dimension': {
  1121. 'length': 5,
  1122. },
  1123. },
  1124. }, {
  1125. 'selector': ['__ALL_ELEMENTS__', 1, '__ALL_ELEMENTS__', '__ALL_ELEMENTS__'],
  1126. 'metadata': {
  1127. 'dimension': {
  1128. 'length': 5,
  1129. },
  1130. },
  1131. }, {
  1132. 'selector': ['__ALL_ELEMENTS__', 1, '__ALL_ELEMENTS__', '__ALL_ELEMENTS__', '__ALL_ELEMENTS__'],
  1133. 'metadata': {
  1134. 'structural_type': 'numpy.int64',
  1135. },
  1136. }, {
  1137. 'selector': ['__ALL_ELEMENTS__', 2],
  1138. 'metadata': {
  1139. 'dimension': {
  1140. 'length': 5,
  1141. },
  1142. 'structural_type': 'd3m.container.numpy.ndarray',
  1143. },
  1144. }, {
  1145. 'selector': ['__ALL_ELEMENTS__', 2, '__ALL_ELEMENTS__'],
  1146. 'metadata': {
  1147. 'dimension': {
  1148. 'length': 5,
  1149. },
  1150. },
  1151. }, {
  1152. 'selector': ['__ALL_ELEMENTS__', 2, '__ALL_ELEMENTS__', '__ALL_ELEMENTS__'],
  1153. 'metadata': {
  1154. 'dimension': {
  1155. 'length': 5,
  1156. },
  1157. },
  1158. }, {
  1159. 'selector': ['__ALL_ELEMENTS__', 2, '__ALL_ELEMENTS__', '__ALL_ELEMENTS__', '__ALL_ELEMENTS__'],
  1160. 'metadata': {
  1161. 'structural_type': 'numpy.int64',
  1162. },
  1163. }, {
  1164. 'selector': ['__ALL_ELEMENTS__', 3],
  1165. 'metadata': {
  1166. 'dimension': {
  1167. 'length': 5,
  1168. },
  1169. 'structural_type': 'd3m.container.numpy.ndarray',
  1170. },
  1171. }, {
  1172. 'selector': ['__ALL_ELEMENTS__', 3, '__ALL_ELEMENTS__'],
  1173. 'metadata': {
  1174. 'dimension': {
  1175. 'length': 5,
  1176. },
  1177. },
  1178. }, {
  1179. 'selector': ['__ALL_ELEMENTS__', 3, '__ALL_ELEMENTS__', '__ALL_ELEMENTS__'],
  1180. 'metadata': {
  1181. 'dimension': {
  1182. 'length': 5,
  1183. },
  1184. },
  1185. }, {
  1186. 'selector': ['__ALL_ELEMENTS__', 3, '__ALL_ELEMENTS__', '__ALL_ELEMENTS__', '__ALL_ELEMENTS__'],
  1187. 'metadata': {
  1188. 'structural_type': 'numpy.int64',
  1189. },
  1190. }])
  1191. array2 = container.ndarray(df, generate_metadata=False)
  1192. array2.metadata = array2.metadata.generate(array2, compact=False)
  1193. # We do not automatically compact numpy with nested numpy arrays into one array
  1194. # (there might be an exception if array is jagged).
  1195. self.assertEqual(utils.to_json_structure(array2.metadata.to_internal_simple_structure()), [{
  1196. 'selector': [],
  1197. 'metadata': {
  1198. 'schema': metadata_base.CONTAINER_SCHEMA_VERSION,
  1199. 'structural_type': 'd3m.container.numpy.ndarray',
  1200. 'dimension': {
  1201. 'length': 3,
  1202. 'name': 'rows',
  1203. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'],
  1204. },
  1205. 'foo': 'bar',
  1206. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'],
  1207. },
  1208. }, {
  1209. 'selector': ['__ALL_ELEMENTS__'],
  1210. 'metadata': {
  1211. 'dimension': {
  1212. 'length': 4,
  1213. 'name': 'columns',
  1214. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'],
  1215. },
  1216. },
  1217. }, {
  1218. 'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__'],
  1219. 'metadata': {
  1220. 'dimension': {
  1221. 'length': 5,
  1222. },
  1223. 'structural_type': 'd3m.container.numpy.ndarray',
  1224. },
  1225. }, {
  1226. 'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__', '__ALL_ELEMENTS__'],
  1227. 'metadata': {
  1228. 'dimension': {
  1229. 'length': 5,
  1230. },
  1231. },
  1232. }, {
  1233. 'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__', '__ALL_ELEMENTS__', '__ALL_ELEMENTS__'],
  1234. 'metadata': {
  1235. 'dimension': {
  1236. 'length': 5,
  1237. },
  1238. },
  1239. }, {
  1240. 'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__', '__ALL_ELEMENTS__', '__ALL_ELEMENTS__', '__ALL_ELEMENTS__'],
  1241. 'metadata': {
  1242. 'structural_type': 'numpy.int64',
  1243. },
  1244. }])
  1245. def test_simple_list_to_dataframe(self):
  1246. data = container.List([1, 2, 3], generate_metadata=True)
  1247. dataframe = container.DataFrame(data, generate_metadata=False)
  1248. compact_metadata = dataframe.metadata.generate(dataframe, compact=True)
  1249. noncompact_metadata = dataframe.metadata.generate(dataframe, compact=False)
  1250. expected_metadata = [{
  1251. 'selector': [],
  1252. 'metadata': {
  1253. 'schema': 'https://metadata.datadrivendiscovery.org/schemas/v0/container.json',
  1254. 'structural_type': 'd3m.container.pandas.DataFrame',
  1255. 'dimension': {
  1256. 'length': 3,
  1257. 'name': 'rows',
  1258. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'],
  1259. },
  1260. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'],
  1261. },
  1262. }, {
  1263. 'selector': ['__ALL_ELEMENTS__'],
  1264. 'metadata': {
  1265. 'structural_type': '__NO_VALUE__',
  1266. 'dimension': {
  1267. 'name': 'columns',
  1268. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'],
  1269. 'length': 1,
  1270. },
  1271. },
  1272. }, {
  1273. 'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__'],
  1274. 'metadata': {
  1275. 'structural_type': 'numpy.int64',
  1276. },
  1277. }]
  1278. self.assertEqual(utils.to_json_structure(compact_metadata.to_internal_simple_structure()), expected_metadata)
  1279. expected_metadata[2]['selector'] = ['__ALL_ELEMENTS__', 0]
  1280. self.assertEqual(utils.to_json_structure(noncompact_metadata.to_internal_simple_structure()), expected_metadata)
  1281. def test_select_columns_compact_metadata(self):
  1282. data = container.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6], 'c': [7, 8, 9]}, generate_metadata=False)
  1283. data.metadata = data.metadata.generate(data, compact=True)
  1284. data.metadata = data.metadata.update_column(0, {'name': 'aaa'})
  1285. data.metadata = data.metadata.update_column(1, {'name': 'bbb'})
  1286. data.metadata = data.metadata.update_column(2, {'name': 'ccc'})
  1287. data.metadata = data.metadata.update((0, 0), {'row': '1'})
  1288. data.metadata = data.metadata.update((1, 0), {'row': '2'})
  1289. data.metadata = data.metadata.update((2, 0), {'row': '3'})
  1290. data.metadata = data.metadata.update((0, metadata_base.ALL_ELEMENTS), {'all_elements_on_row': 'rowA'})
  1291. data_metadata_before = data.metadata.to_internal_json_structure()
  1292. # Test "select_columns" working with a tuple. Specifically, iloc[:, tuple(1)] does not work
  1293. # (i.e. throws "{IndexingError}Too many indexers"), but iloc[:, 1] and iloc[:, [1]] work.
  1294. selected = data.select_columns(tuple([1, 0, 2, 1]))
  1295. self.assertEqual(selected.values.tolist(), [[4, 1, 7, 4], [5, 2, 8, 5], [6, 3, 9, 6]])
  1296. self.assertEqual(utils.to_json_structure(selected.metadata.to_internal_simple_structure()), [{
  1297. 'selector': [],
  1298. 'metadata': {
  1299. 'schema': metadata_base.CONTAINER_SCHEMA_VERSION,
  1300. 'structural_type': 'd3m.container.pandas.DataFrame',
  1301. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'],
  1302. 'dimension': {
  1303. 'name': 'rows',
  1304. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'],
  1305. 'length': 3,
  1306. },
  1307. },
  1308. }, {
  1309. 'selector': ['__ALL_ELEMENTS__'],
  1310. 'metadata': {
  1311. 'dimension': {
  1312. 'name': 'columns',
  1313. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'],
  1314. 'length': 4,
  1315. },
  1316. },
  1317. }, {
  1318. 'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__'],
  1319. 'metadata': {'structural_type': 'numpy.int64'},
  1320. }, {
  1321. 'selector': ['__ALL_ELEMENTS__', 0],
  1322. 'metadata': {'name': 'bbb'},
  1323. }, {
  1324. 'selector': ['__ALL_ELEMENTS__', 1],
  1325. 'metadata': {'name': 'aaa'},
  1326. }, {
  1327. 'selector': ['__ALL_ELEMENTS__', 2],
  1328. 'metadata': {'name': 'ccc'},
  1329. }, {
  1330. 'selector': ['__ALL_ELEMENTS__', 3],
  1331. 'metadata': {'name': 'bbb'},
  1332. }, {
  1333. 'selector': [0, '__ALL_ELEMENTS__'],
  1334. 'metadata': {'all_elements_on_row': 'rowA'},
  1335. }, {
  1336. 'selector': [0, 1],
  1337. 'metadata': {'row': '1'},
  1338. }, {
  1339. 'selector': [1, 1],
  1340. 'metadata': {'row': '2'},
  1341. }, {
  1342. 'selector': [2, 1],
  1343. 'metadata': {'row': '3'},
  1344. }])
  1345. self.assertEqual(data.metadata.to_internal_json_structure(), data_metadata_before)
  1346. selected = data.select_columns([1])
  1347. self.assertEqual(selected.values.tolist(), [[4], [5], [6]])
  1348. self.assertEqual(utils.to_json_structure(selected.metadata.to_internal_simple_structure()), [{
  1349. 'selector': [],
  1350. 'metadata': {
  1351. 'schema': metadata_base.CONTAINER_SCHEMA_VERSION,
  1352. 'structural_type': 'd3m.container.pandas.DataFrame',
  1353. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'],
  1354. 'dimension': {
  1355. 'name': 'rows',
  1356. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'],
  1357. 'length': 3,
  1358. },
  1359. },
  1360. }, {
  1361. 'selector': ['__ALL_ELEMENTS__'],
  1362. 'metadata': {
  1363. 'dimension': {
  1364. 'name': 'columns',
  1365. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'],
  1366. 'length': 1,
  1367. },
  1368. },
  1369. }, {
  1370. 'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__'],
  1371. 'metadata': {'structural_type': 'numpy.int64'},
  1372. }, {
  1373. 'selector': ['__ALL_ELEMENTS__', 0],
  1374. 'metadata': {'name': 'bbb'},
  1375. }, {
  1376. 'selector': [0, '__ALL_ELEMENTS__'],
  1377. 'metadata': {'all_elements_on_row': 'rowA'},
  1378. }])
  1379. self.assertEqual(data.metadata.to_internal_json_structure(), data_metadata_before)
  1380. def test_select_columns_noncompact_metadata(self):
  1381. data = container.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6], 'c': [7, 8, 9]}, generate_metadata=False)
  1382. data.metadata = data.metadata.generate(data, compact=False)
  1383. data.metadata = data.metadata.update_column(0, {'name': 'aaa'})
  1384. data.metadata = data.metadata.update_column(1, {'name': 'bbb'})
  1385. data.metadata = data.metadata.update_column(2, {'name': 'ccc'})
  1386. data.metadata = data.metadata.update((0, 0), {'row': '1'})
  1387. data.metadata = data.metadata.update((1, 0), {'row': '2'})
  1388. data.metadata = data.metadata.update((2, 0), {'row': '3'})
  1389. data.metadata = data.metadata.update((0, metadata_base.ALL_ELEMENTS), {'all_elements_on_row': 'rowA'})
  1390. data_metadata_before = data.metadata.to_internal_json_structure()
  1391. # Test "select_columns" working with a tuple. Specifically, iloc[:, tuple(1)] does not work
  1392. # (i.e. throws "{IndexingError}Too many indexers"), but iloc[:, 1] and iloc[:, [1]] work.
  1393. selected = data.select_columns(tuple([1, 0, 2, 1]))
  1394. self.assertEqual(selected.values.tolist(), [[4, 1, 7, 4], [5, 2, 8, 5], [6, 3, 9, 6]])
  1395. self.assertEqual(utils.to_json_structure(selected.metadata.to_internal_simple_structure()), [{
  1396. 'selector': [],
  1397. 'metadata': {
  1398. 'schema': metadata_base.CONTAINER_SCHEMA_VERSION,
  1399. 'structural_type': 'd3m.container.pandas.DataFrame',
  1400. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'],
  1401. 'dimension': {
  1402. 'name': 'rows',
  1403. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'],
  1404. 'length': 3,
  1405. },
  1406. },
  1407. }, {
  1408. 'selector': ['__ALL_ELEMENTS__'],
  1409. 'metadata': {
  1410. 'dimension': {
  1411. 'name': 'columns',
  1412. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'],
  1413. 'length': 4,
  1414. },
  1415. },
  1416. }, {
  1417. 'selector': ['__ALL_ELEMENTS__', 0],
  1418. 'metadata': {'name': 'bbb', 'structural_type': 'numpy.int64'},
  1419. }, {
  1420. 'selector': ['__ALL_ELEMENTS__', 1],
  1421. 'metadata': {'name': 'aaa', 'structural_type': 'numpy.int64'},
  1422. }, {
  1423. 'selector': ['__ALL_ELEMENTS__', 2],
  1424. 'metadata': {'name': 'ccc', 'structural_type': 'numpy.int64'},
  1425. }, {
  1426. 'selector': ['__ALL_ELEMENTS__', 3],
  1427. 'metadata': {'name': 'bbb', 'structural_type': 'numpy.int64'},
  1428. }, {
  1429. 'selector': [0, '__ALL_ELEMENTS__'],
  1430. 'metadata': {'all_elements_on_row': 'rowA'},
  1431. }, {
  1432. 'selector': [0, 1],
  1433. 'metadata': {'row': '1'},
  1434. }, {
  1435. 'selector': [1, 1],
  1436. 'metadata': {'row': '2'},
  1437. }, {
  1438. 'selector': [2, 1],
  1439. 'metadata': {'row': '3'},
  1440. }])
  1441. self.assertEqual(data.metadata.to_internal_json_structure(), data_metadata_before)
  1442. selected = data.select_columns([1])
  1443. self.assertEqual(selected.values.tolist(), [[4], [5], [6]])
  1444. self.assertEqual(utils.to_json_structure(selected.metadata.to_internal_simple_structure()), [{
  1445. 'selector': [],
  1446. 'metadata': {
  1447. 'schema': metadata_base.CONTAINER_SCHEMA_VERSION,
  1448. 'structural_type': 'd3m.container.pandas.DataFrame',
  1449. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'],
  1450. 'dimension': {
  1451. 'name': 'rows',
  1452. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'],
  1453. 'length': 3,
  1454. },
  1455. },
  1456. }, {
  1457. 'selector': ['__ALL_ELEMENTS__'],
  1458. 'metadata': {
  1459. 'dimension': {
  1460. 'name': 'columns',
  1461. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'],
  1462. 'length': 1,
  1463. },
  1464. },
  1465. }, {
  1466. 'selector': ['__ALL_ELEMENTS__', 0],
  1467. 'metadata': {'name': 'bbb', 'structural_type': 'numpy.int64'},
  1468. }, {
  1469. 'selector': [0, '__ALL_ELEMENTS__'],
  1470. 'metadata': {'all_elements_on_row': 'rowA'},
  1471. }])
  1472. self.assertEqual(data.metadata.to_internal_json_structure(), data_metadata_before)
  1473. def test_append_columns_compact_metadata(self):
  1474. left = container.DataFrame({'a1': [1, 2, 3], 'b1': [4, 5, 6], 'c1': [7, 8, 9]}, {
  1475. 'top_level': 'left',
  1476. }, generate_metadata=False)
  1477. left.metadata = left.metadata.generate(left, compact=True)
  1478. left.metadata = left.metadata.update_column(0, {'name': 'aaa111'})
  1479. left.metadata = left.metadata.update_column(1, {'name': 'bbb111'})
  1480. left.metadata = left.metadata.update_column(2, {'name': 'ccc111'})
  1481. left.metadata = left.metadata.update((0, 0), {'row': '1a'})
  1482. left.metadata = left.metadata.update((1, 0), {'row': '2a'})
  1483. left.metadata = left.metadata.update((2, 0), {'row': '3a'})
  1484. left.metadata = left.metadata.update((0, metadata_base.ALL_ELEMENTS), {'all_elements_on_row': 'rowA'})
  1485. right = container.DataFrame({'a2': [11, 12, 13], 'b2': [14, 15, 16], 'c2': [17, 18, 19]}, {
  1486. 'top_level': 'right',
  1487. }, generate_metadata=False)
  1488. right.metadata = right.metadata.generate(right, compact=True)
  1489. right.metadata = right.metadata.update_column(0, {'name': 'aaa222'})
  1490. right.metadata = right.metadata.update_column(1, {'name': 'bbb222'})
  1491. right.metadata = right.metadata.update_column(2, {'name': 'ccc222'})
  1492. right.metadata = right.metadata.update((0, 1), {'row': '1b'})
  1493. right.metadata = right.metadata.update((1, 1), {'row': '2b'})
  1494. right.metadata = right.metadata.update((2, 1), {'row': '3b'})
  1495. right.metadata = right.metadata.update((0, metadata_base.ALL_ELEMENTS), {'all_elements_on_row': 'rowB'})
  1496. right_metadata_before = right.metadata.to_internal_json_structure()
  1497. data = left.append_columns(right, use_right_metadata=False)
  1498. self.assertEqual(data.values.tolist(), [[1, 4, 7, 11, 14, 17], [2, 5, 8, 12, 15, 18], [3, 6, 9, 13, 16, 19]])
  1499. self.assertEqual(utils.to_json_structure(data.metadata.to_internal_simple_structure()), [{
  1500. 'selector': [],
  1501. 'metadata': {
  1502. 'top_level': 'left',
  1503. 'schema': metadata_base.CONTAINER_SCHEMA_VERSION,
  1504. 'structural_type': 'd3m.container.pandas.DataFrame',
  1505. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'],
  1506. 'dimension': {
  1507. 'name': 'rows',
  1508. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'],
  1509. 'length': 3,
  1510. },
  1511. },
  1512. }, {
  1513. 'selector': ['__ALL_ELEMENTS__'],
  1514. 'metadata': {
  1515. 'dimension': {
  1516. 'name': 'columns',
  1517. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'],
  1518. 'length': 6,
  1519. },
  1520. },
  1521. }, {
  1522. 'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__'],
  1523. 'metadata': {
  1524. 'structural_type': 'numpy.int64',
  1525. },
  1526. }, {
  1527. 'selector': ['__ALL_ELEMENTS__', 0],
  1528. 'metadata': {'name': 'aaa111'},
  1529. }, {
  1530. 'selector': ['__ALL_ELEMENTS__', 1],
  1531. 'metadata': {'name': 'bbb111'},
  1532. }, {
  1533. 'selector': ['__ALL_ELEMENTS__', 2],
  1534. 'metadata': {'name': 'ccc111'},
  1535. }, {
  1536. 'selector': ['__ALL_ELEMENTS__', 3],
  1537. 'metadata': {'name': 'aaa222', 'structural_type': 'numpy.int64'},
  1538. }, {
  1539. 'selector': ['__ALL_ELEMENTS__', 4],
  1540. 'metadata': {'name': 'bbb222', 'structural_type': 'numpy.int64'},
  1541. }, {
  1542. 'selector': ['__ALL_ELEMENTS__', 5],
  1543. 'metadata': {'name': 'ccc222', 'structural_type': 'numpy.int64'},
  1544. }, {
  1545. 'selector': [0, '__ALL_ELEMENTS__'],
  1546. 'metadata': {'all_elements_on_row': 'rowA'},
  1547. }, {
  1548. 'selector': [0, 0],
  1549. 'metadata': {'row': '1a'},
  1550. }, {
  1551. 'selector': [0, 3],
  1552. 'metadata': {'all_elements_on_row': 'rowB'},
  1553. }, {
  1554. 'selector': [0, 4],
  1555. 'metadata': {'row': '1b', 'all_elements_on_row': 'rowB'},
  1556. }, {
  1557. 'selector': [0, 5],
  1558. 'metadata': {'all_elements_on_row': 'rowB'},
  1559. }, {
  1560. 'selector': [1, 0],
  1561. 'metadata': {'row': '2a'},
  1562. }, {
  1563. 'selector': [1, 4],
  1564. 'metadata': {'row': '2b'},
  1565. }, {
  1566. 'selector': [2, 0],
  1567. 'metadata': {'row': '3a'},
  1568. }, {
  1569. 'selector': [2, 4],
  1570. 'metadata': {'row': '3b'},
  1571. }])
  1572. data = left.append_columns(right, use_right_metadata=True)
  1573. self.assertEqual(data.values.tolist(), [[1, 4, 7, 11, 14, 17], [2, 5, 8, 12, 15, 18], [3, 6, 9, 13, 16, 19]])
  1574. self.assertEqual(utils.to_json_structure(data.metadata.to_internal_simple_structure()), [{
  1575. 'selector': [],
  1576. 'metadata': {
  1577. 'top_level': 'right',
  1578. 'schema': metadata_base.CONTAINER_SCHEMA_VERSION,
  1579. 'structural_type': 'd3m.container.pandas.DataFrame',
  1580. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'],
  1581. 'dimension': {
  1582. 'name': 'rows',
  1583. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'],
  1584. 'length': 3,
  1585. },
  1586. },
  1587. }, {
  1588. 'selector': ['__ALL_ELEMENTS__'],
  1589. 'metadata': {
  1590. 'dimension': {
  1591. 'name': 'columns',
  1592. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'],
  1593. 'length': 6,
  1594. },
  1595. },
  1596. }, {
  1597. 'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__'],
  1598. 'metadata': {
  1599. 'structural_type': 'numpy.int64',
  1600. },
  1601. }, {
  1602. 'selector': ['__ALL_ELEMENTS__', 0],
  1603. 'metadata': {'name': 'aaa111', 'structural_type': 'numpy.int64'},
  1604. }, {
  1605. 'selector': ['__ALL_ELEMENTS__', 1],
  1606. 'metadata': {'name': 'bbb111', 'structural_type': 'numpy.int64'},
  1607. }, {
  1608. 'selector': ['__ALL_ELEMENTS__', 2],
  1609. 'metadata': {'name': 'ccc111', 'structural_type': 'numpy.int64'},
  1610. }, {
  1611. 'selector': ['__ALL_ELEMENTS__', 3],
  1612. 'metadata': {'name': 'aaa222'},
  1613. }, {
  1614. 'selector': ['__ALL_ELEMENTS__', 4],
  1615. 'metadata': {'name': 'bbb222'},
  1616. }, {
  1617. 'selector': ['__ALL_ELEMENTS__', 5],
  1618. 'metadata': {'name': 'ccc222'},
  1619. }, {
  1620. 'selector': [0, '__ALL_ELEMENTS__'],
  1621. 'metadata': {'all_elements_on_row': 'rowB'},
  1622. }, {
  1623. 'selector': [0, 0],
  1624. 'metadata': {'row': '1a', 'all_elements_on_row': 'rowA'},
  1625. }, {
  1626. 'selector': [0, 1],
  1627. 'metadata': {'all_elements_on_row': 'rowA'},
  1628. }, {
  1629. 'selector': [0, 2],
  1630. 'metadata': {'all_elements_on_row': 'rowA'},
  1631. }, {
  1632. 'selector': [0, 4],
  1633. 'metadata': {'row': '1b'},
  1634. }, {
  1635. 'selector': [1, 0],
  1636. 'metadata': {'row': '2a'},
  1637. }, {
  1638. 'selector': [1, 4],
  1639. 'metadata': {'row': '2b'},
  1640. }, {
  1641. 'selector': [2, 0],
  1642. 'metadata': {'row': '3a'},
  1643. }, {
  1644. 'selector': [2, 4],
  1645. 'metadata': {'row': '3b'},
  1646. }])
  1647. self.assertEqual(right.metadata.to_internal_json_structure(), right_metadata_before)
  1648. def test_append_columns_noncompact_metadata(self):
  1649. left = container.DataFrame({'a1': [1, 2, 3], 'b1': [4, 5, 6], 'c1': [7, 8, 9]}, {
  1650. 'top_level': 'left',
  1651. }, generate_metadata=False)
  1652. left.metadata = left.metadata.generate(left, compact=False)
  1653. left.metadata = left.metadata.update_column(0, {'name': 'aaa111'})
  1654. left.metadata = left.metadata.update_column(1, {'name': 'bbb111'})
  1655. left.metadata = left.metadata.update_column(2, {'name': 'ccc111'})
  1656. left.metadata = left.metadata.update((0, 0), {'row': '1a'})
  1657. left.metadata = left.metadata.update((1, 0), {'row': '2a'})
  1658. left.metadata = left.metadata.update((2, 0), {'row': '3a'})
  1659. left.metadata = left.metadata.update((0, metadata_base.ALL_ELEMENTS), {'all_elements_on_row': 'rowA'})
  1660. right = container.DataFrame({'a2': [11, 12, 13], 'b2': [14, 15, 16], 'c2': [17, 18, 19]}, {
  1661. 'top_level': 'right',
  1662. }, generate_metadata=False)
  1663. right.metadata = right.metadata.generate(right, compact=False)
  1664. right.metadata = right.metadata.update_column(0, {'name': 'aaa222'})
  1665. right.metadata = right.metadata.update_column(1, {'name': 'bbb222'})
  1666. right.metadata = right.metadata.update_column(2, {'name': 'ccc222'})
  1667. right.metadata = right.metadata.update((0, 1), {'row': '1b'})
  1668. right.metadata = right.metadata.update((1, 1), {'row': '2b'})
  1669. right.metadata = right.metadata.update((2, 1), {'row': '3b'})
  1670. right.metadata = right.metadata.update((0, metadata_base.ALL_ELEMENTS), {'all_elements_on_row': 'rowB'})
  1671. right_metadata_before = right.metadata.to_internal_json_structure()
  1672. data = left.append_columns(right, use_right_metadata=False)
  1673. self.assertEqual(data.values.tolist(), [[1, 4, 7, 11, 14, 17], [2, 5, 8, 12, 15, 18], [3, 6, 9, 13, 16, 19]])
  1674. self.assertEqual(utils.to_json_structure(data.metadata.to_internal_simple_structure()), [{
  1675. 'selector': [],
  1676. 'metadata': {
  1677. 'top_level': 'left',
  1678. 'schema': metadata_base.CONTAINER_SCHEMA_VERSION,
  1679. 'structural_type': 'd3m.container.pandas.DataFrame',
  1680. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'],
  1681. 'dimension': {
  1682. 'name': 'rows',
  1683. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'],
  1684. 'length': 3,
  1685. },
  1686. },
  1687. }, {
  1688. 'selector': ['__ALL_ELEMENTS__'],
  1689. 'metadata': {
  1690. 'dimension': {
  1691. 'name': 'columns',
  1692. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'],
  1693. 'length': 6,
  1694. },
  1695. },
  1696. }, {
  1697. 'selector': ['__ALL_ELEMENTS__', 0],
  1698. 'metadata': {'name': 'aaa111', 'structural_type': 'numpy.int64'},
  1699. }, {
  1700. 'selector': ['__ALL_ELEMENTS__', 1],
  1701. 'metadata': {'name': 'bbb111', 'structural_type': 'numpy.int64'},
  1702. }, {
  1703. 'selector': ['__ALL_ELEMENTS__', 2],
  1704. 'metadata': {'name': 'ccc111', 'structural_type': 'numpy.int64'},
  1705. }, {
  1706. 'selector': ['__ALL_ELEMENTS__', 3],
  1707. 'metadata': {'name': 'aaa222', 'structural_type': 'numpy.int64'},
  1708. }, {
  1709. 'selector': ['__ALL_ELEMENTS__', 4],
  1710. 'metadata': {'name': 'bbb222', 'structural_type': 'numpy.int64'},
  1711. }, {
  1712. 'selector': ['__ALL_ELEMENTS__', 5],
  1713. 'metadata': {'name': 'ccc222', 'structural_type': 'numpy.int64'},
  1714. }, {
  1715. 'selector': [0, '__ALL_ELEMENTS__'],
  1716. 'metadata': {'all_elements_on_row': 'rowA'},
  1717. }, {
  1718. 'selector': [0, 0],
  1719. 'metadata': {'row': '1a'},
  1720. }, {
  1721. 'selector': [0, 3],
  1722. 'metadata': {'all_elements_on_row': 'rowB'},
  1723. }, {
  1724. 'selector': [0, 4],
  1725. 'metadata': {'row': '1b', 'all_elements_on_row': 'rowB'},
  1726. }, {
  1727. 'selector': [0, 5],
  1728. 'metadata': {'all_elements_on_row': 'rowB'},
  1729. }, {
  1730. 'selector': [1, 0],
  1731. 'metadata': {'row': '2a'},
  1732. }, {
  1733. 'selector': [1, 4],
  1734. 'metadata': {'row': '2b'},
  1735. }, {
  1736. 'selector': [2, 0],
  1737. 'metadata': {'row': '3a'},
  1738. }, {
  1739. 'selector': [2, 4],
  1740. 'metadata': {'row': '3b'},
  1741. }])
  1742. data = left.append_columns(right, use_right_metadata=True)
  1743. self.assertEqual(data.values.tolist(), [[1, 4, 7, 11, 14, 17], [2, 5, 8, 12, 15, 18], [3, 6, 9, 13, 16, 19]])
  1744. self.assertEqual(utils.to_json_structure(data.metadata.to_internal_simple_structure()), [{
  1745. 'selector': [],
  1746. 'metadata': {
  1747. 'top_level': 'right',
  1748. 'schema': metadata_base.CONTAINER_SCHEMA_VERSION,
  1749. 'structural_type': 'd3m.container.pandas.DataFrame',
  1750. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'],
  1751. 'dimension': {
  1752. 'name': 'rows',
  1753. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'],
  1754. 'length': 3,
  1755. },
  1756. },
  1757. }, {
  1758. 'selector': ['__ALL_ELEMENTS__'],
  1759. 'metadata': {
  1760. 'dimension': {
  1761. 'name': 'columns',
  1762. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'],
  1763. 'length': 6,
  1764. },
  1765. },
  1766. }, {
  1767. 'selector': ['__ALL_ELEMENTS__', 0],
  1768. 'metadata': {'name': 'aaa111', 'structural_type': 'numpy.int64'},
  1769. }, {
  1770. 'selector': ['__ALL_ELEMENTS__', 1],
  1771. 'metadata': {'name': 'bbb111', 'structural_type': 'numpy.int64'},
  1772. }, {
  1773. 'selector': ['__ALL_ELEMENTS__', 2],
  1774. 'metadata': {'name': 'ccc111', 'structural_type': 'numpy.int64'},
  1775. }, {
  1776. 'selector': ['__ALL_ELEMENTS__', 3],
  1777. 'metadata': {'name': 'aaa222', 'structural_type': 'numpy.int64'},
  1778. }, {
  1779. 'selector': ['__ALL_ELEMENTS__', 4],
  1780. 'metadata': {'name': 'bbb222', 'structural_type': 'numpy.int64'},
  1781. }, {
  1782. 'selector': ['__ALL_ELEMENTS__', 5],
  1783. 'metadata': {'name': 'ccc222', 'structural_type': 'numpy.int64'},
  1784. }, {
  1785. 'selector': [0, '__ALL_ELEMENTS__'],
  1786. 'metadata': {'all_elements_on_row': 'rowB'},
  1787. }, {
  1788. 'selector': [0, 0],
  1789. 'metadata': {'row': '1a', 'all_elements_on_row': 'rowA'},
  1790. }, {
  1791. 'selector': [0, 1],
  1792. 'metadata': {'all_elements_on_row': 'rowA'},
  1793. }, {
  1794. 'selector': [0, 2],
  1795. 'metadata': {'all_elements_on_row': 'rowA'},
  1796. }, {
  1797. 'selector': [0, 4],
  1798. 'metadata': {'row': '1b'},
  1799. }, {
  1800. 'selector': [1, 0],
  1801. 'metadata': {'row': '2a'},
  1802. }, {
  1803. 'selector': [1, 4],
  1804. 'metadata': {'row': '2b'},
  1805. }, {
  1806. 'selector': [2, 0],
  1807. 'metadata': {'row': '3a'},
  1808. }, {
  1809. 'selector': [2, 4],
  1810. 'metadata': {'row': '3b'},
  1811. }])
  1812. self.assertEqual(right.metadata.to_internal_json_structure(), right_metadata_before)
  1813. def test_replace_columns_compact_metadata(self):
  1814. main = container.DataFrame({'a1': [1, 2, 3], 'b1': [4, 5, 6], 'c1': [7, 8, 9]}, {
  1815. 'top_level': 'main',
  1816. }, generate_metadata=False)
  1817. main.metadata = main.metadata.generate(main, compact=True)
  1818. main.metadata = main.metadata.update_column(0, {'name': 'aaa111'})
  1819. main.metadata = main.metadata.update_column(1, {'name': 'bbb111', 'extra': 'b_column'})
  1820. main.metadata = main.metadata.update_column(2, {'name': 'ccc111'})
  1821. main.metadata = main.metadata.update((0, 0), {'row': '1a'})
  1822. main.metadata = main.metadata.update((1, 0), {'row': '2a'})
  1823. main.metadata = main.metadata.update((2, 0), {'row': '3a'})
  1824. main.metadata = main.metadata.update((0, metadata_base.ALL_ELEMENTS), {'all_elements_on_row': 'rowA'})
  1825. main_metadata_before = main.metadata.to_internal_json_structure()
  1826. columns = container.DataFrame({'a2': [11, 12, 13], 'b2': [14, 15, 16]}, {
  1827. 'top_level': 'columns',
  1828. }, generate_metadata=False)
  1829. columns.metadata = columns.metadata.generate(columns, compact=True)
  1830. columns.metadata = columns.metadata.update_column(0, {'name': 'aaa222'})
  1831. columns.metadata = columns.metadata.update_column(1, {'name': 'bbb222'})
  1832. columns.metadata = columns.metadata.update((0, 1), {'row': '1b'})
  1833. columns.metadata = columns.metadata.update((1, 1), {'row': '2b'})
  1834. columns.metadata = columns.metadata.update((2, 1), {'row': '3b'})
  1835. columns.metadata = columns.metadata.update((0, metadata_base.ALL_ELEMENTS), {'all_elements_on_row': 'rowB'})
  1836. columns_metadata_before = columns.metadata.to_internal_json_structure()
  1837. new_main = main.replace_columns(columns, [1, 2])
  1838. self.assertEqual(new_main.values.tolist(), [[1, 11, 14], [2, 12, 15], [3, 13, 16]])
  1839. self.assertEqual(utils.to_json_structure(new_main.metadata.to_internal_simple_structure()), [{
  1840. 'selector': [], 'metadata': {
  1841. 'top_level': 'main',
  1842. 'schema': metadata_base.CONTAINER_SCHEMA_VERSION,
  1843. 'structural_type': 'd3m.container.pandas.DataFrame',
  1844. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'],
  1845. 'dimension': {
  1846. 'name': 'rows',
  1847. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'],
  1848. 'length': 3,
  1849. },
  1850. },
  1851. }, {
  1852. 'selector': ['__ALL_ELEMENTS__'],
  1853. 'metadata': {
  1854. 'dimension': {
  1855. 'name': 'columns',
  1856. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'],
  1857. 'length': 3,
  1858. },
  1859. },
  1860. }, {
  1861. 'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__'],
  1862. 'metadata': {
  1863. 'structural_type': 'numpy.int64',
  1864. },
  1865. }, {
  1866. 'selector': ['__ALL_ELEMENTS__', 0],
  1867. 'metadata': {'name': 'aaa111'},
  1868. }, {
  1869. 'selector': ['__ALL_ELEMENTS__', 1],
  1870. 'metadata': {'name': 'aaa222', 'structural_type': 'numpy.int64'},
  1871. }, {
  1872. 'selector': ['__ALL_ELEMENTS__', 2],
  1873. 'metadata': {'name': 'bbb222', 'structural_type': 'numpy.int64'},
  1874. }, {
  1875. 'selector': [0, '__ALL_ELEMENTS__'],
  1876. 'metadata': {'all_elements_on_row': 'rowA'},
  1877. }, {
  1878. 'selector': [0, 0],
  1879. 'metadata': {'row': '1a'},
  1880. }, {
  1881. 'selector': [0, 1],
  1882. 'metadata': {'all_elements_on_row': 'rowB'},
  1883. }, {
  1884. 'selector': [0, 2],
  1885. 'metadata': {'row': '1b', 'all_elements_on_row': 'rowB'},
  1886. }, {
  1887. 'selector': [1, 0],
  1888. 'metadata': {'row': '2a'},
  1889. }, {
  1890. 'selector': [1, 2],
  1891. 'metadata': {'row': '2b'},
  1892. }, {
  1893. 'selector': [2, 0],
  1894. 'metadata': {'row': '3a'},
  1895. }, {
  1896. 'selector': [2, 2],
  1897. 'metadata': {'row': '3b'},
  1898. }])
  1899. self.assertEqual(main_metadata_before, main.metadata.to_internal_json_structure())
  1900. self.assertEqual(columns_metadata_before, columns.metadata.to_internal_json_structure())
  1901. new_main = main.replace_columns(columns, [0, 2])
  1902. self.assertEqual(new_main.values.tolist(), [[11, 4, 14], [12, 5, 15], [13, 6, 16]])
  1903. self.assertEqual(utils.to_json_structure(new_main.metadata.to_internal_simple_structure()), [{
  1904. 'selector': [], 'metadata': {
  1905. 'top_level': 'main',
  1906. 'schema': metadata_base.CONTAINER_SCHEMA_VERSION,
  1907. 'structural_type': 'd3m.container.pandas.DataFrame',
  1908. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'],
  1909. 'dimension': {
  1910. 'name': 'rows',
  1911. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'],
  1912. 'length': 3,
  1913. },
  1914. },
  1915. }, {
  1916. 'selector': ['__ALL_ELEMENTS__'],
  1917. 'metadata': {
  1918. 'dimension': {
  1919. 'name': 'columns',
  1920. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'],
  1921. 'length': 3,
  1922. },
  1923. },
  1924. }, {
  1925. 'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__'],
  1926. 'metadata': {
  1927. 'structural_type': 'numpy.int64',
  1928. },
  1929. }, {
  1930. 'selector': ['__ALL_ELEMENTS__', 0],
  1931. 'metadata': {'name': 'aaa222', 'structural_type': 'numpy.int64'},
  1932. }, {
  1933. 'selector': ['__ALL_ELEMENTS__', 1],
  1934. 'metadata': {
  1935. 'name': 'bbb111',
  1936. 'extra': 'b_column',
  1937. },
  1938. }, {
  1939. 'selector': ['__ALL_ELEMENTS__', 2],
  1940. 'metadata': {'name': 'bbb222', 'structural_type': 'numpy.int64'},
  1941. }, {
  1942. 'selector': [0, '__ALL_ELEMENTS__'],
  1943. 'metadata': {'all_elements_on_row': 'rowA'},
  1944. }, {
  1945. 'selector': [0, 0],
  1946. 'metadata': {'all_elements_on_row': 'rowB'},
  1947. }, {
  1948. 'selector': [0, 2],
  1949. 'metadata': {'row': '1b', 'all_elements_on_row': 'rowB'},
  1950. }, {
  1951. 'selector': [1, 2],
  1952. 'metadata': {'row': '2b'},
  1953. }, {
  1954. 'selector': [2, 2],
  1955. 'metadata': {'row': '3b'},
  1956. }])
  1957. self.assertEqual(main_metadata_before, main.metadata.to_internal_json_structure())
  1958. self.assertEqual(columns_metadata_before, columns.metadata.to_internal_json_structure())
  1959. new_main = main.replace_columns(columns, [1])
  1960. self.assertEqual(new_main.values.tolist(), [[1, 11, 14, 7], [2, 12, 15, 8], [3, 13, 16, 9]])
  1961. self.assertEqual(utils.to_json_structure(new_main.metadata.to_internal_simple_structure()), [{
  1962. 'selector': [], 'metadata': {
  1963. 'top_level': 'main',
  1964. 'schema': metadata_base.CONTAINER_SCHEMA_VERSION,
  1965. 'structural_type': 'd3m.container.pandas.DataFrame',
  1966. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'],
  1967. 'dimension': {
  1968. 'name': 'rows',
  1969. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'],
  1970. 'length': 3,
  1971. },
  1972. },
  1973. }, {
  1974. 'selector': ['__ALL_ELEMENTS__'],
  1975. 'metadata': {
  1976. 'dimension': {
  1977. 'name': 'columns',
  1978. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'],
  1979. 'length': 4,
  1980. },
  1981. },
  1982. }, {
  1983. 'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__'],
  1984. 'metadata': {
  1985. 'structural_type': 'numpy.int64',
  1986. },
  1987. }, {
  1988. 'selector': ['__ALL_ELEMENTS__', 0],
  1989. 'metadata': {'name': 'aaa111'},
  1990. }, {
  1991. 'selector': ['__ALL_ELEMENTS__', 1],
  1992. 'metadata': {'name': 'aaa222', 'structural_type': 'numpy.int64'},
  1993. }, {
  1994. 'selector': ['__ALL_ELEMENTS__', 2],
  1995. 'metadata': {'name': 'bbb222', 'structural_type': 'numpy.int64'},
  1996. }, {
  1997. 'selector': ['__ALL_ELEMENTS__', 3],
  1998. 'metadata': {'name': 'ccc111', 'structural_type': 'numpy.int64'},
  1999. }, {
  2000. 'selector': [0, '__ALL_ELEMENTS__'],
  2001. 'metadata': {'all_elements_on_row': 'rowA'},
  2002. }, {
  2003. 'selector': [0, 0],
  2004. 'metadata': {'row': '1a'},
  2005. }, {
  2006. 'selector': [0, 1],
  2007. 'metadata': {'all_elements_on_row': 'rowB'},
  2008. }, {
  2009. 'selector': [0, 2],
  2010. 'metadata': {'row': '1b', 'all_elements_on_row': 'rowB'},
  2011. }, {
  2012. 'selector': [0, 3],
  2013. 'metadata': {'all_elements_on_row': 'rowA'},
  2014. }, {
  2015. 'selector': [1, 0],
  2016. 'metadata': {'row': '2a'},
  2017. }, {
  2018. 'selector': [1, 2],
  2019. 'metadata': {'row': '2b'},
  2020. }, {
  2021. 'selector': [2, 0],
  2022. 'metadata': {'row': '3a'},
  2023. }, {
  2024. 'selector': [2, 2],
  2025. 'metadata': {'row': '3b'},
  2026. }])
  2027. self.assertEqual(main_metadata_before, main.metadata.to_internal_json_structure())
  2028. self.assertEqual(columns_metadata_before, columns.metadata.to_internal_json_structure())
  2029. new_main = main.replace_columns(columns, [0, 1, 2])
  2030. self.assertEqual(new_main.values.tolist(), [[11, 14], [12, 15], [13, 16]])
  2031. self.assertEqual(utils.to_json_structure(new_main.metadata.to_internal_simple_structure()), [{
  2032. 'selector': [], 'metadata': {
  2033. 'top_level': 'main',
  2034. 'schema': metadata_base.CONTAINER_SCHEMA_VERSION,
  2035. 'structural_type': 'd3m.container.pandas.DataFrame',
  2036. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'],
  2037. 'dimension': {
  2038. 'name': 'rows',
  2039. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'],
  2040. 'length': 3,
  2041. },
  2042. },
  2043. }, {
  2044. 'selector': ['__ALL_ELEMENTS__'],
  2045. 'metadata': {
  2046. 'dimension': {
  2047. 'name': 'columns',
  2048. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'],
  2049. 'length': 2,
  2050. },
  2051. },
  2052. }, {
  2053. 'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__'],
  2054. 'metadata': {
  2055. 'structural_type': 'numpy.int64',
  2056. },
  2057. }, {
  2058. 'selector': ['__ALL_ELEMENTS__', 0],
  2059. 'metadata': {'name': 'aaa222', 'structural_type': 'numpy.int64'},
  2060. }, {
  2061. 'selector': ['__ALL_ELEMENTS__', 1],
  2062. 'metadata': {'name': 'bbb222', 'structural_type': 'numpy.int64'},
  2063. }, {
  2064. 'selector': [0, '__ALL_ELEMENTS__'],
  2065. 'metadata': {'all_elements_on_row': 'rowA'},
  2066. }, {
  2067. 'selector': [0, 0],
  2068. 'metadata': {'all_elements_on_row': 'rowB'},
  2069. }, {
  2070. 'selector': [0, 1],
  2071. 'metadata': {'row': '1b', 'all_elements_on_row': 'rowB'},
  2072. }, {
  2073. 'selector': [1, 1],
  2074. 'metadata': {'row': '2b'},
  2075. }, {
  2076. 'selector': [2, 1],
  2077. 'metadata': {'row': '3b'},
  2078. }])
  2079. self.assertEqual(main_metadata_before, main.metadata.to_internal_json_structure())
  2080. self.assertEqual(columns_metadata_before, columns.metadata.to_internal_json_structure())
  2081. def test_replace_columns_noncompact_metadata(self):
  2082. main = container.DataFrame({'a1': [1, 2, 3], 'b1': [4, 5, 6], 'c1': [7, 8, 9]}, {
  2083. 'top_level': 'main',
  2084. }, generate_metadata=False)
  2085. main.metadata = main.metadata.generate(main, compact=False)
  2086. main.metadata = main.metadata.update_column(0, {'name': 'aaa111'})
  2087. main.metadata = main.metadata.update_column(1, {'name': 'bbb111', 'extra': 'b_column'})
  2088. main.metadata = main.metadata.update_column(2, {'name': 'ccc111'})
  2089. main.metadata = main.metadata.update((0, 0), {'row': '1a'})
  2090. main.metadata = main.metadata.update((1, 0), {'row': '2a'})
  2091. main.metadata = main.metadata.update((2, 0), {'row': '3a'})
  2092. main.metadata = main.metadata.update((0, metadata_base.ALL_ELEMENTS), {'all_elements_on_row': 'rowA'})
  2093. main_metadata_before = main.metadata.to_internal_json_structure()
  2094. columns = container.DataFrame({'a2': [11, 12, 13], 'b2': [14, 15, 16]}, {
  2095. 'top_level': 'columns',
  2096. }, generate_metadata=False)
  2097. columns.metadata = columns.metadata.generate(columns, compact=False)
  2098. columns.metadata = columns.metadata.update_column(0, {'name': 'aaa222'})
  2099. columns.metadata = columns.metadata.update_column(1, {'name': 'bbb222'})
  2100. columns.metadata = columns.metadata.update((0, 1), {'row': '1b'})
  2101. columns.metadata = columns.metadata.update((1, 1), {'row': '2b'})
  2102. columns.metadata = columns.metadata.update((2, 1), {'row': '3b'})
  2103. columns.metadata = columns.metadata.update((0, metadata_base.ALL_ELEMENTS), {'all_elements_on_row': 'rowB'})
  2104. columns_metadata_before = columns.metadata.to_internal_json_structure()
  2105. new_main = main.replace_columns(columns, [1, 2])
  2106. self.assertEqual(new_main.values.tolist(), [[1, 11, 14], [2, 12, 15], [3, 13, 16]])
  2107. self.assertEqual(utils.to_json_structure(new_main.metadata.to_internal_simple_structure()), [{
  2108. 'selector': [], 'metadata': {
  2109. 'top_level': 'main',
  2110. 'schema': metadata_base.CONTAINER_SCHEMA_VERSION,
  2111. 'structural_type': 'd3m.container.pandas.DataFrame',
  2112. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'],
  2113. 'dimension': {
  2114. 'name': 'rows',
  2115. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'],
  2116. 'length': 3,
  2117. },
  2118. },
  2119. }, {
  2120. 'selector': ['__ALL_ELEMENTS__'],
  2121. 'metadata': {
  2122. 'dimension': {
  2123. 'name': 'columns',
  2124. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'],
  2125. 'length': 3,
  2126. },
  2127. },
  2128. }, {
  2129. 'selector': ['__ALL_ELEMENTS__', 0],
  2130. 'metadata': {'name': 'aaa111', 'structural_type': 'numpy.int64'},
  2131. }, {
  2132. 'selector': ['__ALL_ELEMENTS__', 1],
  2133. 'metadata': {'name': 'aaa222', 'structural_type': 'numpy.int64'},
  2134. }, {
  2135. 'selector': ['__ALL_ELEMENTS__', 2],
  2136. 'metadata': {'name': 'bbb222', 'structural_type': 'numpy.int64'},
  2137. }, {
  2138. 'selector': [0, '__ALL_ELEMENTS__'],
  2139. 'metadata': {'all_elements_on_row': 'rowA'},
  2140. }, {
  2141. 'selector': [0, 0],
  2142. 'metadata': {'row': '1a'},
  2143. }, {
  2144. 'selector': [0, 1],
  2145. 'metadata': {'all_elements_on_row': 'rowB'},
  2146. }, {
  2147. 'selector': [0, 2],
  2148. 'metadata': {'row': '1b', 'all_elements_on_row': 'rowB'},
  2149. }, {
  2150. 'selector': [1, 0],
  2151. 'metadata': {'row': '2a'},
  2152. }, {
  2153. 'selector': [1, 2],
  2154. 'metadata': {'row': '2b'},
  2155. }, {
  2156. 'selector': [2, 0],
  2157. 'metadata': {'row': '3a'},
  2158. }, {
  2159. 'selector': [2, 2],
  2160. 'metadata': {'row': '3b'},
  2161. }])
  2162. self.assertEqual(main_metadata_before, main.metadata.to_internal_json_structure())
  2163. self.assertEqual(columns_metadata_before, columns.metadata.to_internal_json_structure())
  2164. new_main = main.replace_columns(columns, [0, 2])
  2165. self.assertEqual(new_main.values.tolist(), [[11, 4, 14], [12, 5, 15], [13, 6, 16]])
  2166. self.assertEqual(utils.to_json_structure(new_main.metadata.to_internal_simple_structure()), [{
  2167. 'selector': [], 'metadata': {
  2168. 'top_level': 'main',
  2169. 'schema': metadata_base.CONTAINER_SCHEMA_VERSION,
  2170. 'structural_type': 'd3m.container.pandas.DataFrame',
  2171. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'],
  2172. 'dimension': {
  2173. 'name': 'rows',
  2174. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'],
  2175. 'length': 3,
  2176. },
  2177. },
  2178. }, {
  2179. 'selector': ['__ALL_ELEMENTS__'],
  2180. 'metadata': {
  2181. 'dimension': {
  2182. 'name': 'columns',
  2183. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'],
  2184. 'length': 3,
  2185. },
  2186. },
  2187. }, {
  2188. 'selector': ['__ALL_ELEMENTS__', 0],
  2189. 'metadata': {'name': 'aaa222', 'structural_type': 'numpy.int64'},
  2190. }, {
  2191. 'selector': ['__ALL_ELEMENTS__', 1],
  2192. 'metadata': {
  2193. 'name': 'bbb111',
  2194. 'extra': 'b_column',
  2195. 'structural_type': 'numpy.int64',
  2196. },
  2197. }, {
  2198. 'selector': ['__ALL_ELEMENTS__', 2],
  2199. 'metadata': {'name': 'bbb222', 'structural_type': 'numpy.int64'},
  2200. }, {
  2201. 'selector': [0, '__ALL_ELEMENTS__'],
  2202. 'metadata': {'all_elements_on_row': 'rowA'},
  2203. }, {
  2204. 'selector': [0, 0],
  2205. 'metadata': {'all_elements_on_row': 'rowB'},
  2206. }, {
  2207. 'selector': [0, 2],
  2208. 'metadata': {'row': '1b', 'all_elements_on_row': 'rowB'},
  2209. }, {
  2210. 'selector': [1, 2],
  2211. 'metadata': {'row': '2b'},
  2212. }, {
  2213. 'selector': [2, 2],
  2214. 'metadata': {'row': '3b'},
  2215. }])
  2216. self.assertEqual(main_metadata_before, main.metadata.to_internal_json_structure())
  2217. self.assertEqual(columns_metadata_before, columns.metadata.to_internal_json_structure())
  2218. new_main = main.replace_columns(columns, [1])
  2219. self.assertEqual(new_main.values.tolist(), [[1, 11, 14, 7], [2, 12, 15, 8], [3, 13, 16, 9]])
  2220. self.assertEqual(utils.to_json_structure(new_main.metadata.to_internal_simple_structure()), [{
  2221. 'selector': [], 'metadata': {
  2222. 'top_level': 'main',
  2223. 'schema': metadata_base.CONTAINER_SCHEMA_VERSION,
  2224. 'structural_type': 'd3m.container.pandas.DataFrame',
  2225. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'],
  2226. 'dimension': {
  2227. 'name': 'rows',
  2228. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'],
  2229. 'length': 3,
  2230. },
  2231. },
  2232. }, {
  2233. 'selector': ['__ALL_ELEMENTS__'],
  2234. 'metadata': {
  2235. 'dimension': {
  2236. 'name': 'columns',
  2237. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'],
  2238. 'length': 4,
  2239. },
  2240. },
  2241. }, {
  2242. 'selector': ['__ALL_ELEMENTS__', 0],
  2243. 'metadata': {'name': 'aaa111', 'structural_type': 'numpy.int64'},
  2244. }, {
  2245. 'selector': ['__ALL_ELEMENTS__', 1],
  2246. 'metadata': {'name': 'aaa222', 'structural_type': 'numpy.int64'},
  2247. }, {
  2248. 'selector': ['__ALL_ELEMENTS__', 2],
  2249. 'metadata': {'name': 'bbb222', 'structural_type': 'numpy.int64'},
  2250. }, {
  2251. 'selector': ['__ALL_ELEMENTS__', 3],
  2252. 'metadata': {'name': 'ccc111', 'structural_type': 'numpy.int64'},
  2253. }, {
  2254. 'selector': [0, '__ALL_ELEMENTS__'],
  2255. 'metadata': {'all_elements_on_row': 'rowA'},
  2256. }, {
  2257. 'selector': [0, 0],
  2258. 'metadata': {'row': '1a'},
  2259. }, {
  2260. 'selector': [0, 1],
  2261. 'metadata': {'all_elements_on_row': 'rowB'},
  2262. }, {
  2263. 'selector': [0, 2],
  2264. 'metadata': {'row': '1b', 'all_elements_on_row': 'rowB'},
  2265. }, {
  2266. 'selector': [0, 3],
  2267. 'metadata': {'all_elements_on_row': 'rowA'},
  2268. }, {
  2269. 'selector': [1, 0],
  2270. 'metadata': {'row': '2a'},
  2271. }, {
  2272. 'selector': [1, 2],
  2273. 'metadata': {'row': '2b'},
  2274. }, {
  2275. 'selector': [2, 0],
  2276. 'metadata': {'row': '3a'},
  2277. }, {
  2278. 'selector': [2, 2],
  2279. 'metadata': {'row': '3b'},
  2280. }])
  2281. self.assertEqual(main_metadata_before, main.metadata.to_internal_json_structure())
  2282. self.assertEqual(columns_metadata_before, columns.metadata.to_internal_json_structure())
  2283. new_main = main.replace_columns(columns, [0, 1, 2])
  2284. self.assertEqual(new_main.values.tolist(), [[11, 14], [12, 15], [13, 16]])
  2285. self.assertEqual(utils.to_json_structure(new_main.metadata.to_internal_simple_structure()), [{
  2286. 'selector': [], 'metadata': {
  2287. 'top_level': 'main',
  2288. 'schema': metadata_base.CONTAINER_SCHEMA_VERSION,
  2289. 'structural_type': 'd3m.container.pandas.DataFrame',
  2290. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'],
  2291. 'dimension': {
  2292. 'name': 'rows',
  2293. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'],
  2294. 'length': 3,
  2295. },
  2296. },
  2297. }, {
  2298. 'selector': ['__ALL_ELEMENTS__'],
  2299. 'metadata': {
  2300. 'dimension': {
  2301. 'name': 'columns',
  2302. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'],
  2303. 'length': 2,
  2304. },
  2305. },
  2306. }, {
  2307. 'selector': ['__ALL_ELEMENTS__', 0],
  2308. 'metadata': {'name': 'aaa222', 'structural_type': 'numpy.int64'},
  2309. }, {
  2310. 'selector': ['__ALL_ELEMENTS__', 1],
  2311. 'metadata': {'name': 'bbb222', 'structural_type': 'numpy.int64'},
  2312. }, {
  2313. 'selector': [0, '__ALL_ELEMENTS__'],
  2314. 'metadata': {'all_elements_on_row': 'rowA'},
  2315. }, {
  2316. 'selector': [0, 0],
  2317. 'metadata': {'all_elements_on_row': 'rowB'},
  2318. }, {
  2319. 'selector': [0, 1],
  2320. 'metadata': {'row': '1b', 'all_elements_on_row': 'rowB'},
  2321. }, {
  2322. 'selector': [1, 1],
  2323. 'metadata': {'row': '2b'},
  2324. }, {
  2325. 'selector': [2, 1],
  2326. 'metadata': {'row': '3b'},
  2327. }])
  2328. self.assertEqual(main_metadata_before, main.metadata.to_internal_json_structure())
  2329. self.assertEqual(columns_metadata_before, columns.metadata.to_internal_json_structure())
  2330. def test_select_columns_empty(self):
  2331. data = container.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6], 'c': [7, 8, 9]}, generate_metadata=True)
  2332. with self.assertRaises(Exception):
  2333. data.select_columns([])
  2334. with self.assertRaises(Exception):
  2335. data.metadata.select_columns([])
  2336. selected = data.select_columns([], allow_empty_columns=True)
  2337. self.assertEqual(selected.shape, (3, 0))
  2338. self.assertEqual(utils.to_json_structure(selected.metadata.to_internal_simple_structure()), [{
  2339. 'selector': [],
  2340. 'metadata': {
  2341. 'dimension': {
  2342. 'length': 3,
  2343. 'name': 'rows',
  2344. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'],
  2345. },
  2346. 'schema': 'https://metadata.datadrivendiscovery.org/schemas/v0/container.json',
  2347. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'],
  2348. 'structural_type': 'd3m.container.pandas.DataFrame',
  2349. },
  2350. }, {
  2351. 'selector': ['__ALL_ELEMENTS__'],
  2352. 'metadata': {
  2353. 'dimension': {
  2354. 'length': 0,
  2355. 'name': 'columns',
  2356. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'],
  2357. },
  2358. },
  2359. }])
  2360. def test_dataframe_select_copy(self):
  2361. df = container.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]})
  2362. selection = df.select_columns([0])
  2363. with warnings.catch_warnings(record=True) as w:
  2364. selection.iloc[:, 0] = selection.iloc[:, 0].map(lambda x: x + 1)
  2365. self.assertEqual(len(w), 0)
  2366. self.assertEqual(selection.values.tolist(), [[2], [3], [4]])
  2367. self.assertEqual(df.values.tolist(), [[1, 4], [2, 5], [3, 6]])
  2368. def test_save_container_empty_dataset(self):
  2369. dataset = container.Dataset({}, generate_metadata=True)
  2370. with tempfile.TemporaryDirectory() as temp_directory:
  2371. container_utils.save_container(dataset, os.path.join(temp_directory, 'dataset'))
  2372. if __name__ == '__main__':
  2373. unittest.main()

全栈的自动化机器学习系统,主要针对多变量时间序列数据的异常检测。TODS提供了详尽的用于构建基于机器学习的异常检测系统的模块,它们包括:数据处理(data processing),时间序列处理( time series processing),特征分析(feature analysis),检测算法(detection algorithms),和强化模块( reinforcement module)。这些模块所提供的功能包括常见的数据预处理、时间序列数据的平滑或变换,从时域或频域中抽取特征、多种多样的检测算