You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

test_base_utils.py 40 kB

first commit Former-commit-id: 08bc23ba02cffbce3cf63962390a65459a132e48 [formerly 0795edd4834b9b7dc66db8d10d4cbaf42bbf82cb] [formerly b5010b42541add7e2ea2578bf2da537efc457757 [formerly a7ca09c2c34c4fc8b3d8e01fcfa08eeeb2cae99d]] [formerly 615058473a2177ca5b89e9edbb797f4c2a59c7e5 [formerly 743d8dfc6843c4c205051a8ab309fbb2116c895e] [formerly bb0ea98b1e14154ef464e2f7a16738705894e54b [formerly 960a69da74b81ef8093820e003f2d6c59a34974c]]] [formerly 2fa3be52c1b44665bc81a7cc7d4cea4bbf0d91d5 [formerly 2054589f0898627e0a17132fd9d4cc78efc91867] [formerly 3b53730e8a895e803dfdd6ca72bc05e17a4164c1 [formerly 8a2fa8ab7baf6686d21af1f322df46fd58c60e69]] [formerly 87d1e3a07a19d03c7d7c94d93ab4fa9f58dada7c [formerly f331916385a5afac1234854ee8d7f160f34b668f] [formerly 69fb3c78a483343f5071da4f7e2891b83a49dd18 [formerly 386086f05aa9487f65bce2ee54438acbdce57650]]]] Former-commit-id: a00aed8c934a6460c4d9ac902b9a74a3d6864697 [formerly 26fdeca29c2f07916d837883983ca2982056c78e] [formerly 0e3170d41a2f99ecf5c918183d361d4399d793bf [formerly 3c12ad4c88ac5192e0f5606ac0d88dd5bf8602dc]] [formerly d5894f84f2fd2e77a6913efdc5ae388cf1be0495 [formerly ad3e7bc670ff92c992730d29c9d3aa1598d844e8] [formerly 69fb3c78a483343f5071da4f7e2891b83a49dd18]] Former-commit-id: 3c19c9fae64f6106415fbc948a4dc613b9ee12f8 [formerly 467ddc0549c74bb007e8f01773bb6dc9103b417d] [formerly 5fa518345d958e2760e443b366883295de6d991c [formerly 3530e130b9fdb7280f638dbc2e785d2165ba82aa]] Former-commit-id: 9f5d473d42a435ec0d60149939d09be1acc25d92 [formerly be0b25c4ec2cde052a041baf0e11f774a158105d] Former-commit-id: 9eca71cb73ba9edccd70ac06a3b636b8d4093b04
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035
  1. import unittest
  2. from d3m import container, utils as d3m_utils
  3. from d3m.base import utils
  4. from d3m.metadata import base as metadata_base
  5. class TestBaseUtils(unittest.TestCase):
  6. def test_combine_columns_compact_metadata(self):
  7. main = container.DataFrame({'a1': [1, 2, 3], 'b1': [4, 5, 6], 'c1': [7, 8, 9], 'd1': [10, 11, 12], 'e1': [13, 14, 15]}, {
  8. 'top_level': 'main',
  9. }, generate_metadata=False)
  10. main.metadata = main.metadata.generate(main, compact=True)
  11. main.metadata = main.metadata.update_column(0, {'name': 'aaa111'})
  12. main.metadata = main.metadata.update_column(1, {'name': 'bbb111', 'extra': 'b_column'})
  13. main.metadata = main.metadata.update_column(2, {'name': 'ccc111'})
  14. columns2 = container.DataFrame({'a2': [21, 22, 23], 'b2': [24, 25, 26]}, {
  15. 'top_level': 'columns2',
  16. }, generate_metadata=False)
  17. columns2.metadata = columns2.metadata.generate(columns2, compact=True)
  18. columns2.metadata = columns2.metadata.update_column(0, {'name': 'aaa222'})
  19. columns2.metadata = columns2.metadata.update_column(1, {'name': 'bbb222'})
  20. columns3 = container.DataFrame({'a3': [31, 32, 33], 'b3': [34, 35, 36]}, {
  21. 'top_level': 'columns3',
  22. }, generate_metadata=False)
  23. columns3.metadata = columns3.metadata.generate(columns3, compact=True)
  24. columns3.metadata = columns3.metadata.update_column(0, {'name': 'aaa333'})
  25. columns3.metadata = columns3.metadata.update_column(1, {'name': 'bbb333'})
  26. result = utils.combine_columns(main, [1, 2], [columns2, columns3], return_result='append', add_index_columns=False)
  27. self.assertEqual(result.values.tolist(), [
  28. [1, 4, 7, 10, 13, 21, 24, 31, 34],
  29. [2, 5, 8, 11, 14, 22, 25, 32, 35],
  30. [3, 6, 9, 12, 15, 23, 26, 33, 36],
  31. ])
  32. self.assertEqual(d3m_utils.to_json_structure(result.metadata.to_internal_simple_structure()), [{
  33. 'selector': [],
  34. 'metadata': {
  35. 'top_level': 'main',
  36. 'schema': metadata_base.CONTAINER_SCHEMA_VERSION,
  37. 'structural_type': 'd3m.container.pandas.DataFrame',
  38. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'],
  39. 'dimension': {
  40. 'name': 'rows',
  41. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'],
  42. 'length': 3,
  43. },
  44. },
  45. }, {
  46. 'selector': ['__ALL_ELEMENTS__'],
  47. 'metadata': {
  48. 'dimension': {
  49. 'name': 'columns',
  50. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'],
  51. 'length': 9,
  52. },
  53. },
  54. }, {
  55. 'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__'],
  56. 'metadata': {
  57. 'structural_type': 'numpy.int64',
  58. },
  59. }, {
  60. 'selector': ['__ALL_ELEMENTS__', 0],
  61. 'metadata': {
  62. 'name': 'aaa111',
  63. },
  64. }, {
  65. 'selector': ['__ALL_ELEMENTS__', 1],
  66. 'metadata': {
  67. 'name': 'bbb111',
  68. 'extra': 'b_column',
  69. },
  70. }, {
  71. 'selector': ['__ALL_ELEMENTS__', 2],
  72. 'metadata': {
  73. 'name': 'ccc111',
  74. },
  75. }, {
  76. 'selector': ['__ALL_ELEMENTS__', 3],
  77. 'metadata': {
  78. 'name': 'd1',
  79. },
  80. }, {
  81. 'selector': ['__ALL_ELEMENTS__', 4],
  82. 'metadata': {
  83. 'name': 'e1',
  84. },
  85. }, {
  86. 'selector': ['__ALL_ELEMENTS__', 5],
  87. 'metadata': {
  88. 'name': 'aaa222',
  89. 'structural_type': 'numpy.int64',
  90. },
  91. }, {
  92. 'selector': ['__ALL_ELEMENTS__', 6],
  93. 'metadata': {
  94. 'name': 'bbb222',
  95. 'structural_type': 'numpy.int64',
  96. },
  97. }, {
  98. 'selector': ['__ALL_ELEMENTS__', 7],
  99. 'metadata': {
  100. 'name': 'aaa333',
  101. 'structural_type': 'numpy.int64',
  102. },
  103. }, {
  104. 'selector': ['__ALL_ELEMENTS__', 8],
  105. 'metadata': {
  106. 'name': 'bbb333',
  107. 'structural_type': 'numpy.int64',
  108. },
  109. }])
  110. result = utils.combine_columns(main, [1, 2], [columns2, columns3], return_result='new', add_index_columns=False)
  111. self.assertEqual(result.values.tolist(), [
  112. [21, 24, 31, 34],
  113. [22, 25, 32, 35],
  114. [23, 26, 33, 36],
  115. ])
  116. self.assertEqual(d3m_utils.to_json_structure(result.metadata.to_internal_simple_structure()), [{
  117. 'selector': [],
  118. 'metadata': {
  119. 'top_level': 'columns2',
  120. 'schema': metadata_base.CONTAINER_SCHEMA_VERSION,
  121. 'structural_type': 'd3m.container.pandas.DataFrame',
  122. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'],
  123. 'dimension': {
  124. 'name': 'rows',
  125. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'],
  126. 'length': 3,
  127. },
  128. },
  129. }, {
  130. 'selector': ['__ALL_ELEMENTS__'],
  131. 'metadata': {
  132. 'dimension': {
  133. 'name': 'columns',
  134. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'],
  135. 'length': 4,
  136. },
  137. },
  138. }, {
  139. 'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__'],
  140. 'metadata': {
  141. 'structural_type': 'numpy.int64',
  142. },
  143. }, {
  144. 'selector': ['__ALL_ELEMENTS__', 0],
  145. 'metadata': {
  146. 'name': 'aaa222',
  147. },
  148. }, {
  149. 'selector': ['__ALL_ELEMENTS__', 1],
  150. 'metadata': {
  151. 'name': 'bbb222',
  152. },
  153. }, {
  154. 'selector': ['__ALL_ELEMENTS__', 2],
  155. 'metadata': {
  156. 'name': 'aaa333',
  157. 'structural_type': 'numpy.int64',
  158. },
  159. }, {
  160. 'selector': ['__ALL_ELEMENTS__', 3],
  161. 'metadata': {
  162. 'name': 'bbb333',
  163. 'structural_type': 'numpy.int64',
  164. },
  165. }])
  166. result = utils.combine_columns(main, [1, 2], [columns2, columns3], return_result='replace', add_index_columns=False)
  167. self.assertEqual(result.values.tolist(), [
  168. [1, 21, 24, 31, 34, 10, 13],
  169. [2, 22, 25, 32, 35, 11, 14],
  170. [3, 23, 26, 33, 36, 12, 15],
  171. ])
  172. self.assertEqual(d3m_utils.to_json_structure(result.metadata.to_internal_simple_structure()), [{
  173. 'selector': [],
  174. 'metadata': {
  175. 'top_level': 'main',
  176. 'schema': metadata_base.CONTAINER_SCHEMA_VERSION,
  177. 'structural_type': 'd3m.container.pandas.DataFrame',
  178. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'],
  179. 'dimension': {
  180. 'name': 'rows',
  181. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'],
  182. 'length': 3,
  183. },
  184. },
  185. }, {
  186. 'selector': ['__ALL_ELEMENTS__'],
  187. 'metadata': {
  188. 'dimension': {
  189. 'name': 'columns',
  190. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'],
  191. 'length': 7,
  192. },
  193. },
  194. }, {
  195. 'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__'],
  196. 'metadata': {
  197. 'structural_type': 'numpy.int64',
  198. },
  199. }, {
  200. 'selector': ['__ALL_ELEMENTS__', 0],
  201. 'metadata': {
  202. 'name': 'aaa111',
  203. },
  204. }, {
  205. 'selector': ['__ALL_ELEMENTS__', 1],
  206. 'metadata': {
  207. 'name': 'aaa222',
  208. 'structural_type': 'numpy.int64',
  209. },
  210. }, {
  211. 'selector': ['__ALL_ELEMENTS__', 2],
  212. 'metadata': {
  213. 'name': 'bbb222',
  214. 'structural_type': 'numpy.int64',
  215. },
  216. }, {
  217. 'selector': ['__ALL_ELEMENTS__', 3],
  218. 'metadata': {
  219. 'name': 'aaa333',
  220. 'structural_type': 'numpy.int64',
  221. },
  222. }, {
  223. 'selector': ['__ALL_ELEMENTS__', 4],
  224. 'metadata': {
  225. 'name': 'bbb333',
  226. 'structural_type': 'numpy.int64',
  227. },
  228. }, {
  229. 'selector': ['__ALL_ELEMENTS__', 5],
  230. 'metadata': {
  231. 'name': 'd1',
  232. 'structural_type': 'numpy.int64',
  233. },
  234. }, {
  235. 'selector': ['__ALL_ELEMENTS__', 6],
  236. 'metadata': {
  237. 'name': 'e1',
  238. 'structural_type': 'numpy.int64',
  239. },
  240. }])
  241. result = utils.combine_columns(main, [0, 1, 2, 3, 4], [columns2, columns3], return_result='replace', add_index_columns=False)
  242. self.assertEqual(result.values.tolist(), [
  243. [21, 24, 31, 34],
  244. [22, 25, 32, 35],
  245. [23, 26, 33, 36],
  246. ])
  247. self.assertEqual(d3m_utils.to_json_structure(result.metadata.to_internal_simple_structure()), [{
  248. 'selector': [],
  249. 'metadata': {
  250. 'top_level': 'main',
  251. 'schema': metadata_base.CONTAINER_SCHEMA_VERSION,
  252. 'structural_type': 'd3m.container.pandas.DataFrame',
  253. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'],
  254. 'dimension': {
  255. 'name': 'rows',
  256. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'],
  257. 'length': 3,
  258. },
  259. },
  260. }, {
  261. 'selector': ['__ALL_ELEMENTS__'],
  262. 'metadata': {
  263. 'dimension': {
  264. 'name': 'columns',
  265. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'],
  266. 'length': 4,
  267. },
  268. },
  269. }, {
  270. 'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__'],
  271. 'metadata': {
  272. 'structural_type': 'numpy.int64',
  273. },
  274. }, {
  275. 'selector': ['__ALL_ELEMENTS__', 0],
  276. 'metadata': {
  277. 'name': 'aaa222',
  278. 'structural_type': 'numpy.int64',
  279. },
  280. }, {
  281. 'selector': ['__ALL_ELEMENTS__', 1],
  282. 'metadata': {
  283. 'name': 'bbb222',
  284. 'structural_type': 'numpy.int64',
  285. },
  286. }, {
  287. 'selector': ['__ALL_ELEMENTS__', 2],
  288. 'metadata': {
  289. 'name': 'aaa333',
  290. 'structural_type': 'numpy.int64',
  291. },
  292. }, {
  293. 'selector': ['__ALL_ELEMENTS__', 3],
  294. 'metadata': {
  295. 'name': 'bbb333',
  296. 'structural_type': 'numpy.int64',
  297. },
  298. }])
  299. result = utils.combine_columns(main, [4], [columns2, columns3], return_result='replace', add_index_columns=False)
  300. self.assertEqual(result.values.tolist(), [
  301. [1, 4, 7, 10, 21, 24, 31, 34],
  302. [2, 5, 8, 11, 22, 25, 32, 35],
  303. [3, 6, 9, 12, 23, 26, 33, 36],
  304. ])
  305. self.assertEqual(d3m_utils.to_json_structure(result.metadata.to_internal_simple_structure()), [{
  306. 'selector': [],
  307. 'metadata': {
  308. 'top_level': 'main',
  309. 'schema': metadata_base.CONTAINER_SCHEMA_VERSION,
  310. 'structural_type': 'd3m.container.pandas.DataFrame',
  311. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'],
  312. 'dimension': {
  313. 'name': 'rows',
  314. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'],
  315. 'length': 3,
  316. },
  317. },
  318. }, {
  319. 'selector': ['__ALL_ELEMENTS__'],
  320. 'metadata': {
  321. 'dimension': {
  322. 'name': 'columns',
  323. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'],
  324. 'length': 8,
  325. },
  326. },
  327. }, {
  328. 'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__'],
  329. 'metadata': {
  330. 'structural_type': 'numpy.int64',
  331. },
  332. }, {
  333. 'selector': ['__ALL_ELEMENTS__', 0],
  334. 'metadata': {
  335. 'name': 'aaa111',
  336. },
  337. }, {
  338. 'selector': ['__ALL_ELEMENTS__', 1],
  339. 'metadata': {
  340. 'name': 'bbb111',
  341. 'extra': 'b_column',
  342. },
  343. }, {
  344. 'selector': ['__ALL_ELEMENTS__', 2],
  345. 'metadata': {
  346. 'name': 'ccc111',
  347. },
  348. }, {
  349. 'selector': ['__ALL_ELEMENTS__', 3],
  350. 'metadata': {
  351. 'name': 'd1',
  352. },
  353. }, {
  354. 'selector': ['__ALL_ELEMENTS__', 4],
  355. 'metadata': {
  356. 'structural_type': 'numpy.int64',
  357. 'name': 'aaa222',
  358. },
  359. }, {
  360. 'selector': ['__ALL_ELEMENTS__', 5],
  361. 'metadata': {
  362. 'structural_type': 'numpy.int64',
  363. 'name': 'bbb222',
  364. },
  365. }, {
  366. 'selector': ['__ALL_ELEMENTS__', 6],
  367. 'metadata': {
  368. 'structural_type': 'numpy.int64',
  369. 'name': 'aaa333',
  370. },
  371. }, {
  372. 'selector': ['__ALL_ELEMENTS__', 7],
  373. 'metadata': {
  374. 'structural_type': 'numpy.int64',
  375. 'name': 'bbb333',
  376. },
  377. }])
  378. result = utils.combine_columns(main, [0, 2, 4], [columns2, columns3], return_result='replace', add_index_columns=False)
  379. self.assertEqual(result.values.tolist(), [
  380. [21, 4, 24, 10, 31, 34],
  381. [22, 5, 25, 11, 32, 35],
  382. [23, 6, 26, 12, 33, 36],
  383. ])
  384. self.assertEqual(d3m_utils.to_json_structure(result.metadata.to_internal_simple_structure()), [{
  385. 'selector': [],
  386. 'metadata': {
  387. 'top_level': 'main',
  388. 'schema': metadata_base.CONTAINER_SCHEMA_VERSION,
  389. 'structural_type': 'd3m.container.pandas.DataFrame',
  390. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'],
  391. 'dimension': {
  392. 'name': 'rows',
  393. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'],
  394. 'length': 3,
  395. },
  396. },
  397. }, {
  398. 'selector': ['__ALL_ELEMENTS__'],
  399. 'metadata': {
  400. 'dimension': {
  401. 'name': 'columns',
  402. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'],
  403. 'length': 6,
  404. },
  405. },
  406. }, {
  407. 'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__'],
  408. 'metadata': {
  409. 'structural_type': 'numpy.int64',
  410. },
  411. }, {
  412. 'selector': ['__ALL_ELEMENTS__', 0],
  413. 'metadata': {
  414. 'name': 'aaa222',
  415. 'structural_type': 'numpy.int64',
  416. },
  417. }, {
  418. 'selector': ['__ALL_ELEMENTS__', 1],
  419. 'metadata': {
  420. 'name': 'bbb111',
  421. 'extra': 'b_column',
  422. },
  423. }, {
  424. 'selector': ['__ALL_ELEMENTS__', 2],
  425. 'metadata': {
  426. 'name': 'bbb222',
  427. 'structural_type': 'numpy.int64',
  428. },
  429. }, {
  430. 'selector': ['__ALL_ELEMENTS__', 3],
  431. 'metadata': {
  432. 'name': 'd1',
  433. },
  434. }, {
  435. 'selector': ['__ALL_ELEMENTS__', 4],
  436. 'metadata': {
  437. 'name': 'aaa333',
  438. 'structural_type': 'numpy.int64',
  439. },
  440. }, {
  441. 'selector': ['__ALL_ELEMENTS__', 5],
  442. 'metadata': {
  443. 'name': 'bbb333',
  444. 'structural_type': 'numpy.int64',
  445. },
  446. }])
  447. def test_combine_columns_noncompact_metadata(self):
  448. main = container.DataFrame({'a1': [1, 2, 3], 'b1': [4, 5, 6], 'c1': [7, 8, 9], 'd1': [10, 11, 12], 'e1': [13, 14, 15]}, {
  449. 'top_level': 'main',
  450. }, generate_metadata=False)
  451. main.metadata = main.metadata.generate(main, compact=False)
  452. main.metadata = main.metadata.update_column(0, {'name': 'aaa111'})
  453. main.metadata = main.metadata.update_column(1, {'name': 'bbb111', 'extra': 'b_column'})
  454. main.metadata = main.metadata.update_column(2, {'name': 'ccc111'})
  455. columns2 = container.DataFrame({'a2': [21, 22, 23], 'b2': [24, 25, 26]}, {
  456. 'top_level': 'columns2',
  457. }, generate_metadata=False)
  458. columns2.metadata = columns2.metadata.generate(columns2, compact=False)
  459. columns2.metadata = columns2.metadata.update_column(0, {'name': 'aaa222'})
  460. columns2.metadata = columns2.metadata.update_column(1, {'name': 'bbb222'})
  461. columns3 = container.DataFrame({'a3': [31, 32, 33], 'b3': [34, 35, 36]}, {
  462. 'top_level': 'columns3',
  463. }, generate_metadata=False)
  464. columns3.metadata = columns3.metadata.generate(columns3, compact=False)
  465. columns3.metadata = columns3.metadata.update_column(0, {'name': 'aaa333'})
  466. columns3.metadata = columns3.metadata.update_column(1, {'name': 'bbb333'})
  467. result = utils.combine_columns(main, [1, 2], [columns2, columns3], return_result='append', add_index_columns=False)
  468. self.assertEqual(result.values.tolist(), [
  469. [1, 4, 7, 10, 13, 21, 24, 31, 34],
  470. [2, 5, 8, 11, 14, 22, 25, 32, 35],
  471. [3, 6, 9, 12, 15, 23, 26, 33, 36],
  472. ])
  473. self.assertEqual(d3m_utils.to_json_structure(result.metadata.to_internal_simple_structure()), [{
  474. 'selector': [],
  475. 'metadata': {
  476. 'top_level': 'main',
  477. 'schema': metadata_base.CONTAINER_SCHEMA_VERSION,
  478. 'structural_type': 'd3m.container.pandas.DataFrame',
  479. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'],
  480. 'dimension': {
  481. 'name': 'rows',
  482. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'],
  483. 'length': 3,
  484. },
  485. },
  486. }, {
  487. 'selector': ['__ALL_ELEMENTS__'],
  488. 'metadata': {
  489. 'dimension': {
  490. 'name': 'columns',
  491. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'],
  492. 'length': 9,
  493. },
  494. },
  495. }, {
  496. 'selector': ['__ALL_ELEMENTS__', 0],
  497. 'metadata': {
  498. 'name': 'aaa111',
  499. 'structural_type': 'numpy.int64',
  500. },
  501. }, {
  502. 'selector': ['__ALL_ELEMENTS__', 1],
  503. 'metadata': {
  504. 'name': 'bbb111',
  505. 'extra': 'b_column',
  506. 'structural_type': 'numpy.int64',
  507. },
  508. }, {
  509. 'selector': ['__ALL_ELEMENTS__', 2],
  510. 'metadata': {
  511. 'name': 'ccc111',
  512. 'structural_type': 'numpy.int64',
  513. },
  514. }, {
  515. 'selector': ['__ALL_ELEMENTS__', 3],
  516. 'metadata': {
  517. 'name': 'd1',
  518. 'structural_type': 'numpy.int64',
  519. },
  520. }, {
  521. 'selector': ['__ALL_ELEMENTS__', 4],
  522. 'metadata': {
  523. 'name': 'e1',
  524. 'structural_type': 'numpy.int64',
  525. },
  526. }, {
  527. 'selector': ['__ALL_ELEMENTS__', 5],
  528. 'metadata': {
  529. 'name': 'aaa222',
  530. 'structural_type': 'numpy.int64',
  531. },
  532. }, {
  533. 'selector': ['__ALL_ELEMENTS__', 6],
  534. 'metadata': {
  535. 'name': 'bbb222',
  536. 'structural_type': 'numpy.int64',
  537. },
  538. }, {
  539. 'selector': ['__ALL_ELEMENTS__', 7],
  540. 'metadata': {
  541. 'name': 'aaa333',
  542. 'structural_type': 'numpy.int64',
  543. },
  544. }, {
  545. 'selector': ['__ALL_ELEMENTS__', 8],
  546. 'metadata': {
  547. 'name': 'bbb333',
  548. 'structural_type': 'numpy.int64',
  549. },
  550. }])
  551. result = utils.combine_columns(main, [1, 2], [columns2, columns3], return_result='new', add_index_columns=False)
  552. self.assertEqual(result.values.tolist(), [
  553. [21, 24, 31, 34],
  554. [22, 25, 32, 35],
  555. [23, 26, 33, 36],
  556. ])
  557. self.assertEqual(d3m_utils.to_json_structure(result.metadata.to_internal_simple_structure()), [{
  558. 'selector': [],
  559. 'metadata': {
  560. 'top_level': 'columns2',
  561. 'schema': metadata_base.CONTAINER_SCHEMA_VERSION,
  562. 'structural_type': 'd3m.container.pandas.DataFrame',
  563. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'],
  564. 'dimension': {
  565. 'name': 'rows',
  566. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'],
  567. 'length': 3,
  568. },
  569. },
  570. }, {
  571. 'selector': ['__ALL_ELEMENTS__'],
  572. 'metadata': {
  573. 'dimension': {
  574. 'name': 'columns',
  575. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'],
  576. 'length': 4,
  577. },
  578. },
  579. }, {
  580. 'selector': ['__ALL_ELEMENTS__', 0],
  581. 'metadata': {
  582. 'name': 'aaa222',
  583. 'structural_type': 'numpy.int64',
  584. },
  585. }, {
  586. 'selector': ['__ALL_ELEMENTS__', 1],
  587. 'metadata': {
  588. 'name': 'bbb222',
  589. 'structural_type': 'numpy.int64',
  590. },
  591. }, {
  592. 'selector': ['__ALL_ELEMENTS__', 2],
  593. 'metadata': {
  594. 'name': 'aaa333',
  595. 'structural_type': 'numpy.int64',
  596. },
  597. }, {
  598. 'selector': ['__ALL_ELEMENTS__', 3],
  599. 'metadata': {
  600. 'name': 'bbb333',
  601. 'structural_type': 'numpy.int64',
  602. },
  603. }])
  604. result = utils.combine_columns(main, [1, 2], [columns2, columns3], return_result='replace', add_index_columns=False)
  605. self.assertEqual(result.values.tolist(), [
  606. [1, 21, 24, 31, 34, 10, 13],
  607. [2, 22, 25, 32, 35, 11, 14],
  608. [3, 23, 26, 33, 36, 12, 15],
  609. ])
  610. self.assertEqual(d3m_utils.to_json_structure(result.metadata.to_internal_simple_structure()), [{
  611. 'selector': [],
  612. 'metadata': {
  613. 'top_level': 'main',
  614. 'schema': metadata_base.CONTAINER_SCHEMA_VERSION,
  615. 'structural_type': 'd3m.container.pandas.DataFrame',
  616. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'],
  617. 'dimension': {
  618. 'name': 'rows',
  619. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'],
  620. 'length': 3,
  621. },
  622. },
  623. }, {
  624. 'selector': ['__ALL_ELEMENTS__'],
  625. 'metadata': {
  626. 'dimension': {
  627. 'name': 'columns',
  628. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'],
  629. 'length': 7,
  630. },
  631. },
  632. }, {
  633. 'selector': ['__ALL_ELEMENTS__', 0],
  634. 'metadata': {
  635. 'name': 'aaa111',
  636. 'structural_type': 'numpy.int64',
  637. },
  638. }, {
  639. 'selector': ['__ALL_ELEMENTS__', 1],
  640. 'metadata': {
  641. 'name': 'aaa222',
  642. 'structural_type': 'numpy.int64',
  643. },
  644. }, {
  645. 'selector': ['__ALL_ELEMENTS__', 2],
  646. 'metadata': {
  647. 'name': 'bbb222',
  648. 'structural_type': 'numpy.int64',
  649. },
  650. }, {
  651. 'selector': ['__ALL_ELEMENTS__', 3],
  652. 'metadata': {
  653. 'name': 'aaa333',
  654. 'structural_type': 'numpy.int64',
  655. },
  656. }, {
  657. 'selector': ['__ALL_ELEMENTS__', 4],
  658. 'metadata': {
  659. 'name': 'bbb333',
  660. 'structural_type': 'numpy.int64',
  661. },
  662. }, {
  663. 'selector': ['__ALL_ELEMENTS__', 5],
  664. 'metadata': {
  665. 'name': 'd1',
  666. 'structural_type': 'numpy.int64',
  667. },
  668. }, {
  669. 'selector': ['__ALL_ELEMENTS__', 6],
  670. 'metadata': {
  671. 'name': 'e1',
  672. 'structural_type': 'numpy.int64',
  673. },
  674. }])
  675. result = utils.combine_columns(main, [0, 1, 2, 3, 4], [columns2, columns3], return_result='replace', add_index_columns=False)
  676. self.assertEqual(result.values.tolist(), [
  677. [21, 24, 31, 34],
  678. [22, 25, 32, 35],
  679. [23, 26, 33, 36],
  680. ])
  681. self.assertEqual(d3m_utils.to_json_structure(result.metadata.to_internal_simple_structure()), [{
  682. 'selector': [],
  683. 'metadata': {
  684. 'top_level': 'main',
  685. 'schema': metadata_base.CONTAINER_SCHEMA_VERSION,
  686. 'structural_type': 'd3m.container.pandas.DataFrame',
  687. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'],
  688. 'dimension': {
  689. 'name': 'rows',
  690. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'],
  691. 'length': 3,
  692. },
  693. },
  694. }, {
  695. 'selector': ['__ALL_ELEMENTS__'],
  696. 'metadata': {
  697. 'dimension': {
  698. 'name': 'columns',
  699. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'],
  700. 'length': 4,
  701. },
  702. },
  703. }, {
  704. 'selector': ['__ALL_ELEMENTS__', 0],
  705. 'metadata': {
  706. 'name': 'aaa222',
  707. 'structural_type': 'numpy.int64',
  708. },
  709. }, {
  710. 'selector': ['__ALL_ELEMENTS__', 1],
  711. 'metadata': {
  712. 'name': 'bbb222',
  713. 'structural_type': 'numpy.int64',
  714. },
  715. }, {
  716. 'selector': ['__ALL_ELEMENTS__', 2],
  717. 'metadata': {
  718. 'name': 'aaa333',
  719. 'structural_type': 'numpy.int64',
  720. },
  721. }, {
  722. 'selector': ['__ALL_ELEMENTS__', 3],
  723. 'metadata': {
  724. 'name': 'bbb333',
  725. 'structural_type': 'numpy.int64',
  726. },
  727. }])
  728. result = utils.combine_columns(main, [4], [columns2, columns3], return_result='replace', add_index_columns=False)
  729. self.assertEqual(result.values.tolist(), [
  730. [1, 4, 7, 10, 21, 24, 31, 34],
  731. [2, 5, 8, 11, 22, 25, 32, 35],
  732. [3, 6, 9, 12, 23, 26, 33, 36],
  733. ])
  734. self.assertEqual(d3m_utils.to_json_structure(result.metadata.to_internal_simple_structure()), [{
  735. 'selector': [],
  736. 'metadata': {
  737. 'top_level': 'main',
  738. 'schema': metadata_base.CONTAINER_SCHEMA_VERSION,
  739. 'structural_type': 'd3m.container.pandas.DataFrame',
  740. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'],
  741. 'dimension': {
  742. 'name': 'rows',
  743. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'],
  744. 'length': 3,
  745. },
  746. },
  747. }, {
  748. 'selector': ['__ALL_ELEMENTS__'],
  749. 'metadata': {
  750. 'dimension': {
  751. 'name': 'columns',
  752. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'],
  753. 'length': 8,
  754. },
  755. },
  756. }, {
  757. 'selector': ['__ALL_ELEMENTS__', 0],
  758. 'metadata': {
  759. 'name': 'aaa111',
  760. 'structural_type': 'numpy.int64',
  761. },
  762. }, {
  763. 'selector': ['__ALL_ELEMENTS__', 1],
  764. 'metadata': {
  765. 'name': 'bbb111',
  766. 'extra': 'b_column',
  767. 'structural_type': 'numpy.int64',
  768. },
  769. }, {
  770. 'selector': ['__ALL_ELEMENTS__', 2],
  771. 'metadata': {
  772. 'name': 'ccc111',
  773. 'structural_type': 'numpy.int64',
  774. },
  775. }, {
  776. 'selector': ['__ALL_ELEMENTS__', 3],
  777. 'metadata': {
  778. 'name': 'd1',
  779. 'structural_type': 'numpy.int64',
  780. },
  781. }, {
  782. 'selector': ['__ALL_ELEMENTS__', 4],
  783. 'metadata': {
  784. 'structural_type': 'numpy.int64',
  785. 'name': 'aaa222',
  786. },
  787. }, {
  788. 'selector': ['__ALL_ELEMENTS__', 5],
  789. 'metadata': {
  790. 'structural_type': 'numpy.int64',
  791. 'name': 'bbb222',
  792. },
  793. }, {
  794. 'selector': ['__ALL_ELEMENTS__', 6],
  795. 'metadata': {
  796. 'structural_type': 'numpy.int64',
  797. 'name': 'aaa333',
  798. },
  799. }, {
  800. 'selector': ['__ALL_ELEMENTS__', 7],
  801. 'metadata': {
  802. 'structural_type': 'numpy.int64',
  803. 'name': 'bbb333',
  804. },
  805. }])
  806. result = utils.combine_columns(main, [0, 2, 4], [columns2, columns3], return_result='replace', add_index_columns=False)
  807. self.assertEqual(result.values.tolist(), [
  808. [21, 4, 24, 10, 31, 34],
  809. [22, 5, 25, 11, 32, 35],
  810. [23, 6, 26, 12, 33, 36],
  811. ])
  812. self.assertEqual(d3m_utils.to_json_structure(result.metadata.to_internal_simple_structure()), [{
  813. 'selector': [],
  814. 'metadata': {
  815. 'top_level': 'main',
  816. 'schema': metadata_base.CONTAINER_SCHEMA_VERSION,
  817. 'structural_type': 'd3m.container.pandas.DataFrame',
  818. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'],
  819. 'dimension': {
  820. 'name': 'rows',
  821. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'],
  822. 'length': 3,
  823. },
  824. },
  825. }, {
  826. 'selector': ['__ALL_ELEMENTS__'],
  827. 'metadata': {
  828. 'dimension': {
  829. 'name': 'columns',
  830. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'],
  831. 'length': 6,
  832. },
  833. },
  834. }, {
  835. 'selector': ['__ALL_ELEMENTS__', 0],
  836. 'metadata': {
  837. 'name': 'aaa222',
  838. 'structural_type': 'numpy.int64',
  839. },
  840. }, {
  841. 'selector': ['__ALL_ELEMENTS__', 1],
  842. 'metadata': {
  843. 'name': 'bbb111',
  844. 'extra': 'b_column',
  845. 'structural_type': 'numpy.int64',
  846. },
  847. }, {
  848. 'selector': ['__ALL_ELEMENTS__', 2],
  849. 'metadata': {
  850. 'name': 'bbb222',
  851. 'structural_type': 'numpy.int64',
  852. },
  853. }, {
  854. 'selector': ['__ALL_ELEMENTS__', 3],
  855. 'metadata': {
  856. 'name': 'd1',
  857. 'structural_type': 'numpy.int64',
  858. },
  859. }, {
  860. 'selector': ['__ALL_ELEMENTS__', 4],
  861. 'metadata': {
  862. 'name': 'aaa333',
  863. 'structural_type': 'numpy.int64',
  864. },
  865. }, {
  866. 'selector': ['__ALL_ELEMENTS__', 5],
  867. 'metadata': {
  868. 'name': 'bbb333',
  869. 'structural_type': 'numpy.int64',
  870. },
  871. }])
  872. def test_combine_columns_new_with_index_compact_metadata(self):
  873. main = container.DataFrame({'d3mIndex': [1, 2, 3], 'b1': [4, 5, 6], 'c1': [7, 8, 9]}, columns=['d3mIndex', 'b1', 'c1'], generate_metadata=False)
  874. main.metadata = main.metadata.generate(main, compact=True)
  875. main.metadata = main.metadata.update_column(0, {'name': 'd3mIndex', 'semantic_types': ['http://schema.org/Integer', 'https://metadata.datadrivendiscovery.org/types/PrimaryKey']})
  876. main.metadata = main.metadata.update_column(1, {'name': 'b1', 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute']})
  877. main.metadata = main.metadata.update_column(2, {'name': 'c1', 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute']})
  878. columns = container.DataFrame({'d3mIndex': [1, 2, 3], 'b2': [4, 5, 6]}, columns=['d3mIndex', 'b2'], generate_metadata=False)
  879. columns.metadata = columns.metadata.generate(columns, compact=True)
  880. columns.metadata = columns.metadata.update_column(0, {'name': 'd3mIndex', 'semantic_types': ['http://schema.org/Integer', 'https://metadata.datadrivendiscovery.org/types/PrimaryKey']})
  881. columns.metadata = columns.metadata.update_column(1, {'name': 'b2', 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute']})
  882. result = utils.combine_columns(main, [], [columns], return_result='new', add_index_columns=True)
  883. self.assertEqual(result.values.tolist(), [
  884. [1, 4],
  885. [2, 5],
  886. [3, 6],
  887. ])
  888. self.assertEqual(d3m_utils.to_json_structure(result.metadata.to_internal_simple_structure()), [{
  889. 'selector': [],
  890. 'metadata': {
  891. 'schema': metadata_base.CONTAINER_SCHEMA_VERSION,
  892. 'structural_type': 'd3m.container.pandas.DataFrame',
  893. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'],
  894. 'dimension': {
  895. 'name': 'rows',
  896. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'],
  897. 'length': 3,
  898. },
  899. },
  900. }, {
  901. 'selector': ['__ALL_ELEMENTS__'],
  902. 'metadata': {
  903. 'dimension': {
  904. 'name': 'columns',
  905. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'],
  906. 'length': 2,
  907. },
  908. },
  909. }, {
  910. 'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__'],
  911. 'metadata': {
  912. 'structural_type': 'numpy.int64',
  913. },
  914. }, {
  915. 'selector': ['__ALL_ELEMENTS__', 0],
  916. 'metadata': {
  917. 'name': 'd3mIndex',
  918. 'semantic_types': ['http://schema.org/Integer', 'https://metadata.datadrivendiscovery.org/types/PrimaryKey'],
  919. },
  920. }, {
  921. 'selector': ['__ALL_ELEMENTS__', 1],
  922. 'metadata': {
  923. 'name': 'b2',
  924. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute'],
  925. },
  926. }])
  927. def test_combine_columns_new_with_index_noncompact_metadata(self):
  928. main = container.DataFrame({'d3mIndex': [1, 2, 3], 'b1': [4, 5, 6], 'c1': [7, 8, 9]}, columns=['d3mIndex', 'b1', 'c1'], generate_metadata=False)
  929. main.metadata = main.metadata.generate(main, compact=False)
  930. main.metadata = main.metadata.update_column(0, {'name': 'd3mIndex', 'semantic_types': ['http://schema.org/Integer', 'https://metadata.datadrivendiscovery.org/types/PrimaryKey']})
  931. main.metadata = main.metadata.update_column(1, {'name': 'b1', 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute']})
  932. main.metadata = main.metadata.update_column(2, {'name': 'c1', 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute']})
  933. columns = container.DataFrame({'d3mIndex': [1, 2, 3], 'b2': [4, 5, 6]}, columns=['d3mIndex', 'b2'], generate_metadata=False)
  934. columns.metadata = columns.metadata.generate(columns, compact=False)
  935. columns.metadata = columns.metadata.update_column(0, {'name': 'd3mIndex', 'semantic_types': ['http://schema.org/Integer', 'https://metadata.datadrivendiscovery.org/types/PrimaryKey']})
  936. columns.metadata = columns.metadata.update_column(1, {'name': 'b2', 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute']})
  937. result = utils.combine_columns(main, [], [columns], return_result='new', add_index_columns=True)
  938. self.assertEqual(result.values.tolist(), [
  939. [1, 4],
  940. [2, 5],
  941. [3, 6],
  942. ])
  943. self.assertEqual(d3m_utils.to_json_structure(result.metadata.to_internal_simple_structure()), [{
  944. 'selector': [],
  945. 'metadata': {
  946. 'schema': metadata_base.CONTAINER_SCHEMA_VERSION,
  947. 'structural_type': 'd3m.container.pandas.DataFrame',
  948. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'],
  949. 'dimension': {
  950. 'name': 'rows',
  951. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'],
  952. 'length': 3,
  953. },
  954. },
  955. }, {
  956. 'selector': ['__ALL_ELEMENTS__'],
  957. 'metadata': {
  958. 'dimension': {
  959. 'name': 'columns',
  960. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'],
  961. 'length': 2,
  962. },
  963. },
  964. }, {
  965. 'selector': ['__ALL_ELEMENTS__', 0],
  966. 'metadata': {
  967. 'name': 'd3mIndex',
  968. 'semantic_types': ['http://schema.org/Integer', 'https://metadata.datadrivendiscovery.org/types/PrimaryKey'],
  969. 'structural_type': 'numpy.int64',
  970. },
  971. }, {
  972. 'selector': ['__ALL_ELEMENTS__', 1],
  973. 'metadata': {
  974. 'name': 'b2',
  975. 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute'],
  976. 'structural_type': 'numpy.int64',
  977. },
  978. }])
  979. if __name__ == '__main__':
  980. unittest.main()

全栈的自动化机器学习系统,主要针对多变量时间序列数据的异常检测。TODS提供了详尽的用于构建基于机器学习的异常检测系统的模块,它们包括:数据处理(data processing),时间序列处理( time series processing),特征分析(feature analysis),检测算法(detection algorithms),和强化模块( reinforcement module)。这些模块所提供的功能包括常见的数据预处理、时间序列数据的平滑或变换,从时域或频域中抽取特征、多种多样的检测算