|
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035 |
- import unittest
-
- from d3m import container, utils as d3m_utils
- from d3m.base import utils
- from d3m.metadata import base as metadata_base
-
-
- class TestBaseUtils(unittest.TestCase):
- def test_combine_columns_compact_metadata(self):
- main = container.DataFrame({'a1': [1, 2, 3], 'b1': [4, 5, 6], 'c1': [7, 8, 9], 'd1': [10, 11, 12], 'e1': [13, 14, 15]}, {
- 'top_level': 'main',
- }, generate_metadata=False)
- main.metadata = main.metadata.generate(main, compact=True)
- main.metadata = main.metadata.update_column(0, {'name': 'aaa111'})
- main.metadata = main.metadata.update_column(1, {'name': 'bbb111', 'extra': 'b_column'})
- main.metadata = main.metadata.update_column(2, {'name': 'ccc111'})
-
- columns2 = container.DataFrame({'a2': [21, 22, 23], 'b2': [24, 25, 26]}, {
- 'top_level': 'columns2',
- }, generate_metadata=False)
- columns2.metadata = columns2.metadata.generate(columns2, compact=True)
- columns2.metadata = columns2.metadata.update_column(0, {'name': 'aaa222'})
- columns2.metadata = columns2.metadata.update_column(1, {'name': 'bbb222'})
-
- columns3 = container.DataFrame({'a3': [31, 32, 33], 'b3': [34, 35, 36]}, {
- 'top_level': 'columns3',
- }, generate_metadata=False)
- columns3.metadata = columns3.metadata.generate(columns3, compact=True)
- columns3.metadata = columns3.metadata.update_column(0, {'name': 'aaa333'})
- columns3.metadata = columns3.metadata.update_column(1, {'name': 'bbb333'})
-
- result = utils.combine_columns(main, [1, 2], [columns2, columns3], return_result='append', add_index_columns=False)
-
- self.assertEqual(result.values.tolist(), [
- [1, 4, 7, 10, 13, 21, 24, 31, 34],
- [2, 5, 8, 11, 14, 22, 25, 32, 35],
- [3, 6, 9, 12, 15, 23, 26, 33, 36],
- ])
-
- self.assertEqual(d3m_utils.to_json_structure(result.metadata.to_internal_simple_structure()), [{
- 'selector': [],
- 'metadata': {
- 'top_level': 'main',
- 'schema': metadata_base.CONTAINER_SCHEMA_VERSION,
- 'structural_type': 'd3m.container.pandas.DataFrame',
- 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'],
- 'dimension': {
- 'name': 'rows',
- 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'],
- 'length': 3,
- },
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__'],
- 'metadata': {
- 'dimension': {
- 'name': 'columns',
- 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'],
- 'length': 9,
- },
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__'],
- 'metadata': {
- 'structural_type': 'numpy.int64',
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__', 0],
- 'metadata': {
- 'name': 'aaa111',
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__', 1],
- 'metadata': {
- 'name': 'bbb111',
- 'extra': 'b_column',
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__', 2],
- 'metadata': {
- 'name': 'ccc111',
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__', 3],
- 'metadata': {
- 'name': 'd1',
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__', 4],
- 'metadata': {
- 'name': 'e1',
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__', 5],
- 'metadata': {
- 'name': 'aaa222',
- 'structural_type': 'numpy.int64',
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__', 6],
- 'metadata': {
- 'name': 'bbb222',
- 'structural_type': 'numpy.int64',
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__', 7],
- 'metadata': {
- 'name': 'aaa333',
- 'structural_type': 'numpy.int64',
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__', 8],
- 'metadata': {
- 'name': 'bbb333',
- 'structural_type': 'numpy.int64',
- },
- }])
-
- result = utils.combine_columns(main, [1, 2], [columns2, columns3], return_result='new', add_index_columns=False)
-
- self.assertEqual(result.values.tolist(), [
- [21, 24, 31, 34],
- [22, 25, 32, 35],
- [23, 26, 33, 36],
- ])
-
- self.assertEqual(d3m_utils.to_json_structure(result.metadata.to_internal_simple_structure()), [{
- 'selector': [],
- 'metadata': {
- 'top_level': 'columns2',
- 'schema': metadata_base.CONTAINER_SCHEMA_VERSION,
- 'structural_type': 'd3m.container.pandas.DataFrame',
- 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'],
- 'dimension': {
- 'name': 'rows',
- 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'],
- 'length': 3,
- },
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__'],
- 'metadata': {
- 'dimension': {
- 'name': 'columns',
- 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'],
- 'length': 4,
- },
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__'],
- 'metadata': {
- 'structural_type': 'numpy.int64',
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__', 0],
- 'metadata': {
- 'name': 'aaa222',
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__', 1],
- 'metadata': {
- 'name': 'bbb222',
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__', 2],
- 'metadata': {
- 'name': 'aaa333',
- 'structural_type': 'numpy.int64',
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__', 3],
- 'metadata': {
- 'name': 'bbb333',
- 'structural_type': 'numpy.int64',
- },
- }])
-
- result = utils.combine_columns(main, [1, 2], [columns2, columns3], return_result='replace', add_index_columns=False)
-
- self.assertEqual(result.values.tolist(), [
- [1, 21, 24, 31, 34, 10, 13],
- [2, 22, 25, 32, 35, 11, 14],
- [3, 23, 26, 33, 36, 12, 15],
- ])
-
- self.assertEqual(d3m_utils.to_json_structure(result.metadata.to_internal_simple_structure()), [{
- 'selector': [],
- 'metadata': {
- 'top_level': 'main',
- 'schema': metadata_base.CONTAINER_SCHEMA_VERSION,
- 'structural_type': 'd3m.container.pandas.DataFrame',
- 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'],
- 'dimension': {
- 'name': 'rows',
- 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'],
- 'length': 3,
- },
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__'],
- 'metadata': {
- 'dimension': {
- 'name': 'columns',
- 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'],
- 'length': 7,
- },
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__'],
- 'metadata': {
- 'structural_type': 'numpy.int64',
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__', 0],
- 'metadata': {
- 'name': 'aaa111',
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__', 1],
- 'metadata': {
- 'name': 'aaa222',
- 'structural_type': 'numpy.int64',
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__', 2],
- 'metadata': {
- 'name': 'bbb222',
- 'structural_type': 'numpy.int64',
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__', 3],
- 'metadata': {
- 'name': 'aaa333',
- 'structural_type': 'numpy.int64',
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__', 4],
- 'metadata': {
- 'name': 'bbb333',
- 'structural_type': 'numpy.int64',
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__', 5],
- 'metadata': {
- 'name': 'd1',
- 'structural_type': 'numpy.int64',
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__', 6],
- 'metadata': {
- 'name': 'e1',
- 'structural_type': 'numpy.int64',
- },
- }])
-
- result = utils.combine_columns(main, [0, 1, 2, 3, 4], [columns2, columns3], return_result='replace', add_index_columns=False)
-
- self.assertEqual(result.values.tolist(), [
- [21, 24, 31, 34],
- [22, 25, 32, 35],
- [23, 26, 33, 36],
- ])
-
- self.assertEqual(d3m_utils.to_json_structure(result.metadata.to_internal_simple_structure()), [{
- 'selector': [],
- 'metadata': {
- 'top_level': 'main',
- 'schema': metadata_base.CONTAINER_SCHEMA_VERSION,
- 'structural_type': 'd3m.container.pandas.DataFrame',
- 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'],
- 'dimension': {
- 'name': 'rows',
- 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'],
- 'length': 3,
- },
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__'],
- 'metadata': {
- 'dimension': {
- 'name': 'columns',
- 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'],
- 'length': 4,
- },
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__'],
- 'metadata': {
- 'structural_type': 'numpy.int64',
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__', 0],
- 'metadata': {
- 'name': 'aaa222',
- 'structural_type': 'numpy.int64',
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__', 1],
- 'metadata': {
- 'name': 'bbb222',
- 'structural_type': 'numpy.int64',
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__', 2],
- 'metadata': {
- 'name': 'aaa333',
- 'structural_type': 'numpy.int64',
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__', 3],
- 'metadata': {
- 'name': 'bbb333',
- 'structural_type': 'numpy.int64',
- },
- }])
-
- result = utils.combine_columns(main, [4], [columns2, columns3], return_result='replace', add_index_columns=False)
-
- self.assertEqual(result.values.tolist(), [
- [1, 4, 7, 10, 21, 24, 31, 34],
- [2, 5, 8, 11, 22, 25, 32, 35],
- [3, 6, 9, 12, 23, 26, 33, 36],
- ])
-
- self.assertEqual(d3m_utils.to_json_structure(result.metadata.to_internal_simple_structure()), [{
- 'selector': [],
- 'metadata': {
- 'top_level': 'main',
- 'schema': metadata_base.CONTAINER_SCHEMA_VERSION,
- 'structural_type': 'd3m.container.pandas.DataFrame',
- 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'],
- 'dimension': {
- 'name': 'rows',
- 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'],
- 'length': 3,
- },
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__'],
- 'metadata': {
- 'dimension': {
- 'name': 'columns',
- 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'],
- 'length': 8,
- },
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__'],
- 'metadata': {
- 'structural_type': 'numpy.int64',
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__', 0],
- 'metadata': {
- 'name': 'aaa111',
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__', 1],
- 'metadata': {
- 'name': 'bbb111',
- 'extra': 'b_column',
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__', 2],
- 'metadata': {
- 'name': 'ccc111',
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__', 3],
- 'metadata': {
- 'name': 'd1',
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__', 4],
- 'metadata': {
- 'structural_type': 'numpy.int64',
- 'name': 'aaa222',
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__', 5],
- 'metadata': {
- 'structural_type': 'numpy.int64',
- 'name': 'bbb222',
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__', 6],
- 'metadata': {
- 'structural_type': 'numpy.int64',
- 'name': 'aaa333',
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__', 7],
- 'metadata': {
- 'structural_type': 'numpy.int64',
- 'name': 'bbb333',
- },
- }])
-
- result = utils.combine_columns(main, [0, 2, 4], [columns2, columns3], return_result='replace', add_index_columns=False)
-
- self.assertEqual(result.values.tolist(), [
- [21, 4, 24, 10, 31, 34],
- [22, 5, 25, 11, 32, 35],
- [23, 6, 26, 12, 33, 36],
- ])
-
- self.assertEqual(d3m_utils.to_json_structure(result.metadata.to_internal_simple_structure()), [{
- 'selector': [],
- 'metadata': {
- 'top_level': 'main',
- 'schema': metadata_base.CONTAINER_SCHEMA_VERSION,
- 'structural_type': 'd3m.container.pandas.DataFrame',
- 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'],
- 'dimension': {
- 'name': 'rows',
- 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'],
- 'length': 3,
- },
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__'],
- 'metadata': {
- 'dimension': {
- 'name': 'columns',
- 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'],
- 'length': 6,
- },
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__'],
- 'metadata': {
- 'structural_type': 'numpy.int64',
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__', 0],
- 'metadata': {
- 'name': 'aaa222',
- 'structural_type': 'numpy.int64',
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__', 1],
- 'metadata': {
- 'name': 'bbb111',
- 'extra': 'b_column',
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__', 2],
- 'metadata': {
- 'name': 'bbb222',
- 'structural_type': 'numpy.int64',
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__', 3],
- 'metadata': {
- 'name': 'd1',
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__', 4],
- 'metadata': {
- 'name': 'aaa333',
- 'structural_type': 'numpy.int64',
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__', 5],
- 'metadata': {
- 'name': 'bbb333',
- 'structural_type': 'numpy.int64',
- },
- }])
- def test_combine_columns_noncompact_metadata(self):
- main = container.DataFrame({'a1': [1, 2, 3], 'b1': [4, 5, 6], 'c1': [7, 8, 9], 'd1': [10, 11, 12], 'e1': [13, 14, 15]}, {
- 'top_level': 'main',
- }, generate_metadata=False)
- main.metadata = main.metadata.generate(main, compact=False)
- main.metadata = main.metadata.update_column(0, {'name': 'aaa111'})
- main.metadata = main.metadata.update_column(1, {'name': 'bbb111', 'extra': 'b_column'})
- main.metadata = main.metadata.update_column(2, {'name': 'ccc111'})
-
- columns2 = container.DataFrame({'a2': [21, 22, 23], 'b2': [24, 25, 26]}, {
- 'top_level': 'columns2',
- }, generate_metadata=False)
- columns2.metadata = columns2.metadata.generate(columns2, compact=False)
- columns2.metadata = columns2.metadata.update_column(0, {'name': 'aaa222'})
- columns2.metadata = columns2.metadata.update_column(1, {'name': 'bbb222'})
-
- columns3 = container.DataFrame({'a3': [31, 32, 33], 'b3': [34, 35, 36]}, {
- 'top_level': 'columns3',
- }, generate_metadata=False)
- columns3.metadata = columns3.metadata.generate(columns3, compact=False)
- columns3.metadata = columns3.metadata.update_column(0, {'name': 'aaa333'})
- columns3.metadata = columns3.metadata.update_column(1, {'name': 'bbb333'})
-
- result = utils.combine_columns(main, [1, 2], [columns2, columns3], return_result='append', add_index_columns=False)
-
- self.assertEqual(result.values.tolist(), [
- [1, 4, 7, 10, 13, 21, 24, 31, 34],
- [2, 5, 8, 11, 14, 22, 25, 32, 35],
- [3, 6, 9, 12, 15, 23, 26, 33, 36],
- ])
-
- self.assertEqual(d3m_utils.to_json_structure(result.metadata.to_internal_simple_structure()), [{
- 'selector': [],
- 'metadata': {
- 'top_level': 'main',
- 'schema': metadata_base.CONTAINER_SCHEMA_VERSION,
- 'structural_type': 'd3m.container.pandas.DataFrame',
- 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'],
- 'dimension': {
- 'name': 'rows',
- 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'],
- 'length': 3,
- },
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__'],
- 'metadata': {
- 'dimension': {
- 'name': 'columns',
- 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'],
- 'length': 9,
- },
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__', 0],
- 'metadata': {
- 'name': 'aaa111',
- 'structural_type': 'numpy.int64',
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__', 1],
- 'metadata': {
- 'name': 'bbb111',
- 'extra': 'b_column',
- 'structural_type': 'numpy.int64',
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__', 2],
- 'metadata': {
- 'name': 'ccc111',
- 'structural_type': 'numpy.int64',
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__', 3],
- 'metadata': {
- 'name': 'd1',
- 'structural_type': 'numpy.int64',
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__', 4],
- 'metadata': {
- 'name': 'e1',
- 'structural_type': 'numpy.int64',
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__', 5],
- 'metadata': {
- 'name': 'aaa222',
- 'structural_type': 'numpy.int64',
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__', 6],
- 'metadata': {
- 'name': 'bbb222',
- 'structural_type': 'numpy.int64',
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__', 7],
- 'metadata': {
- 'name': 'aaa333',
- 'structural_type': 'numpy.int64',
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__', 8],
- 'metadata': {
- 'name': 'bbb333',
- 'structural_type': 'numpy.int64',
- },
- }])
-
- result = utils.combine_columns(main, [1, 2], [columns2, columns3], return_result='new', add_index_columns=False)
-
- self.assertEqual(result.values.tolist(), [
- [21, 24, 31, 34],
- [22, 25, 32, 35],
- [23, 26, 33, 36],
- ])
-
- self.assertEqual(d3m_utils.to_json_structure(result.metadata.to_internal_simple_structure()), [{
- 'selector': [],
- 'metadata': {
- 'top_level': 'columns2',
- 'schema': metadata_base.CONTAINER_SCHEMA_VERSION,
- 'structural_type': 'd3m.container.pandas.DataFrame',
- 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'],
- 'dimension': {
- 'name': 'rows',
- 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'],
- 'length': 3,
- },
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__'],
- 'metadata': {
- 'dimension': {
- 'name': 'columns',
- 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'],
- 'length': 4,
- },
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__', 0],
- 'metadata': {
- 'name': 'aaa222',
- 'structural_type': 'numpy.int64',
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__', 1],
- 'metadata': {
- 'name': 'bbb222',
- 'structural_type': 'numpy.int64',
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__', 2],
- 'metadata': {
- 'name': 'aaa333',
- 'structural_type': 'numpy.int64',
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__', 3],
- 'metadata': {
- 'name': 'bbb333',
- 'structural_type': 'numpy.int64',
- },
- }])
-
- result = utils.combine_columns(main, [1, 2], [columns2, columns3], return_result='replace', add_index_columns=False)
-
- self.assertEqual(result.values.tolist(), [
- [1, 21, 24, 31, 34, 10, 13],
- [2, 22, 25, 32, 35, 11, 14],
- [3, 23, 26, 33, 36, 12, 15],
- ])
-
- self.assertEqual(d3m_utils.to_json_structure(result.metadata.to_internal_simple_structure()), [{
- 'selector': [],
- 'metadata': {
- 'top_level': 'main',
- 'schema': metadata_base.CONTAINER_SCHEMA_VERSION,
- 'structural_type': 'd3m.container.pandas.DataFrame',
- 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'],
- 'dimension': {
- 'name': 'rows',
- 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'],
- 'length': 3,
- },
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__'],
- 'metadata': {
- 'dimension': {
- 'name': 'columns',
- 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'],
- 'length': 7,
- },
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__', 0],
- 'metadata': {
- 'name': 'aaa111',
- 'structural_type': 'numpy.int64',
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__', 1],
- 'metadata': {
- 'name': 'aaa222',
- 'structural_type': 'numpy.int64',
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__', 2],
- 'metadata': {
- 'name': 'bbb222',
- 'structural_type': 'numpy.int64',
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__', 3],
- 'metadata': {
- 'name': 'aaa333',
- 'structural_type': 'numpy.int64',
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__', 4],
- 'metadata': {
- 'name': 'bbb333',
- 'structural_type': 'numpy.int64',
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__', 5],
- 'metadata': {
- 'name': 'd1',
- 'structural_type': 'numpy.int64',
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__', 6],
- 'metadata': {
- 'name': 'e1',
- 'structural_type': 'numpy.int64',
- },
- }])
-
- result = utils.combine_columns(main, [0, 1, 2, 3, 4], [columns2, columns3], return_result='replace', add_index_columns=False)
-
- self.assertEqual(result.values.tolist(), [
- [21, 24, 31, 34],
- [22, 25, 32, 35],
- [23, 26, 33, 36],
- ])
-
- self.assertEqual(d3m_utils.to_json_structure(result.metadata.to_internal_simple_structure()), [{
- 'selector': [],
- 'metadata': {
- 'top_level': 'main',
- 'schema': metadata_base.CONTAINER_SCHEMA_VERSION,
- 'structural_type': 'd3m.container.pandas.DataFrame',
- 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'],
- 'dimension': {
- 'name': 'rows',
- 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'],
- 'length': 3,
- },
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__'],
- 'metadata': {
- 'dimension': {
- 'name': 'columns',
- 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'],
- 'length': 4,
- },
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__', 0],
- 'metadata': {
- 'name': 'aaa222',
- 'structural_type': 'numpy.int64',
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__', 1],
- 'metadata': {
- 'name': 'bbb222',
- 'structural_type': 'numpy.int64',
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__', 2],
- 'metadata': {
- 'name': 'aaa333',
- 'structural_type': 'numpy.int64',
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__', 3],
- 'metadata': {
- 'name': 'bbb333',
- 'structural_type': 'numpy.int64',
- },
- }])
-
- result = utils.combine_columns(main, [4], [columns2, columns3], return_result='replace', add_index_columns=False)
-
- self.assertEqual(result.values.tolist(), [
- [1, 4, 7, 10, 21, 24, 31, 34],
- [2, 5, 8, 11, 22, 25, 32, 35],
- [3, 6, 9, 12, 23, 26, 33, 36],
- ])
-
- self.assertEqual(d3m_utils.to_json_structure(result.metadata.to_internal_simple_structure()), [{
- 'selector': [],
- 'metadata': {
- 'top_level': 'main',
- 'schema': metadata_base.CONTAINER_SCHEMA_VERSION,
- 'structural_type': 'd3m.container.pandas.DataFrame',
- 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'],
- 'dimension': {
- 'name': 'rows',
- 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'],
- 'length': 3,
- },
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__'],
- 'metadata': {
- 'dimension': {
- 'name': 'columns',
- 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'],
- 'length': 8,
- },
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__', 0],
- 'metadata': {
- 'name': 'aaa111',
- 'structural_type': 'numpy.int64',
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__', 1],
- 'metadata': {
- 'name': 'bbb111',
- 'extra': 'b_column',
- 'structural_type': 'numpy.int64',
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__', 2],
- 'metadata': {
- 'name': 'ccc111',
- 'structural_type': 'numpy.int64',
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__', 3],
- 'metadata': {
- 'name': 'd1',
- 'structural_type': 'numpy.int64',
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__', 4],
- 'metadata': {
- 'structural_type': 'numpy.int64',
- 'name': 'aaa222',
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__', 5],
- 'metadata': {
- 'structural_type': 'numpy.int64',
- 'name': 'bbb222',
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__', 6],
- 'metadata': {
- 'structural_type': 'numpy.int64',
- 'name': 'aaa333',
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__', 7],
- 'metadata': {
- 'structural_type': 'numpy.int64',
- 'name': 'bbb333',
- },
- }])
-
- result = utils.combine_columns(main, [0, 2, 4], [columns2, columns3], return_result='replace', add_index_columns=False)
-
- self.assertEqual(result.values.tolist(), [
- [21, 4, 24, 10, 31, 34],
- [22, 5, 25, 11, 32, 35],
- [23, 6, 26, 12, 33, 36],
- ])
-
- self.assertEqual(d3m_utils.to_json_structure(result.metadata.to_internal_simple_structure()), [{
- 'selector': [],
- 'metadata': {
- 'top_level': 'main',
- 'schema': metadata_base.CONTAINER_SCHEMA_VERSION,
- 'structural_type': 'd3m.container.pandas.DataFrame',
- 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'],
- 'dimension': {
- 'name': 'rows',
- 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'],
- 'length': 3,
- },
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__'],
- 'metadata': {
- 'dimension': {
- 'name': 'columns',
- 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'],
- 'length': 6,
- },
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__', 0],
- 'metadata': {
- 'name': 'aaa222',
- 'structural_type': 'numpy.int64',
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__', 1],
- 'metadata': {
- 'name': 'bbb111',
- 'extra': 'b_column',
- 'structural_type': 'numpy.int64',
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__', 2],
- 'metadata': {
- 'name': 'bbb222',
- 'structural_type': 'numpy.int64',
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__', 3],
- 'metadata': {
- 'name': 'd1',
- 'structural_type': 'numpy.int64',
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__', 4],
- 'metadata': {
- 'name': 'aaa333',
- 'structural_type': 'numpy.int64',
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__', 5],
- 'metadata': {
- 'name': 'bbb333',
- 'structural_type': 'numpy.int64',
- },
- }])
-
- def test_combine_columns_new_with_index_compact_metadata(self):
- main = container.DataFrame({'d3mIndex': [1, 2, 3], 'b1': [4, 5, 6], 'c1': [7, 8, 9]}, columns=['d3mIndex', 'b1', 'c1'], generate_metadata=False)
- main.metadata = main.metadata.generate(main, compact=True)
- main.metadata = main.metadata.update_column(0, {'name': 'd3mIndex', 'semantic_types': ['http://schema.org/Integer', 'https://metadata.datadrivendiscovery.org/types/PrimaryKey']})
- main.metadata = main.metadata.update_column(1, {'name': 'b1', 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute']})
- main.metadata = main.metadata.update_column(2, {'name': 'c1', 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute']})
-
- columns = container.DataFrame({'d3mIndex': [1, 2, 3], 'b2': [4, 5, 6]}, columns=['d3mIndex', 'b2'], generate_metadata=False)
- columns.metadata = columns.metadata.generate(columns, compact=True)
- columns.metadata = columns.metadata.update_column(0, {'name': 'd3mIndex', 'semantic_types': ['http://schema.org/Integer', 'https://metadata.datadrivendiscovery.org/types/PrimaryKey']})
- columns.metadata = columns.metadata.update_column(1, {'name': 'b2', 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute']})
-
- result = utils.combine_columns(main, [], [columns], return_result='new', add_index_columns=True)
-
- self.assertEqual(result.values.tolist(), [
- [1, 4],
- [2, 5],
- [3, 6],
- ])
-
- self.assertEqual(d3m_utils.to_json_structure(result.metadata.to_internal_simple_structure()), [{
- 'selector': [],
- 'metadata': {
- 'schema': metadata_base.CONTAINER_SCHEMA_VERSION,
- 'structural_type': 'd3m.container.pandas.DataFrame',
- 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'],
- 'dimension': {
- 'name': 'rows',
- 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'],
- 'length': 3,
- },
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__'],
- 'metadata': {
- 'dimension': {
- 'name': 'columns',
- 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'],
- 'length': 2,
- },
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__'],
- 'metadata': {
- 'structural_type': 'numpy.int64',
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__', 0],
- 'metadata': {
- 'name': 'd3mIndex',
- 'semantic_types': ['http://schema.org/Integer', 'https://metadata.datadrivendiscovery.org/types/PrimaryKey'],
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__', 1],
- 'metadata': {
- 'name': 'b2',
- 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute'],
- },
- }])
-
- def test_combine_columns_new_with_index_noncompact_metadata(self):
- main = container.DataFrame({'d3mIndex': [1, 2, 3], 'b1': [4, 5, 6], 'c1': [7, 8, 9]}, columns=['d3mIndex', 'b1', 'c1'], generate_metadata=False)
- main.metadata = main.metadata.generate(main, compact=False)
- main.metadata = main.metadata.update_column(0, {'name': 'd3mIndex', 'semantic_types': ['http://schema.org/Integer', 'https://metadata.datadrivendiscovery.org/types/PrimaryKey']})
- main.metadata = main.metadata.update_column(1, {'name': 'b1', 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute']})
- main.metadata = main.metadata.update_column(2, {'name': 'c1', 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute']})
-
- columns = container.DataFrame({'d3mIndex': [1, 2, 3], 'b2': [4, 5, 6]}, columns=['d3mIndex', 'b2'], generate_metadata=False)
- columns.metadata = columns.metadata.generate(columns, compact=False)
- columns.metadata = columns.metadata.update_column(0, {'name': 'd3mIndex', 'semantic_types': ['http://schema.org/Integer', 'https://metadata.datadrivendiscovery.org/types/PrimaryKey']})
- columns.metadata = columns.metadata.update_column(1, {'name': 'b2', 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute']})
-
- result = utils.combine_columns(main, [], [columns], return_result='new', add_index_columns=True)
-
- self.assertEqual(result.values.tolist(), [
- [1, 4],
- [2, 5],
- [3, 6],
- ])
-
- self.assertEqual(d3m_utils.to_json_structure(result.metadata.to_internal_simple_structure()), [{
- 'selector': [],
- 'metadata': {
- 'schema': metadata_base.CONTAINER_SCHEMA_VERSION,
- 'structural_type': 'd3m.container.pandas.DataFrame',
- 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'],
- 'dimension': {
- 'name': 'rows',
- 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'],
- 'length': 3,
- },
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__'],
- 'metadata': {
- 'dimension': {
- 'name': 'columns',
- 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'],
- 'length': 2,
- },
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__', 0],
- 'metadata': {
- 'name': 'd3mIndex',
- 'semantic_types': ['http://schema.org/Integer', 'https://metadata.datadrivendiscovery.org/types/PrimaryKey'],
- 'structural_type': 'numpy.int64',
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__', 1],
- 'metadata': {
- 'name': 'b2',
- 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute'],
- 'structural_type': 'numpy.int64',
- },
- }])
-
-
- if __name__ == '__main__':
- unittest.main()
|