|
- from d3m import container
- from d3m.metadata import base as metadata_base
- import unittest
- from feature_analysis import NonNegativeMatrixFactorization
- from common_primitives import dataset_to_dataframe,column_parser
- from d3m import container,utils
- from d3m.container import DataFrame as d3m_dataframe
-
- import utils as test_utils
- import os
- import numpy as np
- import pandas as pd
- import logging
- from scipy.fft import fft
- from cmath import polar
- import nimfa
-
- LENGTH = 1400
-
- class NmfTestCase(unittest.TestCase):
- def test_basic(self):
- self.maxDiff=None
-
- main = container.DataFrame({'A': [1, 2, 3], 'B': [4,5,6]},
- columns=['A', 'B'],
- generate_metadata=True)
-
- self.assertEqual(utils.to_json_structure(main.metadata.to_internal_simple_structure()), [{
- 'selector': [],
- 'metadata': {
- # 'top_level': 'main',
- 'schema': metadata_base.CONTAINER_SCHEMA_VERSION,
- 'structural_type': 'd3m.container.pandas.DataFrame',
- 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'],
- 'dimension': {
- 'name': 'rows',
- 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'],
- 'length': 3,
- },
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__'],
- 'metadata': {
- 'dimension': {
- 'name': 'columns',
- 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'],
- 'length': 2,
- },
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__', 0],
- 'metadata': {
- 'structural_type': 'numpy.int64',
- 'name': 'A',
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__', 1],
- 'metadata': {
- 'structural_type': 'numpy.int64',
- 'name': 'B',
- },
- }])
-
- a = np.array([[1,0,1,0,1],[1,0,1,0,1],[1,0,1,0,1]])
- b = np.array([[1,0],[1,0],[1,0],[1,0],[1,0]])
-
- hyperparams_class = NonNegativeMatrixFactorization.NonNegativeMatrixFactorization.metadata.get_hyperparams()
- hp = hyperparams_class.defaults().replace({
- 'use_semantic_types': True,
- 'use_columns': (0,1,),
- 'return_result':'append',
- 'rank':5,
- 'seed':'fixed',
- 'W':a,
- 'H': b,
- })
- primitive = NonNegativeMatrixFactorization.NonNegativeMatrixFactorization(hyperparams=hp)
- new_main = primitive.produce(inputs=main).value
-
- print("new_main",new_main)
- c = pd.DataFrame({"A":[1,2,3,np.nan,np.nan], "B":[4,5,6,np.nan,np.nan],
- 'row_latent_vector_0':[0.816725,1.078965,1.341205,np.nan,np.nan],
- 'row_latent_vector_1':[3.514284e-16,2.383547e-16,2.227207e-16,np.nan,np.nan],
- 'row_latent_vector_2':[0.816725,1.078965,1.341205,np.nan,np.nan],
- 'row_latent_vector_3':[3.514284e-16,2.383547e-16,2.227207e-16,np.nan,np.nan],
- 'row_latent_vector_4':[0.816725,1.078965,1.341205,np.nan,np.nan],
- 'column_latent_vector_0':[ 0.642626,0.542312,0.642626,0.542312,0.642626],
- 'column_latent_vector_1':[ 1.534324,1.848782,1.534324,1.848782,1.534324],
- })
- pd.testing.assert_frame_equal(new_main, c)
-
- params = primitive.get_params()
- primitive.set_params(params=params)
-
-
- # print(utils.to_json_structure(new_main.metadata.to_internal_simple_structure()))
- self.assertEqual(utils.to_json_structure(new_main.metadata.to_internal_simple_structure()), [{
- 'selector': [],
- 'metadata': {
- # 'top_level': 'main',
- 'schema': metadata_base.CONTAINER_SCHEMA_VERSION,
- 'structural_type': 'd3m.container.pandas.DataFrame',
- 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'],
- 'dimension': {
- 'name': 'rows',
- 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'],
- 'length': 3,
- },
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__'],
- 'metadata': {
- 'dimension': {
- 'name': 'columns',
- 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'],
- 'length': 9,
- },
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__', 0],
- 'metadata': {
- 'name': 'A',
- 'structural_type': 'numpy.int64',
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__', 1],
- 'metadata': {
- 'name': 'B',
- 'structural_type': 'numpy.int64',
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__', 2],
- 'metadata': {
- 'name': 'row_latent_vector_0',
- 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute'],
- 'structural_type': 'numpy.float64',
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__', 3],
- 'metadata': {
- 'name': 'row_latent_vector_1',
- 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute'],
- 'structural_type': 'numpy.float64',
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__', 4],
- 'metadata': {
- 'name': 'row_latent_vector_2',
- 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute'],
- 'structural_type': 'numpy.float64',
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__', 5],
- 'metadata': {
- 'name': 'row_latent_vector_3',
- 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute'],
- 'structural_type': 'numpy.float64',
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__', 6],
- 'metadata': {
- 'name': 'row_latent_vector_4',
- 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute'],
- 'structural_type': 'numpy.float64',
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__', 7],
- 'metadata': {
- 'name': 'column_latent_vector_0',
- 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute'],
- 'structural_type': 'numpy.float64',
- },
- }, {
- 'selector': ['__ALL_ELEMENTS__', 8],
- 'metadata': {
- 'name': 'column_latent_vector_1',
- 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute'],
- 'structural_type': 'numpy.float64',
- },
- }])
-
- params = primitive.get_params()
- primitive.set_params(params=params)
-
-
-
- if __name__ == '__main__':
- unittest.main()
|