| @@ -2209,7 +2209,7 @@ class ConcatDataset(DatasetOp): | |||
| Number, number of batches. | |||
| """ | |||
| children_sizes = [c.get_dataset_size() for c in self.input] | |||
| dataset_size = np.sum(children_sizes) | |||
| dataset_size = sum(children_sizes) | |||
| return dataset_size | |||
| @@ -2219,8 +2219,8 @@ class RenameDataset(DatasetOp): | |||
| Args: | |||
| input_dataset (Dataset): Input Dataset to be Renamed. | |||
| input_column_names (list[str]): list of names of the input columns. | |||
| output_column_names (list[str]): list of names of the output columns. | |||
| input_columns (list[str]): list of names of the input columns. | |||
| output_columns (list[str]): list of names of the output columns. | |||
| """ | |||
| def __init__(self, input_dataset, input_columns, output_columns): | |||
| @@ -4736,15 +4736,15 @@ class _NumpySlicesDataset: | |||
| def __init__(self, data, column_list=None): | |||
| self.column_list = None | |||
| # Convert dict data into tuple | |||
| if isinstance(data, dict) or isinstance(data[0], dict): | |||
| if isinstance(data, dict): | |||
| data = self.process_dict(data) | |||
| if isinstance(data[0], tuple) or isinstance(data, tuple): | |||
| if isinstance(data, tuple): | |||
| self.is_tuple = True | |||
| self.data = data | |||
| if isinstance(data[0], tuple): | |||
| for i in range(len(self.data)): | |||
| self.data[i] = np.array(self.data[i]) | |||
| self.data = () | |||
| data_len = len(data) | |||
| for i in range(data_len): | |||
| self.data = self.data + (np.array(data[i]),) | |||
| else: | |||
| self.is_tuple = False | |||
| self.data = np.array(data) | |||
| @@ -4780,14 +4780,7 @@ class _NumpySlicesDataset: | |||
| """ | |||
| Convert the dict like data into tuple format, when input is a tuple of dict then compose it into a dict first. | |||
| """ | |||
| # When input is a tuple of dict, composing it | |||
| if isinstance(input_data, tuple) and isinstance(input_data[0], dict): | |||
| data_dict = {} | |||
| for d in input_data: | |||
| data_dict.update(d) | |||
| input_data = data_dict | |||
| # convert pandas like dict(has "values" column) into General dict | |||
| # Convert pandas like dict(has "values" column) into General dict | |||
| data_keys = list(input_data.keys()) | |||
| data_col = input_data[data_keys[0]] | |||
| if hasattr(data_col, "values"): | |||
| @@ -4798,13 +4791,13 @@ class _NumpySlicesDataset: | |||
| input_data = new_dict | |||
| # Convert the data in dict into tuple | |||
| data = [] | |||
| data = () | |||
| self.column_list = [] | |||
| keys = input_data.keys() | |||
| for key in keys: | |||
| self.column_list.append(key) | |||
| value = input_data[key] | |||
| data.append(tuple(value)) | |||
| data = data + (list(value),) | |||
| return data | |||
| @@ -4843,7 +4836,7 @@ class NumpySlicesDataset(GeneratorDataset): | |||
| - not allowed | |||
| Args: | |||
| data(list, tuple or dict)Input of Given data, supported data type includes list, tuple, dict and other numpy | |||
| data (list, tuple or dict) Input of Given data, supported data type includes list, tuple, dict and other numpy | |||
| format. Input data will be sliced in first dimension and generate many rows, large data is not recommend to | |||
| load in this way as data is loading into memory. | |||
| column_names (list[str], optional): List of column names of the dataset (default=None). If column_names not | |||
| @@ -4867,8 +4860,8 @@ class NumpySlicesDataset(GeneratorDataset): | |||
| >>> # 2) Input data can be a dict, and column_names will be its key | |||
| >>> data = {"a": [1, 2], "b": [3, 4]} | |||
| >>> dataset2 = ds.NumpySlicesDataset(data) | |||
| >>> # 3) Input data can be a tuple (or list of tuple), and each tuple element refers to data in each column | |||
| >>> data = ((1, 2), (3, 4), (5, 6)) | |||
| >>> # 3) Input data can be a tuple of lists (or numpy arrays), each tuple element refers to data in each column | |||
| >>> data = ([1, 2], [3, 4], [5, 6]) | |||
| >>> dataset3 = ds.NumpySlicesDataset(data, column_names=["column_1", "column_2", "column_3"]) | |||
| >>> # 4) Load data from csv file | |||
| >>> import pandas as pd | |||
| @@ -1482,8 +1482,11 @@ def check_numpyslicesdataset(method): | |||
| # check data; required argument | |||
| data = param_dict.get('data') | |||
| if not isinstance(data, (list, tuple, dict, np.ndarray)): | |||
| raise TypeError("Unsupported data type: {}, only support some common python data type, \ | |||
| like list, tuple, dict, and numpy array.".format(type(data))) | |||
| raise TypeError("Unsupported data type: {}, only support some common python data type, " | |||
| "like list, tuple, dict, and numpy array.".format(type(data))) | |||
| if isinstance(data, tuple) and not isinstance(data[0], (list, np.ndarray)): | |||
| raise TypeError("Unsupported data type: when input is tuple, only support some common python " | |||
| "data type, like tuple of lists and tuple of numpy arrays.") | |||
| if not data: | |||
| raise ValueError("Input data is empty.") | |||
| @@ -1497,20 +1500,17 @@ def check_numpyslicesdataset(method): | |||
| if isinstance(data, dict): | |||
| data_column = len(list(data.keys())) | |||
| if column_num != data_column: | |||
| raise ValueError("Num of column is {0}, but required is {1}.".format(column_num, data_column)) | |||
| raise ValueError("Num of input column names is {0}, but required is {1}." | |||
| .format(column_num, data_column)) | |||
| # Consider input is a tuple of dict | |||
| elif isinstance(data[0], dict): | |||
| data_column = sum(len(list(data[i].keys())) for i in range(len(data))) | |||
| if column_num != data_column: | |||
| raise ValueError("Num of column is {0}, but required is {1}.".format(column_num, data_column)) | |||
| elif isinstance(data[0], tuple) or isinstance(data, tuple): | |||
| elif isinstance(data, tuple): | |||
| if column_num != len(data): | |||
| raise ValueError("Num of column is {0}, but required is {1}.".format(column_num, len(data))) | |||
| raise ValueError("Num of input column names is {0}, but required is {1}." | |||
| .format(column_num, len(data))) | |||
| else: | |||
| if column_num != 1: | |||
| raise ValueError("Num of column is {0}, but required is {1} as data is list.".format(column_num, 1)) | |||
| raise ValueError("Num of input column names is {0}, but required is {1} as data is list." | |||
| .format(column_num, 1)) | |||
| return method(*args, **kwargs) | |||
| @@ -81,34 +81,32 @@ def test_numpy_slices_dict_1(): | |||
| assert data[1] == res[i][1] | |||
| def test_numpy_slices_dict_2(): | |||
| logger.info("Test input data is a tuple of Dictionary structure data.") | |||
| def test_numpy_slices_tuple_1(): | |||
| logger.info("Test slicing a list of tuple.") | |||
| data1, data2 = {"a": [1, 2]}, {"b": [3, 4]} | |||
| ds = de.NumpySlicesDataset((data1, data2), column_names=["col1", "col2"], shuffle=False) | |||
| res = [[1, 3], [2, 4]] | |||
| np_data = [([1, 2], [3, 4]), ([11, 12], [13, 14]), ([21, 22], [23, 24])] | |||
| ds = de.NumpySlicesDataset(np_data, shuffle=False) | |||
| for i, data in enumerate(ds): | |||
| assert data[0] == res[i][0] | |||
| assert data[1] == res[i][1] | |||
| assert np.equal(data, np_data[i]).all() | |||
| assert sum([1 for _ in ds]) == 3 | |||
| def test_numpy_slices_tuple_1(): | |||
| logger.info("Test slicing a list of tuple.") | |||
| np_data = [([1, 2], [3, 4]), ([11, 12], [13, 14]), ([21, 22], [23, 24])] | |||
| res = [[[1, 2], [11, 12], [21, 22]], [[3, 4], [13, 14], [23, 24]]] | |||
| def test_numpy_slices_tuple_2(): | |||
| logger.info("Test slicing a tuple of list.") | |||
| np_data = ([1, 2], [3, 4], [5, 6]) | |||
| expected = [[1, 3, 5], [2, 4, 6]] | |||
| ds = de.NumpySlicesDataset(np_data, shuffle=False) | |||
| for i, data in enumerate(ds): | |||
| assert np.equal(data[0], res[i][0]).all() | |||
| assert np.equal(data[1], res[i][1]).all() | |||
| assert np.equal(data[2], res[i][2]).all() | |||
| assert np.equal(data, expected[i]).all() | |||
| assert sum([1 for _ in ds]) == 2 | |||
| def test_numpy_slices_tuple_2(): | |||
| def test_numpy_slices_tuple_3(): | |||
| logger.info("Test reading different dimension of tuple data.") | |||
| features, labels = np.random.sample((5, 2)), np.random.sample((5, 1)) | |||
| data = (features, labels) | |||
| @@ -191,9 +189,9 @@ if __name__ == "__main__": | |||
| test_numpy_slices_list_3() | |||
| test_numpy_slices_list_append() | |||
| test_numpy_slices_dict_1() | |||
| test_numpy_slices_dict_2() | |||
| test_numpy_slices_tuple_1() | |||
| test_numpy_slices_tuple_2() | |||
| test_numpy_slices_tuple_3() | |||
| test_numpy_slices_csv_value() | |||
| test_numpy_slices_csv_dict() | |||
| test_numpy_slices_num_samplers() | |||