Merge pull request !5801 from xiefangqi/md_modify_output_tensortags/v1.0.0
| @@ -394,7 +394,7 @@ class Dataset: | |||
| logger.error("func must be a function.") | |||
| raise TypeError("func must be a function.") | |||
| for row_data in self: | |||
| for row_data in self.create_tuple_iterator(output_numpy=True): | |||
| if dataset is None: | |||
| dataset = func(row_data) | |||
| else: | |||
| @@ -1133,7 +1133,7 @@ class Dataset: | |||
| return SaveOp(self).save(file_names, file_type) | |||
| def create_tuple_iterator(self, columns=None, num_epochs=-1): | |||
| def create_tuple_iterator(self, columns=None, num_epochs=-1, output_numpy=False): | |||
| """ | |||
| Create an Iterator over the dataset. The data retrieved will be a list of ndarray of data. | |||
| @@ -1143,8 +1143,11 @@ class Dataset: | |||
| Args: | |||
| columns (list[str], optional): List of columns to be used to specify the order of columns | |||
| (default=None, means all columns). | |||
| num_epochs (int, optional): max epochs that iterator can be iteratered, | |||
| if num_epochs = -1, iterator can be iteratered infinit epochs (default=-1) | |||
| num_epochs (int, optional): maximum epochs that iterator can be iteratered, | |||
| if num_epochs = -1, iterator can be iteratered infinite epochs (default=-1) | |||
| output_numpy (bool, optional): Whether or not to output NumPy datatype, | |||
| if output_numpy=False, iterator will output MSTensor (default=False). | |||
| Returns: | |||
| Iterator, list of ndarray. | |||
| @@ -1161,9 +1164,9 @@ class Dataset: | |||
| """ | |||
| if self._noop_mode(): | |||
| return DummyIterator(self, 'tuple') | |||
| return TupleIterator(self, columns, num_epochs) | |||
| return TupleIterator(self, columns, num_epochs, output_numpy) | |||
| def create_dict_iterator(self, num_epochs=-1): | |||
| def create_dict_iterator(self, num_epochs=-1, output_numpy=False): | |||
| """ | |||
| Create an Iterator over the dataset. | |||
| @@ -1171,8 +1174,10 @@ class Dataset: | |||
| of the columns in the dictionary may not be the same as the original order. | |||
| Args: | |||
| num_epochs (int, optional): max epochs that iterator can be iteratered, | |||
| if num_epochs = -1, iterator can be iteratered infinit epochs (default=-1) | |||
| num_epochs (int, optional): maximum epochs that iterator can be iteratered, | |||
| if num_epochs = -1, iterator can be iteratered infinite epochs (default=-1) | |||
| output_numpy (bool, optional): Whether or not to output NumPy datatype, | |||
| if output_numpy=False, iterator will output MSTensor (default=False). | |||
| Returns: | |||
| Iterator, dictionary of column_name-ndarray pair. | |||
| @@ -1190,7 +1195,7 @@ class Dataset: | |||
| """ | |||
| if self._noop_mode(): | |||
| return DummyIterator(self, 'dict') | |||
| return DictIterator(self, num_epochs) | |||
| return DictIterator(self, num_epochs, output_numpy) | |||
| def __iter__(self): | |||
| """Create an Iterator over the dataset.""" | |||
| @@ -1617,7 +1622,7 @@ class BucketBatchByLengthDataset(DatasetOp): | |||
| """ | |||
| if self.dataset_size is None: | |||
| num_rows = 0 | |||
| for _ in self.create_dict_iterator(num_epochs=1): | |||
| for _ in self.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| num_rows += 1 | |||
| self.dataset_size = num_rows | |||
| return self.dataset_size | |||
| @@ -2163,7 +2168,7 @@ class FilterDataset(DatasetOp): | |||
| """ | |||
| if self.dataset_size is None: | |||
| num_rows = 0 | |||
| for _ in self.create_dict_iterator(num_epochs=1): | |||
| for _ in self.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| num_rows += 1 | |||
| self.dataset_size = num_rows | |||
| return self.dataset_size | |||
| @@ -2400,7 +2405,7 @@ class ConcatDataset(DatasetOp): | |||
| """ | |||
| if self.dataset_size is None: | |||
| num_rows = 0 | |||
| for _ in self.create_dict_iterator(num_epochs=1): | |||
| for _ in self.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| num_rows += 1 | |||
| self.dataset_size = num_rows | |||
| return self.dataset_size | |||
| @@ -3495,7 +3500,7 @@ class GeneratorDataset(MappableDataset): | |||
| self.dataset_size = rows_from_sampler | |||
| else: | |||
| num_rows = 0 | |||
| for _ in self.create_dict_iterator(num_epochs=1): | |||
| for _ in self.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| num_rows += 1 | |||
| self.dataset_size = num_rows | |||
| return self.dataset_size | |||
| @@ -67,8 +67,9 @@ class Iterator: | |||
| dataset: Dataset to be iterated over | |||
| """ | |||
| def __init__(self, dataset, num_epochs=-1): | |||
| def __init__(self, dataset, num_epochs=-1, output_numpy=False): | |||
| self.num_epochs = num_epochs | |||
| self.output_numpy = output_numpy | |||
| ITERATORS_LIST.append(weakref.ref(self)) | |||
| # create a copy of tree and work on it. | |||
| self.dataset = copy.deepcopy(dataset) | |||
| @@ -305,8 +306,8 @@ class DictIterator(Iterator): | |||
| """ | |||
| The derived class of Iterator with dict type. | |||
| """ | |||
| def __init__(self, dataset, num_epochs=-1): | |||
| super().__init__(dataset, num_epochs) | |||
| def __init__(self, dataset, num_epochs=-1, output_numpy=False): | |||
| super().__init__(dataset, num_epochs, output_numpy) | |||
| self.depipeline.LaunchTreeExec() | |||
| def check_node_type(self, node): | |||
| @@ -323,7 +324,9 @@ class DictIterator(Iterator): | |||
| Dict, the next record in the dataset. | |||
| """ | |||
| return {k: v.as_array() for k, v in self.depipeline.GetNextAsMap().items()} | |||
| if self.output_numpy: | |||
| return {k: v.as_array() for k, v in self.depipeline.GetNextAsMap().items()} | |||
| return {k: Tensor(v.as_array()) for k, v in self.depipeline.GetNextAsMap().items()} | |||
| class TupleIterator(Iterator): | |||
| @@ -333,12 +336,12 @@ class TupleIterator(Iterator): | |||
| def check_node_type(self, node): | |||
| pass | |||
| def __init__(self, dataset, columns=None, num_epochs=-1): | |||
| def __init__(self, dataset, columns=None, num_epochs=-1, output_numpy=False): | |||
| if columns is not None: | |||
| if not isinstance(columns, list): | |||
| columns = [columns] | |||
| dataset = dataset.project(columns) | |||
| super().__init__(dataset, num_epochs) | |||
| super().__init__(dataset, num_epochs, output_numpy) | |||
| self.depipeline.LaunchTreeExec() | |||
| def __iter__(self): | |||
| @@ -352,7 +355,9 @@ class TupleIterator(Iterator): | |||
| List, the next record in the dataset. | |||
| """ | |||
| return [t.as_array() for t in self.depipeline.GetNextAsList()] | |||
| if self.output_numpy: | |||
| return [t.as_array() for t in self.depipeline.GetNextAsList()] | |||
| return [Tensor(t.as_array()) for t in self.depipeline.GetNextAsList()] | |||
| class DummyIterator(): | |||
| @@ -18,8 +18,7 @@ import os | |||
| from mindspore._checkparam import check_bool, check_int | |||
| from .. import context, nn | |||
| from ._utils import _exec_datagraph, _get_types_and_shapes, _to_tensor, \ | |||
| _construct_tensor_list | |||
| from ._utils import _exec_datagraph, _get_types_and_shapes, _construct_tensor_list | |||
| from ..nn.wrap import GetNextSingleOp | |||
| from ..parallel._utils import _get_device_num, _get_global_rank, _need_to_full, _to_full_shapes | |||
| from ..ops import operations as P | |||
| @@ -297,4 +296,4 @@ class _DatasetIterNormal: | |||
| def __next__(self): | |||
| data = self.iter.__next__() | |||
| return _to_tensor(data) | |||
| return data | |||
| @@ -19,7 +19,7 @@ import argparse | |||
| import time | |||
| import numpy as np | |||
| from pycocotools.coco import COCO | |||
| from mindspore import context, Tensor | |||
| from mindspore import context | |||
| from mindspore.train.serialization import load_checkpoint, load_param_into_net | |||
| from mindspore.common import set_seed | |||
| @@ -68,7 +68,7 @@ def FasterRcnn_eval(dataset_path, ckpt_path, ann_file): | |||
| start = time.time() | |||
| # run net | |||
| output = net(Tensor(img_data), Tensor(img_metas), Tensor(gt_bboxes), Tensor(gt_labels), Tensor(gt_num)) | |||
| output = net(img_data, img_metas, gt_bboxes, gt_labels, gt_num) | |||
| end = time.time() | |||
| print("Iter {} cost time {}".format(eval_iter, end - start)) | |||
| @@ -57,7 +57,7 @@ def MaskRcnn_eval(dataset_path, ckpt_path, ann_file): | |||
| print("total images num: ", total) | |||
| print("Processing, please wait a moment.") | |||
| max_num = 128 | |||
| for data in ds.create_dict_iterator(): | |||
| for data in ds.create_dict_iterator(output_numpy=True): | |||
| eval_iter = eval_iter + 1 | |||
| img_data = data['image'] | |||
| @@ -109,7 +109,7 @@ def extract_features(net, dataset_path, config): | |||
| config=config, | |||
| repeat_num=1) | |||
| step_size = dataset.get_dataset_size() | |||
| pbar = tqdm(list(dataset.create_dict_iterator())) | |||
| pbar = tqdm(list(dataset.create_dict_iterator(output_numpy=True))) | |||
| model = Model(net) | |||
| i = 0 | |||
| for data in pbar: | |||
| @@ -146,7 +146,7 @@ def test(cloud_args=None): | |||
| per_batch_size=args.per_batch_size, | |||
| max_epoch=1, rank=args.rank, group_size=args.group_size, | |||
| mode='eval') | |||
| eval_dataloader = de_dataset.create_tuple_iterator() | |||
| eval_dataloader = de_dataset.create_tuple_iterator(output_numpy=True) | |||
| network = get_network(args.backbone, args.num_classes, platform=args.platform) | |||
| if network is None: | |||
| raise NotImplementedError('not implement {}'.format(args.backbone)) | |||
| @@ -44,7 +44,7 @@ def ssd_eval(dataset_path, ckpt_path): | |||
| print("\n========================================\n") | |||
| print("total images num: ", total) | |||
| print("Processing, please wait a moment.") | |||
| for data in ds.create_dict_iterator(): | |||
| for data in ds.create_dict_iterator(output_numpy=True): | |||
| img_id = data['img_id'] | |||
| img_np = data['image'] | |||
| image_shape = data['image_shape'] | |||
| @@ -159,7 +159,7 @@ def test(cloud_args=None): | |||
| for model in args.models: | |||
| dataset = classification_dataset(args.data_path, args.image_size, args.per_batch_size, mode='eval') | |||
| eval_dataloader = dataset.create_tuple_iterator() | |||
| eval_dataloader = dataset.create_tuple_iterator(output_numpy=True) | |||
| network = vgg16(args.num_classes, args, phase="test") | |||
| # pre_trained | |||
| @@ -300,10 +300,10 @@ def test(): | |||
| input_shape = Tensor(tuple(config.test_img_shape), ms.float32) | |||
| args.logger.info('Start inference....') | |||
| for i, data in enumerate(ds.create_dict_iterator()): | |||
| image = Tensor(data["image"]) | |||
| image = data["image"] | |||
| image_shape = Tensor(data["image_shape"]) | |||
| image_id = Tensor(data["img_id"]) | |||
| image_shape = data["image_shape"] | |||
| image_id = data["img_id"] | |||
| prediction = network(image, input_shape) | |||
| output_big, output_me, output_small = prediction | |||
| @@ -299,7 +299,7 @@ def train(): | |||
| old_progress = -1 | |||
| t_end = time.time() | |||
| data_loader = ds.create_dict_iterator() | |||
| data_loader = ds.create_dict_iterator(output_numpy=True) | |||
| for i, data in enumerate(data_loader): | |||
| images = data["image"] | |||
| @@ -306,10 +306,10 @@ def test(): | |||
| input_shape = Tensor(tuple(config.test_img_shape), ms.float32) | |||
| args.logger.info('Start inference....') | |||
| for i, data in enumerate(ds.create_dict_iterator()): | |||
| image = Tensor(data["image"]) | |||
| image = data["image"] | |||
| image_shape = Tensor(data["image_shape"]) | |||
| image_id = Tensor(data["img_id"]) | |||
| image_shape = data["image_shape"] | |||
| image_id = data["img_id"] | |||
| prediction = network(image, input_shape) | |||
| output_big, output_me, output_small = prediction | |||
| @@ -303,7 +303,7 @@ def train(): | |||
| old_progress = -1 | |||
| t_end = time.time() | |||
| data_loader = ds.create_dict_iterator() | |||
| data_loader = ds.create_dict_iterator(output_numpy=True) | |||
| shape_record = ShapeRecord() | |||
| for i, data in enumerate(data_loader): | |||
| @@ -44,7 +44,7 @@ def yolo_eval(dataset_path, ckpt_path): | |||
| print("\n========================================\n") | |||
| print("total images num: ", total) | |||
| print("Processing, please wait a moment.") | |||
| for data in ds.create_dict_iterator(): | |||
| for data in ds.create_dict_iterator(output_numpy=True): | |||
| img_np = data['image'] | |||
| image_shape = data['image_shape'] | |||
| annotation = data['annotation'] | |||
| @@ -52,7 +52,7 @@ def train_and_eval(): | |||
| eval_class = BGCFEvaluate(parser, train_graph, test_graph, parser.Ks) | |||
| itr = train_ds.create_dict_iterator(parser.num_epoch) | |||
| itr = train_ds.create_dict_iterator(parser.num_epoch, output_numpy=True) | |||
| num_iter = int(num_pairs / parser.batch_pairs) | |||
| for _epoch in range(1, parser.num_epoch + 1): | |||
| @@ -29,7 +29,6 @@ from mindspore import context | |||
| from mindspore import log as logger | |||
| from mindspore.nn.wrap.loss_scale import DynamicLossScaleUpdateCell | |||
| from mindspore.nn.optim import AdamWeightDecay, Lamb, Momentum | |||
| from mindspore.common.tensor import Tensor | |||
| from mindspore.train.model import Model | |||
| from mindspore.train.callback import CheckpointConfig, ModelCheckpoint, TimeMonitor | |||
| from mindspore.train.serialization import load_checkpoint, load_param_into_net | |||
| @@ -123,7 +122,7 @@ def do_eval(dataset=None, network=None, num_class=2, assessment_method="accuracy | |||
| for data in dataset.create_dict_iterator(): | |||
| input_data = [] | |||
| for i in columns_list: | |||
| input_data.append(Tensor(data[i])) | |||
| input_data.append(data[i]) | |||
| input_ids, input_mask, token_type_id, label_ids = input_data | |||
| logits = model.predict(input_ids, input_mask, token_type_id, label_ids) | |||
| callback.update(logits, label_ids) | |||
| @@ -30,7 +30,6 @@ from mindspore import context | |||
| from mindspore import log as logger | |||
| from mindspore.nn.wrap.loss_scale import DynamicLossScaleUpdateCell | |||
| from mindspore.nn.optim import AdamWeightDecay, Lamb, Momentum | |||
| from mindspore.common.tensor import Tensor | |||
| from mindspore.train.model import Model | |||
| from mindspore.train.callback import CheckpointConfig, ModelCheckpoint, TimeMonitor | |||
| from mindspore.train.serialization import load_checkpoint, load_param_into_net | |||
| @@ -132,7 +131,7 @@ def do_eval(dataset=None, network=None, use_crf="", num_class=2, assessment_meth | |||
| for data in dataset.create_dict_iterator(): | |||
| input_data = [] | |||
| for i in columns_list: | |||
| input_data.append(Tensor(data[i])) | |||
| input_data.append(data[i]) | |||
| input_ids, input_mask, token_type_id, label_ids = input_data | |||
| logits = model.predict(input_ids, input_mask, token_type_id, label_ids) | |||
| callback.update(logits, label_ids) | |||
| @@ -112,7 +112,7 @@ def do_eval(dataset=None, vocab_file="", eval_json="", load_checkpoint_path="", | |||
| for data in dataset.create_dict_iterator(): | |||
| input_data = [] | |||
| for i in columns_list: | |||
| input_data.append(Tensor(data[i])) | |||
| input_data.append(data[i]) | |||
| input_ids, input_mask, segment_ids, unique_ids = input_data | |||
| start_positions = Tensor([1], mstype.float32) | |||
| end_positions = Tensor([1], mstype.float32) | |||
| @@ -107,7 +107,7 @@ def transformer_infer(config, dataset): | |||
| probs = [] | |||
| source_sentences = [] | |||
| target_sentences = [] | |||
| for batch in dataset.create_dict_iterator(): | |||
| for batch in dataset.create_dict_iterator(output_numpy=True): | |||
| source_sentences.append(batch["source_eos_ids"]) | |||
| target_sentences.append(batch["target_eos_ids"]) | |||
| @@ -232,7 +232,7 @@ def transformer_infer_ppl(config, dataset): | |||
| lengths = [] | |||
| source_sentences = [] | |||
| target_sentences = [] | |||
| for batch in dataset.create_dict_iterator(): | |||
| for batch in dataset.create_dict_iterator(output_numpy=True): | |||
| source_sentences.append(batch["source_eos_ids"]) | |||
| target_sentences.append(batch["target_eos_ids"]) | |||
| @@ -19,7 +19,6 @@ import os | |||
| import re | |||
| import argparse | |||
| import mindspore.common.dtype as mstype | |||
| from mindspore import Tensor | |||
| from mindspore import context | |||
| from mindspore.train.model import Model | |||
| from mindspore.train.callback import TimeMonitor | |||
| @@ -282,7 +281,7 @@ def do_eval_standalone(): | |||
| for data in eval_dataset.create_dict_iterator(): | |||
| input_data = [] | |||
| for i in columns_list: | |||
| input_data.append(Tensor(data[i])) | |||
| input_data.append(data[i]) | |||
| input_ids, input_mask, token_type_id, label_ids = input_data | |||
| logits = eval_model(input_ids, token_type_id, input_mask) | |||
| callback.update(logits[3], label_ids) | |||
| @@ -96,7 +96,7 @@ class EvalCallBack(Callback): | |||
| for data in self.dataset.create_dict_iterator(): | |||
| input_data = [] | |||
| for i in columns_list: | |||
| input_data.append(Tensor(data[i])) | |||
| input_data.append(data[i]) | |||
| input_ids, input_mask, token_type_id, label_ids = input_data | |||
| self.network.set_train(False) | |||
| logits = self.network(input_ids, token_type_id, input_mask) | |||
| @@ -113,7 +113,7 @@ def run_transformer_eval(): | |||
| predictions = [] | |||
| source_sents = [] | |||
| target_sents = [] | |||
| for batch in dataset.create_dict_iterator(): | |||
| for batch in dataset.create_dict_iterator(output_numpy=True): | |||
| source_sents.append(batch["source_eos_ids"]) | |||
| target_sents.append(batch["target_eos_ids"]) | |||
| source_ids = Tensor(batch["source_eos_ids"], mstype.int32) | |||
| @@ -22,7 +22,7 @@ def create_dataset(data_file): | |||
| num_parallel_workers=num_readers, | |||
| shuffle=True) | |||
| index = 0 | |||
| for item in data_set.create_dict_iterator(): | |||
| for item in data_set.create_dict_iterator(output_numpy=True): | |||
| print("example {}: {}".format(index, item)) | |||
| index += 1 | |||
| if index % 1000 == 0: | |||
| @@ -28,7 +28,7 @@ args = parser.parse_args() | |||
| data_set = ds.MindDataset(args.path) | |||
| num_iter = 0 | |||
| for item in data_set.create_dict_iterator(): | |||
| for item in data_set.create_dict_iterator(output_numpy=True): | |||
| print(item) | |||
| num_iter += 1 | |||
| print("Total items # is {}".format(num_iter)) | |||
| @@ -22,7 +22,7 @@ def create_dataset(data_file): | |||
| num_parallel_workers=num_readers, | |||
| shuffle=True) | |||
| index = 0 | |||
| for item in data_set.create_dict_iterator(): | |||
| for item in data_set.create_dict_iterator(output_numpy=True): | |||
| print("example {}: {}".format(index, item)) | |||
| index += 1 | |||
| if index % 1000 == 0: | |||
| @@ -22,7 +22,7 @@ def create_dataset(data_file): | |||
| num_parallel_workers=num_readers, | |||
| shuffle=True) | |||
| index = 0 | |||
| for item in data_set.create_dict_iterator(): | |||
| for item in data_set.create_dict_iterator(output_numpy=True): | |||
| print("example {}: {}".format(index, item)) | |||
| index += 1 | |||
| if index % 1000 == 0: | |||
| @@ -96,7 +96,7 @@ if __name__ == '__main__': | |||
| dataset_types, dataset_shapes, (), 'dataset') | |||
| ds1.send() | |||
| for data in data_set.create_tuple_iterator(): | |||
| for data in data_set.create_tuple_iterator(output_numpy=True): | |||
| output = net() | |||
| print(data[0].any()) | |||
| print( | |||
| @@ -92,7 +92,7 @@ class BNNLeNet5(nn.Cell): | |||
| def train_model(train_net, net, dataset): | |||
| accs = [] | |||
| loss_sum = 0 | |||
| for _, data in enumerate(dataset.create_dict_iterator()): | |||
| for _, data in enumerate(dataset.create_dict_iterator(output_numpy=True)): | |||
| train_x = Tensor(data['image'].astype(np.float32)) | |||
| label = Tensor(data['label'].astype(np.int32)) | |||
| loss = train_net(train_x, label) | |||
| @@ -109,7 +109,7 @@ def train_model(train_net, net, dataset): | |||
| def validate_model(net, dataset): | |||
| accs = [] | |||
| for _, data in enumerate(dataset.create_dict_iterator()): | |||
| for _, data in enumerate(dataset.create_dict_iterator(output_numpy=True)): | |||
| train_x = Tensor(data['image'].astype(np.float32)) | |||
| label = Tensor(data['label'].astype(np.int32)) | |||
| output = net(train_x) | |||
| @@ -122,7 +122,7 @@ def test_svi_cvae(): | |||
| sample_label = Tensor([i for i in range(0, 8)] * 8, dtype=mstype.int32) | |||
| generated_sample = cvae.generate_sample(sample_label, 64, IMAGE_SHAPE) | |||
| # test function: reconstruct_sample | |||
| for sample in ds_train.create_dict_iterator(): | |||
| for sample in ds_train.create_dict_iterator(output_numpy=True): | |||
| sample_x = Tensor(sample['image'], dtype=mstype.float32) | |||
| sample_y = Tensor(sample['label'], dtype=mstype.int32) | |||
| reconstructed_sample = cvae.reconstruct_sample(sample_x, sample_y) | |||
| @@ -110,7 +110,7 @@ def test_svi_vae(): | |||
| # test function: generate_sample | |||
| generated_sample = vae.generate_sample(64, IMAGE_SHAPE) | |||
| # test function: reconstruct_sample | |||
| for sample in ds_train.create_dict_iterator(): | |||
| for sample in ds_train.create_dict_iterator(output_numpy=True): | |||
| sample_x = Tensor(sample['image'], dtype=mstype.float32) | |||
| reconstructed_sample = vae.reconstruct_sample(sample_x) | |||
| print('The loss of the trained network is ', trained_loss) | |||
| @@ -93,7 +93,7 @@ class LeNet5(nn.Cell): | |||
| def train_model(train_net, net, dataset): | |||
| accs = [] | |||
| loss_sum = 0 | |||
| for _, data in enumerate(dataset.create_dict_iterator()): | |||
| for _, data in enumerate(dataset.create_dict_iterator(output_numpy=True)): | |||
| train_x = Tensor(data['image'].astype(np.float32)) | |||
| label = Tensor(data['label'].astype(np.int32)) | |||
| loss = train_net(train_x, label) | |||
| @@ -110,7 +110,7 @@ def train_model(train_net, net, dataset): | |||
| def validate_model(net, dataset): | |||
| accs = [] | |||
| for _, data in enumerate(dataset.create_dict_iterator()): | |||
| for _, data in enumerate(dataset.create_dict_iterator(output_numpy=True)): | |||
| train_x = Tensor(data['image'].astype(np.float32)) | |||
| label = Tensor(data['label'].astype(np.int32)) | |||
| output = net(train_x) | |||
| @@ -92,7 +92,7 @@ class LeNet5(nn.Cell): | |||
| def train_model(train_net, net, dataset): | |||
| accs = [] | |||
| loss_sum = 0 | |||
| for _, data in enumerate(dataset.create_dict_iterator()): | |||
| for _, data in enumerate(dataset.create_dict_iterator(output_numpy=True)): | |||
| train_x = Tensor(data['image'].astype(np.float32)) | |||
| label = Tensor(data['label'].astype(np.int32)) | |||
| loss = train_net(train_x, label) | |||
| @@ -109,7 +109,7 @@ def train_model(train_net, net, dataset): | |||
| def validate_model(net, dataset): | |||
| accs = [] | |||
| for _, data in enumerate(dataset.create_dict_iterator()): | |||
| for _, data in enumerate(dataset.create_dict_iterator(output_numpy=True)): | |||
| train_x = Tensor(data['image'].astype(np.float32)) | |||
| label = Tensor(data['label'].astype(np.int32)) | |||
| output = net(train_x) | |||
| @@ -129,7 +129,7 @@ if __name__ == '__main__': | |||
| epi_uncer_model_path=None, | |||
| ale_uncer_model_path=None, | |||
| save_model=False) | |||
| for eval_data in ds_eval.create_dict_iterator(): | |||
| for eval_data in ds_eval.create_dict_iterator(output_numpy=True): | |||
| eval_data = Tensor(eval_data['image'], mstype.float32) | |||
| epistemic_uncertainty = evaluation.eval_epistemic_uncertainty(eval_data) | |||
| aleatoric_uncertainty = evaluation.eval_aleatoric_uncertainty(eval_data) | |||
| @@ -423,8 +423,8 @@ def test_pynative_resnet50(): | |||
| if step > max_step: | |||
| break | |||
| start_time = time.time() | |||
| input_data = Tensor(element["image"]) | |||
| input_label = Tensor(element["label"]) | |||
| input_data = element["image"] | |||
| input_label = element["label"] | |||
| loss_output = net_with_criterion(input_data, input_label) | |||
| grads = train_network(input_data, input_label) | |||
| optimizer(grads) | |||
| @@ -48,7 +48,8 @@ def test_HWC2CHW(plot=False): | |||
| image_transposed = [] | |||
| image = [] | |||
| for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): | |||
| for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), | |||
| data2.create_dict_iterator(num_epochs=1, output_numpy=True)): | |||
| transposed_item = item1["image"].copy() | |||
| original_item = item2["image"].copy() | |||
| image_transposed.append(transposed_item.transpose(1, 2, 0)) | |||
| @@ -105,7 +106,8 @@ def test_HWC2CHW_comp(plot=False): | |||
| image_c_transposed = [] | |||
| image_py_transposed = [] | |||
| for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): | |||
| for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), | |||
| data2.create_dict_iterator(num_epochs=1, output_numpy=True)): | |||
| c_image = item1["image"] | |||
| py_image = (item2["image"].transpose(1, 2, 0) * 255).astype(np.uint8) | |||
| @@ -40,7 +40,8 @@ def test_apply_generator_case(): | |||
| data2 = data2.repeat(2) | |||
| data2 = data2.batch(4) | |||
| for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): | |||
| for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), | |||
| data2.create_dict_iterator(num_epochs=1, output_numpy=True)): | |||
| np.testing.assert_array_equal(item1["data"], item2["data"]) | |||
| @@ -63,7 +64,8 @@ def test_apply_imagefolder_case(): | |||
| data2 = data2.map(operations=normalize_op) | |||
| data2 = data2.repeat(2) | |||
| for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): | |||
| for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), | |||
| data2.create_dict_iterator(num_epochs=1, output_numpy=True)): | |||
| np.testing.assert_array_equal(item1["image"], item2["image"]) | |||
| @@ -48,10 +48,10 @@ def test_auto_contrast_py(plot=False): | |||
| for idx, (image, _) in enumerate(ds_original): | |||
| if idx == 0: | |||
| images_original = np.transpose(image, (0, 2, 3, 1)) | |||
| images_original = np.transpose(image.asnumpy(), (0, 2, 3, 1)) | |||
| else: | |||
| images_original = np.append(images_original, | |||
| np.transpose(image, (0, 2, 3, 1)), | |||
| np.transpose(image.asnumpy(), (0, 2, 3, 1)), | |||
| axis=0) | |||
| # AutoContrast Images | |||
| @@ -69,10 +69,10 @@ def test_auto_contrast_py(plot=False): | |||
| for idx, (image, _) in enumerate(ds_auto_contrast): | |||
| if idx == 0: | |||
| images_auto_contrast = np.transpose(image, (0, 2, 3, 1)) | |||
| images_auto_contrast = np.transpose(image.asnumpy(), (0, 2, 3, 1)) | |||
| else: | |||
| images_auto_contrast = np.append(images_auto_contrast, | |||
| np.transpose(image, (0, 2, 3, 1)), | |||
| np.transpose(image.asnumpy(), (0, 2, 3, 1)), | |||
| axis=0) | |||
| num_samples = images_original.shape[0] | |||
| @@ -110,10 +110,10 @@ def test_auto_contrast_c(plot=False): | |||
| for idx, (image, _) in enumerate(ds_auto_contrast_py): | |||
| if idx == 0: | |||
| images_auto_contrast_py = image | |||
| images_auto_contrast_py = image.asnumpy() | |||
| else: | |||
| images_auto_contrast_py = np.append(images_auto_contrast_py, | |||
| image, | |||
| image.asnumpy(), | |||
| axis=0) | |||
| ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) | |||
| @@ -125,10 +125,10 @@ def test_auto_contrast_c(plot=False): | |||
| for idx, (image, _) in enumerate(ds_auto_contrast_c): | |||
| if idx == 0: | |||
| images_auto_contrast_c = image | |||
| images_auto_contrast_c = image.asnumpy() | |||
| else: | |||
| images_auto_contrast_c = np.append(images_auto_contrast_c, | |||
| image, | |||
| image.asnumpy(), | |||
| axis=0) | |||
| num_samples = images_auto_contrast_c.shape[0] | |||
| @@ -170,10 +170,10 @@ def test_auto_contrast_one_channel_c(plot=False): | |||
| for idx, (image, _) in enumerate(ds_auto_contrast_py): | |||
| if idx == 0: | |||
| images_auto_contrast_py = image | |||
| images_auto_contrast_py = image.asnumpy() | |||
| else: | |||
| images_auto_contrast_py = np.append(images_auto_contrast_py, | |||
| image, | |||
| image.asnumpy(), | |||
| axis=0) | |||
| ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) | |||
| @@ -186,10 +186,10 @@ def test_auto_contrast_one_channel_c(plot=False): | |||
| for idx, (image, _) in enumerate(ds_auto_contrast_c): | |||
| if idx == 0: | |||
| images_auto_contrast_c = image | |||
| images_auto_contrast_c = image.asnumpy() | |||
| else: | |||
| images_auto_contrast_c = np.append(images_auto_contrast_c, | |||
| image, | |||
| image.asnumpy(), | |||
| axis=0) | |||
| num_samples = images_auto_contrast_c.shape[0] | |||
| @@ -218,9 +218,9 @@ def test_auto_contrast_mnist_c(plot=False): | |||
| for _, (data_orig, data_trans) in enumerate(zip(ds_orig, ds_auto_contrast_c)): | |||
| image_orig, label_orig = data_orig | |||
| image_trans, _ = data_trans | |||
| images.append(image_orig) | |||
| labels.append(label_orig) | |||
| images_trans.append(image_trans) | |||
| images.append(image_orig.asnumpy()) | |||
| labels.append(label_orig.asnumpy()) | |||
| images_trans.append(image_trans.asnumpy()) | |||
| # Compare with expected md5 from images | |||
| filename = "autocontrast_mnist_result_c.npz" | |||
| @@ -58,7 +58,8 @@ def test_bounding_box_augment_with_rotation_op(plot_vis=False): | |||
| unaugSamp, augSamp = [], [] | |||
| for unAug, Aug in zip(dataVoc1.create_dict_iterator(num_epochs=1), dataVoc2.create_dict_iterator(num_epochs=1)): | |||
| for unAug, Aug in zip(dataVoc1.create_dict_iterator(num_epochs=1, output_numpy=True), | |||
| dataVoc2.create_dict_iterator(num_epochs=1, output_numpy=True)): | |||
| unaugSamp.append(unAug) | |||
| augSamp.append(Aug) | |||
| @@ -96,7 +97,8 @@ def test_bounding_box_augment_with_crop_op(plot_vis=False): | |||
| unaugSamp, augSamp = [], [] | |||
| for unAug, Aug in zip(dataVoc1.create_dict_iterator(num_epochs=1), dataVoc2.create_dict_iterator(num_epochs=1)): | |||
| for unAug, Aug in zip(dataVoc1.create_dict_iterator(num_epochs=1, output_numpy=True), | |||
| dataVoc2.create_dict_iterator(num_epochs=1, output_numpy=True)): | |||
| unaugSamp.append(unAug) | |||
| augSamp.append(Aug) | |||
| @@ -133,7 +135,8 @@ def test_bounding_box_augment_valid_ratio_c(plot_vis=False): | |||
| unaugSamp, augSamp = [], [] | |||
| for unAug, Aug in zip(dataVoc1.create_dict_iterator(num_epochs=1), dataVoc2.create_dict_iterator(num_epochs=1)): | |||
| for unAug, Aug in zip(dataVoc1.create_dict_iterator(num_epochs=1, output_numpy=True), | |||
| dataVoc2.create_dict_iterator(num_epochs=1, output_numpy=True)): | |||
| unaugSamp.append(unAug) | |||
| augSamp.append(Aug) | |||
| @@ -166,7 +169,8 @@ def test_bounding_box_augment_op_coco_c(plot_vis=False): | |||
| unaugSamp, augSamp = [], [] | |||
| for unAug, Aug in zip(dataCoco1.create_dict_iterator(num_epochs=1), dataCoco2.create_dict_iterator(num_epochs=1)): | |||
| for unAug, Aug in zip(dataCoco1.create_dict_iterator(num_epochs=1, output_numpy=True), | |||
| dataCoco2.create_dict_iterator(num_epochs=1, output_numpy=True)): | |||
| unaugSamp.append(unAug) | |||
| augSamp.append(Aug) | |||
| @@ -209,7 +213,8 @@ def test_bounding_box_augment_valid_edge_c(plot_vis=False): | |||
| unaugSamp, augSamp = [], [] | |||
| for unAug, Aug in zip(dataVoc1.create_dict_iterator(num_epochs=1), dataVoc2.create_dict_iterator(num_epochs=1)): | |||
| for unAug, Aug in zip(dataVoc1.create_dict_iterator(num_epochs=1, output_numpy=True), | |||
| dataVoc2.create_dict_iterator(num_epochs=1, output_numpy=True)): | |||
| unaugSamp.append(unAug) | |||
| augSamp.append(Aug) | |||
| @@ -135,7 +135,7 @@ def test_bucket_batch_multi_bucket_no_padding(): | |||
| [[1], [5], [9]]] | |||
| output = [] | |||
| for data in dataset.create_dict_iterator(num_epochs=1): | |||
| for data in dataset.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| output.append(data["col1"].tolist()) | |||
| assert output == expected_output | |||
| @@ -166,7 +166,7 @@ def test_bucket_batch_multi_bucket_with_padding(): | |||
| [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]]] | |||
| output = [] | |||
| for data in dataset.create_dict_iterator(num_epochs=1): | |||
| for data in dataset.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| output.append(data["col1"].tolist()) | |||
| assert output == expected_output | |||
| @@ -187,7 +187,7 @@ def test_bucket_batch_single_bucket_no_padding(): | |||
| [[5], [6], [7], [8], [9]]] | |||
| output = [] | |||
| for data in dataset.create_dict_iterator(num_epochs=1): | |||
| for data in dataset.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| output.append(data["col1"].tolist()) | |||
| assert output == expected_output | |||
| @@ -217,7 +217,7 @@ def test_bucket_batch_single_bucket_with_padding(): | |||
| [0, 1, 2, 3, 4, 5, 6, 7, 8, 0, 0, 0]]] | |||
| output = [] | |||
| for data in dataset.create_dict_iterator(num_epochs=1): | |||
| for data in dataset.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| output.append(data["col1"].tolist()) | |||
| assert output == expected_output | |||
| @@ -248,7 +248,7 @@ def test_bucket_batch_pad_to_bucket_boundary(): | |||
| [0, 1, 2, 3, 4, 5, 6, 7, 8, 0, 0, 0, 0, 0]]] | |||
| output = [] | |||
| for data in dataset.create_dict_iterator(num_epochs=1): | |||
| for data in dataset.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| output.append(data["col1"].tolist()) | |||
| assert output == expected_output | |||
| @@ -284,7 +284,7 @@ def test_bucket_batch_default_pad(): | |||
| [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]]] | |||
| output = [] | |||
| for data in dataset.create_dict_iterator(num_epochs=1): | |||
| for data in dataset.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| output.append(data["col1"].tolist()) | |||
| assert output == expected_output | |||
| @@ -315,7 +315,7 @@ def test_bucket_batch_drop_remainder(): | |||
| [[19], [22], [25]]] | |||
| output = [] | |||
| for data in dataset.create_dict_iterator(num_epochs=1): | |||
| for data in dataset.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| output.append(data["col1"].tolist()) | |||
| assert output == expected_output | |||
| @@ -345,7 +345,7 @@ def test_bucket_batch_default_length_function(): | |||
| [0, 1, 2, 3, 4, 5, 6, 7, 8]]] | |||
| output = [] | |||
| for data in dataset.create_dict_iterator(num_epochs=1): | |||
| for data in dataset.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| output.append(data["col1"].tolist()) | |||
| assert output == expected_output | |||
| @@ -380,7 +380,7 @@ def test_bucket_batch_multi_column(): | |||
| same_shape_output = [] | |||
| variable_shape_output = [] | |||
| for data in dataset.create_dict_iterator(num_epochs=1): | |||
| for data in dataset.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| same_shape_output.append(data["same_shape"].tolist()) | |||
| variable_shape_output.append(data["variable_shape"].tolist()) | |||
| @@ -419,7 +419,7 @@ def test_bucket_batch_three_columns(): | |||
| same_shape_output = [] | |||
| same_shape2_output = [] | |||
| variable_shape_output = [] | |||
| for data in dataset.create_dict_iterator(num_epochs=1): | |||
| for data in dataset.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| same_shape_output.append(data["same_shape"].tolist()) | |||
| same_shape2_output.append(data["same_shape2"].tolist()) | |||
| variable_shape_output.append(data["variable_shape"].tolist()) | |||
| @@ -27,7 +27,7 @@ def test_compose(): | |||
| data = ds.NumpySlicesDataset(arr, column_names="col", shuffle=False) | |||
| data = data.map(operations=ops.Compose(op_list), input_columns=["col"]) | |||
| res = [] | |||
| for i in data.create_dict_iterator(num_epochs=1): | |||
| for i in data.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| res.append(i["col"].tolist()) | |||
| return res | |||
| except (TypeError, ValueError) as e: | |||
| @@ -26,7 +26,7 @@ def test_random_apply(): | |||
| data = ds.NumpySlicesDataset(arr, column_names="col", shuffle=False) | |||
| data = data.map(operations=ops.RandomApply(op_list, prob), input_columns=["col"]) | |||
| res = [] | |||
| for i in data.create_dict_iterator(num_epochs=1): | |||
| for i in data.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| res.append(i["col"].tolist()) | |||
| return res | |||
| except (TypeError, ValueError) as e: | |||
| @@ -29,7 +29,7 @@ def test_random_choice(): | |||
| data = ds.NumpySlicesDataset(arr, column_names="col", shuffle=False) | |||
| data = data.map(operations=ops.RandomChoice(op_list), input_columns=["col"]) | |||
| res = [] | |||
| for i in data.create_dict_iterator(num_epochs=1): | |||
| for i in data.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| res.append(i["col"].tolist()) | |||
| return res | |||
| except (TypeError, ValueError) as e: | |||
| @@ -49,7 +49,8 @@ def test_center_crop_op(height=375, width=375, plot=False): | |||
| image_cropped = [] | |||
| image = [] | |||
| for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): | |||
| for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), | |||
| data2.create_dict_iterator(num_epochs=1, output_numpy=True)): | |||
| image_cropped.append(item1["image"].copy()) | |||
| image.append(item2["image"].copy()) | |||
| if plot: | |||
| @@ -99,7 +100,8 @@ def test_center_crop_comp(height=375, width=375, plot=False): | |||
| image_c_cropped = [] | |||
| image_py_cropped = [] | |||
| for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): | |||
| for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), | |||
| data2.create_dict_iterator(num_epochs=1, output_numpy=True)): | |||
| c_image = item1["image"] | |||
| py_image = (item2["image"].transpose(1, 2, 0) * 255).astype(np.uint8) | |||
| # Note: The images aren't exactly the same due to rounding error | |||
| @@ -132,7 +134,7 @@ def test_crop_grayscale(height=375, width=375): | |||
| crop_gray = vision.CenterCrop([height, width]) | |||
| data1 = data1.map(operations=crop_gray, input_columns=["image"]) | |||
| for item1 in data1.create_dict_iterator(num_epochs=1): | |||
| for item1 in data1.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| c_image = item1["image"] | |||
| # Check that the image is grayscale | |||
| @@ -50,9 +50,10 @@ def test_concat_01(): | |||
| data3 = data1 + data2 | |||
| # Here i refers to index, d refers to data element | |||
| for i, d in enumerate(data3): | |||
| logger.info("data: %i", d[0][0]) | |||
| assert i == d[0][0] | |||
| for i, d in enumerate(data3.create_tuple_iterator(output_numpy=True)): | |||
| t = d | |||
| logger.info("data: %i", t[0][0]) | |||
| assert i == t[0][0] | |||
| assert sum([1 for _ in data3]) == 10 | |||
| @@ -68,9 +69,10 @@ def test_concat_02(): | |||
| data3 = data1.concat(data2) | |||
| # Here i refers to index, d refers to data element | |||
| for i, d in enumerate(data3): | |||
| logger.info("data: %i", d[0][0]) | |||
| assert i == d[0][0] | |||
| for i, d in enumerate(data3.create_tuple_iterator(output_numpy=True)): | |||
| t = d | |||
| logger.info("data: %i", t[0][0]) | |||
| assert i == t[0][0] | |||
| assert sum([1 for _ in data3]) == 10 | |||
| @@ -145,9 +147,10 @@ def test_concat_06(): | |||
| dataset = data1 + data2 + data3 | |||
| # Here i refers to index, d refers to data element | |||
| for i, d in enumerate(dataset): | |||
| logger.info("data: %i", d[0][0]) | |||
| assert i == d[0][0] | |||
| for i, d in enumerate(dataset.create_tuple_iterator(output_numpy=True)): | |||
| t = d | |||
| logger.info("data: %i", t[0][0]) | |||
| assert i == t[0][0] | |||
| assert sum([1 for _ in dataset]) == 20 | |||
| @@ -165,9 +168,10 @@ def test_concat_07(): | |||
| data4 = data1 + dataset | |||
| # Here i refers to index, d refers to data element | |||
| for i, d in enumerate(data4): | |||
| logger.info("data: %i", d[0][0]) | |||
| assert i == d[0][0] | |||
| for i, d in enumerate(data4.create_tuple_iterator(output_numpy=True)): | |||
| t = d | |||
| logger.info("data: %i", t[0][0]) | |||
| assert i == t[0][0] | |||
| assert sum([1 for _ in data4]) == 20 | |||
| @@ -184,9 +188,10 @@ def test_concat_08(): | |||
| data3 = data3.repeat(2) | |||
| # Here i refers to index, d refers to data element | |||
| for i, d in enumerate(data3): | |||
| logger.info("data: %i", d[0][0]) | |||
| assert i % 10 == d[0][0] | |||
| for i, d in enumerate(data3.create_tuple_iterator(output_numpy=True)): | |||
| t = d | |||
| logger.info("data: %i", t[0][0]) | |||
| assert i % 10 == t[0][0] | |||
| assert sum([1 for _ in data3]) == 20 | |||
| @@ -205,9 +210,10 @@ def test_concat_09(): | |||
| res = [0, 1, 2, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 3, 4, 5, 6, 7, 8, 9] | |||
| # Here i refers to index, d refers to data element | |||
| for i, d in enumerate(data3): | |||
| logger.info("data: %i", d[0][0]) | |||
| assert res[i] == d[0][0] | |||
| for i, d in enumerate(data3.create_tuple_iterator(output_numpy=True)): | |||
| t = d | |||
| logger.info("data: %i", t[0][0]) | |||
| assert res[i] == t[0][0] | |||
| assert sum([1 for _ in data3]) == 20 | |||
| @@ -225,9 +231,10 @@ def test_concat_10(): | |||
| res = [0, 1, 2, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] | |||
| # Here i refers to index, d refers to data element | |||
| for i, d in enumerate(data3): | |||
| logger.info("data: %i", d[0][0]) | |||
| assert res[i] == d[0][0] | |||
| for i, d in enumerate(data3.create_tuple_iterator(output_numpy=True)): | |||
| t = d | |||
| logger.info("data: %i", t[0][0]) | |||
| assert res[i] == t[0][0] | |||
| assert sum([1 for _ in data3]) == 13 | |||
| @@ -247,9 +254,10 @@ def test_concat_11(): | |||
| res = [0, 10, 15, 20] | |||
| # Here i refers to index, d refers to data element | |||
| for i, d in enumerate(data3): | |||
| logger.info("data: %i", d[0][0]) | |||
| assert res[i] == d[0][0] | |||
| for i, d in enumerate(data3.create_tuple_iterator(output_numpy=True)): | |||
| t = d | |||
| logger.info("data: %i", t[0][0]) | |||
| assert res[i] == t[0][0] | |||
| assert sum([1 for _ in data3]) == 3 | |||
| @@ -270,9 +278,10 @@ def test_concat_12(): | |||
| data3 = data3.shuffle(buffer_size=10) | |||
| # Here i refers to index, d refers to data element | |||
| for i, d in enumerate(data3): | |||
| logger.info("data: %i", d[0][0]) | |||
| assert res[i] == d[0][0] | |||
| for i, d in enumerate(data3.create_tuple_iterator(output_numpy=True)): | |||
| t = d | |||
| logger.info("data: %i", t[0][0]) | |||
| assert res[i] == t[0][0] | |||
| assert sum([1 for _ in data3]) == 10 | |||
| @@ -297,9 +306,10 @@ def test_concat_13(): | |||
| data3 = data3.shuffle(buffer_size=int(data3.get_dataset_size())) | |||
| # Here i refers to index, d refers to data element | |||
| for i, d in enumerate(data3): | |||
| logger.info("data: %i", d[0][0]) | |||
| assert res[i] == d[0][0] | |||
| for i, d in enumerate(data3.create_tuple_iterator(output_numpy=True)): | |||
| t = d | |||
| logger.info("data: %i", t[0][0]) | |||
| assert res[i] == t[0][0] | |||
| assert sum([1 for _ in data3]) == 3 | |||
| @@ -324,11 +334,11 @@ def test_concat_14(): | |||
| data3 = data1 + data2 | |||
| expected, output = [], [] | |||
| for d in data1: | |||
| for d in data1.create_tuple_iterator(output_numpy=True): | |||
| expected.append(d[0]) | |||
| for d in data2: | |||
| for d in data2.create_tuple_iterator(output_numpy=True): | |||
| expected.append(d[0]) | |||
| for d in data3: | |||
| for d in data3.create_tuple_iterator(output_numpy=True): | |||
| output.append(d[0]) | |||
| assert len(expected) == len(output) | |||
| @@ -34,7 +34,7 @@ def test_concatenate_op_all(): | |||
| data = data.map(operations=concatenate_op, input_columns=["col"]) | |||
| expected = np.array([1.4, 2., 3., 4., 4.5, 5., 6., 7., 8., 9., 10.3, | |||
| 11., 12.]) | |||
| for data_row in data: | |||
| for data_row in data.create_tuple_iterator(output_numpy=True): | |||
| np.testing.assert_array_equal(data_row[0], expected) | |||
| @@ -46,7 +46,7 @@ def test_concatenate_op_none(): | |||
| concatenate_op = data_trans.Concatenate() | |||
| data = data.map(operations=concatenate_op, input_columns=["col"]) | |||
| for data_row in data: | |||
| for data_row in data.create_tuple_iterator(output_numpy=True): | |||
| np.testing.assert_array_equal(data_row[0], np.array([5., 6., 7., 8.], dtype=np.float)) | |||
| @@ -61,7 +61,7 @@ def test_concatenate_op_string(): | |||
| data = data.map(operations=concatenate_op, input_columns=["col"]) | |||
| expected = np.array(["dw", "df", "ss", "ad", "dwsdf", "df"], dtype='S') | |||
| for data_row in data: | |||
| for data_row in data.create_tuple_iterator(output_numpy=True): | |||
| np.testing.assert_array_equal(data_row[0], expected) | |||
| @@ -77,7 +77,7 @@ def test_concatenate_op_multi_input_string(): | |||
| data = data.map(operations=concatenate_op, input_columns=["col1", "col2"], column_order=["out1"], | |||
| output_columns=["out1"]) | |||
| expected = np.array(["dw", "df", "1", "2", "d", "3", "4", "e", "dwsdf", "df"], dtype='S') | |||
| for data_row in data: | |||
| for data_row in data.create_tuple_iterator(output_numpy=True): | |||
| np.testing.assert_array_equal(data_row[0], expected) | |||
| @@ -92,7 +92,7 @@ def test_concatenate_op_multi_input_numeric(): | |||
| data = data.map(operations=concatenate_op, input_columns=["col1", "col2"], column_order=["out1"], | |||
| output_columns=["out1"]) | |||
| expected = np.array([3, 5, 1, 2, 3, 4]) | |||
| for data_row in data: | |||
| for data_row in data.create_tuple_iterator(output_numpy=True): | |||
| np.testing.assert_array_equal(data_row[0], expected) | |||
| @@ -158,7 +158,7 @@ def test_concatenate_op_negative_axis(): | |||
| data = data.map(operations=concatenate_op, input_columns=["col"]) | |||
| expected = np.array([1.4, 2., 3., 4., 4.5, 5., 6., 7., 8., 9., 10.3, | |||
| 11., 12.]) | |||
| for data_row in data: | |||
| for data_row in data.create_tuple_iterator(output_numpy=True): | |||
| np.testing.assert_array_equal(data_row[0], expected) | |||
| @@ -288,7 +288,7 @@ def test_deterministic_python_seed(): | |||
| data1 = data1.map(operations=transform, input_columns=["image"]) | |||
| data1_output = [] | |||
| # config.set_seed() calls random.seed() | |||
| for data_one in data1.create_dict_iterator(num_epochs=1): | |||
| for data_one in data1.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| data1_output.append(data_one["image"]) | |||
| # Second dataset | |||
| @@ -298,7 +298,7 @@ def test_deterministic_python_seed(): | |||
| ds.config.set_seed(0) | |||
| data2_output = [] | |||
| for data_two in data2.create_dict_iterator(num_epochs=1): | |||
| for data_two in data2.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| data2_output.append(data_two["image"]) | |||
| np.testing.assert_equal(data1_output, data2_output) | |||
| @@ -331,7 +331,7 @@ def test_deterministic_python_seed_multi_thread(): | |||
| data1 = data1.map(operations=transform, input_columns=["image"], python_multiprocessing=True) | |||
| data1_output = [] | |||
| # config.set_seed() calls random.seed() | |||
| for data_one in data1.create_dict_iterator(num_epochs=1): | |||
| for data_one in data1.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| data1_output.append(data_one["image"]) | |||
| # Second dataset | |||
| @@ -342,7 +342,7 @@ def test_deterministic_python_seed_multi_thread(): | |||
| ds.config.set_seed(0) | |||
| data2_output = [] | |||
| for data_two in data2.create_dict_iterator(num_epochs=1): | |||
| for data_two in data2.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| data2_output.append(data_two["image"]) | |||
| try: | |||
| @@ -61,7 +61,8 @@ def test_cut_out_op(plot=False): | |||
| data2 = data2.map(operations=transforms_2, input_columns=["image"]) | |||
| num_iter = 0 | |||
| for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): | |||
| for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), | |||
| data2.create_dict_iterator(num_epochs=1, output_numpy=True)): | |||
| num_iter += 1 | |||
| image_1 = (item1["image"].transpose(1, 2, 0) * 255).astype(np.uint8) | |||
| # C image doesn't require transpose | |||
| @@ -108,7 +109,8 @@ def test_cut_out_op_multicut(plot=False): | |||
| num_iter = 0 | |||
| image_list_1, image_list_2 = [], [] | |||
| for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): | |||
| for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), | |||
| data2.create_dict_iterator(num_epochs=1, output_numpy=True)): | |||
| num_iter += 1 | |||
| image_1 = (item1["image"].transpose(1, 2, 0) * 255).astype(np.uint8) | |||
| # C image doesn't require transpose | |||
| @@ -189,7 +191,8 @@ def test_cut_out_comp(plot=False): | |||
| num_iter = 0 | |||
| image_list_1, image_list_2 = [], [] | |||
| for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): | |||
| for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), | |||
| data2.create_dict_iterator(num_epochs=1, output_numpy=True)): | |||
| num_iter += 1 | |||
| image_1 = (item1["image"].transpose(1, 2, 0) * 255).astype(np.uint8) | |||
| # C image doesn't require transpose | |||
| @@ -44,9 +44,9 @@ def test_cutmix_batch_success1(plot=False): | |||
| images_original = None | |||
| for idx, (image, _) in enumerate(ds_original): | |||
| if idx == 0: | |||
| images_original = image | |||
| images_original = image.asnumpy() | |||
| else: | |||
| images_original = np.append(images_original, image, axis=0) | |||
| images_original = np.append(images_original, image.asnumpy(), axis=0) | |||
| # CutMix Images | |||
| data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) | |||
| @@ -61,9 +61,9 @@ def test_cutmix_batch_success1(plot=False): | |||
| images_cutmix = None | |||
| for idx, (image, _) in enumerate(data1): | |||
| if idx == 0: | |||
| images_cutmix = image.transpose(0, 2, 3, 1) | |||
| images_cutmix = image.asnumpy().transpose(0, 2, 3, 1) | |||
| else: | |||
| images_cutmix = np.append(images_cutmix, image.transpose(0, 2, 3, 1), axis=0) | |||
| images_cutmix = np.append(images_cutmix, image.asnumpy().transpose(0, 2, 3, 1), axis=0) | |||
| if plot: | |||
| visualize_list(images_original, images_cutmix) | |||
| @@ -87,9 +87,9 @@ def test_cutmix_batch_success2(plot=False): | |||
| images_original = None | |||
| for idx, (image, _) in enumerate(ds_original): | |||
| if idx == 0: | |||
| images_original = image | |||
| images_original = image.asnumpy() | |||
| else: | |||
| images_original = np.append(images_original, image, axis=0) | |||
| images_original = np.append(images_original, image.asnumpy(), axis=0) | |||
| # CutMix Images | |||
| data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) | |||
| @@ -104,9 +104,9 @@ def test_cutmix_batch_success2(plot=False): | |||
| images_cutmix = None | |||
| for idx, (image, _) in enumerate(data1): | |||
| if idx == 0: | |||
| images_cutmix = image | |||
| images_cutmix = image.asnumpy() | |||
| else: | |||
| images_cutmix = np.append(images_cutmix, image, axis=0) | |||
| images_cutmix = np.append(images_cutmix, image.asnumpy(), axis=0) | |||
| if plot: | |||
| visualize_list(images_original, images_cutmix) | |||
| @@ -131,9 +131,9 @@ def test_cutmix_batch_success3(plot=False): | |||
| images_original = None | |||
| for idx, (image, _) in enumerate(ds_original): | |||
| if idx == 0: | |||
| images_original = image | |||
| images_original = image.asnumpy() | |||
| else: | |||
| images_original = np.append(images_original, image, axis=0) | |||
| images_original = np.append(images_original, image.asnumpy(), axis=0) | |||
| # CutMix Images | |||
| data1 = ds.ImageFolderDataset(dataset_dir=DATA_DIR2, shuffle=False) | |||
| @@ -151,9 +151,9 @@ def test_cutmix_batch_success3(plot=False): | |||
| images_cutmix = None | |||
| for idx, (image, _) in enumerate(data1): | |||
| if idx == 0: | |||
| images_cutmix = image | |||
| images_cutmix = image.asnumpy() | |||
| else: | |||
| images_cutmix = np.append(images_cutmix, image, axis=0) | |||
| images_cutmix = np.append(images_cutmix, image.asnumpy(), axis=0) | |||
| if plot: | |||
| visualize_list(images_original, images_cutmix) | |||
| @@ -178,9 +178,9 @@ def test_cutmix_batch_success4(plot=False): | |||
| images_original = None | |||
| for idx, (image, _) in enumerate(ds_original): | |||
| if idx == 0: | |||
| images_original = image | |||
| images_original = image.asnumpy() | |||
| else: | |||
| images_original = np.append(images_original, image, axis=0) | |||
| images_original = np.append(images_original, image.asnumpy(), axis=0) | |||
| # CutMix Images | |||
| data1 = ds.CelebADataset(dataset_dir=DATA_DIR3, shuffle=False) | |||
| @@ -198,9 +198,9 @@ def test_cutmix_batch_success4(plot=False): | |||
| images_cutmix = None | |||
| for idx, (image, _) in enumerate(data1): | |||
| if idx == 0: | |||
| images_cutmix = image | |||
| images_cutmix = image.asnumpy() | |||
| else: | |||
| images_cutmix = np.append(images_cutmix, image, axis=0) | |||
| images_cutmix = np.append(images_cutmix, image.asnumpy(), axis=0) | |||
| if plot: | |||
| visualize_list(images_original, images_cutmix) | |||
| @@ -279,9 +279,9 @@ def test_cutmix_batch_fail1(): | |||
| data1 = data1.map(operations=cutmix_batch_op, input_columns=["image", "label"]) | |||
| for idx, (image, _) in enumerate(data1): | |||
| if idx == 0: | |||
| images_cutmix = image | |||
| images_cutmix = image.asnumpy() | |||
| else: | |||
| images_cutmix = np.append(images_cutmix, image, axis=0) | |||
| images_cutmix = np.append(images_cutmix, image.asnumpy(), axis=0) | |||
| error_message = "You must make sure images are HWC or CHW and batch " | |||
| assert error_message in str(error.value) | |||
| @@ -360,9 +360,9 @@ def test_cutmix_batch_fail5(): | |||
| images_cutmix = np.array([]) | |||
| for idx, (image, _) in enumerate(data1): | |||
| if idx == 0: | |||
| images_cutmix = image | |||
| images_cutmix = image.asnumpy() | |||
| else: | |||
| images_cutmix = np.append(images_cutmix, image, axis=0) | |||
| images_cutmix = np.append(images_cutmix, image.asnumpy(), axis=0) | |||
| error_message = "Both images and labels columns are required" | |||
| assert error_message in str(error.value) | |||
| @@ -387,9 +387,9 @@ def test_cutmix_batch_fail6(): | |||
| images_cutmix = np.array([]) | |||
| for idx, (image, _) in enumerate(data1): | |||
| if idx == 0: | |||
| images_cutmix = image | |||
| images_cutmix = image.asnumpy() | |||
| else: | |||
| images_cutmix = np.append(images_cutmix, image, axis=0) | |||
| images_cutmix = np.append(images_cutmix, image.asnumpy(), axis=0) | |||
| error_message = "CutMixBatch: Image doesn't match the given image format." | |||
| assert error_message in str(error.value) | |||
| @@ -412,9 +412,9 @@ def test_cutmix_batch_fail7(): | |||
| images_cutmix = np.array([]) | |||
| for idx, (image, _) in enumerate(data1): | |||
| if idx == 0: | |||
| images_cutmix = image | |||
| images_cutmix = image.asnumpy() | |||
| else: | |||
| images_cutmix = np.append(images_cutmix, image, axis=0) | |||
| images_cutmix = np.append(images_cutmix, image.asnumpy(), axis=0) | |||
| error_message = "CutMixBatch: Wrong labels shape. The second column (labels) must have a shape of NC or NLC" | |||
| assert error_message in str(error.value) | |||
| @@ -28,7 +28,7 @@ def test_numpy_slices_list_1(): | |||
| ds = de.NumpySlicesDataset(np_data, shuffle=False) | |||
| for i, data in enumerate(ds): | |||
| assert data[0] == np_data[i] | |||
| assert data[0].asnumpy() == np_data[i] | |||
| def test_numpy_slices_list_2(): | |||
| @@ -38,7 +38,7 @@ def test_numpy_slices_list_2(): | |||
| ds = de.NumpySlicesDataset(np_data, column_names=["col1"], shuffle=False) | |||
| for i, data in enumerate(ds): | |||
| assert np.equal(data[0], np_data[i]).all() | |||
| assert np.equal(data[0].asnumpy(), np_data[i]).all() | |||
| def test_numpy_slices_list_3(): | |||
| @@ -48,7 +48,7 @@ def test_numpy_slices_list_3(): | |||
| ds = de.NumpySlicesDataset(np_data, column_names=["col1"], shuffle=False) | |||
| for i, data in enumerate(ds): | |||
| assert np.equal(data[0], np_data[i]).all() | |||
| assert np.equal(data[0].asnumpy(), np_data[i]).all() | |||
| def test_numpy_slices_list_append(): | |||
| @@ -62,12 +62,12 @@ def test_numpy_slices_list_append(): | |||
| data1 = data1.map(operations=[vision.Decode(True), resize_op], input_columns=["image"]) | |||
| res = [] | |||
| for data in data1.create_dict_iterator(num_epochs=1): | |||
| for data in data1.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| res.append(data["image"]) | |||
| ds = de.NumpySlicesDataset(res, column_names=["col1"], shuffle=False) | |||
| for i, data in enumerate(ds): | |||
| for i, data in enumerate(ds.create_tuple_iterator(output_numpy=True)): | |||
| assert np.equal(data, res[i]).all() | |||
| @@ -79,8 +79,8 @@ def test_numpy_slices_dict_1(): | |||
| res = [[1, 3], [2, 4]] | |||
| for i, data in enumerate(ds): | |||
| assert data[0] == res[i][0] | |||
| assert data[1] == res[i][1] | |||
| assert data[0].asnumpy() == res[i][0] | |||
| assert data[1].asnumpy() == res[i][1] | |||
| def test_numpy_slices_tuple_1(): | |||
| @@ -89,7 +89,7 @@ def test_numpy_slices_tuple_1(): | |||
| np_data = [([1, 2], [3, 4]), ([11, 12], [13, 14]), ([21, 22], [23, 24])] | |||
| ds = de.NumpySlicesDataset(np_data, shuffle=False) | |||
| for i, data in enumerate(ds): | |||
| for i, data in enumerate(ds.create_tuple_iterator(output_numpy=True)): | |||
| assert np.equal(data, np_data[i]).all() | |||
| assert sum([1 for _ in ds]) == 3 | |||
| @@ -102,7 +102,7 @@ def test_numpy_slices_tuple_2(): | |||
| expected = [[1, 3, 5], [2, 4, 6]] | |||
| ds = de.NumpySlicesDataset(np_data, shuffle=False) | |||
| for i, data in enumerate(ds): | |||
| for i, data in enumerate(ds.create_tuple_iterator(output_numpy=True)): | |||
| assert np.equal(data, expected[i]).all() | |||
| assert sum([1 for _ in ds]) == 2 | |||
| @@ -116,8 +116,8 @@ def test_numpy_slices_tuple_3(): | |||
| ds = de.NumpySlicesDataset(data, column_names=["col1", "col2"], shuffle=False) | |||
| for i, data in enumerate(ds): | |||
| assert np.equal(data[0], features[i]).all() | |||
| assert data[1] == labels[i] | |||
| assert np.equal(data[0].asnumpy(), features[i]).all() | |||
| assert data[1].asnumpy() == labels[i] | |||
| def test_numpy_slices_csv_value(): | |||
| @@ -132,8 +132,8 @@ def test_numpy_slices_csv_value(): | |||
| ds = de.NumpySlicesDataset(np_data, column_names=["col1", "col2"], shuffle=False) | |||
| for i, data in enumerate(ds): | |||
| assert np.equal(np_data[0][i], data[0]).all() | |||
| assert np.equal(np_data[1][i], data[1]).all() | |||
| assert np.equal(np_data[0][i], data[0].asnumpy()).all() | |||
| assert np.equal(np_data[1][i], data[1].asnumpy()).all() | |||
| def test_numpy_slices_csv_dict(): | |||
| @@ -146,7 +146,7 @@ def test_numpy_slices_csv_dict(): | |||
| ds = de.NumpySlicesDataset(dict(df), shuffle=False) | |||
| for i, data in enumerate(ds): | |||
| for i, data in enumerate(ds.create_tuple_iterator(output_numpy=True)): | |||
| assert np.equal(data, res[i]).all() | |||
| @@ -157,7 +157,7 @@ def test_numpy_slices_num_samplers(): | |||
| ds = de.NumpySlicesDataset(np_data, shuffle=False, num_samples=2) | |||
| for i, data in enumerate(ds): | |||
| assert np.equal(data[0], np_data[i]).all() | |||
| assert np.equal(data[0].asnumpy(), np_data[i]).all() | |||
| assert sum([1 for _ in ds]) == 2 | |||
| @@ -169,7 +169,7 @@ def test_numpy_slices_distributed_sampler(): | |||
| ds = de.NumpySlicesDataset(np_data, shuffle=False, shard_id=0, num_shards=4) | |||
| for i, data in enumerate(ds): | |||
| assert np.equal(data[0], np_data[i * 4]).all() | |||
| assert np.equal(data[0].asnumpy(), np_data[i * 4]).all() | |||
| assert sum([1 for _ in ds]) == 2 | |||
| @@ -200,7 +200,7 @@ def test_numpy_slices_sequential_sampler(): | |||
| ds = de.NumpySlicesDataset(np_data, sampler=de.SequentialSampler()).repeat(2) | |||
| for i, data in enumerate(ds): | |||
| assert np.equal(data[0], np_data[i % 8]).all() | |||
| assert np.equal(data[0].asnumpy(), np_data[i % 8]).all() | |||
| def test_numpy_slices_invalid_column_names_type(): | |||
| @@ -27,7 +27,7 @@ def test_celeba_dataset_label(): | |||
| [0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, | |||
| 0, 0, 1]] | |||
| count = 0 | |||
| for item in data.create_dict_iterator(num_epochs=1): | |||
| for item in data.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| logger.info("----------image--------") | |||
| logger.info(item["image"]) | |||
| logger.info("----------attr--------") | |||
| @@ -63,7 +63,7 @@ def test_celeba_dataset_ext(): | |||
| expect_labels = [0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, | |||
| 0, 1, 0, 1, 0, 0, 1], | |||
| count = 0 | |||
| for item in data.create_dict_iterator(num_epochs=1): | |||
| for item in data.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| logger.info("----------image--------") | |||
| logger.info(item["image"]) | |||
| logger.info("----------attr--------") | |||
| @@ -75,7 +75,7 @@ def test_cifar10_content_check(): | |||
| images, labels = load_cifar(DATA_DIR_10) | |||
| num_iter = 0 | |||
| # in this example, each dictionary has keys "image" and "label" | |||
| for i, d in enumerate(data1.create_dict_iterator(num_epochs=1)): | |||
| for i, d in enumerate(data1.create_dict_iterator(num_epochs=1, output_numpy=True)): | |||
| np.testing.assert_array_equal(d["image"], images[i]) | |||
| np.testing.assert_array_equal(d["label"], labels[i]) | |||
| num_iter += 1 | |||
| @@ -153,7 +153,7 @@ def test_cifar10_pk_sampler(): | |||
| data = ds.Cifar10Dataset(DATA_DIR_10, sampler=sampler) | |||
| num_iter = 0 | |||
| label_list = [] | |||
| for item in data.create_dict_iterator(num_epochs=1): | |||
| for item in data.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| label_list.append(item["label"]) | |||
| num_iter += 1 | |||
| np.testing.assert_array_equal(golden, label_list) | |||
| @@ -170,7 +170,8 @@ def test_cifar10_sequential_sampler(): | |||
| data1 = ds.Cifar10Dataset(DATA_DIR_10, sampler=sampler) | |||
| data2 = ds.Cifar10Dataset(DATA_DIR_10, shuffle=False, num_samples=num_samples) | |||
| num_iter = 0 | |||
| for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): | |||
| for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), | |||
| data2.create_dict_iterator(num_epochs=1, output_numpy=True)): | |||
| np.testing.assert_equal(item1["label"], item2["label"]) | |||
| num_iter += 1 | |||
| assert num_iter == num_samples | |||
| @@ -225,7 +226,7 @@ def test_cifar10_visualize(plot=False): | |||
| data1 = ds.Cifar10Dataset(DATA_DIR_10, num_samples=10, shuffle=False) | |||
| num_iter = 0 | |||
| image_list, label_list = [], [] | |||
| for item in data1.create_dict_iterator(num_epochs=1): | |||
| for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| image = item["image"] | |||
| label = item["label"] | |||
| image_list.append(image) | |||
| @@ -251,7 +252,7 @@ def test_cifar100_content_check(): | |||
| images, labels = load_cifar(DATA_DIR_100, kind="cifar100") | |||
| num_iter = 0 | |||
| # in this example, each dictionary has keys "image", "coarse_label" and "fine_image" | |||
| for i, d in enumerate(data1.create_dict_iterator(num_epochs=1)): | |||
| for i, d in enumerate(data1.create_dict_iterator(num_epochs=1, output_numpy=True)): | |||
| np.testing.assert_array_equal(d["image"], images[i]) | |||
| np.testing.assert_array_equal(d["coarse_label"], labels[i][0]) | |||
| np.testing.assert_array_equal(d["fine_label"], labels[i][1]) | |||
| @@ -319,7 +320,7 @@ def test_cifar100_pk_sampler(): | |||
| data = ds.Cifar100Dataset(DATA_DIR_100, sampler=sampler) | |||
| num_iter = 0 | |||
| label_list = [] | |||
| for item in data.create_dict_iterator(num_epochs=1): | |||
| for item in data.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| label_list.append(item["coarse_label"]) | |||
| num_iter += 1 | |||
| np.testing.assert_array_equal(golden, label_list) | |||
| @@ -375,7 +376,7 @@ def test_cifar100_visualize(plot=False): | |||
| data1 = ds.Cifar100Dataset(DATA_DIR_100, num_samples=10, shuffle=False) | |||
| num_iter = 0 | |||
| image_list, label_list = [], [] | |||
| for item in data1.create_dict_iterator(num_epochs=1): | |||
| for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| image = item["image"] | |||
| coarse_label = item["coarse_label"] | |||
| fine_label = item["fine_label"] | |||
| @@ -26,7 +26,7 @@ def test_clue(): | |||
| data = ds.CLUEDataset(TRAIN_FILE, task='AFQMC', usage='train', shuffle=False) | |||
| data = data.repeat(2) | |||
| data = data.skip(3) | |||
| for d in data.create_dict_iterator(num_epochs=1): | |||
| for d in data.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| buffer.append({ | |||
| 'label': d['label'].item().decode("utf8"), | |||
| 'sentence1': d['sentence1'].item().decode("utf8"), | |||
| @@ -43,7 +43,7 @@ def test_clue_num_shards(): | |||
| buffer = [] | |||
| data = ds.CLUEDataset(TRAIN_FILE, task='AFQMC', usage='train', num_shards=3, shard_id=1) | |||
| for d in data.create_dict_iterator(num_epochs=1): | |||
| for d in data.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| buffer.append({ | |||
| 'label': d['label'].item().decode("utf8"), | |||
| 'sentence1': d['sentence1'].item().decode("utf8"), | |||
| @@ -60,7 +60,7 @@ def test_clue_num_samples(): | |||
| data = ds.CLUEDataset(TRAIN_FILE, task='AFQMC', usage='train', num_samples=2) | |||
| count = 0 | |||
| for _ in data.create_dict_iterator(num_epochs=1): | |||
| for _ in data.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| count += 1 | |||
| assert count == 2 | |||
| @@ -87,7 +87,7 @@ def test_clue_afqmc(): | |||
| # train | |||
| buffer = [] | |||
| data = ds.CLUEDataset(TRAIN_FILE, task='AFQMC', usage='train', shuffle=False) | |||
| for d in data.create_dict_iterator(num_epochs=1): | |||
| for d in data.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| buffer.append({ | |||
| 'label': d['label'].item().decode("utf8"), | |||
| 'sentence1': d['sentence1'].item().decode("utf8"), | |||
| @@ -98,7 +98,7 @@ def test_clue_afqmc(): | |||
| # test | |||
| buffer = [] | |||
| data = ds.CLUEDataset(TEST_FILE, task='AFQMC', usage='test', shuffle=False) | |||
| for d in data.create_dict_iterator(num_epochs=1): | |||
| for d in data.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| buffer.append({ | |||
| 'id': d['id'], | |||
| 'sentence1': d['sentence1'].item().decode("utf8"), | |||
| @@ -109,7 +109,7 @@ def test_clue_afqmc(): | |||
| # evaluation | |||
| buffer = [] | |||
| data = ds.CLUEDataset(EVAL_FILE, task='AFQMC', usage='eval', shuffle=False) | |||
| for d in data.create_dict_iterator(num_epochs=1): | |||
| for d in data.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| buffer.append({ | |||
| 'label': d['label'].item().decode("utf8"), | |||
| 'sentence1': d['sentence1'].item().decode("utf8"), | |||
| @@ -129,7 +129,7 @@ def test_clue_cmnli(): | |||
| # train | |||
| buffer = [] | |||
| data = ds.CLUEDataset(TRAIN_FILE, task='CMNLI', usage='train', shuffle=False) | |||
| for d in data.create_dict_iterator(num_epochs=1): | |||
| for d in data.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| buffer.append({ | |||
| 'label': d['label'].item().decode("utf8"), | |||
| 'sentence1': d['sentence1'].item().decode("utf8"), | |||
| @@ -140,7 +140,7 @@ def test_clue_cmnli(): | |||
| # test | |||
| buffer = [] | |||
| data = ds.CLUEDataset(TEST_FILE, task='CMNLI', usage='test', shuffle=False) | |||
| for d in data.create_dict_iterator(num_epochs=1): | |||
| for d in data.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| buffer.append({ | |||
| 'id': d['id'], | |||
| 'sentence1': d['sentence1'], | |||
| @@ -151,7 +151,7 @@ def test_clue_cmnli(): | |||
| # eval | |||
| buffer = [] | |||
| data = ds.CLUEDataset(EVAL_FILE, task='CMNLI', usage='eval', shuffle=False) | |||
| for d in data.create_dict_iterator(num_epochs=1): | |||
| for d in data.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| buffer.append({ | |||
| 'label': d['label'], | |||
| 'sentence1': d['sentence1'], | |||
| @@ -171,7 +171,7 @@ def test_clue_csl(): | |||
| # train | |||
| buffer = [] | |||
| data = ds.CLUEDataset(TRAIN_FILE, task='CSL', usage='train', shuffle=False) | |||
| for d in data.create_dict_iterator(num_epochs=1): | |||
| for d in data.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| buffer.append({ | |||
| 'id': d['id'], | |||
| 'abst': d['abst'].item().decode("utf8"), | |||
| @@ -183,7 +183,7 @@ def test_clue_csl(): | |||
| # test | |||
| buffer = [] | |||
| data = ds.CLUEDataset(TEST_FILE, task='CSL', usage='test', shuffle=False) | |||
| for d in data.create_dict_iterator(num_epochs=1): | |||
| for d in data.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| buffer.append({ | |||
| 'id': d['id'], | |||
| 'abst': d['abst'].item().decode("utf8"), | |||
| @@ -194,7 +194,7 @@ def test_clue_csl(): | |||
| # eval | |||
| buffer = [] | |||
| data = ds.CLUEDataset(EVAL_FILE, task='CSL', usage='eval', shuffle=False) | |||
| for d in data.create_dict_iterator(num_epochs=1): | |||
| for d in data.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| buffer.append({ | |||
| 'id': d['id'], | |||
| 'abst': d['abst'].item().decode("utf8"), | |||
| @@ -215,7 +215,7 @@ def test_clue_iflytek(): | |||
| # train | |||
| buffer = [] | |||
| data = ds.CLUEDataset(TRAIN_FILE, task='IFLYTEK', usage='train', shuffle=False) | |||
| for d in data.create_dict_iterator(num_epochs=1): | |||
| for d in data.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| buffer.append({ | |||
| 'label': d['label'].item().decode("utf8"), | |||
| 'label_des': d['label_des'].item().decode("utf8"), | |||
| @@ -226,7 +226,7 @@ def test_clue_iflytek(): | |||
| # test | |||
| buffer = [] | |||
| data = ds.CLUEDataset(TEST_FILE, task='IFLYTEK', usage='test', shuffle=False) | |||
| for d in data.create_dict_iterator(num_epochs=1): | |||
| for d in data.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| buffer.append({ | |||
| 'id': d['id'], | |||
| 'sentence': d['sentence'].item().decode("utf8") | |||
| @@ -236,7 +236,7 @@ def test_clue_iflytek(): | |||
| # eval | |||
| buffer = [] | |||
| data = ds.CLUEDataset(EVAL_FILE, task='IFLYTEK', usage='eval', shuffle=False) | |||
| for d in data.create_dict_iterator(num_epochs=1): | |||
| for d in data.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| buffer.append({ | |||
| 'label': d['label'].item().decode("utf8"), | |||
| 'label_des': d['label_des'].item().decode("utf8"), | |||
| @@ -256,7 +256,7 @@ def test_clue_tnews(): | |||
| # train | |||
| buffer = [] | |||
| data = ds.CLUEDataset(TRAIN_FILE, task='TNEWS', usage='train', shuffle=False) | |||
| for d in data.create_dict_iterator(num_epochs=1): | |||
| for d in data.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| buffer.append({ | |||
| 'label': d['label'].item().decode("utf8"), | |||
| 'label_desc': d['label_desc'].item().decode("utf8"), | |||
| @@ -269,7 +269,7 @@ def test_clue_tnews(): | |||
| # test | |||
| buffer = [] | |||
| data = ds.CLUEDataset(TEST_FILE, task='TNEWS', usage='test', shuffle=False) | |||
| for d in data.create_dict_iterator(num_epochs=1): | |||
| for d in data.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| buffer.append({ | |||
| 'id': d['id'], | |||
| 'sentence': d['sentence'].item().decode("utf8"), | |||
| @@ -281,7 +281,7 @@ def test_clue_tnews(): | |||
| # eval | |||
| buffer = [] | |||
| data = ds.CLUEDataset(EVAL_FILE, task='TNEWS', usage='eval', shuffle=False) | |||
| for d in data.create_dict_iterator(num_epochs=1): | |||
| for d in data.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| buffer.append({ | |||
| 'label': d['label'].item().decode("utf8"), | |||
| 'label_desc': d['label_desc'].item().decode("utf8"), | |||
| @@ -303,7 +303,7 @@ def test_clue_wsc(): | |||
| # train | |||
| buffer = [] | |||
| data = ds.CLUEDataset(TRAIN_FILE, task='WSC', usage='train') | |||
| for d in data.create_dict_iterator(num_epochs=1): | |||
| for d in data.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| buffer.append({ | |||
| 'span1_index': d['span1_index'], | |||
| 'span2_index': d['span2_index'], | |||
| @@ -318,7 +318,7 @@ def test_clue_wsc(): | |||
| # test | |||
| buffer = [] | |||
| data = ds.CLUEDataset(TEST_FILE, task='WSC', usage='test') | |||
| for d in data.create_dict_iterator(num_epochs=1): | |||
| for d in data.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| buffer.append({ | |||
| 'span1_index': d['span1_index'], | |||
| 'span2_index': d['span2_index'], | |||
| @@ -332,7 +332,7 @@ def test_clue_wsc(): | |||
| # eval | |||
| buffer = [] | |||
| data = ds.CLUEDataset(EVAL_FILE, task='WSC', usage='eval') | |||
| for d in data.create_dict_iterator(num_epochs=1): | |||
| for d in data.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| buffer.append({ | |||
| 'span1_index': d['span1_index'], | |||
| 'span2_index': d['span2_index'], | |||
| @@ -33,7 +33,7 @@ def test_coco_detection(): | |||
| image_shape = [] | |||
| bbox = [] | |||
| category_id = [] | |||
| for data in data1.create_dict_iterator(num_epochs=1): | |||
| for data in data1.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| image_shape.append(data["image"].shape) | |||
| bbox.append(data["bbox"]) | |||
| category_id.append(data["category_id"]) | |||
| @@ -66,7 +66,7 @@ def test_coco_stuff(): | |||
| image_shape = [] | |||
| segmentation = [] | |||
| iscrowd = [] | |||
| for data in data1.create_dict_iterator(num_epochs=1): | |||
| for data in data1.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| image_shape.append(data["image"].shape) | |||
| segmentation.append(data["segmentation"]) | |||
| iscrowd.append(data["iscrowd"]) | |||
| @@ -107,7 +107,7 @@ def test_coco_keypoint(): | |||
| image_shape = [] | |||
| keypoints = [] | |||
| num_keypoints = [] | |||
| for data in data1.create_dict_iterator(num_epochs=1): | |||
| for data in data1.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| image_shape.append(data["image"].shape) | |||
| keypoints.append(data["keypoints"]) | |||
| num_keypoints.append(data["num_keypoints"]) | |||
| @@ -136,7 +136,7 @@ def test_coco_panoptic(): | |||
| category_id = [] | |||
| iscrowd = [] | |||
| area = [] | |||
| for data in data1.create_dict_iterator(num_epochs=1): | |||
| for data in data1.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| image_shape.append(data["image"].shape) | |||
| bbox.append(data["bbox"]) | |||
| category_id.append(data["category_id"]) | |||
| @@ -33,7 +33,7 @@ def test_csv_dataset_basic(): | |||
| shuffle=False) | |||
| data = data.repeat(2) | |||
| data = data.skip(2) | |||
| for d in data.create_dict_iterator(num_epochs=1): | |||
| for d in data.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| buffer.append(d) | |||
| assert len(buffer) == 4 | |||
| @@ -45,7 +45,7 @@ def test_csv_dataset_one_file(): | |||
| column_names=['col1', 'col2', 'col3', 'col4'], | |||
| shuffle=False) | |||
| buffer = [] | |||
| for d in data.create_dict_iterator(num_epochs=1): | |||
| for d in data.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| buffer.append(d) | |||
| assert len(buffer) == 3 | |||
| @@ -58,7 +58,7 @@ def test_csv_dataset_all_file(): | |||
| column_names=['col1', 'col2', 'col3', 'col4'], | |||
| shuffle=False) | |||
| buffer = [] | |||
| for d in data.create_dict_iterator(num_epochs=1): | |||
| for d in data.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| buffer.append(d) | |||
| assert len(buffer) == 10 | |||
| @@ -70,7 +70,7 @@ def test_csv_dataset_num_samples(): | |||
| column_names=['col1', 'col2', 'col3', 'col4'], | |||
| shuffle=False, num_samples=2) | |||
| count = 0 | |||
| for _ in data.create_dict_iterator(num_epochs=1): | |||
| for _ in data.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| count += 1 | |||
| assert count == 2 | |||
| @@ -83,7 +83,7 @@ def test_csv_dataset_distribution(): | |||
| column_names=['col1', 'col2', 'col3', 'col4'], | |||
| shuffle=False, num_shards=2, shard_id=0) | |||
| count = 0 | |||
| for _ in data.create_dict_iterator(num_epochs=1): | |||
| for _ in data.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| count += 1 | |||
| assert count == 2 | |||
| @@ -96,7 +96,7 @@ def test_csv_dataset_quoted(): | |||
| column_names=['col1', 'col2', 'col3', 'col4'], | |||
| shuffle=False) | |||
| buffer = [] | |||
| for d in data.create_dict_iterator(num_epochs=1): | |||
| for d in data.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| buffer.extend([d['col1'].item().decode("utf8"), | |||
| d['col2'].item().decode("utf8"), | |||
| d['col3'].item().decode("utf8"), | |||
| @@ -113,7 +113,7 @@ def test_csv_dataset_separated(): | |||
| column_names=['col1', 'col2', 'col3', 'col4'], | |||
| shuffle=False) | |||
| buffer = [] | |||
| for d in data.create_dict_iterator(num_epochs=1): | |||
| for d in data.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| buffer.extend([d['col1'].item().decode("utf8"), | |||
| d['col2'].item().decode("utf8"), | |||
| d['col3'].item().decode("utf8"), | |||
| @@ -129,7 +129,7 @@ def test_csv_dataset_embedded(): | |||
| column_names=['col1', 'col2', 'col3', 'col4'], | |||
| shuffle=False) | |||
| buffer = [] | |||
| for d in data.create_dict_iterator(num_epochs=1): | |||
| for d in data.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| buffer.extend([d['col1'].item().decode("utf8"), | |||
| d['col2'].item().decode("utf8"), | |||
| d['col3'].item().decode("utf8"), | |||
| @@ -145,7 +145,7 @@ def test_csv_dataset_chinese(): | |||
| column_names=['col1', 'col2', 'col3', 'col4', 'col5'], | |||
| shuffle=False) | |||
| buffer = [] | |||
| for d in data.create_dict_iterator(num_epochs=1): | |||
| for d in data.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| buffer.extend([d['col1'].item().decode("utf8"), | |||
| d['col2'].item().decode("utf8"), | |||
| d['col3'].item().decode("utf8"), | |||
| @@ -161,7 +161,7 @@ def test_csv_dataset_header(): | |||
| column_defaults=["", "", "", ""], | |||
| shuffle=False) | |||
| buffer = [] | |||
| for d in data.create_dict_iterator(num_epochs=1): | |||
| for d in data.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| buffer.extend([d['col1'].item().decode("utf8"), | |||
| d['col2'].item().decode("utf8"), | |||
| d['col3'].item().decode("utf8"), | |||
| @@ -177,7 +177,7 @@ def test_csv_dataset_number(): | |||
| column_names=['col1', 'col2', 'col3', 'col4'], | |||
| shuffle=False) | |||
| buffer = [] | |||
| for d in data.create_dict_iterator(num_epochs=1): | |||
| for d in data.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| buffer.extend([d['col1'].item(), | |||
| d['col2'].item(), | |||
| d['col3'].item(), | |||
| @@ -203,7 +203,7 @@ def test_csv_dataset_exception(): | |||
| column_names=['col1', 'col2', 'col3', 'col4'], | |||
| shuffle=False) | |||
| with pytest.raises(Exception) as err: | |||
| for _ in data.create_dict_iterator(num_epochs=1): | |||
| for _ in data.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| pass | |||
| assert "Failed to parse file" in str(err.value) | |||
| @@ -216,7 +216,7 @@ def test_csv_dataset_type_error(): | |||
| column_names=['col1', 'col2', 'col3', 'col4'], | |||
| shuffle=False) | |||
| with pytest.raises(Exception) as err: | |||
| for _ in data.create_dict_iterator(num_epochs=1): | |||
| for _ in data.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| pass | |||
| assert "type does not match" in str(err.value) | |||
| @@ -47,7 +47,7 @@ def test_generator_0(): | |||
| data1 = ds.GeneratorDataset(generator_1d, ["data"]) | |||
| i = 0 | |||
| for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary | |||
| for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary | |||
| golden = np.array([i]) | |||
| np.testing.assert_array_equal(item["data"], golden) | |||
| i = i + 1 | |||
| @@ -69,7 +69,7 @@ def test_generator_1(): | |||
| data1 = ds.GeneratorDataset(generator_md, ["data"]) | |||
| i = 0 | |||
| for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary | |||
| for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary | |||
| golden = np.array([[i, i + 1], [i + 2, i + 3]]) | |||
| np.testing.assert_array_equal(item["data"], golden) | |||
| i = i + 1 | |||
| @@ -91,7 +91,7 @@ def test_generator_2(): | |||
| data1 = ds.GeneratorDataset(generator_mc, ["col0", "col1"]) | |||
| i = 0 | |||
| for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary | |||
| for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary | |||
| golden = np.array([i]) | |||
| np.testing.assert_array_equal(item["col0"], golden) | |||
| golden = np.array([[i, i + 1], [i + 2, i + 3]]) | |||
| @@ -111,7 +111,7 @@ def test_generator_3(): | |||
| data1 = data1.repeat(4) | |||
| i = 0 | |||
| for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary | |||
| for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary | |||
| golden = np.array([i]) | |||
| np.testing.assert_array_equal(item["data"], golden) | |||
| i = i + 1 | |||
| @@ -131,7 +131,7 @@ def test_generator_4(): | |||
| data1 = data1.batch(4) | |||
| i = 0 | |||
| for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary | |||
| for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary | |||
| golden = np.array([[i], [i + 1], [i + 2], [i + 3]]) | |||
| np.testing.assert_array_equal(item["data"], golden) | |||
| i = i + 4 | |||
| @@ -151,7 +151,7 @@ def type_tester(t): | |||
| data1 = data1.batch(4) | |||
| i = 0 | |||
| for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary | |||
| for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary | |||
| golden = np.array([[i], [i + 1], [i + 2], [i + 3]], dtype=t) | |||
| np.testing.assert_array_equal(item["data"], golden) | |||
| i = i + 4 | |||
| @@ -178,7 +178,7 @@ def type_tester_with_type_check(t, c): | |||
| data1 = data1.batch(4) | |||
| i = 0 | |||
| for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary | |||
| for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary | |||
| golden = np.array([[i], [i + 1], [i + 2], [i + 3]], dtype=t) | |||
| np.testing.assert_array_equal(item["data"], golden) | |||
| i = i + 4 | |||
| @@ -213,7 +213,7 @@ def type_tester_with_type_check_2c(t, c): | |||
| data1 = data1.batch(4) | |||
| i = 0 | |||
| for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary | |||
| for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary | |||
| golden = np.array([[i], [i + 1], [i + 2], [i + 3]], dtype=t) | |||
| np.testing.assert_array_equal(item["data0"], golden) | |||
| i = i + 4 | |||
| @@ -250,7 +250,7 @@ def test_generator_8(): | |||
| num_parallel_workers=2) | |||
| i = 0 | |||
| for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary | |||
| for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary | |||
| golden = np.array([i * 3]) | |||
| np.testing.assert_array_equal(item["out0"], golden) | |||
| golden = np.array([[i * 7, (i + 1) * 7], [(i + 2) * 7, (i + 3) * 7]]) | |||
| @@ -280,14 +280,14 @@ def test_generator_9(): | |||
| i = 0 | |||
| for data1, data2 in zip(data1, data2): # each data is a dictionary | |||
| golden = np.array([i]) | |||
| np.testing.assert_array_equal(data1[0], golden) | |||
| np.testing.assert_array_equal(data1[0].asnumpy(), golden) | |||
| golden = np.array([[i * 3, (i + 1) * 3], [(i + 2) * 3, (i + 3) * 3]]) | |||
| np.testing.assert_array_equal(data1[1], golden) | |||
| np.testing.assert_array_equal(data1[1].asnumpy(), golden) | |||
| golden = np.array([i * 3]) | |||
| np.testing.assert_array_equal(data2[0], golden) | |||
| np.testing.assert_array_equal(data2[0].asnumpy(), golden) | |||
| golden = np.array([[i, i + 1], [i + 2, i + 3]]) | |||
| np.testing.assert_array_equal(data2[1], golden) | |||
| np.testing.assert_array_equal(data2[1].asnumpy(), golden) | |||
| i = i + 1 | |||
| @@ -304,7 +304,7 @@ def test_generator_10(): | |||
| # Expected column order is |col0|out1|out2| | |||
| i = 0 | |||
| for item in data1.create_tuple_iterator(num_epochs=1): | |||
| for item in data1.create_tuple_iterator(num_epochs=1, output_numpy=True): | |||
| golden = np.array([i]) | |||
| np.testing.assert_array_equal(item[0], golden) | |||
| golden = np.array([[i, i + 1], [i + 2, i + 3]]) | |||
| @@ -328,7 +328,7 @@ def test_generator_11(): | |||
| # Expected column order is |out1|out2| | |||
| i = 0 | |||
| for item in data1.create_tuple_iterator(num_epochs=1): | |||
| for item in data1.create_tuple_iterator(num_epochs=1, output_numpy=True): | |||
| # len should be 2 because col0 is dropped (not included in column_order) | |||
| assert len(item) == 2 | |||
| golden = np.array([[i, i + 1], [i + 2, i + 3]]) | |||
| @@ -350,7 +350,7 @@ def test_generator_12(): | |||
| # Expected column order is |col0|col1| | |||
| i = 0 | |||
| for item in data1.create_tuple_iterator(num_epochs=1): | |||
| for item in data1.create_tuple_iterator(num_epochs=1, output_numpy=True): | |||
| assert len(item) == 2 | |||
| golden = np.array([i * 5]) | |||
| np.testing.assert_array_equal(item[0], golden) | |||
| @@ -363,7 +363,7 @@ def test_generator_12(): | |||
| # Expected column order is |col0|col1| | |||
| i = 0 | |||
| for item in data1.create_tuple_iterator(num_epochs=1): | |||
| for item in data1.create_tuple_iterator(num_epochs=1, output_numpy=True): | |||
| assert len(item) == 2 | |||
| golden = np.array([i * 5]) | |||
| np.testing.assert_array_equal(item[1], golden) | |||
| @@ -384,7 +384,7 @@ def test_generator_13(): | |||
| # Expected column order is |out0|col1| | |||
| i = 0 | |||
| for item in data1.create_tuple_iterator(num_epochs=1): | |||
| for item in data1.create_tuple_iterator(num_epochs=1, output_numpy=True): | |||
| assert len(item) == 2 | |||
| golden = np.array([i * 5]) | |||
| np.testing.assert_array_equal(item[0], golden) | |||
| @@ -392,7 +392,7 @@ def test_generator_13(): | |||
| np.testing.assert_array_equal(item[1], golden) | |||
| i = i + 1 | |||
| for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary | |||
| for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary | |||
| # len should be 2 because col0 is dropped (not included in column_order) | |||
| assert len(item) == 2 | |||
| golden = np.array([i * 5]) | |||
| @@ -411,7 +411,7 @@ def test_generator_14(): | |||
| source = [(np.array([x]),) for x in range(256)] | |||
| ds1 = ds.GeneratorDataset(source, ["data"], sampler=ds.SequentialSampler(), num_parallel_workers=4).repeat(2) | |||
| i = 0 | |||
| for data in ds1.create_dict_iterator(num_epochs=1): # each data is a dictionary | |||
| for data in ds1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary | |||
| golden = np.array([i]) | |||
| np.testing.assert_array_equal(data["data"], golden) | |||
| i = i + 1 | |||
| @@ -429,7 +429,7 @@ def test_generator_15(): | |||
| source = [(np.array([x]),) for x in range(256)] | |||
| ds1 = ds.GeneratorDataset(source, ["data"], sampler=sampler, num_parallel_workers=4).repeat(2) | |||
| i = 0 | |||
| for data in ds1.create_dict_iterator(num_epochs=1): # each data is a dictionary | |||
| for data in ds1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary | |||
| golden = np.array([i]) | |||
| np.testing.assert_array_equal(data["data"], golden) | |||
| i = i + 1 | |||
| @@ -448,7 +448,7 @@ def test_generator_16(): | |||
| data1 = ds.GeneratorDataset(source, ["col0", "col1"], sampler=ds.SequentialSampler()) | |||
| i = 0 | |||
| for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary | |||
| for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary | |||
| golden = np.array([i]) | |||
| np.testing.assert_array_equal(item["col0"], golden) | |||
| golden = np.array([i + 1]) | |||
| @@ -468,7 +468,7 @@ def test_generator_17(): | |||
| data1 = ds.GeneratorDataset(source, ["col0", "col1"], sampler=sampler) | |||
| i = 0 | |||
| for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary | |||
| for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary | |||
| golden = np.array([i]) | |||
| np.testing.assert_array_equal(item["col0"], golden) | |||
| golden = np.array([i + 1]) | |||
| @@ -528,7 +528,7 @@ def test_generator_sequential_sampler(): | |||
| source = [(np.array([x]),) for x in range(64)] | |||
| ds1 = ds.GeneratorDataset(source, ["data"], sampler=ds.SequentialSampler()) | |||
| i = 0 | |||
| for data in ds1.create_dict_iterator(num_epochs=1): # each data is a dictionary | |||
| for data in ds1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary | |||
| golden = np.array([i]) | |||
| np.testing.assert_array_equal(data["data"], golden) | |||
| i = i + 1 | |||
| @@ -546,7 +546,7 @@ def test_generator_distributed_sampler(): | |||
| for sid in range(8): | |||
| ds1 = ds.GeneratorDataset(source, ["data"], shuffle=False, num_shards=8, shard_id=sid) | |||
| i = sid | |||
| for data in ds1.create_dict_iterator(num_epochs=1): # each data is a dictionary | |||
| for data in ds1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary | |||
| golden = np.array([i]) | |||
| np.testing.assert_array_equal(data["data"], golden) | |||
| i = i + 8 | |||
| @@ -605,7 +605,7 @@ def type_tester_with_type_check_2c_schema(t, c): | |||
| data1 = data1.batch(4) | |||
| i = 0 | |||
| for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary | |||
| for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary | |||
| golden = np.array([[i], [i + 1], [i + 2], [i + 3]], dtype=t) | |||
| np.testing.assert_array_equal(item["data0"], golden) | |||
| i = i + 4 | |||
| @@ -636,7 +636,7 @@ def test_generator_dataset_size_0(): | |||
| data_size = data1.get_dataset_size() | |||
| num_rows = 0 | |||
| for _ in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary | |||
| for _ in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary | |||
| num_rows = num_rows + 1 | |||
| assert data_size == num_rows | |||
| @@ -171,7 +171,7 @@ def test_imagefolder_classindex(): | |||
| 333, 333, 333, 333, 333, 333, 333, 333, 333, 333, 333] | |||
| num_iter = 0 | |||
| for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary | |||
| for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary | |||
| # in this example, each dictionary has keys "image" and "label" | |||
| logger.info("image is {}".format(item["image"])) | |||
| logger.info("label is {}".format(item["label"])) | |||
| @@ -196,7 +196,7 @@ def test_imagefolder_negative_classindex(): | |||
| -333, -333, -333, -333, -333, -333, -333, -333, -333, -333, -333] | |||
| num_iter = 0 | |||
| for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary | |||
| for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary | |||
| # in this example, each dictionary has keys "image" and "label" | |||
| logger.info("image is {}".format(item["image"])) | |||
| logger.info("label is {}".format(item["label"])) | |||
| @@ -267,7 +267,7 @@ def test_sequential_sampler(): | |||
| result = [] | |||
| num_iter = 0 | |||
| for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary | |||
| for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary | |||
| # in this example, each dictionary has keys "image" and "label" | |||
| result.append(item["label"]) | |||
| num_iter += 1 | |||
| @@ -26,7 +26,7 @@ def test_manifest_dataset_train(): | |||
| count = 0 | |||
| cat_count = 0 | |||
| dog_count = 0 | |||
| for item in data.create_dict_iterator(num_epochs=1): | |||
| for item in data.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| logger.info("item[image] is {}".format(item["image"])) | |||
| count = count + 1 | |||
| if item["label"].size == 1 and item["label"] == 0: | |||
| @@ -41,7 +41,7 @@ def test_manifest_dataset_train(): | |||
| def test_manifest_dataset_eval(): | |||
| data = ds.ManifestDataset(DATA_FILE, "eval", decode=True) | |||
| count = 0 | |||
| for item in data.create_dict_iterator(num_epochs=1): | |||
| for item in data.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| logger.info("item[image] is {}".format(item["image"])) | |||
| count = count + 1 | |||
| if item["label"] != 0 and item["label"] != 1: | |||
| @@ -55,7 +55,7 @@ def test_manifest_dataset_class_index(): | |||
| out_class_indexing = data.get_class_indexing() | |||
| assert out_class_indexing == {"dog": 11} | |||
| count = 0 | |||
| for item in data.create_dict_iterator(num_epochs=1): | |||
| for item in data.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| logger.info("item[image] is {}".format(item["image"])) | |||
| count = count + 1 | |||
| if item["label"] != 11: | |||
| @@ -81,7 +81,7 @@ def test_manifest_dataset_multi_label(): | |||
| data = ds.ManifestDataset(DATA_FILE, decode=True, shuffle=False) | |||
| count = 0 | |||
| expect_label = [1, 0, 0, [0, 2]] | |||
| for item in data.create_dict_iterator(num_epochs=1): | |||
| for item in data.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| assert item["label"].tolist() == expect_label[count] | |||
| logger.info("item[image] is {}".format(item["image"])) | |||
| count = count + 1 | |||
| @@ -107,7 +107,7 @@ def test_manifest_dataset_multi_label_onehot(): | |||
| data = data.map(operations=multi_label_hot, input_columns=["label"]) | |||
| data = data.batch(2) | |||
| count = 0 | |||
| for item in data.create_dict_iterator(num_epochs=1): | |||
| for item in data.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| assert item["label"].tolist() == expect_label[count] | |||
| logger.info("item[image] is {}".format(item["image"])) | |||
| count = count + 1 | |||
| @@ -64,7 +64,7 @@ def test_mnist_content_check(): | |||
| num_iter = 0 | |||
| # in this example, each dictionary has keys "image" and "label" | |||
| image_list, label_list = [], [] | |||
| for i, data in enumerate(data1.create_dict_iterator(num_epochs=1)): | |||
| for i, data in enumerate(data1.create_dict_iterator(num_epochs=1, output_numpy=True)): | |||
| image_list.append(data["image"]) | |||
| label_list.append("label {}".format(data["label"])) | |||
| np.testing.assert_array_equal(data["image"], images[i]) | |||
| @@ -137,7 +137,7 @@ def test_mnist_pk_sampler(): | |||
| data = ds.MnistDataset(DATA_DIR, sampler=sampler) | |||
| num_iter = 0 | |||
| label_list = [] | |||
| for item in data.create_dict_iterator(num_epochs=1): | |||
| for item in data.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| label_list.append(item["label"]) | |||
| num_iter += 1 | |||
| np.testing.assert_array_equal(golden, label_list) | |||
| @@ -156,8 +156,8 @@ def test_mnist_sequential_sampler(): | |||
| label_list1, label_list2 = [], [] | |||
| num_iter = 0 | |||
| for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): | |||
| label_list1.append(item1["label"]) | |||
| label_list2.append(item2["label"]) | |||
| label_list1.append(item1["label"].asnumpy()) | |||
| label_list2.append(item2["label"].asnumpy()) | |||
| num_iter += 1 | |||
| np.testing.assert_array_equal(label_list1, label_list2) | |||
| assert num_iter == num_samples | |||
| @@ -214,7 +214,7 @@ def test_mnist_visualize(plot=False): | |||
| data1 = ds.MnistDataset(DATA_DIR, num_samples=10, shuffle=False) | |||
| num_iter = 0 | |||
| image_list, label_list = [], [] | |||
| for item in data1.create_dict_iterator(num_epochs=1): | |||
| for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| image = item["image"] | |||
| label = item["label"] | |||
| image_list.append(image) | |||
| @@ -25,7 +25,7 @@ def test_imagefolder_shardings(print_res=False): | |||
| shuffle=shuffle, class_indexing=class_index, decode=True) | |||
| data1 = data1.repeat(repeat_cnt) | |||
| res = [] | |||
| for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary | |||
| for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary | |||
| res.append(item["label"].item()) | |||
| if print_res: | |||
| logger.info("labels of dataset: {}".format(res)) | |||
| @@ -59,7 +59,7 @@ def test_tfrecord_shardings1(print_res=False): | |||
| shuffle=ds.Shuffle.FILES, num_parallel_workers=1) | |||
| data1 = data1.repeat(repeat_cnt) | |||
| res = [] | |||
| for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary | |||
| for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary | |||
| res.append(item["scalars"][0]) | |||
| if print_res: | |||
| logger.info("scalars of dataset: {}".format(res)) | |||
| @@ -97,7 +97,7 @@ def test_tfrecord_shardings4(print_res=False): | |||
| shuffle=ds.Shuffle.FILES, num_parallel_workers=4) | |||
| data1 = data1.repeat(repeat_cnt) | |||
| res = [] | |||
| for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary | |||
| for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary | |||
| res.append(item["scalars"][0]) | |||
| if print_res: | |||
| logger.info("scalars of dataset: {}".format(res)) | |||
| @@ -141,7 +141,7 @@ def test_manifest_shardings(print_res=False): | |||
| shuffle=shuffle, decode=True) | |||
| data1 = data1.repeat(repeat_cnt) | |||
| res = [] | |||
| for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary | |||
| for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary | |||
| res.append(item["label"].item()) | |||
| if print_res: | |||
| logger.info("labels of dataset: {}".format(res)) | |||
| @@ -166,7 +166,7 @@ def test_voc_shardings(print_res=False): | |||
| data1 = ds.VOCDataset(voc_dir, decode=True, sampler=sampler) | |||
| data1 = data1.repeat(repeat_cnt) | |||
| res = [] | |||
| for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary | |||
| for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary | |||
| res.append(item["image"].shape[0]) | |||
| if print_res: | |||
| logger.info("labels of dataset: {}".format(res)) | |||
| @@ -194,7 +194,7 @@ def test_cifar10_shardings(print_res=False): | |||
| shuffle=shuffle) | |||
| data1 = data1.repeat(repeat_cnt) | |||
| res = [] | |||
| for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary | |||
| for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary | |||
| res.append(item["label"].item()) | |||
| if print_res: | |||
| logger.info("labels of dataset: {}".format(res)) | |||
| @@ -214,7 +214,7 @@ def test_cifar100_shardings(print_res=False): | |||
| shuffle=shuffle) | |||
| data1 = data1.repeat(repeat_cnt) | |||
| res = [] | |||
| for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary | |||
| for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary | |||
| res.append(item["coarse_label"].item()) | |||
| if print_res: | |||
| logger.info("labels of dataset: {}".format(res)) | |||
| @@ -233,7 +233,7 @@ def test_mnist_shardings(print_res=False): | |||
| shuffle=shuffle) | |||
| data1 = data1.repeat(repeat_cnt) | |||
| res = [] | |||
| for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary | |||
| for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary | |||
| res.append(item["label"].item()) | |||
| if print_res: | |||
| logger.info("labels of dataset: {}".format(res)) | |||
| @@ -25,7 +25,7 @@ DATA_ALL_FILE = "../data/dataset/testTextFileDataset/*" | |||
| def test_textline_dataset_one_file(): | |||
| data = ds.TextFileDataset(DATA_FILE) | |||
| count = 0 | |||
| for i in data.create_dict_iterator(num_epochs=1): | |||
| for i in data.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| logger.info("{}".format(i["text"])) | |||
| count += 1 | |||
| assert count == 3 | |||
| @@ -34,7 +34,7 @@ def test_textline_dataset_one_file(): | |||
| def test_textline_dataset_all_file(): | |||
| data = ds.TextFileDataset(DATA_ALL_FILE) | |||
| count = 0 | |||
| for i in data.create_dict_iterator(num_epochs=1): | |||
| for i in data.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| logger.info("{}".format(i["text"])) | |||
| count += 1 | |||
| assert count == 5 | |||
| @@ -43,7 +43,7 @@ def test_textline_dataset_all_file(): | |||
| def test_textline_dataset_num_samples_zero(): | |||
| data = ds.TextFileDataset(DATA_FILE, num_samples=0) | |||
| count = 0 | |||
| for i in data.create_dict_iterator(num_epochs=1): | |||
| for i in data.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| logger.info("{}".format(i["text"])) | |||
| count += 1 | |||
| assert count == 3 | |||
| @@ -56,7 +56,7 @@ def test_textline_dataset_shuffle_false4(): | |||
| count = 0 | |||
| line = ["This is a text file.", "Another file.", | |||
| "Be happy every day.", "End of file.", "Good luck to everyone."] | |||
| for i in data.create_dict_iterator(num_epochs=1): | |||
| for i in data.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| strs = i["text"].item().decode("utf8") | |||
| assert strs == line[count] | |||
| count += 1 | |||
| @@ -73,7 +73,7 @@ def test_textline_dataset_shuffle_false1(): | |||
| count = 0 | |||
| line = ["This is a text file.", "Be happy every day.", "Good luck to everyone.", | |||
| "Another file.", "End of file."] | |||
| for i in data.create_dict_iterator(num_epochs=1): | |||
| for i in data.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| strs = i["text"].item().decode("utf8") | |||
| assert strs == line[count] | |||
| count += 1 | |||
| @@ -90,7 +90,7 @@ def test_textline_dataset_shuffle_files4(): | |||
| count = 0 | |||
| line = ["This is a text file.", "Another file.", | |||
| "Be happy every day.", "End of file.", "Good luck to everyone."] | |||
| for i in data.create_dict_iterator(num_epochs=1): | |||
| for i in data.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| strs = i["text"].item().decode("utf8") | |||
| assert strs == line[count] | |||
| count += 1 | |||
| @@ -107,7 +107,7 @@ def test_textline_dataset_shuffle_files1(): | |||
| count = 0 | |||
| line = ["This is a text file.", "Be happy every day.", "Good luck to everyone.", | |||
| "Another file.", "End of file."] | |||
| for i in data.create_dict_iterator(num_epochs=1): | |||
| for i in data.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| strs = i["text"].item().decode("utf8") | |||
| assert strs == line[count] | |||
| count += 1 | |||
| @@ -124,7 +124,7 @@ def test_textline_dataset_shuffle_global4(): | |||
| count = 0 | |||
| line = ["Another file.", "Good luck to everyone.", "End of file.", | |||
| "This is a text file.", "Be happy every day."] | |||
| for i in data.create_dict_iterator(num_epochs=1): | |||
| for i in data.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| strs = i["text"].item().decode("utf8") | |||
| assert strs == line[count] | |||
| count += 1 | |||
| @@ -141,7 +141,7 @@ def test_textline_dataset_shuffle_global1(): | |||
| count = 0 | |||
| line = ["Another file.", "Good luck to everyone.", "This is a text file.", | |||
| "End of file.", "Be happy every day."] | |||
| for i in data.create_dict_iterator(num_epochs=1): | |||
| for i in data.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| strs = i["text"].item().decode("utf8") | |||
| assert strs == line[count] | |||
| count += 1 | |||
| @@ -154,7 +154,7 @@ def test_textline_dataset_shuffle_global1(): | |||
| def test_textline_dataset_num_samples(): | |||
| data = ds.TextFileDataset(DATA_FILE, num_samples=2) | |||
| count = 0 | |||
| for _ in data.create_dict_iterator(num_epochs=1): | |||
| for _ in data.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| count += 1 | |||
| assert count == 2 | |||
| @@ -162,7 +162,7 @@ def test_textline_dataset_num_samples(): | |||
| def test_textline_dataset_distribution(): | |||
| data = ds.TextFileDataset(DATA_ALL_FILE, num_shards=2, shard_id=1) | |||
| count = 0 | |||
| for _ in data.create_dict_iterator(num_epochs=1): | |||
| for _ in data.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| count += 1 | |||
| assert count == 3 | |||
| @@ -174,7 +174,7 @@ def test_textline_dataset_repeat(): | |||
| line = ["This is a text file.", "Be happy every day.", "Good luck to everyone.", | |||
| "This is a text file.", "Be happy every day.", "Good luck to everyone.", | |||
| "This is a text file.", "Be happy every day.", "Good luck to everyone."] | |||
| for i in data.create_dict_iterator(num_epochs=1): | |||
| for i in data.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| strs = i["text"].item().decode("utf8") | |||
| assert strs == line[count] | |||
| count += 1 | |||
| @@ -39,7 +39,7 @@ def test_tfrecord_shape(): | |||
| schema_file = "../data/dataset/testTFTestAllTypes/datasetSchemaRank0.json" | |||
| ds1 = ds.TFRecordDataset(FILES, schema_file) | |||
| ds1 = ds1.batch(2) | |||
| for data in ds1.create_dict_iterator(num_epochs=1): | |||
| for data in ds1.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| logger.info(data) | |||
| output_shape = ds1.output_shapes() | |||
| assert len(output_shape[-1]) == 1 | |||
| @@ -162,7 +162,7 @@ def test_tfrecord_schema(): | |||
| for d1, d2 in zip(data1, data2): | |||
| for t1, t2 in zip(d1, d2): | |||
| np.testing.assert_array_equal(t1, t2) | |||
| np.testing.assert_array_equal(t1.asnumpy(), t2.asnumpy()) | |||
| def test_tfrecord_shuffle(): | |||
| @@ -174,7 +174,7 @@ def test_tfrecord_shuffle(): | |||
| for d1, d2 in zip(data1, data2): | |||
| for t1, t2 in zip(d1, d2): | |||
| np.testing.assert_array_equal(t1, t2) | |||
| np.testing.assert_array_equal(t1.asnumpy(), t2.asnumpy()) | |||
| def test_tfrecord_shard(): | |||
| @@ -187,7 +187,7 @@ def test_tfrecord_shard(): | |||
| shuffle=ds.Shuffle.FILES) | |||
| data1 = data1.repeat(num_repeats) | |||
| res = list() | |||
| for item in data1.create_dict_iterator(num_epochs=1): | |||
| for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| res.append(item["scalars"][0]) | |||
| return res | |||
| @@ -215,7 +215,7 @@ def test_tfrecord_shard_equal_rows(): | |||
| ds1 = ds.TFRecordDataset(tf_files, num_shards=num_shards, shard_id=shard_id, shard_equal_rows=True) | |||
| ds1 = ds1.repeat(num_repeats) | |||
| res = list() | |||
| for data in ds1.create_dict_iterator(num_epochs=1): | |||
| for data in ds1.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| res.append(data["scalars"][0]) | |||
| return res | |||
| @@ -238,7 +238,7 @@ def test_tfrecord_shard_equal_rows(): | |||
| def test_tfrecord_no_schema_columns_list(): | |||
| logger.info("test_tfrecord_no_schema_columns_list") | |||
| data = ds.TFRecordDataset(FILES, shuffle=False, columns_list=["col_sint16"]) | |||
| row = data.create_dict_iterator(num_epochs=1).__next__() | |||
| row = data.create_dict_iterator(num_epochs=1, output_numpy=True).__next__() | |||
| assert row["col_sint16"] == [-32768] | |||
| with pytest.raises(KeyError) as info: | |||
| @@ -258,7 +258,7 @@ def test_tfrecord_schema_columns_list(): | |||
| schema.add_column('col_sint32', de_type=mstype.int64, shape=[1]) | |||
| schema.add_column('col_sint64', de_type=mstype.int64, shape=[1]) | |||
| data = ds.TFRecordDataset(FILES, schema=schema, shuffle=False, columns_list=["col_sint16"]) | |||
| row = data.create_dict_iterator(num_epochs=1).__next__() | |||
| row = data.create_dict_iterator(num_epochs=1, output_numpy=True).__next__() | |||
| assert row["col_sint16"] == [-32768] | |||
| with pytest.raises(KeyError) as info: | |||
| @@ -275,7 +275,7 @@ def test_tfrecord_invalid_files(): | |||
| data = ds.TFRecordDataset(files, SCHEMA_FILE, shuffle=ds.Shuffle.FILES) | |||
| with pytest.raises(RuntimeError) as info: | |||
| _ = data.create_dict_iterator(num_epochs=1).get_next() | |||
| _ = data.create_dict_iterator(num_epochs=1, output_numpy=True).get_next() | |||
| assert "cannot be opened" in str(info.value) | |||
| assert "not valid tfrecord files" in str(info.value) | |||
| assert valid_file not in str(info.value) | |||
| @@ -23,7 +23,7 @@ TARGET_SHAPE = [680, 680, 680, 680, 642, 607, 561, 596, 612, 680] | |||
| def test_voc_segmentation(): | |||
| data1 = ds.VOCDataset(DATA_DIR, task="Segmentation", usage="train", shuffle=False, decode=True) | |||
| num = 0 | |||
| for item in data1.create_dict_iterator(num_epochs=1): | |||
| for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| assert item["image"].shape[0] == IMAGE_SHAPE[num] | |||
| assert item["target"].shape[0] == TARGET_SHAPE[num] | |||
| num += 1 | |||
| @@ -34,7 +34,7 @@ def test_voc_detection(): | |||
| data1 = ds.VOCDataset(DATA_DIR, task="Detection", usage="train", shuffle=False, decode=True) | |||
| num = 0 | |||
| count = [0, 0, 0, 0, 0, 0] | |||
| for item in data1.create_dict_iterator(num_epochs=1): | |||
| for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| assert item["image"].shape[0] == IMAGE_SHAPE[num] | |||
| for label in item["label"]: | |||
| count[label[0]] += 1 | |||
| @@ -53,7 +53,7 @@ def test_voc_class_index(): | |||
| assert (class_index2 == {'car': 0, 'cat': 1, 'train': 5}) | |||
| num = 0 | |||
| count = [0, 0, 0, 0, 0, 0] | |||
| for item in data1.create_dict_iterator(num_epochs=1): | |||
| for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| for label in item["label"]: | |||
| count[label[0]] += 1 | |||
| assert label[0] in (0, 1, 5) | |||
| @@ -71,7 +71,7 @@ def test_voc_get_class_indexing(): | |||
| assert (class_index2 == {'car': 0, 'cat': 1, 'chair': 2, 'dog': 3, 'person': 4, 'train': 5}) | |||
| num = 0 | |||
| count = [0, 0, 0, 0, 0, 0] | |||
| for item in data1.create_dict_iterator(num_epochs=1): | |||
| for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| for label in item["label"]: | |||
| count[label[0]] += 1 | |||
| assert label[0] in (0, 1, 2, 3, 4, 5) | |||
| @@ -40,7 +40,8 @@ def test_decode_op(): | |||
| # Second dataset | |||
| data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) | |||
| for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): | |||
| for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), | |||
| data2.create_dict_iterator(num_epochs=1, output_numpy=True)): | |||
| actual = item1["image"] | |||
| expected = cv2.imdecode(item2["image"], cv2.IMREAD_COLOR) | |||
| expected = cv2.cvtColor(expected, cv2.COLOR_BGR2RGB) | |||
| @@ -65,7 +66,8 @@ def test_decode_op_tf_file_dataset(): | |||
| # Second dataset | |||
| data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) | |||
| for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): | |||
| for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), | |||
| data2.create_dict_iterator(num_epochs=1, output_numpy=True)): | |||
| actual = item1["image"] | |||
| expected = cv2.imdecode(item2["image"], cv2.IMREAD_COLOR) | |||
| expected = cv2.cvtColor(expected, cv2.COLOR_BGR2RGB) | |||
| @@ -26,7 +26,7 @@ def compare(array): | |||
| array = np.array(array) | |||
| data = data.map(operations=ops.Duplicate(), input_columns=["x"], output_columns=["x", "y"], | |||
| column_order=["x", "y"]) | |||
| for d in data.create_dict_iterator(num_epochs=1): | |||
| for d in data.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| np.testing.assert_array_equal(array, d["x"]) | |||
| np.testing.assert_array_equal(array, d["y"]) | |||
| @@ -86,9 +86,9 @@ def test_decode_op(): | |||
| num_epoch = 5 | |||
| # iter1 will always assume there is a next epoch and never shutdown. | |||
| iter1 = data1.create_dict_iterator() | |||
| iter1 = data1.create_dict_iterator(output_numpy=True) | |||
| # iter 2 will stop and shutdown pipeline after num_epoch | |||
| iter2 = data2.create_dict_iterator(num_epoch) | |||
| iter2 = data2.create_dict_iterator(num_epoch, output_numpy=True) | |||
| for _ in range(num_epoch): | |||
| i = 0 | |||
| for item1, item2 in itertools.zip_longest(iter1, iter2): | |||
| @@ -135,7 +135,7 @@ def test_generator_dict_0(): | |||
| i = 0 | |||
| # create the iterator inside the loop declaration | |||
| for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary | |||
| for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary | |||
| golden = np.array([i]) | |||
| np.testing.assert_array_equal(item["data"], golden) | |||
| i = i + 1 | |||
| @@ -154,7 +154,7 @@ def test_generator_dict_1(): | |||
| i = 0 | |||
| # BAD. Do not create iterator every time inside. | |||
| # Create iterator outside the epoch for loop. | |||
| for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary | |||
| for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary | |||
| golden = np.array([i]) | |||
| np.testing.assert_array_equal(item["data"], golden) | |||
| i = i + 1 | |||
| @@ -174,7 +174,7 @@ def test_generator_dict_2(): | |||
| i = 0 | |||
| for item in iter1: # each data is a dictionary | |||
| golden = np.array([i]) | |||
| np.testing.assert_array_equal(item["data"], golden) | |||
| np.testing.assert_array_equal(item["data"].asnumpy(), golden) | |||
| i = i + 1 | |||
| assert i == 64 | |||
| @@ -197,7 +197,7 @@ def test_generator_dict_3(): | |||
| i = 0 | |||
| for item in iter1: # each data is a dictionary | |||
| golden = np.array([i]) | |||
| np.testing.assert_array_equal(item["data"], golden) | |||
| np.testing.assert_array_equal(item["data"].asnumpy(), golden) | |||
| i = i + 1 | |||
| assert i == 64 | |||
| # optional | |||
| @@ -221,7 +221,7 @@ def test_generator_dict_4(): | |||
| i = 0 | |||
| for item in iter1: # each data is a dictionary | |||
| golden = np.array([i]) | |||
| np.testing.assert_array_equal(item["data"], golden) | |||
| np.testing.assert_array_equal(item["data"].asnumpy(), golden) | |||
| i = i + 1 | |||
| assert i == 64 | |||
| @@ -240,7 +240,7 @@ def test_generator_dict_4_1(): | |||
| # apply dataset operations | |||
| data1 = ds.GeneratorDataset(generator_1d, ["data"]) | |||
| # epoch ctrl op will not be injected if num_epochs is 1. | |||
| iter1 = data1.create_dict_iterator(num_epochs=1) | |||
| iter1 = data1.create_dict_iterator(num_epochs=1, output_numpy=True) | |||
| for _ in range(1): | |||
| i = 0 | |||
| for item in iter1: # each data is a dictionary | |||
| @@ -266,7 +266,7 @@ def test_generator_dict_4_2(): | |||
| # repeat will not be injected when num repeat is 1. | |||
| data1 = data1.repeat(1) | |||
| # epoch ctrl op will not be injected if num_epochs is 1. | |||
| iter1 = data1.create_dict_iterator(num_epochs=1) | |||
| iter1 = data1.create_dict_iterator(num_epochs=1, output_numpy=True) | |||
| for _ in range(1): | |||
| i = 0 | |||
| for item in iter1: # each data is a dictionary | |||
| @@ -289,7 +289,7 @@ def test_generator_dict_5(): | |||
| # apply dataset operations | |||
| data1 = ds.GeneratorDataset(generator_1d, ["data"]) | |||
| iter1 = data1.create_dict_iterator(num_epochs=11) | |||
| iter1 = data1.create_dict_iterator(num_epochs=11, output_numpy=True) | |||
| for _ in range(10): | |||
| i = 0 | |||
| for item in iter1: # each data is a dictionary | |||
| @@ -326,7 +326,7 @@ def test_generator_tuple_0(): | |||
| i = 0 | |||
| # create the iterator inside the loop declaration | |||
| for item in data1.create_tuple_iterator(num_epochs=1): # each data is a dictionary | |||
| for item in data1.create_tuple_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary | |||
| golden = np.array([i]) | |||
| np.testing.assert_array_equal(item[0], golden) | |||
| i = i + 1 | |||
| @@ -345,7 +345,7 @@ def test_generator_tuple_1(): | |||
| i = 0 | |||
| # BAD. Do not create iterator every time inside. | |||
| # Create iterator outside the epoch for loop. | |||
| for item in data1.create_tuple_iterator(num_epochs=1): # each data is a dictionary | |||
| for item in data1.create_tuple_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary | |||
| golden = np.array([i]) | |||
| np.testing.assert_array_equal(item[0], golden) | |||
| i = i + 1 | |||
| @@ -360,7 +360,7 @@ def test_generator_tuple_2(): | |||
| # apply dataset operations | |||
| data1 = ds.GeneratorDataset(generator_1d, ["data"]) | |||
| iter1 = data1.create_tuple_iterator() | |||
| iter1 = data1.create_tuple_iterator(output_numpy=True) | |||
| for _ in range(10): | |||
| i = 0 | |||
| for item in iter1: # each data is a dictionary | |||
| @@ -383,7 +383,7 @@ def test_generator_tuple_3(): | |||
| # apply dataset operations | |||
| data1 = ds.GeneratorDataset(generator_1d, ["data"]) | |||
| iter1 = data1.create_tuple_iterator() | |||
| iter1 = data1.create_tuple_iterator(output_numpy=True) | |||
| for _ in range(10): | |||
| i = 0 | |||
| for item in iter1: # each data is a dictionary | |||
| @@ -407,7 +407,7 @@ def test_generator_tuple_4(): | |||
| # apply dataset operations | |||
| data1 = ds.GeneratorDataset(generator_1d, ["data"]) | |||
| iter1 = data1.create_tuple_iterator(num_epochs=10) | |||
| iter1 = data1.create_tuple_iterator(num_epochs=10, output_numpy=True) | |||
| for _ in range(10): | |||
| i = 0 | |||
| for item in iter1: # each data is a dictionary | |||
| @@ -430,7 +430,7 @@ def test_generator_tuple_5(): | |||
| # apply dataset operations | |||
| data1 = ds.GeneratorDataset(generator_1d, ["data"]) | |||
| iter1 = data1.create_tuple_iterator(num_epochs=11) | |||
| iter1 = data1.create_tuple_iterator(num_epochs=11, output_numpy=True) | |||
| for _ in range(10): | |||
| i = 0 | |||
| for item in iter1: # each data is a dictionary | |||
| @@ -464,7 +464,7 @@ def test_generator_tuple_repeat_1(): | |||
| # apply dataset operations | |||
| data1 = ds.GeneratorDataset(generator_1d, ["data"]) | |||
| data1 = data1.repeat(2) | |||
| iter1 = data1.create_tuple_iterator(num_epochs=11) | |||
| iter1 = data1.create_tuple_iterator(num_epochs=11, output_numpy=True) | |||
| for _ in range(10): | |||
| i = 0 | |||
| for item in iter1: # each data is a dictionary | |||
| @@ -499,7 +499,7 @@ def test_generator_tuple_repeat_repeat_1(): | |||
| data1 = ds.GeneratorDataset(generator_1d, ["data"]) | |||
| data1 = data1.repeat(2) | |||
| data1 = data1.repeat(3) | |||
| iter1 = data1.create_tuple_iterator(num_epochs=11) | |||
| iter1 = data1.create_tuple_iterator(num_epochs=11, output_numpy=True) | |||
| for _ in range(10): | |||
| i = 0 | |||
| for item in iter1: # each data is a dictionary | |||
| @@ -533,7 +533,7 @@ def test_generator_tuple_repeat_repeat_2(): | |||
| data1 = ds.GeneratorDataset(generator_1d, ["data"]) | |||
| data1 = data1.repeat(2) | |||
| data1 = data1.repeat(3) | |||
| iter1 = data1.create_tuple_iterator() | |||
| iter1 = data1.create_tuple_iterator(output_numpy=True) | |||
| for _ in range(10): | |||
| i = 0 | |||
| for item in iter1: # each data is a dictionary | |||
| @@ -559,7 +559,7 @@ def test_generator_tuple_repeat_repeat_3(): | |||
| data1 = ds.GeneratorDataset(generator_1d, ["data"]) | |||
| data1 = data1.repeat(2) | |||
| data1 = data1.repeat(3) | |||
| iter1 = data1.create_tuple_iterator() | |||
| iter1 = data1.create_tuple_iterator(output_numpy=True) | |||
| for _ in range(10): | |||
| i = 0 | |||
| for item in iter1: # each data is a dictionary | |||
| @@ -589,7 +589,7 @@ def test_generator_tuple_infinite_repeat_repeat_1(): | |||
| data1 = ds.GeneratorDataset(generator_1d, ["data"]) | |||
| data1 = data1.repeat() | |||
| data1 = data1.repeat(3) | |||
| iter1 = data1.create_tuple_iterator(num_epochs=11) | |||
| iter1 = data1.create_tuple_iterator(num_epochs=11, output_numpy=True) | |||
| i = 0 | |||
| for item in iter1: # each data is a dictionary | |||
| @@ -612,7 +612,7 @@ def test_generator_tuple_infinite_repeat_repeat_2(): | |||
| data1 = ds.GeneratorDataset(generator_1d, ["data"]) | |||
| data1 = data1.repeat(3) | |||
| data1 = data1.repeat() | |||
| iter1 = data1.create_tuple_iterator(num_epochs=11) | |||
| iter1 = data1.create_tuple_iterator(num_epochs=11, output_numpy=True) | |||
| i = 0 | |||
| for item in iter1: # each data is a dictionary | |||
| @@ -635,7 +635,7 @@ def test_generator_tuple_infinite_repeat_repeat_3(): | |||
| data1 = ds.GeneratorDataset(generator_1d, ["data"]) | |||
| data1 = data1.repeat() | |||
| data1 = data1.repeat() | |||
| iter1 = data1.create_tuple_iterator(num_epochs=11) | |||
| iter1 = data1.create_tuple_iterator(num_epochs=11, output_numpy=True) | |||
| i = 0 | |||
| for item in iter1: # each data is a dictionary | |||
| @@ -658,7 +658,7 @@ def test_generator_tuple_infinite_repeat_repeat_4(): | |||
| data1 = ds.GeneratorDataset(generator_1d, ["data"]) | |||
| data1 = data1.repeat() | |||
| data1 = data1.repeat() | |||
| iter1 = data1.create_tuple_iterator() | |||
| iter1 = data1.create_tuple_iterator(output_numpy=True) | |||
| i = 0 | |||
| for item in iter1: # each data is a dictionary | |||
| @@ -680,7 +680,7 @@ def test_generator_reusedataset(): | |||
| # apply dataset operations | |||
| data1 = ds.GeneratorDataset(generator_1d, ["data"]) | |||
| data1 = data1.repeat(2) | |||
| iter1 = data1.create_tuple_iterator() | |||
| iter1 = data1.create_tuple_iterator(output_numpy=True) | |||
| for _ in range(10): | |||
| i = 0 | |||
| for item in iter1: # each data is a dictionary | |||
| @@ -690,7 +690,7 @@ def test_generator_reusedataset(): | |||
| assert i == 64 * 2 | |||
| data1 = data1.repeat(3) | |||
| iter1 = data1.create_tuple_iterator() | |||
| iter1 = data1.create_tuple_iterator(output_numpy=True) | |||
| for _ in range(5): | |||
| i = 0 | |||
| for item in iter1: # each data is a dictionary | |||
| @@ -700,7 +700,7 @@ def test_generator_reusedataset(): | |||
| assert i == 64 * 2 * 3 | |||
| data1 = data1.batch(2) | |||
| iter1 = data1.create_dict_iterator() | |||
| iter1 = data1.create_dict_iterator(output_numpy=True) | |||
| for _ in range(5): | |||
| i = 0 | |||
| sample = 0 | |||
| @@ -49,10 +49,10 @@ def test_equalize_py(plot=False): | |||
| for idx, (image, _) in enumerate(ds_original): | |||
| if idx == 0: | |||
| images_original = np.transpose(image, (0, 2, 3, 1)) | |||
| images_original = np.transpose(image.asnumpy(), (0, 2, 3, 1)) | |||
| else: | |||
| images_original = np.append(images_original, | |||
| np.transpose(image, (0, 2, 3, 1)), | |||
| np.transpose(image.asnumpy(), (0, 2, 3, 1)), | |||
| axis=0) | |||
| # Color Equalized Images | |||
| @@ -69,10 +69,10 @@ def test_equalize_py(plot=False): | |||
| for idx, (image, _) in enumerate(ds_equalize): | |||
| if idx == 0: | |||
| images_equalize = np.transpose(image, (0, 2, 3, 1)) | |||
| images_equalize = np.transpose(image.asnumpy(), (0, 2, 3, 1)) | |||
| else: | |||
| images_equalize = np.append(images_equalize, | |||
| np.transpose(image, (0, 2, 3, 1)), | |||
| np.transpose(image.asnumpy(), (0, 2, 3, 1)), | |||
| axis=0) | |||
| num_samples = images_original.shape[0] | |||
| @@ -102,10 +102,10 @@ def test_equalize_c(plot=False): | |||
| for idx, (image, _) in enumerate(ds_original): | |||
| if idx == 0: | |||
| images_original = image | |||
| images_original = image.asnumpy() | |||
| else: | |||
| images_original = np.append(images_original, | |||
| image, | |||
| image.asnumpy(), | |||
| axis=0) | |||
| # Equalize Images | |||
| @@ -120,10 +120,10 @@ def test_equalize_c(plot=False): | |||
| for idx, (image, _) in enumerate(ds_equalize): | |||
| if idx == 0: | |||
| images_equalize = image | |||
| images_equalize = image.asnumpy() | |||
| else: | |||
| images_equalize = np.append(images_equalize, | |||
| image, | |||
| image.asnumpy(), | |||
| axis=0) | |||
| if plot: | |||
| visualize_list(images_original, images_equalize) | |||
| @@ -151,10 +151,10 @@ def test_equalize_py_c(plot=False): | |||
| for idx, (image, _) in enumerate(ds_c_equalize): | |||
| if idx == 0: | |||
| images_c_equalize = image | |||
| images_c_equalize = image.asnumpy() | |||
| else: | |||
| images_c_equalize = np.append(images_c_equalize, | |||
| image, | |||
| image.asnumpy(), | |||
| axis=0) | |||
| # Equalize images in python | |||
| @@ -172,10 +172,10 @@ def test_equalize_py_c(plot=False): | |||
| for idx, (image, _) in enumerate(ds_p_equalize): | |||
| if idx == 0: | |||
| images_p_equalize = image | |||
| images_p_equalize = image.asnumpy() | |||
| else: | |||
| images_p_equalize = np.append(images_p_equalize, | |||
| image, | |||
| image.asnumpy(), | |||
| axis=0) | |||
| num_samples = images_c_equalize.shape[0] | |||
| @@ -223,9 +223,9 @@ def test_equalize_mnist_c(plot=False): | |||
| for _, (data_orig, data_trans) in enumerate(zip(ds_orig, ds_equalize_c)): | |||
| image_orig, label_orig = data_orig | |||
| image_trans, _ = data_trans | |||
| images.append(image_orig) | |||
| labels.append(label_orig) | |||
| images_trans.append(image_trans) | |||
| images.append(image_orig.asnumpy()) | |||
| labels.append(label_orig.asnumpy()) | |||
| images_trans.append(image_trans.asnumpy()) | |||
| # Compare with expected md5 from images | |||
| filename = "equalize_mnist_result_c.npz" | |||
| @@ -31,7 +31,7 @@ def test_fillop_basic(): | |||
| data = data.map(operations=fill_op, input_columns=["col"]) | |||
| expected = np.array([3, 3, 3, 3], dtype=np.uint8) | |||
| for data_row in data: | |||
| np.testing.assert_array_equal(data_row[0], expected) | |||
| np.testing.assert_array_equal(data_row[0].asnumpy(), expected) | |||
| def test_fillop_down_type_cast(): | |||
| @@ -44,7 +44,7 @@ def test_fillop_down_type_cast(): | |||
| data = data.map(operations=fill_op, input_columns=["col"]) | |||
| expected = np.array([253, 253, 253, 253], dtype=np.uint8) | |||
| for data_row in data: | |||
| np.testing.assert_array_equal(data_row[0], expected) | |||
| np.testing.assert_array_equal(data_row[0].asnumpy(), expected) | |||
| def test_fillop_up_type_cast(): | |||
| @@ -57,7 +57,7 @@ def test_fillop_up_type_cast(): | |||
| data = data.map(operations=fill_op, input_columns=["col"]) | |||
| expected = np.array([3., 3., 3., 3.], dtype=np.float) | |||
| for data_row in data: | |||
| np.testing.assert_array_equal(data_row[0], expected) | |||
| np.testing.assert_array_equal(data_row[0].asnumpy(), expected) | |||
| def test_fillop_string(): | |||
| @@ -69,7 +69,7 @@ def test_fillop_string(): | |||
| data = data.map(operations=fill_op, input_columns=["col"]) | |||
| expected = np.array(['error', 'error'], dtype='S') | |||
| for data_row in data: | |||
| for data_row in data.create_tuple_iterator(output_numpy=True): | |||
| np.testing.assert_array_equal(data_row[0], expected) | |||
| @@ -35,7 +35,7 @@ def test_diff_predicate_func(): | |||
| num_iter = 0 | |||
| label_list = [] | |||
| for data in dataset.create_dict_iterator(num_epochs=1): | |||
| for data in dataset.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| num_iter += 1 | |||
| label = data["label"] | |||
| label_list.append(label) | |||
| @@ -64,7 +64,7 @@ def test_filter_by_generator_with_no(): | |||
| dataset_f = dataset.filter(predicate=lambda data: data < 11, num_parallel_workers=4) | |||
| num_iter = 0 | |||
| expected_rs = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] | |||
| for item in dataset_f.create_dict_iterator(num_epochs=1): | |||
| for item in dataset_f.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| assert item["data"] == expected_rs[num_iter] | |||
| num_iter += 1 | |||
| @@ -77,7 +77,7 @@ def test_filter_by_generator_with_repeat(): | |||
| num_iter = 0 | |||
| ret_data = [] | |||
| expected_rs = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] | |||
| for item in dataset_f.create_dict_iterator(num_epochs=1): | |||
| for item in dataset_f.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| num_iter += 1 | |||
| ret_data.append(item["data"]) | |||
| assert num_iter == 44 | |||
| @@ -95,7 +95,7 @@ def test_filter_by_generator_with_repeat_after(): | |||
| num_iter = 0 | |||
| ret_data = [] | |||
| expected_rs = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] | |||
| for item in dataset_r.create_dict_iterator(num_epochs=1): | |||
| for item in dataset_r.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| num_iter += 1 | |||
| ret_data.append(item["data"]) | |||
| assert num_iter == 44 | |||
| @@ -120,7 +120,7 @@ def test_filter_by_generator_with_batch(): | |||
| dataset_f = dataset_b.filter(predicate=filter_func_batch, num_parallel_workers=4) | |||
| num_iter = 0 | |||
| ret_data = [] | |||
| for item in dataset_f.create_dict_iterator(num_epochs=1): | |||
| for item in dataset_f.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| num_iter += 1 | |||
| ret_data.append(item["data"]) | |||
| assert num_iter == 3 | |||
| @@ -136,7 +136,7 @@ def test_filter_by_generator_with_batch_after(): | |||
| dataset_b = dataset_f.batch(4) | |||
| num_iter = 0 | |||
| ret_data = [] | |||
| for item in dataset_b.create_dict_iterator(num_epochs=1): | |||
| for item in dataset_b.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| num_iter += 1 | |||
| ret_data.append(item["data"]) | |||
| assert num_iter == 6 | |||
| @@ -202,7 +202,7 @@ def test_filter_by_generator_with_zip(): | |||
| dataset_f = dataz.filter(predicate=filter_func_zip, num_parallel_workers=1) | |||
| num_iter = 0 | |||
| ret_data = [] | |||
| for item in dataset_f.create_dict_iterator(num_epochs=1): | |||
| for item in dataset_f.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| num_iter += 1 | |||
| ret_data.append({"data1": item["data1"], "data2": item["data2"]}) | |||
| assert num_iter == 21 | |||
| @@ -221,7 +221,7 @@ def test_filter_by_generator_with_zip_after(): | |||
| dataz = ds.zip((dt1, dt2)) | |||
| num_iter = 0 | |||
| ret_data = [] | |||
| for item in dataz.create_dict_iterator(num_epochs=1): | |||
| for item in dataz.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| num_iter += 1 | |||
| ret_data.append({"data1": item["data1"], "data2": item["data2"]}) | |||
| assert num_iter == 21 | |||
| @@ -266,7 +266,7 @@ def test_filter_by_generator_with_map_all_col(): | |||
| dataset_f = dataset_map.filter(input_columns=["col1"], predicate=filter_func_map_part, num_parallel_workers=1) | |||
| num_iter = 0 | |||
| ret_data = [] | |||
| for item in dataset_f.create_dict_iterator(num_epochs=1): | |||
| for item in dataset_f.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| num_iter += 1 | |||
| ret_data.append(item["col1"]) | |||
| assert num_iter == 3 | |||
| @@ -282,7 +282,7 @@ def test_filter_by_generator_with_map_part_col(): | |||
| dataset_f = dataset_map.filter(input_columns=["out1", "col2"], predicate=filter_func_map, num_parallel_workers=4) | |||
| num_iter = 0 | |||
| ret_data = [] | |||
| for item in dataset_f.create_dict_iterator(num_epochs=1): | |||
| for item in dataset_f.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| num_iter += 1 | |||
| print(item) | |||
| ret_data.append(item["out1"]) | |||
| @@ -302,7 +302,7 @@ def test_filter_by_generator_with_rename(): | |||
| dataset_f = dataset_b.filter(predicate=filter_func_rename, num_parallel_workers=4) | |||
| num_iter = 0 | |||
| ret_data = [] | |||
| for item in dataset_f.create_dict_iterator(num_epochs=1): | |||
| for item in dataset_f.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| num_iter += 1 | |||
| ret_data.append(item["col1"]) | |||
| assert num_iter == 55 | |||
| @@ -336,7 +336,7 @@ def test_filter_by_generator_with_input_column(): | |||
| dataset_f4 = dataset_f3.filter(predicate=filter_func_input_column1, num_parallel_workers=4) | |||
| num_iter = 0 | |||
| ret_data = [] | |||
| for item in dataset_f4.create_dict_iterator(num_epochs=1): | |||
| for item in dataset_f4.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| num_iter += 1 | |||
| ret_data.append(item["out1"]) | |||
| assert num_iter == 8 | |||
| @@ -370,7 +370,7 @@ def test_filter_by_generator_Partial0(): | |||
| dataset_zip = ds.zip((dataset1, dataset2)) | |||
| dataset_f1 = dataset_zip.filter(predicate=filter_func_Partial_0, num_parallel_workers=2) | |||
| ret = [] | |||
| for item in dataset_f1.create_dict_iterator(num_epochs=1): | |||
| for item in dataset_f1.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| ret.append(item["col1"]) | |||
| assert ret[0] == 5 | |||
| assert ret[6] == 12 | |||
| @@ -384,7 +384,7 @@ def test_filter_by_generator_Partial1(): | |||
| dataset_f1 = dataset_zip.filter(predicate=filter_func_Partial_0, num_parallel_workers=2) | |||
| dataset_map = dataset_f1.map(operations=lambda x1: x1 + 400, input_columns=["col1"], output_columns=["out1"]) | |||
| ret = [] | |||
| for item in dataset_map.create_dict_iterator(num_epochs=1): | |||
| for item in dataset_map.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| ret.append(item["out1"]) | |||
| assert ret[0] == 405 | |||
| assert ret[6] == 412 | |||
| @@ -403,7 +403,7 @@ def test_filter_by_generator_Partial2(): | |||
| output_columns=["out1", "out3"]) | |||
| ret1 = [] | |||
| ret3 = [] | |||
| for item in dataset_map.create_dict_iterator(num_epochs=1): | |||
| for item in dataset_map.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| ret1.append(item["out1"]) | |||
| ret3.append(item["out3"]) | |||
| assert ret1[0] == 400 | |||
| @@ -428,7 +428,7 @@ def test_filter_by_generator_Partial(): | |||
| dataset_s = dataset.shuffle(4) | |||
| dataset_f1 = dataset_s.filter(input_columns=["col1", "col2"], predicate=filter_func_Partial, num_parallel_workers=1) | |||
| for item in dataset_f1.create_dict_iterator(num_epochs=1): | |||
| for item in dataset_f1.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| assert item["col1"] % 3 == 0 | |||
| @@ -442,7 +442,7 @@ def test_filte_case_dataset_cifar10(): | |||
| DATA_DIR_10 = "../data/dataset/testCifar10Data" | |||
| dataset_c = ds.Cifar10Dataset(dataset_dir=DATA_DIR_10, num_samples=100000, shuffle=False) | |||
| dataset_f1 = dataset_c.filter(input_columns=["image", "label"], predicate=filter_func_cifar, num_parallel_workers=1) | |||
| for item in dataset_f1.create_dict_iterator(num_epochs=1): | |||
| for item in dataset_f1.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| # in this example, each dictionary has keys "image" and "label" | |||
| assert item["label"] % 3 == 0 | |||
| @@ -476,7 +476,7 @@ def test_filter_by_generator_with_map_all_sort(): | |||
| dataset_f = dataz.filter(predicate=filter_func_part_sort, num_parallel_workers=1) | |||
| num_iter = 0 | |||
| ret_data = [] | |||
| for item in dataset_f.create_dict_iterator(num_epochs=1): | |||
| for item in dataset_f.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| num_iter += 1 | |||
| ret_data.append(item) | |||
| @@ -54,7 +54,8 @@ def test_five_crop_op(plot=False): | |||
| data2 = data2.map(operations=transform_2, input_columns=["image"]) | |||
| num_iter = 0 | |||
| for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): | |||
| for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), | |||
| data2.create_dict_iterator(num_epochs=1, output_numpy=True)): | |||
| num_iter += 1 | |||
| image_1 = (item1["image"].transpose(1, 2, 0) * 255).astype(np.uint8) | |||
| image_2 = item2["image"] | |||
| @@ -34,7 +34,7 @@ def test_flat_map_1(): | |||
| data = data.flat_map(flat_map_func) | |||
| count = 0 | |||
| for d in data: | |||
| for d in data.create_tuple_iterator(output_numpy=True): | |||
| assert isinstance(d[0], np.ndarray) | |||
| count += 1 | |||
| assert count == 52 | |||
| @@ -60,7 +60,7 @@ def test_flat_map_2(): | |||
| data = data.flat_map(flat_map_func_2) | |||
| count = 0 | |||
| for d in data: | |||
| for d in data.create_tuple_iterator(output_numpy=True): | |||
| assert isinstance(d[0], np.ndarray) | |||
| count += 1 | |||
| assert count == 104 | |||
| @@ -28,7 +28,7 @@ def test_demo_basic_from_dataset(): | |||
| special_first=True) | |||
| data = data.map(operations=text.Lookup(vocab, "<unk>"), input_columns=["text"]) | |||
| res = [] | |||
| for d in data.create_dict_iterator(num_epochs=1): | |||
| for d in data.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| res.append(d["text"].item()) | |||
| assert res == [4, 5, 3, 6, 7, 2], res | |||
| @@ -41,7 +41,7 @@ def test_demo_basic_from_dataset_with_tokenizer(): | |||
| special_first=True) | |||
| data = data.map(operations=text.Lookup(vocab, "<unk>"), input_columns=["text"]) | |||
| res = [] | |||
| for d in data.create_dict_iterator(num_epochs=1): | |||
| for d in data.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| res.append(list(d["text"])) | |||
| assert res == [[13, 3, 7, 14, 9, 17, 3, 2, 19, 9, 2, 11, 3, 4, 16, 4, 8, 6, 5], [21, 20, 10, 25, 23, 26], | |||
| [24, 22, 10, 12, 8, 6, 7, 4, 18, 15, 5], [2, 2]] | |||
| @@ -62,7 +62,7 @@ def test_from_dataset(): | |||
| special_first=True) | |||
| corpus_dataset = corpus_dataset.map(operations=text.Lookup(vocab, "<unk>"), input_columns="text") | |||
| res = [] | |||
| for d in corpus_dataset.create_dict_iterator(num_epochs=1): | |||
| for d in corpus_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| res.append(list(d["text"])) | |||
| return res | |||
| @@ -110,7 +110,7 @@ def test_from_dataset_special_token(): | |||
| data = ds.GeneratorDataset(gen_input(texts), column_names=["text"]) | |||
| data = data.map(operations=text.Lookup(vocab, "<unk>"), input_columns="text") | |||
| res = [] | |||
| for d in data.create_dict_iterator(num_epochs=1): | |||
| for d in data.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| res.append(d["text"].item()) | |||
| return res | |||
| @@ -186,7 +186,7 @@ def test_graphdata_generatordataset(): | |||
| dataset = ds.GeneratorDataset(source=GNNGraphDataset(g, batch_num), column_names=out_column_names, | |||
| sampler=RandomBatchedSampler(edge_num, batch_num), num_parallel_workers=4) | |||
| dataset = dataset.repeat(2) | |||
| itr = dataset.create_dict_iterator(num_epochs=1) | |||
| itr = dataset.create_dict_iterator(num_epochs=1, output_numpy=True) | |||
| i = 0 | |||
| for data in itr: | |||
| assert data['neighbors'].shape == (2, 7) | |||
| @@ -112,7 +112,7 @@ def test_graphdata_distributed(): | |||
| sampler=RandomBatchedSampler(edge_num, batch_num), num_parallel_workers=4, | |||
| python_multiprocessing=False) | |||
| dataset = dataset.repeat(2) | |||
| itr = dataset.create_dict_iterator(num_epochs=1) | |||
| itr = dataset.create_dict_iterator(num_epochs=1, output_numpy=True) | |||
| i = 0 | |||
| for data in itr: | |||
| assert data['neighbors'].shape == (2, 7) | |||
| @@ -48,10 +48,10 @@ def test_invert_py(plot=False): | |||
| for idx, (image, _) in enumerate(ds_original): | |||
| if idx == 0: | |||
| images_original = np.transpose(image, (0, 2, 3, 1)) | |||
| images_original = np.transpose(image.asnumpy(), (0, 2, 3, 1)) | |||
| else: | |||
| images_original = np.append(images_original, | |||
| np.transpose(image, (0, 2, 3, 1)), | |||
| np.transpose(image.asnumpy(), (0, 2, 3, 1)), | |||
| axis=0) | |||
| # Color Inverted Images | |||
| @@ -68,10 +68,10 @@ def test_invert_py(plot=False): | |||
| for idx, (image, _) in enumerate(ds_invert): | |||
| if idx == 0: | |||
| images_invert = np.transpose(image, (0, 2, 3, 1)) | |||
| images_invert = np.transpose(image.asnumpy(), (0, 2, 3, 1)) | |||
| else: | |||
| images_invert = np.append(images_invert, | |||
| np.transpose(image, (0, 2, 3, 1)), | |||
| np.transpose(image.asnumpy(), (0, 2, 3, 1)), | |||
| axis=0) | |||
| num_samples = images_original.shape[0] | |||
| @@ -101,10 +101,10 @@ def test_invert_c(plot=False): | |||
| for idx, (image, _) in enumerate(ds_original): | |||
| if idx == 0: | |||
| images_original = image | |||
| images_original = image.asnumpy() | |||
| else: | |||
| images_original = np.append(images_original, | |||
| image, | |||
| image.asnumpy(), | |||
| axis=0) | |||
| # Invert Images | |||
| @@ -119,10 +119,10 @@ def test_invert_c(plot=False): | |||
| for idx, (image, _) in enumerate(ds_invert): | |||
| if idx == 0: | |||
| images_invert = image | |||
| images_invert = image.asnumpy() | |||
| else: | |||
| images_invert = np.append(images_invert, | |||
| image, | |||
| image.asnumpy(), | |||
| axis=0) | |||
| if plot: | |||
| visualize_list(images_original, images_invert) | |||
| @@ -150,10 +150,10 @@ def test_invert_py_c(plot=False): | |||
| for idx, (image, _) in enumerate(ds_c_invert): | |||
| if idx == 0: | |||
| images_c_invert = image | |||
| images_c_invert = image.asnumpy() | |||
| else: | |||
| images_c_invert = np.append(images_c_invert, | |||
| image, | |||
| image.asnumpy(), | |||
| axis=0) | |||
| # invert images in python | |||
| @@ -171,10 +171,10 @@ def test_invert_py_c(plot=False): | |||
| for idx, (image, _) in enumerate(ds_p_invert): | |||
| if idx == 0: | |||
| images_p_invert = image | |||
| images_p_invert = image.asnumpy() | |||
| else: | |||
| images_p_invert = np.append(images_p_invert, | |||
| image, | |||
| image.asnumpy(), | |||
| axis=0) | |||
| num_samples = images_c_invert.shape[0] | |||
| @@ -15,6 +15,8 @@ | |||
| import numpy as np | |||
| import pytest | |||
| import mindspore.common.dtype as mstype | |||
| from mindspore.common.tensor import Tensor | |||
| import mindspore.dataset as ds | |||
| from mindspore.dataset.engine.iterators import ITERATORS_LIST, _cleanup | |||
| @@ -28,15 +30,15 @@ def check(project_columns): | |||
| data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=COLUMNS, shuffle=False) | |||
| data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=project_columns, shuffle=False) | |||
| for data_actual, data_expected in zip(data1.create_tuple_iterator(project_columns, num_epochs=1), | |||
| data2.create_tuple_iterator(num_epochs=1)): | |||
| for data_actual, data_expected in zip(data1.create_tuple_iterator(project_columns, num_epochs=1, output_numpy=True), | |||
| data2.create_tuple_iterator(num_epochs=1, output_numpy=True)): | |||
| assert len(data_actual) == len(data_expected) | |||
| assert all([np.array_equal(d1, d2) for d1, d2 in zip(data_actual, data_expected)]) | |||
| def test_iterator_create_tuple(): | |||
| def test_iterator_create_tuple_numpy(): | |||
| """ | |||
| Test creating tuple iterator | |||
| Test creating tuple iterator with output NumPy | |||
| """ | |||
| check(COLUMNS) | |||
| check(COLUMNS[0:1]) | |||
| @@ -45,6 +47,46 @@ def test_iterator_create_tuple(): | |||
| check(COLUMNS[7:8]) | |||
| check(COLUMNS[0:2:8]) | |||
| def test_iterator_create_dict_mstensor(): | |||
| """ | |||
| Test creating dict iterator with output MSTensor | |||
| """ | |||
| def generator(): | |||
| for i in range(64): | |||
| yield (np.array([i], dtype=np.float32),) | |||
| # apply dataset operations | |||
| data1 = ds.GeneratorDataset(generator, ["data"]) | |||
| i = 0 | |||
| for item in data1.create_dict_iterator(num_epochs=1): | |||
| golden = np.array([i], dtype=np.float32) | |||
| np.testing.assert_array_equal(item["data"].asnumpy(), golden) | |||
| assert isinstance(item["data"], Tensor) | |||
| assert item["data"].dtype == mstype.float32 | |||
| i += 1 | |||
| assert i == 64 | |||
| def test_iterator_create_tuple_mstensor(): | |||
| """ | |||
| Test creating tuple iterator with output MSTensor | |||
| """ | |||
| def generator(): | |||
| for i in range(64): | |||
| yield (np.array([i], dtype=np.float32),) | |||
| # apply dataset operations | |||
| data1 = ds.GeneratorDataset(generator, ["data"]) | |||
| i = 0 | |||
| for item in data1.create_tuple_iterator(num_epochs=1): | |||
| golden = np.array([i], dtype=np.float32) | |||
| np.testing.assert_array_equal(item[0].asnumpy(), golden) | |||
| assert isinstance(item[0], Tensor) | |||
| assert item[0].dtype == mstype.float32 | |||
| i += 1 | |||
| assert i == 64 | |||
| def test_iterator_weak_ref(): | |||
| ITERATORS_LIST.clear() | |||
| @@ -113,6 +155,6 @@ def test_tree_copy(): | |||
| if __name__ == '__main__': | |||
| test_iterator_create_tuple() | |||
| test_iterator_create_tuple_numpy() | |||
| test_iterator_weak_ref() | |||
| test_tree_copy() | |||
| @@ -63,7 +63,8 @@ def test_linear_transformation_op(plot=False): | |||
| image_transformed = [] | |||
| image = [] | |||
| for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): | |||
| for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), | |||
| data2.create_dict_iterator(num_epochs=1, output_numpy=True)): | |||
| image1 = (item1["image"].transpose(1, 2, 0) * 255).astype(np.uint8) | |||
| image2 = (item2["image"].transpose(1, 2, 0) * 255).astype(np.uint8) | |||
| image_transformed.append(image1) | |||
| @@ -59,7 +59,7 @@ def mask_compare(array, op, constant, dtype=mstype.bool_): | |||
| array = array.astype(dtype=mstype_to_np_type[dtype]) | |||
| np.testing.assert_array_equal(array, d[0]) | |||
| np.testing.assert_array_equal(array, d[0].asnumpy()) | |||
| def test_mask_int_comparison(): | |||
| @@ -187,7 +187,7 @@ def test_nlp_compress_data(add_and_remove_nlp_compress_file): | |||
| NLP_FILE_NAME + "0", None, num_readers, shuffle=False) | |||
| assert data_set.get_dataset_size() == 16 | |||
| num_iter = 0 | |||
| for x, item in zip(data, data_set.create_dict_iterator(num_epochs=1)): | |||
| for x, item in zip(data, data_set.create_dict_iterator(num_epochs=1, output_numpy=True)): | |||
| assert (item["array_a"] == x["array_a"]).all() | |||
| assert (item["array_b"] == x["array_b"]).all() | |||
| assert item["array_c"].tobytes() == x["array_c"] | |||
| @@ -206,7 +206,8 @@ def test_nlp_compress_data_old_version(add_and_remove_nlp_compress_file): | |||
| OLD_NLP_FILE_NAME + "0", None, num_readers, shuffle=False) | |||
| assert old_data_set.get_dataset_size() == 16 | |||
| num_iter = 0 | |||
| for x, item in zip(old_data_set.create_dict_iterator(num_epochs=1), data_set.create_dict_iterator(num_epochs=1)): | |||
| for x, item in zip(old_data_set.create_dict_iterator(num_epochs=1, output_numpy=True), | |||
| data_set.create_dict_iterator(num_epochs=1, output_numpy=True)): | |||
| assert (item["array_a"] == x["array_a"]).all() | |||
| assert (item["array_b"] == x["array_b"]).all() | |||
| assert (item["array_c"] == x["array_c"]).all() | |||
| @@ -255,7 +256,7 @@ def test_cv_minddataset_partition_tutorial(add_and_remove_cv_file): | |||
| data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers, | |||
| num_shards=num_shards, shard_id=partition_id) | |||
| num_iter = 0 | |||
| for item in data_set.create_dict_iterator(num_epochs=1): | |||
| for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| logger.info("-------------- partition : {} ------------------------".format(partition_id)) | |||
| logger.info("-------------- item[file_name]: {}-----------------------".format(item["file_name"])) | |||
| logger.info("-------------- item[label]: {} -----------------------".format(item["label"])) | |||
| @@ -278,7 +279,7 @@ def test_cv_minddataset_partition_num_samples_0(add_and_remove_cv_file): | |||
| num_shards=num_shards, | |||
| shard_id=partition_id, num_samples=1) | |||
| num_iter = 0 | |||
| for item in data_set.create_dict_iterator(num_epochs=1): | |||
| for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| logger.info("-------------- partition : {} ------------------------".format(partition_id)) | |||
| logger.info("-------------- item[file_name]: {}-----------------------".format(item["file_name"])) | |||
| logger.info("-------------- item[label]: {} -----------------------".format(item["label"])) | |||
| @@ -301,7 +302,7 @@ def test_cv_minddataset_partition_num_samples_1(add_and_remove_cv_file): | |||
| num_shards=num_shards, | |||
| shard_id=partition_id, num_samples=2) | |||
| num_iter = 0 | |||
| for item in data_set.create_dict_iterator(num_epochs=1): | |||
| for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| logger.info("-------------- partition : {} ------------------------".format(partition_id)) | |||
| logger.info("-------------- item[file_name]: {}-----------------------".format(item["file_name"])) | |||
| logger.info("-------------- item[label]: {} -----------------------".format(item["label"])) | |||
| @@ -324,7 +325,7 @@ def test_cv_minddataset_partition_num_samples_2(add_and_remove_cv_file): | |||
| num_shards=num_shards, | |||
| shard_id=partition_id, num_samples=3) | |||
| num_iter = 0 | |||
| for item in data_set.create_dict_iterator(num_epochs=1): | |||
| for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| logger.info("-------------- partition : {} ------------------------".format(partition_id)) | |||
| logger.info("-------------- item[file_name]: {}-----------------------".format(item["file_name"])) | |||
| logger.info("-------------- item[label]: {} -----------------------".format(item["label"])) | |||
| @@ -352,7 +353,7 @@ def test_cv_minddataset_partition_tutorial_check_shuffle_result(add_and_remove_c | |||
| data_set = data_set.repeat(3) | |||
| num_iter = 0 | |||
| for item in data_set.create_dict_iterator(num_epochs=1): | |||
| for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| logger.info("-------------- partition : {} ------------------------".format(partition_id)) | |||
| logger.info("-------------- item[file_name]: {}-----------------------".format(item["file_name"])) | |||
| logger.info("-------------- item[label]: {} -----------------------".format(item["label"])) | |||
| @@ -391,7 +392,7 @@ def test_cv_minddataset_partition_tutorial_check_whole_reshuffle_result_per_epoc | |||
| data_set = data_set.repeat(3) | |||
| num_iter = 0 | |||
| for item in data_set.create_dict_iterator(num_epochs=1): | |||
| for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| logger.info("-------------- partition : {} ------------------------".format(partition_id)) | |||
| logger.info("-------------- item[file_name]: {}-----------------------".format(item["file_name"])) | |||
| logger.info("-------------- item[label]: {} -----------------------".format(item["label"])) | |||
| @@ -424,7 +425,7 @@ def test_cv_minddataset_check_shuffle_result(add_and_remove_cv_file): | |||
| data_set = data_set.repeat(3) | |||
| num_iter = 0 | |||
| for item in data_set.create_dict_iterator(num_epochs=1): | |||
| for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| logger.info("-------------- item[file_name]: {}-----------------------".format(item["file_name"])) | |||
| logger.info("-------------- item[label]: {} -----------------------".format(item["label"])) | |||
| num_iter += 1 | |||
| @@ -450,7 +451,7 @@ def test_cv_minddataset_check_shuffle_result(add_and_remove_cv_file): | |||
| data_set2 = data_set2.repeat(3) | |||
| num_iter = 0 | |||
| for item in data_set2.create_dict_iterator(num_epochs=1): | |||
| for item in data_set2.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| logger.info("-------------- item[file_name]: {}-----------------------".format(item["file_name"])) | |||
| logger.info("-------------- item[label]: {} -----------------------".format(item["label"])) | |||
| num_iter += 1 | |||
| @@ -481,7 +482,7 @@ def test_cv_minddataset_check_shuffle_result(add_and_remove_cv_file): | |||
| data_set3 = data_set3.repeat(3) | |||
| num_iter = 0 | |||
| for item in data_set3.create_dict_iterator(num_epochs=1): | |||
| for item in data_set3.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| logger.info("-------------- item[file_name]: {}-----------------------".format(item["file_name"])) | |||
| logger.info("-------------- item[label]: {} -----------------------".format(item["label"])) | |||
| num_iter += 1 | |||
| @@ -513,7 +514,7 @@ def test_cv_minddataset_dataset_size(add_and_remove_cv_file): | |||
| repeat_num = 2 | |||
| data_set = data_set.repeat(repeat_num) | |||
| num_iter = 0 | |||
| for item in data_set.create_dict_iterator(num_epochs=1): | |||
| for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| logger.info( | |||
| "-------------- get dataset size {} -----------------".format(num_iter)) | |||
| logger.info( | |||
| @@ -542,7 +543,7 @@ def test_cv_minddataset_repeat_reshuffle(add_and_remove_cv_file): | |||
| data_set = data_set.repeat(2) | |||
| num_iter = 0 | |||
| labels = [] | |||
| for item in data_set.create_dict_iterator(num_epochs=1): | |||
| for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| logger.info( | |||
| "-------------- get dataset size {} -----------------".format(num_iter)) | |||
| logger.info( | |||
| @@ -571,7 +572,7 @@ def test_cv_minddataset_batch_size_larger_than_records(add_and_remove_cv_file): | |||
| num_parallel_workers=2) | |||
| data_set = data_set.batch(32, drop_remainder=True) | |||
| num_iter = 0 | |||
| for item in data_set.create_dict_iterator(num_epochs=1): | |||
| for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| logger.info( | |||
| "-------------- get dataset size {} -----------------".format(num_iter)) | |||
| logger.info( | |||
| @@ -603,7 +604,7 @@ def test_cv_minddataset_reader_file_list(add_and_remove_cv_file): | |||
| for x in range(FILES_NUM)], columns_list, num_readers) | |||
| assert data_set.get_dataset_size() == 10 | |||
| num_iter = 0 | |||
| for item in data_set.create_dict_iterator(num_epochs=1): | |||
| for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| logger.info( | |||
| "-------------- cv reader basic: {} ------------------------".format(num_iter)) | |||
| logger.info( | |||
| @@ -625,7 +626,7 @@ def test_cv_minddataset_reader_one_partition(add_and_remove_cv_file): | |||
| data_set = ds.MindDataset([CV_FILE_NAME + "0"], columns_list, num_readers) | |||
| assert data_set.get_dataset_size() < 10 | |||
| num_iter = 0 | |||
| for item in data_set.create_dict_iterator(num_epochs=1): | |||
| for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| logger.info( | |||
| "-------------- cv reader basic: {} ------------------------".format(num_iter)) | |||
| logger.info( | |||
| @@ -678,7 +679,7 @@ def test_cv_minddataset_reader_two_dataset(add_and_remove_cv_file): | |||
| columns_list, num_readers) | |||
| assert data_set.get_dataset_size() == 30 | |||
| num_iter = 0 | |||
| for item in data_set.create_dict_iterator(num_epochs=1): | |||
| for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| logger.info( | |||
| "-------------- cv reader basic: {} ------------------------".format(num_iter)) | |||
| logger.info( | |||
| @@ -739,7 +740,7 @@ def test_cv_minddataset_reader_two_dataset_partition(add_and_remove_cv_file): | |||
| columns_list, num_readers) | |||
| assert data_set.get_dataset_size() < 20 | |||
| num_iter = 0 | |||
| for item in data_set.create_dict_iterator(num_epochs=1): | |||
| for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| logger.info( | |||
| "-------------- cv reader basic: {} ------------------------".format(num_iter)) | |||
| logger.info( | |||
| @@ -770,7 +771,7 @@ def test_cv_minddataset_reader_basic_tutorial(add_and_remove_cv_file): | |||
| data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers) | |||
| assert data_set.get_dataset_size() == 10 | |||
| num_iter = 0 | |||
| for item in data_set.create_dict_iterator(num_epochs=1): | |||
| for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| logger.info( | |||
| "-------------- cv reader basic: {} ------------------------".format(num_iter)) | |||
| logger.info( | |||
| @@ -791,7 +792,7 @@ def test_nlp_minddataset_reader_basic_tutorial(add_and_remove_nlp_file): | |||
| data_set = ds.MindDataset(NLP_FILE_NAME + "0", None, num_readers) | |||
| assert data_set.get_dataset_size() == 10 | |||
| num_iter = 0 | |||
| for item in data_set.create_dict_iterator(num_epochs=1): | |||
| for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| logger.info( | |||
| "-------------- cv reader basic: {} ------------------------".format(num_iter)) | |||
| logger.info( | |||
| @@ -821,7 +822,7 @@ def test_cv_minddataset_reader_basic_tutorial_5_epoch(add_and_remove_cv_file): | |||
| assert data_set.get_dataset_size() == 10 | |||
| for _ in range(5): | |||
| num_iter = 0 | |||
| for data in data_set: | |||
| for data in data_set.create_tuple_iterator(output_numpy=True): | |||
| logger.info("data is {}".format(data)) | |||
| num_iter += 1 | |||
| assert num_iter == 10 | |||
| @@ -852,7 +853,7 @@ def test_cv_minddataset_reader_basic_tutorial_5_epoch_with_batch(add_and_remove_ | |||
| assert data_set.get_dataset_size() == 5 | |||
| for _ in range(5): | |||
| num_iter = 0 | |||
| for data in data_set: | |||
| for data in data_set.create_tuple_iterator(output_numpy=True): | |||
| logger.info("data is {}".format(data)) | |||
| num_iter += 1 | |||
| assert num_iter == 5 | |||
| @@ -865,7 +866,7 @@ def test_cv_minddataset_reader_no_columns(add_and_remove_cv_file): | |||
| data_set = ds.MindDataset(CV_FILE_NAME + "0") | |||
| assert data_set.get_dataset_size() == 10 | |||
| num_iter = 0 | |||
| for item in data_set.create_dict_iterator(num_epochs=1): | |||
| for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| logger.info( | |||
| "-------------- cv reader basic: {} ------------------------".format(num_iter)) | |||
| logger.info( | |||
| @@ -888,7 +889,7 @@ def test_cv_minddataset_reader_repeat_tutorial(add_and_remove_cv_file): | |||
| repeat_num = 2 | |||
| data_set = data_set.repeat(repeat_num) | |||
| num_iter = 0 | |||
| for item in data_set.create_dict_iterator(num_epochs=1): | |||
| for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| logger.info( | |||
| "-------------- repeat two test {} ------------------------".format(num_iter)) | |||
| logger.info( | |||
| @@ -1217,7 +1218,7 @@ def test_write_with_multi_bytes_and_array_and_read_by_MindDataset(): | |||
| shuffle=False) | |||
| assert data_set.get_dataset_size() == 6 | |||
| num_iter = 0 | |||
| for item in data_set.create_dict_iterator(num_epochs=1): | |||
| for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| assert len(item) == 13 | |||
| for field in item: | |||
| if isinstance(item[field], np.ndarray): | |||
| @@ -1236,7 +1237,7 @@ def test_write_with_multi_bytes_and_array_and_read_by_MindDataset(): | |||
| shuffle=False) | |||
| assert data_set.get_dataset_size() == 6 | |||
| num_iter = 0 | |||
| for item in data_set.create_dict_iterator(num_epochs=1): | |||
| for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| assert len(item) == 3 | |||
| for field in item: | |||
| if isinstance(item[field], np.ndarray): | |||
| @@ -1253,7 +1254,7 @@ def test_write_with_multi_bytes_and_array_and_read_by_MindDataset(): | |||
| shuffle=False) | |||
| assert data_set.get_dataset_size() == 6 | |||
| num_iter = 0 | |||
| for item in data_set.create_dict_iterator(num_epochs=1): | |||
| for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| assert len(item) == 4 | |||
| for field in item: | |||
| if isinstance(item[field], np.ndarray): | |||
| @@ -1272,7 +1273,7 @@ def test_write_with_multi_bytes_and_array_and_read_by_MindDataset(): | |||
| shuffle=False) | |||
| assert data_set.get_dataset_size() == 6 | |||
| num_iter = 0 | |||
| for item in data_set.create_dict_iterator(num_epochs=1): | |||
| for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| assert len(item) == 3 | |||
| for field in item: | |||
| if isinstance(item[field], np.ndarray): | |||
| @@ -1291,7 +1292,7 @@ def test_write_with_multi_bytes_and_array_and_read_by_MindDataset(): | |||
| shuffle=False) | |||
| assert data_set.get_dataset_size() == 6 | |||
| num_iter = 0 | |||
| for item in data_set.create_dict_iterator(num_epochs=1): | |||
| for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| assert len(item) == 5 | |||
| for field in item: | |||
| if isinstance(item[field], np.ndarray): | |||
| @@ -1310,7 +1311,7 @@ def test_write_with_multi_bytes_and_array_and_read_by_MindDataset(): | |||
| shuffle=False) | |||
| assert data_set.get_dataset_size() == 6 | |||
| num_iter = 0 | |||
| for item in data_set.create_dict_iterator(num_epochs=1): | |||
| for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| assert len(item) == 5 | |||
| for field in item: | |||
| if isinstance(item[field], np.ndarray): | |||
| @@ -1330,7 +1331,7 @@ def test_write_with_multi_bytes_and_array_and_read_by_MindDataset(): | |||
| shuffle=False) | |||
| assert data_set.get_dataset_size() == 6 | |||
| num_iter = 0 | |||
| for item in data_set.create_dict_iterator(num_epochs=1): | |||
| for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| assert len(item) == 11 | |||
| for field in item: | |||
| if isinstance(item[field], np.ndarray): | |||
| @@ -1420,7 +1421,7 @@ def test_write_with_multi_bytes_and_MindDataset(): | |||
| shuffle=False) | |||
| assert data_set.get_dataset_size() == 6 | |||
| num_iter = 0 | |||
| for item in data_set.create_dict_iterator(num_epochs=1): | |||
| for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| assert len(item) == 7 | |||
| for field in item: | |||
| if isinstance(item[field], np.ndarray): | |||
| @@ -1438,7 +1439,7 @@ def test_write_with_multi_bytes_and_MindDataset(): | |||
| shuffle=False) | |||
| assert data_set.get_dataset_size() == 6 | |||
| num_iter = 0 | |||
| for item in data_set.create_dict_iterator(num_epochs=1): | |||
| for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| assert len(item) == 3 | |||
| for field in item: | |||
| if isinstance(item[field], np.ndarray): | |||
| @@ -1456,7 +1457,7 @@ def test_write_with_multi_bytes_and_MindDataset(): | |||
| shuffle=False) | |||
| assert data_set.get_dataset_size() == 6 | |||
| num_iter = 0 | |||
| for item in data_set.create_dict_iterator(num_epochs=1): | |||
| for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| assert len(item) == 2 | |||
| for field in item: | |||
| if isinstance(item[field], np.ndarray): | |||
| @@ -1474,7 +1475,7 @@ def test_write_with_multi_bytes_and_MindDataset(): | |||
| shuffle=False) | |||
| assert data_set.get_dataset_size() == 6 | |||
| num_iter = 0 | |||
| for item in data_set.create_dict_iterator(num_epochs=1): | |||
| for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| assert len(item) == 2 | |||
| for field in item: | |||
| if isinstance(item[field], np.ndarray): | |||
| @@ -1492,7 +1493,7 @@ def test_write_with_multi_bytes_and_MindDataset(): | |||
| shuffle=False) | |||
| assert data_set.get_dataset_size() == 6 | |||
| num_iter = 0 | |||
| for item in data_set.create_dict_iterator(num_epochs=1): | |||
| for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| assert len(item) == 3 | |||
| for field in item: | |||
| if isinstance(item[field], np.ndarray): | |||
| @@ -1511,7 +1512,7 @@ def test_write_with_multi_bytes_and_MindDataset(): | |||
| shuffle=False) | |||
| assert data_set.get_dataset_size() == 6 | |||
| num_iter = 0 | |||
| for item in data_set.create_dict_iterator(num_epochs=1): | |||
| for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| assert len(item) == 5 | |||
| for field in item: | |||
| if isinstance(item[field], np.ndarray): | |||
| @@ -1615,7 +1616,7 @@ def test_write_with_multi_array_and_MindDataset(): | |||
| shuffle=False) | |||
| assert data_set.get_dataset_size() == 6 | |||
| num_iter = 0 | |||
| for item in data_set.create_dict_iterator(num_epochs=1): | |||
| for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| assert len(item) == 8 | |||
| for field in item: | |||
| if isinstance(item[field], np.ndarray): | |||
| @@ -1635,7 +1636,7 @@ def test_write_with_multi_array_and_MindDataset(): | |||
| shuffle=False) | |||
| assert data_set.get_dataset_size() == 6 | |||
| num_iter = 0 | |||
| for item in data_set.create_dict_iterator(num_epochs=1): | |||
| for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| assert len(item) == 6 | |||
| for field in item: | |||
| if isinstance(item[field], np.ndarray): | |||
| @@ -1655,7 +1656,7 @@ def test_write_with_multi_array_and_MindDataset(): | |||
| shuffle=False) | |||
| assert data_set.get_dataset_size() == 6 | |||
| num_iter = 0 | |||
| for item in data_set.create_dict_iterator(num_epochs=1): | |||
| for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| assert len(item) == 3 | |||
| for field in item: | |||
| if isinstance(item[field], np.ndarray): | |||
| @@ -1675,7 +1676,7 @@ def test_write_with_multi_array_and_MindDataset(): | |||
| shuffle=False) | |||
| assert data_set.get_dataset_size() == 6 | |||
| num_iter = 0 | |||
| for item in data_set.create_dict_iterator(num_epochs=1): | |||
| for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| assert len(item) == 3 | |||
| for field in item: | |||
| if isinstance(item[field], np.ndarray): | |||
| @@ -1693,7 +1694,7 @@ def test_write_with_multi_array_and_MindDataset(): | |||
| shuffle=False) | |||
| assert data_set.get_dataset_size() == 6 | |||
| num_iter = 0 | |||
| for item in data_set.create_dict_iterator(num_epochs=1): | |||
| for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| assert len(item) == 1 | |||
| for field in item: | |||
| if isinstance(item[field], np.ndarray): | |||
| @@ -1714,7 +1715,7 @@ def test_write_with_multi_array_and_MindDataset(): | |||
| shuffle=False) | |||
| assert data_set.get_dataset_size() == 6 | |||
| num_iter = 0 | |||
| for item in data_set.create_dict_iterator(num_epochs=1): | |||
| for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| assert len(item) == 8 | |||
| for field in item: | |||
| if isinstance(item[field], np.ndarray): | |||
| @@ -1761,7 +1762,7 @@ def test_numpy_generic(): | |||
| data_set = ds.MindDataset(CV_FILE_NAME + "0", None, num_readers, shuffle=False) | |||
| assert data_set.get_dataset_size() == 10 | |||
| idx = 0 | |||
| for item in data_set.create_dict_iterator(num_epochs=1): | |||
| for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| assert item['label1'] == item['label1'] | |||
| assert item['label2'] == item['label2'] | |||
| assert item['label3'] == item['label3'] | |||
| @@ -1861,7 +1862,7 @@ def test_write_with_float32_float64_float32_array_float64_array_and_MindDataset( | |||
| shuffle=False) | |||
| assert data_set.get_dataset_size() == 5 | |||
| num_iter = 0 | |||
| for item in data_set.create_dict_iterator(num_epochs=1): | |||
| for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| assert len(item) == 8 | |||
| for field in item: | |||
| if isinstance(item[field], np.ndarray): | |||
| @@ -1883,7 +1884,7 @@ def test_write_with_float32_float64_float32_array_float64_array_and_MindDataset( | |||
| shuffle=False) | |||
| assert data_set.get_dataset_size() == 5 | |||
| num_iter = 0 | |||
| for item in data_set.create_dict_iterator(num_epochs=1): | |||
| for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| assert len(item) == 2 | |||
| for field in item: | |||
| if isinstance(item[field], np.ndarray): | |||
| @@ -1905,7 +1906,7 @@ def test_write_with_float32_float64_float32_array_float64_array_and_MindDataset( | |||
| shuffle=False) | |||
| assert data_set.get_dataset_size() == 5 | |||
| num_iter = 0 | |||
| for item in data_set.create_dict_iterator(num_epochs=1): | |||
| for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| assert len(item) == 2 | |||
| for field in item: | |||
| if isinstance(item[field], np.ndarray): | |||
| @@ -97,7 +97,7 @@ def test_invalid_mindrecord(): | |||
| with pytest.raises(Exception, match="MindRecordOp init failed"): | |||
| data_set = ds.MindDataset('dummy.mindrecord', columns_list, num_readers) | |||
| num_iter = 0 | |||
| for _ in data_set.create_dict_iterator(num_epochs=1): | |||
| for _ in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| num_iter += 1 | |||
| try: | |||
| assert num_iter == 0 | |||
| @@ -116,7 +116,7 @@ def test_minddataset_lack_db(): | |||
| with pytest.raises(Exception, match="MindRecordOp init failed"): | |||
| data_set = ds.MindDataset(CV_FILE_NAME, columns_list, num_readers) | |||
| num_iter = 0 | |||
| for _ in data_set.create_dict_iterator(num_epochs=1): | |||
| for _ in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| num_iter += 1 | |||
| try: | |||
| assert num_iter == 0 | |||
| @@ -135,7 +135,7 @@ def test_cv_minddataset_pk_sample_error_class_column(): | |||
| with pytest.raises(Exception, match="MindRecordOp launch failed"): | |||
| data_set = ds.MindDataset(CV_FILE_NAME, columns_list, num_readers, sampler=sampler) | |||
| num_iter = 0 | |||
| for _ in data_set.create_dict_iterator(num_epochs=1): | |||
| for _ in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| num_iter += 1 | |||
| os.remove(CV_FILE_NAME) | |||
| os.remove("{}.db".format(CV_FILE_NAME)) | |||
| @@ -150,7 +150,7 @@ def test_cv_minddataset_pk_sample_exclusive_shuffle(): | |||
| data_set = ds.MindDataset(CV_FILE_NAME, columns_list, num_readers, | |||
| sampler=sampler, shuffle=False) | |||
| num_iter = 0 | |||
| for _ in data_set.create_dict_iterator(num_epochs=1): | |||
| for _ in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| num_iter += 1 | |||
| os.remove(CV_FILE_NAME) | |||
| os.remove("{}.db".format(CV_FILE_NAME)) | |||
| @@ -29,7 +29,7 @@ def test_cv_minddataset_reader_two_png_tutorial(): | |||
| data_set = ds.MindDataset(CV_FILE_NAME, columns_list, num_readers) | |||
| assert data_set.get_dataset_size() == 5 | |||
| num_iter = 0 | |||
| for item in data_set.create_dict_iterator(num_epochs=1): | |||
| for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| assert len(item) == 5 | |||
| logger.info("-------------- cv reader basic is {} -----------------".format(num_iter)) | |||
| logger.info("-------------- item[id] is {} ------------------------".format(item["id"])) | |||
| @@ -50,7 +50,7 @@ def test_cv_minddataset_reader_two_png_tutorial_just_image2(): | |||
| data_set = ds.MindDataset(CV_FILE_NAME, columns_list, num_readers) | |||
| assert data_set.get_dataset_size() == 5 | |||
| num_iter = 0 | |||
| for item in data_set.create_dict_iterator(num_epochs=1): | |||
| for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| assert len(item) == 2 | |||
| logger.info("-------------- cv reader basic is {} -----------------".format(num_iter)) | |||
| logger.info("-------------- item[img_data] is {} ------------------".format(item["img_data"])) | |||
| @@ -57,7 +57,7 @@ def test_cv_minddataset_reader_multi_image_and_ndarray_tutorial(): | |||
| data_set = ds.MindDataset(CV_FILE_NAME, columns_list, num_readers) | |||
| assert data_set.get_dataset_size() == 5 | |||
| num_iter = 0 | |||
| for item in data_set.create_dict_iterator(num_epochs=1): | |||
| for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| assert len(item) == 7 | |||
| logger.info("item: {}".format(item)) | |||
| assert item["image_0"].dtype == np.uint8 | |||
| @@ -122,7 +122,7 @@ def test_cv_minddataset_reader_basic_padded_samples(add_and_remove_cv_file): | |||
| assert data_set.get_dataset_size() == 15 | |||
| num_iter = 0 | |||
| num_padded_iter = 0 | |||
| for item in data_set.create_dict_iterator(num_epochs=1): | |||
| for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| logger.info("-------------- cv reader basic: {} ------------------------".format(num_iter)) | |||
| logger.info("-------------- item[file_name]: {} ------------------------".format(item["file_name"])) | |||
| logger.info("-------------- item[label]: {} ----------------------------".format(item["label"])) | |||
| @@ -157,7 +157,7 @@ def test_cv_minddataset_partition_padded_samples(add_and_remove_cv_file): | |||
| padded_sample=padded_sample, | |||
| num_padded=num_padded) | |||
| assert data_set.get_dataset_size() == dataset_size | |||
| for item in data_set.create_dict_iterator(num_epochs=1): | |||
| for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| logger.info("-------------- partition : {} ------------------------".format(partition_id)) | |||
| logger.info("-------------- len(item[data]): {} ------------------------".format(len(item["data"]))) | |||
| logger.info("-------------- item[data]: {} -----------------------------".format(item["data"])) | |||
| @@ -205,7 +205,7 @@ def test_cv_minddataset_partition_padded_samples_multi_epoch(add_and_remove_cv_f | |||
| assert data_set.get_dataset_size() == dataset_size | |||
| data_set = data_set.repeat(repeat_size) | |||
| local_index = 0 | |||
| for item in data_set.create_dict_iterator(num_epochs=1): | |||
| for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| logger.info("-------------- partition : {} ------------------------".format(partition_id)) | |||
| logger.info("-------------- len(item[data]): {} ------------------------".format(len(item["data"]))) | |||
| logger.info("-------------- item[data]: {} -----------------------------".format(item["data"])) | |||
| @@ -267,7 +267,7 @@ def test_cv_minddataset_partition_padded_samples_no_dividsible(add_and_remove_cv | |||
| padded_sample=padded_sample, | |||
| num_padded=num_padded) | |||
| num_iter = 0 | |||
| for item in data_set.create_dict_iterator(num_epochs=1): | |||
| for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| num_iter += 1 | |||
| return num_iter | |||
| @@ -313,7 +313,7 @@ def test_cv_minddataset_partition_padded_samples_no_equal_column_list(add_and_re | |||
| shard_id=partition_id, | |||
| padded_sample=padded_sample, | |||
| num_padded=num_padded) | |||
| for item in data_set.create_dict_iterator(num_epochs=1): | |||
| for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| logger.info("-------------- partition : {} ------------------------".format(partition_id)) | |||
| logger.info("-------------- len(item[data]): {} ------------------------".format(len(item["data"]))) | |||
| logger.info("-------------- item[data]: {} -----------------------------".format(item["data"])) | |||
| @@ -337,7 +337,7 @@ def test_cv_minddataset_partition_padded_samples_no_column_list(add_and_remove_c | |||
| shard_id=partition_id, | |||
| padded_sample=padded_sample, | |||
| num_padded=num_padded) | |||
| for item in data_set.create_dict_iterator(num_epochs=1): | |||
| for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| logger.info("-------------- partition : {} ------------------------".format(partition_id)) | |||
| logger.info("-------------- len(item[data]): {} ------------------------".format(len(item["data"]))) | |||
| logger.info("-------------- item[data]: {} -----------------------------".format(item["data"])) | |||
| @@ -360,7 +360,7 @@ def test_cv_minddataset_partition_padded_samples_no_num_padded(add_and_remove_cv | |||
| num_shards=num_shards, | |||
| shard_id=partition_id, | |||
| padded_sample=padded_sample) | |||
| for item in data_set.create_dict_iterator(num_epochs=1): | |||
| for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| logger.info("-------------- partition : {} ------------------------".format(partition_id)) | |||
| logger.info("-------------- len(item[data]): {} ------------------------".format(len(item["data"]))) | |||
| logger.info("-------------- item[data]: {} -----------------------------".format(item["data"])) | |||
| @@ -383,7 +383,7 @@ def test_cv_minddataset_partition_padded_samples_no_padded_samples(add_and_remov | |||
| num_shards=num_shards, | |||
| shard_id=partition_id, | |||
| num_padded=num_padded) | |||
| for item in data_set.create_dict_iterator(num_epochs=1): | |||
| for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| logger.info("-------------- partition : {} ------------------------".format(partition_id)) | |||
| logger.info("-------------- len(item[data]): {} ------------------------".format(len(item["data"]))) | |||
| logger.info("-------------- item[data]: {} -----------------------------".format(item["data"])) | |||
| @@ -413,7 +413,7 @@ def test_nlp_minddataset_reader_basic_padded_samples(add_and_remove_nlp_file): | |||
| padded_sample=padded_sample, | |||
| num_padded=num_padded) | |||
| assert data_set.get_dataset_size() == dataset_size | |||
| for item in data_set.create_dict_iterator(num_epochs=1): | |||
| for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| logger.info("-------------- item[id]: {} ------------------------".format(item["id"])) | |||
| logger.info("-------------- item[rating]: {} --------------------".format(item["rating"])) | |||
| logger.info("-------------- item[input_ids]: {}, shape: {} -----------------".format( | |||
| @@ -461,7 +461,7 @@ def test_nlp_minddataset_reader_basic_padded_samples_multi_epoch(add_and_remove_ | |||
| data_set = data_set.repeat(repeat_size) | |||
| local_index = 0 | |||
| for item in data_set.create_dict_iterator(num_epochs=1): | |||
| for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| logger.info("-------------- item[id]: {} ------------------------".format(item["id"])) | |||
| logger.info("-------------- item[rating]: {} --------------------".format(item["rating"])) | |||
| logger.info("-------------- item[input_ids]: {}, shape: {} -----------------".format( | |||
| @@ -523,7 +523,7 @@ def test_nlp_minddataset_reader_basic_padded_samples_check_whole_reshuffle_resul | |||
| assert data_set.get_dataset_size() == dataset_size | |||
| data_set = data_set.repeat(repeat_size) | |||
| inner_num_iter = 0 | |||
| for item in data_set.create_dict_iterator(num_epochs=1): | |||
| for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| logger.info("-------------- item[id]: {} ------------------------".format(item["id"])) | |||
| logger.info("-------------- item[rating]: {} --------------------".format(item["rating"])) | |||
| logger.info("-------------- item[input_ids]: {}, shape: {} -----------------" | |||
| @@ -70,7 +70,7 @@ def test_cv_minddataset_pk_sample_no_column(add_and_remove_cv_file): | |||
| assert data_set.get_dataset_size() == 6 | |||
| num_iter = 0 | |||
| for item in data_set.create_dict_iterator(num_epochs=1): | |||
| for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| logger.info( | |||
| "-------------- cv reader basic: {} ------------------------".format(num_iter)) | |||
| logger.info("-------------- item[file_name]: \ | |||
| @@ -90,7 +90,7 @@ def test_cv_minddataset_pk_sample_basic(add_and_remove_cv_file): | |||
| assert data_set.get_dataset_size() == 6 | |||
| num_iter = 0 | |||
| for item in data_set.create_dict_iterator(num_epochs=1): | |||
| for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| logger.info( | |||
| "-------------- cv reader basic: {} ------------------------".format(num_iter)) | |||
| logger.info("-------------- item[data]: \ | |||
| @@ -111,7 +111,7 @@ def test_cv_minddataset_pk_sample_shuffle(add_and_remove_cv_file): | |||
| assert data_set.get_dataset_size() == 9 | |||
| num_iter = 0 | |||
| for item in data_set.create_dict_iterator(num_epochs=1): | |||
| for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| logger.info( | |||
| "-------------- cv reader basic: {} ------------------------".format(num_iter)) | |||
| logger.info("-------------- item[file_name]: \ | |||
| @@ -132,7 +132,7 @@ def test_cv_minddataset_pk_sample_shuffle_1(add_and_remove_cv_file): | |||
| assert data_set.get_dataset_size() == 5 | |||
| num_iter = 0 | |||
| for item in data_set.create_dict_iterator(num_epochs=1): | |||
| for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| logger.info( | |||
| "-------------- cv reader basic: {} ------------------------".format(num_iter)) | |||
| logger.info("-------------- item[file_name]: \ | |||
| @@ -152,7 +152,7 @@ def test_cv_minddataset_pk_sample_shuffle_2(add_and_remove_cv_file): | |||
| assert data_set.get_dataset_size() == 9 | |||
| num_iter = 0 | |||
| for item in data_set.create_dict_iterator(num_epochs=1): | |||
| for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| logger.info( | |||
| "-------------- cv reader basic: {} ------------------------".format(num_iter)) | |||
| logger.info("-------------- item[file_name]: \ | |||
| @@ -172,7 +172,7 @@ def test_cv_minddataset_pk_sample_out_of_range_0(add_and_remove_cv_file): | |||
| sampler=sampler) | |||
| assert data_set.get_dataset_size() == 15 | |||
| num_iter = 0 | |||
| for item in data_set.create_dict_iterator(num_epochs=1): | |||
| for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| logger.info( | |||
| "-------------- cv reader basic: {} ------------------------".format(num_iter)) | |||
| logger.info("-------------- item[file_name]: \ | |||
| @@ -191,7 +191,7 @@ def test_cv_minddataset_pk_sample_out_of_range_1(add_and_remove_cv_file): | |||
| sampler=sampler) | |||
| assert data_set.get_dataset_size() == 15 | |||
| num_iter = 0 | |||
| for item in data_set.create_dict_iterator(num_epochs=1): | |||
| for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| logger.info( | |||
| "-------------- cv reader basic: {} ------------------------".format(num_iter)) | |||
| logger.info("-------------- item[file_name]: \ | |||
| @@ -210,7 +210,7 @@ def test_cv_minddataset_pk_sample_out_of_range_2(add_and_remove_cv_file): | |||
| sampler=sampler) | |||
| assert data_set.get_dataset_size() == 10 | |||
| num_iter = 0 | |||
| for item in data_set.create_dict_iterator(num_epochs=1): | |||
| for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| logger.info( | |||
| "-------------- cv reader basic: {} ------------------------".format(num_iter)) | |||
| logger.info("-------------- item[file_name]: \ | |||
| @@ -231,7 +231,7 @@ def test_cv_minddataset_subset_random_sample_basic(add_and_remove_cv_file): | |||
| sampler=sampler) | |||
| assert data_set.get_dataset_size() == 5 | |||
| num_iter = 0 | |||
| for item in data_set.create_dict_iterator(num_epochs=1): | |||
| for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| logger.info( | |||
| "-------------- cv reader basic: {} ------------------------".format(num_iter)) | |||
| logger.info( | |||
| @@ -254,7 +254,7 @@ def test_cv_minddataset_subset_random_sample_replica(add_and_remove_cv_file): | |||
| sampler=sampler) | |||
| assert data_set.get_dataset_size() == 6 | |||
| num_iter = 0 | |||
| for item in data_set.create_dict_iterator(num_epochs=1): | |||
| for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| logger.info( | |||
| "-------------- cv reader basic: {} ------------------------".format(num_iter)) | |||
| logger.info( | |||
| @@ -277,7 +277,7 @@ def test_cv_minddataset_subset_random_sample_empty(add_and_remove_cv_file): | |||
| sampler=sampler) | |||
| assert data_set.get_dataset_size() == 0 | |||
| num_iter = 0 | |||
| for item in data_set.create_dict_iterator(num_epochs=1): | |||
| for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| logger.info( | |||
| "-------------- cv reader basic: {} ------------------------".format(num_iter)) | |||
| logger.info( | |||
| @@ -300,7 +300,7 @@ def test_cv_minddataset_subset_random_sample_out_of_range(add_and_remove_cv_file | |||
| sampler=sampler) | |||
| assert data_set.get_dataset_size() == 5 | |||
| num_iter = 0 | |||
| for item in data_set.create_dict_iterator(num_epochs=1): | |||
| for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| logger.info( | |||
| "-------------- cv reader basic: {} ------------------------".format(num_iter)) | |||
| logger.info( | |||
| @@ -322,7 +322,7 @@ def test_cv_minddataset_subset_random_sample_negative(add_and_remove_cv_file): | |||
| sampler=sampler) | |||
| assert data_set.get_dataset_size() == 5 | |||
| num_iter = 0 | |||
| for item in data_set.create_dict_iterator(num_epochs=1): | |||
| for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| logger.info( | |||
| "-------------- cv reader basic: {} ------------------------".format(num_iter)) | |||
| logger.info( | |||
| @@ -345,7 +345,7 @@ def test_cv_minddataset_random_sampler_basic(add_and_remove_cv_file): | |||
| assert data_set.get_dataset_size() == 10 | |||
| num_iter = 0 | |||
| new_dataset = [] | |||
| for item in data_set.create_dict_iterator(num_epochs=1): | |||
| for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| logger.info( | |||
| "-------------- cv reader basic: {} ------------------------".format(num_iter)) | |||
| logger.info( | |||
| @@ -371,7 +371,7 @@ def test_cv_minddataset_random_sampler_repeat(add_and_remove_cv_file): | |||
| epoch1_dataset = [] | |||
| epoch2_dataset = [] | |||
| epoch3_dataset = [] | |||
| for item in ds1.create_dict_iterator(num_epochs=1): | |||
| for item in ds1.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| logger.info( | |||
| "-------------- cv reader basic: {} ------------------------".format(num_iter)) | |||
| logger.info( | |||
| @@ -400,7 +400,7 @@ def test_cv_minddataset_random_sampler_replacement(add_and_remove_cv_file): | |||
| sampler=sampler) | |||
| assert data_set.get_dataset_size() == 5 | |||
| num_iter = 0 | |||
| for item in data_set.create_dict_iterator(num_epochs=1): | |||
| for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| logger.info( | |||
| "-------------- cv reader basic: {} ------------------------".format(num_iter)) | |||
| logger.info( | |||
| @@ -422,7 +422,7 @@ def test_cv_minddataset_sequential_sampler_basic(add_and_remove_cv_file): | |||
| sampler=sampler) | |||
| assert data_set.get_dataset_size() == 4 | |||
| num_iter = 0 | |||
| for item in data_set.create_dict_iterator(num_epochs=1): | |||
| for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| logger.info( | |||
| "-------------- cv reader basic: {} ------------------------".format(num_iter)) | |||
| logger.info( | |||
| @@ -447,7 +447,7 @@ def test_cv_minddataset_sequential_sampler_exceed_size(add_and_remove_cv_file): | |||
| dataset_size = data_set.get_dataset_size() | |||
| assert dataset_size == 10 | |||
| num_iter = 0 | |||
| for item in data_set.create_dict_iterator(num_epochs=1): | |||
| for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| logger.info( | |||
| "-------------- cv reader basic: {} ------------------------".format(num_iter)) | |||
| logger.info( | |||
| @@ -473,7 +473,7 @@ def test_cv_minddataset_split_basic(add_and_remove_cv_file): | |||
| assert d1.get_dataset_size() == 8 | |||
| assert d2.get_dataset_size() == 2 | |||
| num_iter = 0 | |||
| for item in d1.create_dict_iterator(num_epochs=1): | |||
| for item in d1.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| logger.info( | |||
| "-------------- item[data]: {} -----------------------------".format(item["data"])) | |||
| logger.info( | |||
| @@ -485,7 +485,7 @@ def test_cv_minddataset_split_basic(add_and_remove_cv_file): | |||
| num_iter += 1 | |||
| assert num_iter == 8 | |||
| num_iter = 0 | |||
| for item in d2.create_dict_iterator(num_epochs=1): | |||
| for item in d2.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| logger.info( | |||
| "-------------- item[data]: {} -----------------------------".format(item["data"])) | |||
| logger.info( | |||
| @@ -509,7 +509,7 @@ def test_cv_minddataset_split_exact_percent(add_and_remove_cv_file): | |||
| assert d1.get_dataset_size() == 8 | |||
| assert d2.get_dataset_size() == 2 | |||
| num_iter = 0 | |||
| for item in d1.create_dict_iterator(num_epochs=1): | |||
| for item in d1.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| logger.info( | |||
| "-------------- item[data]: {} -----------------------------".format(item["data"])) | |||
| logger.info( | |||
| @@ -521,7 +521,7 @@ def test_cv_minddataset_split_exact_percent(add_and_remove_cv_file): | |||
| num_iter += 1 | |||
| assert num_iter == 8 | |||
| num_iter = 0 | |||
| for item in d2.create_dict_iterator(num_epochs=1): | |||
| for item in d2.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| logger.info( | |||
| "-------------- item[data]: {} -----------------------------".format(item["data"])) | |||
| logger.info( | |||
| @@ -545,7 +545,7 @@ def test_cv_minddataset_split_fuzzy_percent(add_and_remove_cv_file): | |||
| assert d1.get_dataset_size() == 4 | |||
| assert d2.get_dataset_size() == 6 | |||
| num_iter = 0 | |||
| for item in d1.create_dict_iterator(num_epochs=1): | |||
| for item in d1.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| logger.info( | |||
| "-------------- item[data]: {} -----------------------------".format(item["data"])) | |||
| logger.info( | |||
| @@ -557,7 +557,7 @@ def test_cv_minddataset_split_fuzzy_percent(add_and_remove_cv_file): | |||
| num_iter += 1 | |||
| assert num_iter == 4 | |||
| num_iter = 0 | |||
| for item in d2.create_dict_iterator(num_epochs=1): | |||
| for item in d2.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| logger.info( | |||
| "-------------- item[data]: {} -----------------------------".format(item["data"])) | |||
| logger.info( | |||
| @@ -585,7 +585,7 @@ def test_cv_minddataset_split_deterministic(add_and_remove_cv_file): | |||
| d1_dataset = [] | |||
| d2_dataset = [] | |||
| num_iter = 0 | |||
| for item in d1.create_dict_iterator(num_epochs=1): | |||
| for item in d1.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| logger.info( | |||
| "-------------- item[data]: {} -----------------------------".format(item["data"])) | |||
| logger.info( | |||
| @@ -596,7 +596,7 @@ def test_cv_minddataset_split_deterministic(add_and_remove_cv_file): | |||
| num_iter += 1 | |||
| assert num_iter == 8 | |||
| num_iter = 0 | |||
| for item in d2.create_dict_iterator(num_epochs=1): | |||
| for item in d2.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| logger.info( | |||
| "-------------- item[data]: {} -----------------------------".format(item["data"])) | |||
| logger.info( | |||
| @@ -628,7 +628,7 @@ def test_cv_minddataset_split_sharding(add_and_remove_cv_file): | |||
| num_iter = 0 | |||
| d1_shard1 = [] | |||
| for item in d1.create_dict_iterator(num_epochs=1): | |||
| for item in d1.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| logger.info( | |||
| "-------------- item[data]: {} -----------------------------".format(item["data"])) | |||
| logger.info( | |||
| @@ -649,7 +649,7 @@ def test_cv_minddataset_split_sharding(add_and_remove_cv_file): | |||
| epoch2_dataset = [] | |||
| epoch3_dataset = [] | |||
| num_iter = 0 | |||
| for item in d1s.create_dict_iterator(num_epochs=1): | |||
| for item in d1s.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| logger.info( | |||
| "-------------- item[data]: {} -----------------------------".format(item["data"])) | |||
| logger.info( | |||
| @@ -44,7 +44,7 @@ def test_one_hot_op(): | |||
| golden_label = np.ones(num_classes) * epsilon_para / num_classes | |||
| golden_label[1] = 1 - epsilon_para / num_classes | |||
| for data in dataset.create_dict_iterator(num_epochs=1): | |||
| for data in dataset.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| label = data["label"] | |||
| logger.info("label is {}".format(label)) | |||
| logger.info("golden_label is {}".format(golden_label)) | |||
| @@ -83,7 +83,8 @@ def test_mix_up_single(): | |||
| ] | |||
| ds1 = ds1.map(operations=transforms, input_columns=["image", "label"]) | |||
| for data1, data2 in zip(ds1.create_dict_iterator(num_epochs=1), ds2.create_dict_iterator(num_epochs=1)): | |||
| for data1, data2 in zip(ds1.create_dict_iterator(num_epochs=1, output_numpy=True), | |||
| ds2.create_dict_iterator(num_epochs=1, output_numpy=True)): | |||
| image1 = data1["image"] | |||
| label = data1["label"] | |||
| logger.info("label is {}".format(label)) | |||
| @@ -133,7 +134,8 @@ def test_mix_up_multi(): | |||
| ds1 = ds1.map(operations=transforms, input_columns=["image", "label"]) | |||
| num_iter = 0 | |||
| batch1_image1 = 0 | |||
| for data1, data2 in zip(ds1.create_dict_iterator(num_epochs=1), ds2.create_dict_iterator(num_epochs=1)): | |||
| for data1, data2 in zip(ds1.create_dict_iterator(num_epochs=1, output_numpy=True), | |||
| ds2.create_dict_iterator(num_epochs=1, output_numpy=True)): | |||
| image1 = data1["image"] | |||
| label1 = data1["label"] | |||
| logger.info("label: {}".format(label1)) | |||
| @@ -44,9 +44,9 @@ def test_mixup_batch_success1(plot=False): | |||
| images_original = None | |||
| for idx, (image, _) in enumerate(ds_original): | |||
| if idx == 0: | |||
| images_original = image | |||
| images_original = image.asnumpy() | |||
| else: | |||
| images_original = np.append(images_original, image, axis=0) | |||
| images_original = np.append(images_original, image.asnumpy(), axis=0) | |||
| # MixUp Images | |||
| data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) | |||
| @@ -60,9 +60,9 @@ def test_mixup_batch_success1(plot=False): | |||
| images_mixup = None | |||
| for idx, (image, _) in enumerate(data1): | |||
| if idx == 0: | |||
| images_mixup = image | |||
| images_mixup = image.asnumpy() | |||
| else: | |||
| images_mixup = np.append(images_mixup, image, axis=0) | |||
| images_mixup = np.append(images_mixup, image.asnumpy(), axis=0) | |||
| if plot: | |||
| visualize_list(images_original, images_mixup) | |||
| @@ -88,9 +88,9 @@ def test_mixup_batch_success2(plot=False): | |||
| images_original = None | |||
| for idx, (image, _) in enumerate(ds_original): | |||
| if idx == 0: | |||
| images_original = image | |||
| images_original = image.asnumpy() | |||
| else: | |||
| images_original = np.append(images_original, image, axis=0) | |||
| images_original = np.append(images_original, image.asnumpy(), axis=0) | |||
| # MixUp Images | |||
| data1 = ds.ImageFolderDataset(dataset_dir=DATA_DIR2, shuffle=False) | |||
| @@ -108,9 +108,9 @@ def test_mixup_batch_success2(plot=False): | |||
| images_mixup = None | |||
| for idx, (image, _) in enumerate(data1): | |||
| if idx == 0: | |||
| images_mixup = image | |||
| images_mixup = image.asnumpy() | |||
| else: | |||
| images_mixup = np.append(images_mixup, image, axis=0) | |||
| images_mixup = np.append(images_mixup, image.asnumpy(), axis=0) | |||
| if plot: | |||
| visualize_list(images_original, images_mixup) | |||
| @@ -135,9 +135,9 @@ def test_mixup_batch_success3(plot=False): | |||
| images_original = None | |||
| for idx, (image, _) in enumerate(ds_original): | |||
| if idx == 0: | |||
| images_original = image | |||
| images_original = image.asnumpy() | |||
| else: | |||
| images_original = np.append(images_original, image, axis=0) | |||
| images_original = np.append(images_original, image.asnumpy(), axis=0) | |||
| # MixUp Images | |||
| data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) | |||
| @@ -151,9 +151,9 @@ def test_mixup_batch_success3(plot=False): | |||
| images_mixup = np.array([]) | |||
| for idx, (image, _) in enumerate(data1): | |||
| if idx == 0: | |||
| images_mixup = image | |||
| images_mixup = image.asnumpy() | |||
| else: | |||
| images_mixup = np.append(images_mixup, image, axis=0) | |||
| images_mixup = np.append(images_mixup, image.asnumpy(), axis=0) | |||
| if plot: | |||
| visualize_list(images_original, images_mixup) | |||
| @@ -180,9 +180,9 @@ def test_mixup_batch_success4(plot=False): | |||
| images_original = None | |||
| for idx, (image, _) in enumerate(ds_original): | |||
| if idx == 0: | |||
| images_original = image | |||
| images_original = image.asnumpy() | |||
| else: | |||
| images_original = np.append(images_original, image, axis=0) | |||
| images_original = np.append(images_original, image.asnumpy(), axis=0) | |||
| # MixUp Images | |||
| data1 = ds.CelebADataset(DATA_DIR3, shuffle=False) | |||
| @@ -200,9 +200,9 @@ def test_mixup_batch_success4(plot=False): | |||
| images_mixup = np.array([]) | |||
| for idx, (image, _) in enumerate(data1): | |||
| if idx == 0: | |||
| images_mixup = image | |||
| images_mixup = image.asnumpy() | |||
| else: | |||
| images_mixup = np.append(images_mixup, image, axis=0) | |||
| images_mixup = np.append(images_mixup, image.asnumpy(), axis=0) | |||
| if plot: | |||
| visualize_list(images_original, images_mixup) | |||
| @@ -252,9 +252,9 @@ def test_mixup_batch_fail1(): | |||
| images_original = np.array([]) | |||
| for idx, (image, _) in enumerate(ds_original): | |||
| if idx == 0: | |||
| images_original = image | |||
| images_original = image.asnumpy() | |||
| else: | |||
| images_original = np.append(images_original, image, axis=0) | |||
| images_original = np.append(images_original, image.asnumpy(), axis=0) | |||
| # MixUp Images | |||
| data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) | |||
| @@ -266,9 +266,9 @@ def test_mixup_batch_fail1(): | |||
| data1 = data1.map(operations=mixup_batch_op, input_columns=["image", "label"]) | |||
| for idx, (image, _) in enumerate(data1): | |||
| if idx == 0: | |||
| images_mixup = image | |||
| images_mixup = image.asnumpy() | |||
| else: | |||
| images_mixup = np.append(images_mixup, image, axis=0) | |||
| images_mixup = np.append(images_mixup, image.asnumpy(), axis=0) | |||
| error_message = "You must make sure images are HWC or CHW and batched" | |||
| assert error_message in str(error.value) | |||
| @@ -287,9 +287,9 @@ def test_mixup_batch_fail2(): | |||
| images_original = np.array([]) | |||
| for idx, (image, _) in enumerate(ds_original): | |||
| if idx == 0: | |||
| images_original = image | |||
| images_original = image.asnumpy() | |||
| else: | |||
| images_original = np.append(images_original, image, axis=0) | |||
| images_original = np.append(images_original, image.asnumpy(), axis=0) | |||
| # MixUp Images | |||
| data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) | |||
| @@ -315,9 +315,9 @@ def test_mixup_batch_fail3(): | |||
| images_original = None | |||
| for idx, (image, _) in enumerate(ds_original): | |||
| if idx == 0: | |||
| images_original = image | |||
| images_original = image.asnumpy() | |||
| else: | |||
| images_original = np.append(images_original, image, axis=0) | |||
| images_original = np.append(images_original, image.asnumpy(), axis=0) | |||
| # MixUp Images | |||
| data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) | |||
| @@ -332,9 +332,9 @@ def test_mixup_batch_fail3(): | |||
| images_mixup = np.array([]) | |||
| for idx, (image, _) in enumerate(data1): | |||
| if idx == 0: | |||
| images_mixup = image | |||
| images_mixup = image.asnumpy() | |||
| else: | |||
| images_mixup = np.append(images_mixup, image, axis=0) | |||
| images_mixup = np.append(images_mixup, image.asnumpy(), axis=0) | |||
| error_message = "Both images and labels columns are required" | |||
| assert error_message in str(error.value) | |||
| @@ -353,9 +353,9 @@ def test_mixup_batch_fail4(): | |||
| images_original = np.array([]) | |||
| for idx, (image, _) in enumerate(ds_original): | |||
| if idx == 0: | |||
| images_original = image | |||
| images_original = image.asnumpy() | |||
| else: | |||
| images_original = np.append(images_original, image, axis=0) | |||
| images_original = np.append(images_original, image.asnumpy(), axis=0) | |||
| # MixUp Images | |||
| data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) | |||
| @@ -382,9 +382,9 @@ def test_mixup_batch_fail5(): | |||
| images_original = np.array([]) | |||
| for idx, (image, _) in enumerate(ds_original): | |||
| if idx == 0: | |||
| images_original = image | |||
| images_original = image.asnumpy() | |||
| else: | |||
| images_original = np.append(images_original, image, axis=0) | |||
| images_original = np.append(images_original, image.asnumpy(), axis=0) | |||
| # MixUp Images | |||
| data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) | |||
| @@ -397,9 +397,9 @@ def test_mixup_batch_fail5(): | |||
| images_mixup = np.array([]) | |||
| for idx, (image, _) in enumerate(data1): | |||
| if idx == 0: | |||
| images_mixup = image | |||
| images_mixup = image.asnumpy() | |||
| else: | |||
| images_mixup = np.append(images_mixup, image, axis=0) | |||
| images_mixup = np.append(images_mixup, image.asnumpy(), axis=0) | |||
| error_message = "MixUpBatch: Wrong labels shape. The second column (labels) must have a shape of NC or NLC" | |||
| assert error_message in str(error.value) | |||
| @@ -42,7 +42,7 @@ def test_multiple_ngrams(): | |||
| dataset = dataset.map(operations=text.Ngram([1, 2, 3], ("_", 2), ("_", 2), " "), input_columns="text") | |||
| i = 0 | |||
| for data in dataset.create_dict_iterator(num_epochs=1): | |||
| for data in dataset.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| assert [d.decode("utf8") for d in data["text"]] == n_gram_mottos[i] | |||
| i += 1 | |||
| @@ -64,7 +64,7 @@ def test_simple_ngram(): | |||
| dataset = dataset.map(operations=text.Ngram(3, separator=" "), input_columns="text") | |||
| i = 0 | |||
| for data in dataset.create_dict_iterator(num_epochs=1): | |||
| for data in dataset.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| assert [d.decode("utf8") for d in data["text"]] == n_gram_mottos[i], i | |||
| i += 1 | |||
| @@ -79,7 +79,7 @@ def test_corner_cases(): | |||
| try: | |||
| dataset = ds.GeneratorDataset(gen(input_line), column_names=["text"]) | |||
| dataset = dataset.map(operations=text.Ngram(n, l_pad, r_pad, separator=sep), input_columns=["text"]) | |||
| for data in dataset.create_dict_iterator(num_epochs=1): | |||
| for data in dataset.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| return [d.decode("utf8") for d in data["text"]] | |||
| except (ValueError, TypeError) as e: | |||
| return str(e) | |||
| @@ -38,7 +38,7 @@ def test_on_tokenized_line(): | |||
| data = data.map(operations=lookup, input_columns=["text"]) | |||
| res = np.array([[10, 1, 11, 1, 12, 1, 15, 1, 13, 1, 14], | |||
| [11, 1, 12, 1, 10, 1, 14, 1, 13, 1, 15]], dtype=np.int32) | |||
| for i, d in enumerate(data.create_dict_iterator(num_epochs=1)): | |||
| for i, d in enumerate(data.create_dict_iterator(num_epochs=1, output_numpy=True)): | |||
| np.testing.assert_array_equal(d["text"], res[i]) | |||
| @@ -56,7 +56,7 @@ def test_on_tokenized_line_with_no_special_tokens(): | |||
| data = data.map(operations=lookup, input_columns=["text"]) | |||
| res = np.array([[8, 0, 9, 0, 10, 0, 13, 0, 11, 0, 12], | |||
| [9, 0, 10, 0, 8, 0, 12, 0, 11, 0, 13]], dtype=np.int32) | |||
| for i, d in enumerate(data.create_dict_iterator(num_epochs=1)): | |||
| for i, d in enumerate(data.create_dict_iterator(num_epochs=1, output_numpy=True)): | |||
| np.testing.assert_array_equal(d["text"], res[i]) | |||
| @@ -107,7 +107,8 @@ def test_normalize_op_c(plot=False): | |||
| data2 = data2.map(operations=decode_op, input_columns=["image"]) | |||
| num_iter = 0 | |||
| for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): | |||
| for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), | |||
| data2.create_dict_iterator(num_epochs=1, output_numpy=True)): | |||
| image_de_normalized = item1["image"] | |||
| image_original = item2["image"] | |||
| image_np_normalized = normalize_np(image_original, mean, std) | |||
| @@ -144,7 +145,8 @@ def test_normalize_op_py(plot=False): | |||
| data2 = data2.map(operations=transform, input_columns=["image"]) | |||
| num_iter = 0 | |||
| for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): | |||
| for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), | |||
| data2.create_dict_iterator(num_epochs=1, output_numpy=True)): | |||
| image_de_normalized = (item1["image"].transpose(1, 2, 0) * 255).astype(np.uint8) | |||
| image_np_normalized = (normalize_np(item2["image"].transpose(1, 2, 0), mean, std) * 255).astype(np.uint8) | |||
| image_original = (item2["image"].transpose(1, 2, 0) * 255).astype(np.uint8) | |||
| @@ -39,7 +39,7 @@ def test_case_0(): | |||
| data1 = data1.batch(2) | |||
| expected_data = np.array([[[1], [2]], [[3], [0]]]) | |||
| for i, data_row in enumerate(data1): | |||
| for i, data_row in enumerate(data1.create_tuple_iterator(output_numpy=True)): | |||
| np.testing.assert_array_equal(data_row[0], expected_data[i]) | |||
| # Restore configuration | |||
| @@ -31,7 +31,7 @@ def test_map_reorder0(): | |||
| data0 = data0.map(operations=(lambda x: x), input_columns="col0", output_columns="out", | |||
| column_order=["col1", "out"]) | |||
| for item in data0.create_tuple_iterator(num_epochs=1): # each data is a dictionary | |||
| for item in data0.create_tuple_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary | |||
| assert item == [np.array(1), np.array(0)] | |||
| @@ -51,7 +51,7 @@ def test_map_reorder1(): | |||
| data2 = ds.zip((data0, data1)) | |||
| data2 = data2.map(operations=(lambda x: x), input_columns="a0", column_order=["b2", "a2", "b1", "a1", "b0", "a0"]) | |||
| for item in data2.create_tuple_iterator(num_epochs=1): | |||
| for item in data2.create_tuple_iterator(num_epochs=1, output_numpy=True): | |||
| assert item == [np.array(2), np.array(2), np.array(1), np.array(1), np.array(0), np.array(0)] | |||
| @@ -67,7 +67,7 @@ def test_shuffle(): | |||
| data2 = ds.TFRecordDataset(FILES, schema=SCHEMA_FILE, shuffle=ds.Shuffle.FILES) | |||
| data2 = data2.shuffle(10000) | |||
| for d1, d2 in zip(data1, data2): | |||
| for d1, d2 in zip(data1.create_tuple_iterator(output_numpy=True), data2.create_tuple_iterator(output_numpy=True)): | |||
| for t1, t2 in zip(d1, d2): | |||
| np.testing.assert_array_equal(t1, t2) | |||
| @@ -77,7 +77,7 @@ def test_shuffle(): | |||
| data2 = ds.TextFileDataset(DATA_ALL_FILE, shuffle=ds.Shuffle.FILES) | |||
| data2 = data2.shuffle(10000) | |||
| for d1, d2 in zip(data1, data2): | |||
| for d1, d2 in zip(data1.create_tuple_iterator(output_numpy=True), data2.create_tuple_iterator(output_numpy=True)): | |||
| for t1, t2 in zip(d1, d2): | |||
| np.testing.assert_array_equal(t1, t2) | |||
| @@ -87,7 +87,7 @@ def test_shuffle(): | |||
| data2 = ds.CLUEDataset(TRAIN_FILE, task='AFQMC', usage='train', shuffle=ds.Shuffle.FILES) | |||
| data2 = data2.shuffle(10000) | |||
| for d1, d2 in zip(data1, data2): | |||
| for d1, d2 in zip(data1.create_tuple_iterator(output_numpy=True), data2.create_tuple_iterator(output_numpy=True)): | |||
| for t1, t2 in zip(d1, d2): | |||
| np.testing.assert_array_equal(t1, t2) | |||
| @@ -56,7 +56,8 @@ def test_pad_op(): | |||
| data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) | |||
| data2 = data2.map(operations=transform, input_columns=["image"]) | |||
| for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): | |||
| for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), | |||
| data2.create_dict_iterator(num_epochs=1, output_numpy=True)): | |||
| c_image = item1["image"] | |||
| py_image = (item2["image"].transpose(1, 2, 0) * 255).astype(np.uint8) | |||
| @@ -94,7 +95,7 @@ def test_pad_grayscale(): | |||
| pad_gray = c_vision.Pad(100, fill_value=(20, 20, 20)) | |||
| data1 = data1.map(operations=pad_gray, input_columns=["image"]) | |||
| dataset_shape_1 = [] | |||
| for item1 in data1.create_dict_iterator(num_epochs=1): | |||
| for item1 in data1.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| c_image = item1["image"] | |||
| dataset_shape_1.append(c_image.shape) | |||
| @@ -108,7 +109,7 @@ def test_pad_grayscale(): | |||
| data2 = data2.map(operations=ctrans, input_columns=["image"]) | |||
| for item2 in data2.create_dict_iterator(num_epochs=1): | |||
| for item2 in data2.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| c_image = item2["image"] | |||
| dataset_shape_2.append(c_image.shape) | |||
| @@ -62,7 +62,7 @@ def test_batch_padding_01(): | |||
| data1 = ds.GeneratorDataset((lambda: gen_2cols(2)), ["col1d", "col2d"]) | |||
| data1 = data1.batch(batch_size=2, drop_remainder=False, pad_info={"col2d": ([2, 2], -2), "col1d": ([2], -1)}) | |||
| data1 = data1.repeat(2) | |||
| for data in data1.create_dict_iterator(num_epochs=1): | |||
| for data in data1.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| np.testing.assert_array_equal([[0, -1], [1, -1]], data["col1d"]) | |||
| np.testing.assert_array_equal([[[100, -2], [200, -2]], [[101, -2], [201, -2]]], data["col2d"]) | |||
| @@ -71,7 +71,7 @@ def test_batch_padding_02(): | |||
| data1 = ds.GeneratorDataset((lambda: gen_2cols(2)), ["col1d", "col2d"]) | |||
| data1 = data1.batch(batch_size=2, drop_remainder=False, pad_info={"col2d": ([1, 2], -2)}) | |||
| data1 = data1.repeat(2) | |||
| for data in data1.create_dict_iterator(num_epochs=1): | |||
| for data in data1.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| np.testing.assert_array_equal([[0], [1]], data["col1d"]) | |||
| np.testing.assert_array_equal([[[100, -2]], [[101, -2]]], data["col2d"]) | |||
| @@ -81,7 +81,7 @@ def test_batch_padding_03(): | |||
| data1 = data1.batch(batch_size=2, drop_remainder=False, pad_info={"col": (None, -1)}) # pad automatically | |||
| data1 = data1.repeat(2) | |||
| res = dict() | |||
| for ind, data in enumerate(data1.create_dict_iterator(num_epochs=1)): | |||
| for ind, data in enumerate(data1.create_dict_iterator(num_epochs=1, output_numpy=True)): | |||
| res[ind] = data["col"].copy() | |||
| np.testing.assert_array_equal(res[0], [[0, -1], [0, 1]]) | |||
| np.testing.assert_array_equal(res[1], [[0, 1, 2, -1], [0, 1, 2, 3]]) | |||
| @@ -93,7 +93,7 @@ def test_batch_padding_04(): | |||
| data1 = ds.GeneratorDataset((lambda: gen_var_cols(2)), ["col1", "col2"]) | |||
| data1 = data1.batch(batch_size=2, drop_remainder=False, pad_info={}) # pad automatically | |||
| data1 = data1.repeat(2) | |||
| for data in data1.create_dict_iterator(num_epochs=1): | |||
| for data in data1.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| np.testing.assert_array_equal(data["col1"], [[0, 0], [0, 1]]) | |||
| np.testing.assert_array_equal(data["col2"], [[100, 0], [100, 101]]) | |||
| @@ -102,7 +102,7 @@ def test_batch_padding_05(): | |||
| data1 = ds.GeneratorDataset((lambda: gen_var_cols_2d(3)), ["col1", "col2"]) | |||
| data1 = data1.batch(batch_size=3, drop_remainder=False, | |||
| pad_info={"col2": ([2, None], -2), "col1": (None, -1)}) # pad automatically | |||
| for data in data1.create_dict_iterator(num_epochs=1): | |||
| for data in data1.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| np.testing.assert_array_equal(data["col1"], [[[0, -1, -1]], [[0, 1, -1]], [[0, 1, 2]]]) | |||
| np.testing.assert_array_equal(data["col2"], [[[100, -2, -2], [-2, -2, -2]], [[100, 101, -2], [-2, -2, -2]], | |||
| [[100, 101, 102], [-2, -2, -2]]]) | |||
| @@ -180,7 +180,7 @@ def test_pad_via_map(): | |||
| data1 = data1.map(operations=(lambda x: np.pad(x, (0, 816))), input_columns="image") | |||
| data1 = data1.batch(batch_size=25, drop_remainder=True) | |||
| res = [] | |||
| for data in data1.create_dict_iterator(num_epochs=1): | |||
| for data in data1.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| res.append(data["image"]) | |||
| return res | |||
| @@ -189,7 +189,7 @@ def test_pad_via_map(): | |||
| data2 = data2.map(operations=(lambda x: x.reshape(-1)), input_columns="image") # reshape to 1d | |||
| data2 = data2.batch(batch_size=25, drop_remainder=True, pad_info={"image": ([3888], 0)}) | |||
| res = [] | |||
| for data in data2.create_dict_iterator(num_epochs=1): | |||
| for data in data2.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| res.append(data["image"]) | |||
| return res | |||
| @@ -30,7 +30,7 @@ def pad_compare(array, pad_shape, pad_value, res): | |||
| data = data.map(operations=ops.PadEnd(pad_shape, pad_value)) | |||
| else: | |||
| data = data.map(operations=ops.PadEnd(pad_shape)) | |||
| for d in data: | |||
| for d in data.create_tuple_iterator(output_numpy=True): | |||
| np.testing.assert_array_equal(res, d[0]) | |||
| @@ -57,7 +57,7 @@ def test_TFRecord_Padded(): | |||
| testsampler = ds.DistributedSampler(num_shards=shard_num, shard_id=i, shuffle=False, num_samples=None) | |||
| concat_ds.use_sampler(testsampler) | |||
| shard_list = [] | |||
| for item in concat_ds.create_dict_iterator(num_epochs=1): | |||
| for item in concat_ds.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| shard_list.append(len(item['image'])) | |||
| verify_list.append(shard_list) | |||
| assert verify_list == result_list | |||
| @@ -80,7 +80,7 @@ def test_GeneratorDataSet_Padded(): | |||
| distributed_sampler = ds.DistributedSampler(num_shards=shard_num, shard_id=i, shuffle=False, num_samples=None) | |||
| data3.use_sampler(distributed_sampler) | |||
| tem_list = [] | |||
| for ele in data3.create_dict_iterator(num_epochs=1): | |||
| for ele in data3.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| tem_list.append(ele['col1'][0]) | |||
| verify_list.append(tem_list) | |||
| @@ -105,7 +105,7 @@ def test_Reapeat_afterPadded(): | |||
| ds3.use_sampler(testsampler) | |||
| repeat_num = 2 | |||
| ds3 = ds3.repeat(repeat_num) | |||
| for item in ds3.create_dict_iterator(num_epochs=1): | |||
| for item in ds3.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| verify_list.append(len(item['image'])) | |||
| assert verify_list == result_list * repeat_num | |||
| @@ -149,7 +149,7 @@ def test_Unevenly_distributed(): | |||
| tem_list = [] | |||
| testsampler = ds.DistributedSampler(num_shards=numShard, shard_id=i, shuffle=False, num_samples=None) | |||
| ds3.use_sampler(testsampler) | |||
| for item in ds3.create_dict_iterator(num_epochs=1): | |||
| for item in ds3.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| tem_list.append(len(item['image'])) | |||
| verify_list.append(tem_list) | |||
| assert verify_list == result_list | |||
| @@ -174,7 +174,7 @@ def test_three_datasets_connected(): | |||
| distributed_sampler = ds.DistributedSampler(num_shards=shard_num, shard_id=i, shuffle=False, num_samples=None) | |||
| data4.use_sampler(distributed_sampler) | |||
| tem_list = [] | |||
| for ele in data4.create_dict_iterator(num_epochs=1): | |||
| for ele in data4.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| tem_list.append(ele['col1'][0]) | |||
| verify_list.append(tem_list) | |||
| @@ -232,7 +232,7 @@ def test_imagefolder_padded(): | |||
| assert sum([1 for _ in data3]) == 10 | |||
| verify_list = [] | |||
| for ele in data3.create_dict_iterator(num_epochs=1): | |||
| for ele in data3.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| verify_list.append(len(ele['image'])) | |||
| assert verify_list[8] == 1 | |||
| assert verify_list[9] == 6 | |||
| @@ -259,7 +259,7 @@ def test_imagefolder_padded_with_decode(): | |||
| data3.use_sampler(testsampler) | |||
| data3 = data3.map(operations=V_C.Decode(), input_columns="image") | |||
| shard_sample_count = 0 | |||
| for ele in data3.create_dict_iterator(num_epochs=1): | |||
| for ele in data3.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| print("label: {}".format(ele['label'])) | |||
| count += 1 | |||
| shard_sample_count += 1 | |||
| @@ -289,7 +289,7 @@ def test_imagefolder_padded_with_decode_and_get_dataset_size(): | |||
| shard_dataset_size = data3.get_dataset_size() | |||
| data3 = data3.map(operations=V_C.Decode(), input_columns="image") | |||
| shard_sample_count = 0 | |||
| for ele in data3.create_dict_iterator(num_epochs=1): | |||
| for ele in data3.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| print("label: {}".format(ele['label'])) | |||
| count += 1 | |||
| shard_sample_count += 1 | |||
| @@ -313,7 +313,7 @@ def test_more_shard_padded(): | |||
| tem_list = [] | |||
| testsampler = ds.DistributedSampler(num_shards=numShard, shard_id=i, shuffle=False, num_samples=None) | |||
| data3.use_sampler(testsampler) | |||
| for item in data3.create_dict_iterator(num_epochs=1): | |||
| for item in data3.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| tem_list.append(item['col1']) | |||
| vertifyList.append(tem_list) | |||
| @@ -339,7 +339,7 @@ def test_more_shard_padded(): | |||
| tem_list = [] | |||
| testsampler = ds.DistributedSampler(num_shards=numShard, shard_id=i, shuffle=False, num_samples=None) | |||
| ds3.use_sampler(testsampler) | |||
| for item in ds3.create_dict_iterator(num_epochs=1): | |||
| for item in ds3.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| tem_list.append(len(item['image'])) | |||
| vertifyList1.append(tem_list) | |||
| @@ -426,7 +426,7 @@ def test_Mindrecord_Padded(remove_mindrecord_file): | |||
| testsampler = ds.DistributedSampler(num_shards=shard_num, shard_id=i, shuffle=False, num_samples=None) | |||
| ds2.use_sampler(testsampler) | |||
| tem_list = [] | |||
| for ele in ds2.create_dict_iterator(num_epochs=1): | |||
| for ele in ds2.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| tem_list.append(int(ele['file_name'].tostring().decode().lstrip('image_').rstrip('.jpg'))) | |||
| result_list.append(tem_list) | |||
| assert result_list == verify_list | |||
| @@ -440,7 +440,7 @@ def test_clue_padded_and_skip_with_0_samples(): | |||
| data = ds.CLUEDataset(TRAIN_FILE, task='AFQMC', usage='train') | |||
| count = 0 | |||
| for _ in data.create_dict_iterator(num_epochs=1): | |||
| for _ in data.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| count += 1 | |||
| assert count == 3 | |||
| @@ -456,20 +456,20 @@ def test_clue_padded_and_skip_with_0_samples(): | |||
| dataset.use_sampler(testsampler) | |||
| assert dataset.get_dataset_size() == 2 | |||
| count = 0 | |||
| for data in dataset.create_dict_iterator(num_epochs=1): | |||
| for data in dataset.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| count += 1 | |||
| assert count == 2 | |||
| dataset = dataset.skip(count=2) # dataset2 has none samples | |||
| count = 0 | |||
| for data in dataset.create_dict_iterator(num_epochs=1): | |||
| for data in dataset.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| count += 1 | |||
| assert count == 0 | |||
| with pytest.raises(ValueError, match="There is no samples in the "): | |||
| dataset = dataset.concat(data_copy1) | |||
| count = 0 | |||
| for data in dataset.create_dict_iterator(num_epochs=1): | |||
| for data in dataset.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| count += 1 | |||
| assert count == 2 | |||
| @@ -24,7 +24,8 @@ import mindspore.dataset.text as text | |||
| def compare(in1, in2, length, out1, out2): | |||
| data = ds.NumpySlicesDataset({"s1": [in1], "s2": [in2]}) | |||
| data = data.map(operations=text.TruncateSequencePair(length), input_columns=["s1", "s2"]) | |||
| for d in data.create_dict_iterator(num_epochs=1): | |||
| data = data.map(input_columns=["s1", "s2"], operations=text.TruncateSequencePair(length)) | |||
| for d in data.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| np.testing.assert_array_equal(out1, d["s1"]) | |||
| np.testing.assert_array_equal(out2, d["s2"]) | |||
| @@ -36,7 +36,7 @@ def test_case_0(): | |||
| data1 = data1.map(operations=(lambda x: x + x), input_columns="col0", output_columns="out") | |||
| i = 0 | |||
| for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary | |||
| for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary | |||
| # In this test, the dataset is 2x2 sequential tensors | |||
| golden = np.array([[i * 2, (i + 1) * 2], [(i + 2) * 2, (i + 3) * 2]]) | |||
| np.testing.assert_array_equal(item["out"], golden) | |||
| @@ -57,7 +57,7 @@ def test_case_1(): | |||
| column_order=["out0", "out1"]) | |||
| i = 0 | |||
| for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary | |||
| for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary | |||
| # In this test, the dataset is 2x2 sequential tensors | |||
| golden = np.array([[i, i + 1], [i + 2, i + 3]]) | |||
| np.testing.assert_array_equal(item["out0"], golden) | |||
| @@ -81,7 +81,7 @@ def test_case_2(): | |||
| column_order=["out"]) | |||
| i = 0 | |||
| for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary | |||
| for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary | |||
| # In this test, the dataset is 2x2 sequential tensors | |||
| golden = np.array([[i * 2, (i + 1) * 2], [(i + 2) * 2, (i + 3) * 2]]) | |||
| np.testing.assert_array_equal(item["out"], golden) | |||
| @@ -103,7 +103,7 @@ def test_case_3(): | |||
| output_columns=["out0", "out1", "out2"], column_order=["out0", "out1", "out2"]) | |||
| i = 0 | |||
| for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary | |||
| for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary | |||
| # In this test, the dataset is 2x2 sequential tensors | |||
| golden = np.array([[i, i + 1], [i + 2, i + 3]]) | |||
| np.testing.assert_array_equal(item["out0"], golden) | |||
| @@ -130,7 +130,7 @@ def test_case_4(): | |||
| column_order=["out0", "out1", "out2"]) | |||
| i = 0 | |||
| for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary | |||
| for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary | |||
| # In this test, the dataset is 2x2 sequential tensors | |||
| golden = np.array([[i, i + 1], [i + 2, i + 3]]) | |||
| np.testing.assert_array_equal(item["out0"], golden) | |||
| @@ -157,7 +157,7 @@ def test_case_5(): | |||
| data1 = data1.map(operations=func_5, input_columns="col0", output_columns="out") | |||
| for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary | |||
| for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary | |||
| # In this test, the dataset is 2x2 sequential tensors | |||
| golden = np.array([[1, 1], [1, 1]]) | |||
| np.testing.assert_array_equal(item["out"], golden) | |||
| @@ -175,7 +175,7 @@ def test_case_6(): | |||
| data1 = data1.map(operations=[(lambda x: x + x), (lambda x: x + x)], input_columns="col0", output_columns="out") | |||
| i = 0 | |||
| for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary | |||
| for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary | |||
| # In this test, the dataset is 2x2 sequential tensors | |||
| golden = np.array([[i * 4, (i + 1) * 4], [(i + 2) * 4, (i + 3) * 4]]) | |||
| np.testing.assert_array_equal(item["out"], golden) | |||
| @@ -195,7 +195,7 @@ def test_case_7(): | |||
| num_parallel_workers=4, python_multiprocessing=True) | |||
| i = 0 | |||
| for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary | |||
| for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary | |||
| # In this test, the dataset is 2x2 sequential tensors | |||
| golden = np.array([[i * 2, (i + 1) * 2], [(i + 2) * 2, (i + 3) * 2]]) | |||
| np.testing.assert_array_equal(item["out"], golden) | |||
| @@ -219,7 +219,7 @@ def test_case_8(): | |||
| python_multiprocessing=True) | |||
| i = 0 | |||
| for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary | |||
| for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary | |||
| # In this test, the dataset is 2x2 sequential tensors | |||
| golden = np.array([[i, i + 1], [i + 2, i + 3]]) | |||
| np.testing.assert_array_equal(item["out0"], golden) | |||
| @@ -243,7 +243,7 @@ def test_case_9(): | |||
| output_columns="out", num_parallel_workers=4, python_multiprocessing=True) | |||
| i = 0 | |||
| for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary | |||
| for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary | |||
| # In this test, the dataset is 2x2 sequential tensors | |||
| golden = np.array([[i * 2 + 3, (i + 1) * 2 + 3], [(i + 2) * 2 + 3, (i + 3) * 2 + 3]]) | |||
| np.testing.assert_array_equal(item["out"], golden) | |||
| @@ -41,7 +41,7 @@ def test_whitespace_tokenizer_ch(): | |||
| tokenizer = text.PythonTokenizer(my_tokenizer) | |||
| dataset = dataset.map(operations=tokenizer, num_parallel_workers=1) | |||
| tokens = [] | |||
| for i in dataset.create_dict_iterator(num_epochs=1): | |||
| for i in dataset.create_dict_iterator(num_epochs=1, output_numpy=True): | |||
| s = text.to_str(i['text']).tolist() | |||
| tokens.append(s) | |||
| logger.info("The out tokens is : {}".format(tokens)) | |||
| @@ -59,7 +59,8 @@ def test_random_affine_op(plot=False): | |||
| image_affine = [] | |||
| image_original = [] | |||
| for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): | |||
| for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), | |||
| data2.create_dict_iterator(num_epochs=1, output_numpy=True)): | |||
| image1 = (item1["image"].transpose(1, 2, 0) * 255).astype(np.uint8) | |||
| image2 = (item2["image"].transpose(1, 2, 0) * 255).astype(np.uint8) | |||
| image_affine.append(image1) | |||
| @@ -92,7 +93,8 @@ def test_random_affine_op_c(plot=False): | |||
| image_affine = [] | |||
| image_original = [] | |||
| for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): | |||
| for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), | |||
| data2.create_dict_iterator(num_epochs=1, output_numpy=True)): | |||
| image1 = item1["image"] | |||
| image2 = item2["image"] | |||
| image_affine.append(image1) | |||
| @@ -58,7 +58,8 @@ def test_random_apply_op(plot=False): | |||
| image_apply = [] | |||
| image_original = [] | |||
| for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): | |||
| for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), | |||
| data2.create_dict_iterator(num_epochs=1, output_numpy=True)): | |||
| image1 = (item1["image"].transpose(1, 2, 0) * 255).astype(np.uint8) | |||
| image2 = (item2["image"].transpose(1, 2, 0) * 255).astype(np.uint8) | |||
| image_apply.append(image1) | |||
| @@ -55,7 +55,8 @@ def test_random_choice_op(plot=False): | |||
| image_choice = [] | |||
| image_original = [] | |||
| for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): | |||
| for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), | |||
| data2.create_dict_iterator(num_epochs=1, output_numpy=True)): | |||
| image1 = (item1["image"].transpose(1, 2, 0) * 255).astype(np.uint8) | |||
| image2 = (item2["image"].transpose(1, 2, 0) * 255).astype(np.uint8) | |||
| image_choice.append(image1) | |||
| @@ -94,7 +95,8 @@ def test_random_choice_comp(plot=False): | |||
| image_choice = [] | |||
| image_original = [] | |||
| for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): | |||
| for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), | |||
| data2.create_dict_iterator(num_epochs=1, output_numpy=True)): | |||
| image1 = (item1["image"].transpose(1, 2, 0) * 255).astype(np.uint8) | |||
| image2 = (item2["image"].transpose(1, 2, 0) * 255).astype(np.uint8) | |||
| image_choice.append(image1) | |||