Browse Source

!5801 [MD][Feature] minddata iterator output ms_tensor

Merge pull request !5801 from xiefangqi/md_modify_output_tensor
tags/v1.0.0
mindspore-ci-bot Gitee 5 years ago
parent
commit
d8c09067ce
100 changed files with 681 additions and 592 deletions
  1. +18
    -13
      mindspore/dataset/engine/datasets.py
  2. +12
    -7
      mindspore/dataset/engine/iterators.py
  3. +2
    -3
      mindspore/train/dataset_helper.py
  4. +2
    -2
      model_zoo/official/cv/faster_rcnn/eval.py
  5. +1
    -1
      model_zoo/official/cv/maskrcnn/eval.py
  6. +1
    -1
      model_zoo/official/cv/mobilenetv2/src/dataset.py
  7. +1
    -1
      model_zoo/official/cv/resnext50/eval.py
  8. +1
    -1
      model_zoo/official/cv/ssd/eval.py
  9. +1
    -1
      model_zoo/official/cv/vgg16/eval.py
  10. +3
    -3
      model_zoo/official/cv/yolov3_darknet53/eval.py
  11. +1
    -1
      model_zoo/official/cv/yolov3_darknet53/train.py
  12. +3
    -3
      model_zoo/official/cv/yolov3_darknet53_quant/eval.py
  13. +1
    -1
      model_zoo/official/cv/yolov3_darknet53_quant/train.py
  14. +1
    -1
      model_zoo/official/cv/yolov3_resnet18/eval.py
  15. +1
    -1
      model_zoo/official/gnn/bgcf/train.py
  16. +1
    -2
      model_zoo/official/nlp/bert/run_classifier.py
  17. +1
    -2
      model_zoo/official/nlp/bert/run_ner.py
  18. +1
    -1
      model_zoo/official/nlp/bert/run_squad.py
  19. +2
    -2
      model_zoo/official/nlp/mass/src/transformer/infer_mass.py
  20. +1
    -2
      model_zoo/official/nlp/tinybert/run_task_distill.py
  21. +1
    -1
      model_zoo/official/nlp/tinybert/src/utils.py
  22. +1
    -1
      model_zoo/official/nlp/transformer/eval.py
  23. +1
    -1
      model_zoo/utils/cv_to_mindrecord/Caltech-UCSD-Birds-200-2011/create_dataset.py
  24. +1
    -1
      model_zoo/utils/graph_to_mindrecord/reader.py
  25. +1
    -1
      model_zoo/utils/nlp_to_mindrecord/aclImdb/create_dataset.py
  26. +1
    -1
      model_zoo/utils/nlp_to_mindrecord/aclImdb_preprocess/create_dataset.py
  27. +1
    -1
      tests/st/ops/ascend/test_tdt_data_ms.py
  28. +2
    -2
      tests/st/probability/test_bnn_layer.py
  29. +1
    -1
      tests/st/probability/test_gpu_svi_cvae.py
  30. +1
    -1
      tests/st/probability/test_gpu_svi_vae.py
  31. +2
    -2
      tests/st/probability/test_transform_bnn_layer.py
  32. +2
    -2
      tests/st/probability/test_transform_bnn_model.py
  33. +1
    -1
      tests/st/probability/test_uncertainty.py
  34. +2
    -2
      tests/st/pynative/test_pynative_resnet50.py
  35. +4
    -2
      tests/ut/python/dataset/test_HWC2CHW.py
  36. +4
    -2
      tests/ut/python/dataset/test_apply.py
  37. +15
    -15
      tests/ut/python/dataset/test_autocontrast.py
  38. +10
    -5
      tests/ut/python/dataset/test_bounding_box_augment.py
  39. +10
    -10
      tests/ut/python/dataset/test_bucket_batch_by_length.py
  40. +1
    -1
      tests/ut/python/dataset/test_c_compose.py
  41. +1
    -1
      tests/ut/python/dataset/test_c_random_apply.py
  42. +1
    -1
      tests/ut/python/dataset/test_c_random_choice.py
  43. +5
    -3
      tests/ut/python/dataset/test_center_crop.py
  44. +43
    -33
      tests/ut/python/dataset/test_concat.py
  45. +6
    -6
      tests/ut/python/dataset/test_concatenate_op.py
  46. +4
    -4
      tests/ut/python/dataset/test_config.py
  47. +6
    -3
      tests/ut/python/dataset/test_cut_out.py
  48. +24
    -24
      tests/ut/python/dataset/test_cutmix_batch_op.py
  49. +17
    -17
      tests/ut/python/dataset/test_dataset_numpy_slices.py
  50. +2
    -2
      tests/ut/python/dataset/test_datasets_celeba.py
  51. +8
    -7
      tests/ut/python/dataset/test_datasets_cifarop.py
  52. +21
    -21
      tests/ut/python/dataset/test_datasets_clue.py
  53. +4
    -4
      tests/ut/python/dataset/test_datasets_coco.py
  54. +13
    -13
      tests/ut/python/dataset/test_datasets_csv.py
  55. +27
    -27
      tests/ut/python/dataset/test_datasets_generator.py
  56. +3
    -3
      tests/ut/python/dataset/test_datasets_imagefolder.py
  57. +5
    -5
      tests/ut/python/dataset/test_datasets_manifestop.py
  58. +5
    -5
      tests/ut/python/dataset/test_datasets_mnist.py
  59. +8
    -8
      tests/ut/python/dataset/test_datasets_sharding.py
  60. +12
    -12
      tests/ut/python/dataset/test_datasets_textfileop.py
  61. +8
    -8
      tests/ut/python/dataset/test_datasets_tfrecord.py
  62. +4
    -4
      tests/ut/python/dataset/test_datasets_voc.py
  63. +4
    -2
      tests/ut/python/dataset/test_decode.py
  64. +1
    -1
      tests/ut/python/dataset/test_duplicate_op.py
  65. +27
    -27
      tests/ut/python/dataset/test_epoch_ctrl.py
  66. +15
    -15
      tests/ut/python/dataset/test_equalize.py
  67. +4
    -4
      tests/ut/python/dataset/test_fill_op.py
  68. +18
    -18
      tests/ut/python/dataset/test_filterop.py
  69. +2
    -1
      tests/ut/python/dataset/test_five_crop.py
  70. +2
    -2
      tests/ut/python/dataset/test_flat_map.py
  71. +4
    -4
      tests/ut/python/dataset/test_from_dataset.py
  72. +1
    -1
      tests/ut/python/dataset/test_graphdata.py
  73. +1
    -1
      tests/ut/python/dataset/test_graphdata_distributed.py
  74. +12
    -12
      tests/ut/python/dataset/test_invert.py
  75. +47
    -5
      tests/ut/python/dataset/test_iterator.py
  76. +2
    -1
      tests/ut/python/dataset/test_linear_transformation.py
  77. +1
    -1
      tests/ut/python/dataset/test_mask_op.py
  78. +48
    -47
      tests/ut/python/dataset/test_minddataset.py
  79. +4
    -4
      tests/ut/python/dataset/test_minddataset_exception.py
  80. +2
    -2
      tests/ut/python/dataset/test_minddataset_multi_images.py
  81. +1
    -1
      tests/ut/python/dataset/test_minddataset_multi_images_and_ndarray.py
  82. +11
    -11
      tests/ut/python/dataset/test_minddataset_padded.py
  83. +28
    -28
      tests/ut/python/dataset/test_minddataset_sampler.py
  84. +5
    -3
      tests/ut/python/dataset/test_mixup_label_smoothing.py
  85. +32
    -32
      tests/ut/python/dataset/test_mixup_op.py
  86. +3
    -3
      tests/ut/python/dataset/test_ngram_op.py
  87. +2
    -2
      tests/ut/python/dataset/test_nlp.py
  88. +4
    -2
      tests/ut/python/dataset/test_normalizeOp.py
  89. +1
    -1
      tests/ut/python/dataset/test_opt.py
  90. +5
    -5
      tests/ut/python/dataset/test_opt_pass.py
  91. +4
    -3
      tests/ut/python/dataset/test_pad.py
  92. +7
    -7
      tests/ut/python/dataset/test_pad_batch.py
  93. +1
    -1
      tests/ut/python/dataset/test_pad_end_op.py
  94. +15
    -15
      tests/ut/python/dataset/test_paddeddataset.py
  95. +2
    -1
      tests/ut/python/dataset/test_pair_truncate.py
  96. +10
    -10
      tests/ut/python/dataset/test_pyfunc.py
  97. +1
    -1
      tests/ut/python/dataset/test_python_tokenizer.py
  98. +4
    -2
      tests/ut/python/dataset/test_random_affine.py
  99. +2
    -1
      tests/ut/python/dataset/test_random_apply.py
  100. +4
    -2
      tests/ut/python/dataset/test_random_choice.py

+ 18
- 13
mindspore/dataset/engine/datasets.py View File

@@ -394,7 +394,7 @@ class Dataset:
logger.error("func must be a function.") logger.error("func must be a function.")
raise TypeError("func must be a function.") raise TypeError("func must be a function.")


for row_data in self:
for row_data in self.create_tuple_iterator(output_numpy=True):
if dataset is None: if dataset is None:
dataset = func(row_data) dataset = func(row_data)
else: else:
@@ -1133,7 +1133,7 @@ class Dataset:


return SaveOp(self).save(file_names, file_type) return SaveOp(self).save(file_names, file_type)


def create_tuple_iterator(self, columns=None, num_epochs=-1):
def create_tuple_iterator(self, columns=None, num_epochs=-1, output_numpy=False):
""" """
Create an Iterator over the dataset. The data retrieved will be a list of ndarray of data. Create an Iterator over the dataset. The data retrieved will be a list of ndarray of data.


@@ -1143,8 +1143,11 @@ class Dataset:
Args: Args:
columns (list[str], optional): List of columns to be used to specify the order of columns columns (list[str], optional): List of columns to be used to specify the order of columns
(default=None, means all columns). (default=None, means all columns).
num_epochs (int, optional): max epochs that iterator can be iteratered,
if num_epochs = -1, iterator can be iteratered infinit epochs (default=-1)
num_epochs (int, optional): maximum epochs that iterator can be iteratered,
if num_epochs = -1, iterator can be iteratered infinite epochs (default=-1)
output_numpy (bool, optional): Whether or not to output NumPy datatype,
if output_numpy=False, iterator will output MSTensor (default=False).



Returns: Returns:
Iterator, list of ndarray. Iterator, list of ndarray.
@@ -1161,9 +1164,9 @@ class Dataset:
""" """
if self._noop_mode(): if self._noop_mode():
return DummyIterator(self, 'tuple') return DummyIterator(self, 'tuple')
return TupleIterator(self, columns, num_epochs)
return TupleIterator(self, columns, num_epochs, output_numpy)


def create_dict_iterator(self, num_epochs=-1):
def create_dict_iterator(self, num_epochs=-1, output_numpy=False):
""" """
Create an Iterator over the dataset. Create an Iterator over the dataset.


@@ -1171,8 +1174,10 @@ class Dataset:
of the columns in the dictionary may not be the same as the original order. of the columns in the dictionary may not be the same as the original order.


Args: Args:
num_epochs (int, optional): max epochs that iterator can be iteratered,
if num_epochs = -1, iterator can be iteratered infinit epochs (default=-1)
num_epochs (int, optional): maximum epochs that iterator can be iteratered,
if num_epochs = -1, iterator can be iteratered infinite epochs (default=-1)
output_numpy (bool, optional): Whether or not to output NumPy datatype,
if output_numpy=False, iterator will output MSTensor (default=False).


Returns: Returns:
Iterator, dictionary of column_name-ndarray pair. Iterator, dictionary of column_name-ndarray pair.
@@ -1190,7 +1195,7 @@ class Dataset:
""" """
if self._noop_mode(): if self._noop_mode():
return DummyIterator(self, 'dict') return DummyIterator(self, 'dict')
return DictIterator(self, num_epochs)
return DictIterator(self, num_epochs, output_numpy)


def __iter__(self): def __iter__(self):
"""Create an Iterator over the dataset.""" """Create an Iterator over the dataset."""
@@ -1617,7 +1622,7 @@ class BucketBatchByLengthDataset(DatasetOp):
""" """
if self.dataset_size is None: if self.dataset_size is None:
num_rows = 0 num_rows = 0
for _ in self.create_dict_iterator(num_epochs=1):
for _ in self.create_dict_iterator(num_epochs=1, output_numpy=True):
num_rows += 1 num_rows += 1
self.dataset_size = num_rows self.dataset_size = num_rows
return self.dataset_size return self.dataset_size
@@ -2163,7 +2168,7 @@ class FilterDataset(DatasetOp):
""" """
if self.dataset_size is None: if self.dataset_size is None:
num_rows = 0 num_rows = 0
for _ in self.create_dict_iterator(num_epochs=1):
for _ in self.create_dict_iterator(num_epochs=1, output_numpy=True):
num_rows += 1 num_rows += 1
self.dataset_size = num_rows self.dataset_size = num_rows
return self.dataset_size return self.dataset_size
@@ -2400,7 +2405,7 @@ class ConcatDataset(DatasetOp):
""" """
if self.dataset_size is None: if self.dataset_size is None:
num_rows = 0 num_rows = 0
for _ in self.create_dict_iterator(num_epochs=1):
for _ in self.create_dict_iterator(num_epochs=1, output_numpy=True):
num_rows += 1 num_rows += 1
self.dataset_size = num_rows self.dataset_size = num_rows
return self.dataset_size return self.dataset_size
@@ -3495,7 +3500,7 @@ class GeneratorDataset(MappableDataset):
self.dataset_size = rows_from_sampler self.dataset_size = rows_from_sampler
else: else:
num_rows = 0 num_rows = 0
for _ in self.create_dict_iterator(num_epochs=1):
for _ in self.create_dict_iterator(num_epochs=1, output_numpy=True):
num_rows += 1 num_rows += 1
self.dataset_size = num_rows self.dataset_size = num_rows
return self.dataset_size return self.dataset_size


+ 12
- 7
mindspore/dataset/engine/iterators.py View File

@@ -67,8 +67,9 @@ class Iterator:
dataset: Dataset to be iterated over dataset: Dataset to be iterated over
""" """


def __init__(self, dataset, num_epochs=-1):
def __init__(self, dataset, num_epochs=-1, output_numpy=False):
self.num_epochs = num_epochs self.num_epochs = num_epochs
self.output_numpy = output_numpy
ITERATORS_LIST.append(weakref.ref(self)) ITERATORS_LIST.append(weakref.ref(self))
# create a copy of tree and work on it. # create a copy of tree and work on it.
self.dataset = copy.deepcopy(dataset) self.dataset = copy.deepcopy(dataset)
@@ -305,8 +306,8 @@ class DictIterator(Iterator):
""" """
The derived class of Iterator with dict type. The derived class of Iterator with dict type.
""" """
def __init__(self, dataset, num_epochs=-1):
super().__init__(dataset, num_epochs)
def __init__(self, dataset, num_epochs=-1, output_numpy=False):
super().__init__(dataset, num_epochs, output_numpy)
self.depipeline.LaunchTreeExec() self.depipeline.LaunchTreeExec()


def check_node_type(self, node): def check_node_type(self, node):
@@ -323,7 +324,9 @@ class DictIterator(Iterator):
Dict, the next record in the dataset. Dict, the next record in the dataset.
""" """


return {k: v.as_array() for k, v in self.depipeline.GetNextAsMap().items()}
if self.output_numpy:
return {k: v.as_array() for k, v in self.depipeline.GetNextAsMap().items()}
return {k: Tensor(v.as_array()) for k, v in self.depipeline.GetNextAsMap().items()}




class TupleIterator(Iterator): class TupleIterator(Iterator):
@@ -333,12 +336,12 @@ class TupleIterator(Iterator):
def check_node_type(self, node): def check_node_type(self, node):
pass pass


def __init__(self, dataset, columns=None, num_epochs=-1):
def __init__(self, dataset, columns=None, num_epochs=-1, output_numpy=False):
if columns is not None: if columns is not None:
if not isinstance(columns, list): if not isinstance(columns, list):
columns = [columns] columns = [columns]
dataset = dataset.project(columns) dataset = dataset.project(columns)
super().__init__(dataset, num_epochs)
super().__init__(dataset, num_epochs, output_numpy)
self.depipeline.LaunchTreeExec() self.depipeline.LaunchTreeExec()


def __iter__(self): def __iter__(self):
@@ -352,7 +355,9 @@ class TupleIterator(Iterator):
List, the next record in the dataset. List, the next record in the dataset.
""" """


return [t.as_array() for t in self.depipeline.GetNextAsList()]
if self.output_numpy:
return [t.as_array() for t in self.depipeline.GetNextAsList()]
return [Tensor(t.as_array()) for t in self.depipeline.GetNextAsList()]




class DummyIterator(): class DummyIterator():


+ 2
- 3
mindspore/train/dataset_helper.py View File

@@ -18,8 +18,7 @@ import os


from mindspore._checkparam import check_bool, check_int from mindspore._checkparam import check_bool, check_int
from .. import context, nn from .. import context, nn
from ._utils import _exec_datagraph, _get_types_and_shapes, _to_tensor, \
_construct_tensor_list
from ._utils import _exec_datagraph, _get_types_and_shapes, _construct_tensor_list
from ..nn.wrap import GetNextSingleOp from ..nn.wrap import GetNextSingleOp
from ..parallel._utils import _get_device_num, _get_global_rank, _need_to_full, _to_full_shapes from ..parallel._utils import _get_device_num, _get_global_rank, _need_to_full, _to_full_shapes
from ..ops import operations as P from ..ops import operations as P
@@ -297,4 +296,4 @@ class _DatasetIterNormal:


def __next__(self): def __next__(self):
data = self.iter.__next__() data = self.iter.__next__()
return _to_tensor(data)
return data

+ 2
- 2
model_zoo/official/cv/faster_rcnn/eval.py View File

@@ -19,7 +19,7 @@ import argparse
import time import time
import numpy as np import numpy as np
from pycocotools.coco import COCO from pycocotools.coco import COCO
from mindspore import context, Tensor
from mindspore import context
from mindspore.train.serialization import load_checkpoint, load_param_into_net from mindspore.train.serialization import load_checkpoint, load_param_into_net
from mindspore.common import set_seed from mindspore.common import set_seed


@@ -68,7 +68,7 @@ def FasterRcnn_eval(dataset_path, ckpt_path, ann_file):


start = time.time() start = time.time()
# run net # run net
output = net(Tensor(img_data), Tensor(img_metas), Tensor(gt_bboxes), Tensor(gt_labels), Tensor(gt_num))
output = net(img_data, img_metas, gt_bboxes, gt_labels, gt_num)
end = time.time() end = time.time()
print("Iter {} cost time {}".format(eval_iter, end - start)) print("Iter {} cost time {}".format(eval_iter, end - start))




+ 1
- 1
model_zoo/official/cv/maskrcnn/eval.py View File

@@ -57,7 +57,7 @@ def MaskRcnn_eval(dataset_path, ckpt_path, ann_file):
print("total images num: ", total) print("total images num: ", total)
print("Processing, please wait a moment.") print("Processing, please wait a moment.")
max_num = 128 max_num = 128
for data in ds.create_dict_iterator():
for data in ds.create_dict_iterator(output_numpy=True):
eval_iter = eval_iter + 1 eval_iter = eval_iter + 1


img_data = data['image'] img_data = data['image']


+ 1
- 1
model_zoo/official/cv/mobilenetv2/src/dataset.py View File

@@ -109,7 +109,7 @@ def extract_features(net, dataset_path, config):
config=config, config=config,
repeat_num=1) repeat_num=1)
step_size = dataset.get_dataset_size() step_size = dataset.get_dataset_size()
pbar = tqdm(list(dataset.create_dict_iterator()))
pbar = tqdm(list(dataset.create_dict_iterator(output_numpy=True)))
model = Model(net) model = Model(net)
i = 0 i = 0
for data in pbar: for data in pbar:


+ 1
- 1
model_zoo/official/cv/resnext50/eval.py View File

@@ -146,7 +146,7 @@ def test(cloud_args=None):
per_batch_size=args.per_batch_size, per_batch_size=args.per_batch_size,
max_epoch=1, rank=args.rank, group_size=args.group_size, max_epoch=1, rank=args.rank, group_size=args.group_size,
mode='eval') mode='eval')
eval_dataloader = de_dataset.create_tuple_iterator()
eval_dataloader = de_dataset.create_tuple_iterator(output_numpy=True)
network = get_network(args.backbone, args.num_classes, platform=args.platform) network = get_network(args.backbone, args.num_classes, platform=args.platform)
if network is None: if network is None:
raise NotImplementedError('not implement {}'.format(args.backbone)) raise NotImplementedError('not implement {}'.format(args.backbone))


+ 1
- 1
model_zoo/official/cv/ssd/eval.py View File

@@ -44,7 +44,7 @@ def ssd_eval(dataset_path, ckpt_path):
print("\n========================================\n") print("\n========================================\n")
print("total images num: ", total) print("total images num: ", total)
print("Processing, please wait a moment.") print("Processing, please wait a moment.")
for data in ds.create_dict_iterator():
for data in ds.create_dict_iterator(output_numpy=True):
img_id = data['img_id'] img_id = data['img_id']
img_np = data['image'] img_np = data['image']
image_shape = data['image_shape'] image_shape = data['image_shape']


+ 1
- 1
model_zoo/official/cv/vgg16/eval.py View File

@@ -159,7 +159,7 @@ def test(cloud_args=None):


for model in args.models: for model in args.models:
dataset = classification_dataset(args.data_path, args.image_size, args.per_batch_size, mode='eval') dataset = classification_dataset(args.data_path, args.image_size, args.per_batch_size, mode='eval')
eval_dataloader = dataset.create_tuple_iterator()
eval_dataloader = dataset.create_tuple_iterator(output_numpy=True)
network = vgg16(args.num_classes, args, phase="test") network = vgg16(args.num_classes, args, phase="test")


# pre_trained # pre_trained


+ 3
- 3
model_zoo/official/cv/yolov3_darknet53/eval.py View File

@@ -300,10 +300,10 @@ def test():
input_shape = Tensor(tuple(config.test_img_shape), ms.float32) input_shape = Tensor(tuple(config.test_img_shape), ms.float32)
args.logger.info('Start inference....') args.logger.info('Start inference....')
for i, data in enumerate(ds.create_dict_iterator()): for i, data in enumerate(ds.create_dict_iterator()):
image = Tensor(data["image"])
image = data["image"]


image_shape = Tensor(data["image_shape"])
image_id = Tensor(data["img_id"])
image_shape = data["image_shape"]
image_id = data["img_id"]


prediction = network(image, input_shape) prediction = network(image, input_shape)
output_big, output_me, output_small = prediction output_big, output_me, output_small = prediction


+ 1
- 1
model_zoo/official/cv/yolov3_darknet53/train.py View File

@@ -299,7 +299,7 @@ def train():


old_progress = -1 old_progress = -1
t_end = time.time() t_end = time.time()
data_loader = ds.create_dict_iterator()
data_loader = ds.create_dict_iterator(output_numpy=True)


for i, data in enumerate(data_loader): for i, data in enumerate(data_loader):
images = data["image"] images = data["image"]


+ 3
- 3
model_zoo/official/cv/yolov3_darknet53_quant/eval.py View File

@@ -306,10 +306,10 @@ def test():
input_shape = Tensor(tuple(config.test_img_shape), ms.float32) input_shape = Tensor(tuple(config.test_img_shape), ms.float32)
args.logger.info('Start inference....') args.logger.info('Start inference....')
for i, data in enumerate(ds.create_dict_iterator()): for i, data in enumerate(ds.create_dict_iterator()):
image = Tensor(data["image"])
image = data["image"]


image_shape = Tensor(data["image_shape"])
image_id = Tensor(data["img_id"])
image_shape = data["image_shape"]
image_id = data["img_id"]


prediction = network(image, input_shape) prediction = network(image, input_shape)
output_big, output_me, output_small = prediction output_big, output_me, output_small = prediction


+ 1
- 1
model_zoo/official/cv/yolov3_darknet53_quant/train.py View File

@@ -303,7 +303,7 @@ def train():


old_progress = -1 old_progress = -1
t_end = time.time() t_end = time.time()
data_loader = ds.create_dict_iterator()
data_loader = ds.create_dict_iterator(output_numpy=True)


shape_record = ShapeRecord() shape_record = ShapeRecord()
for i, data in enumerate(data_loader): for i, data in enumerate(data_loader):


+ 1
- 1
model_zoo/official/cv/yolov3_resnet18/eval.py View File

@@ -44,7 +44,7 @@ def yolo_eval(dataset_path, ckpt_path):
print("\n========================================\n") print("\n========================================\n")
print("total images num: ", total) print("total images num: ", total)
print("Processing, please wait a moment.") print("Processing, please wait a moment.")
for data in ds.create_dict_iterator():
for data in ds.create_dict_iterator(output_numpy=True):
img_np = data['image'] img_np = data['image']
image_shape = data['image_shape'] image_shape = data['image_shape']
annotation = data['annotation'] annotation = data['annotation']


+ 1
- 1
model_zoo/official/gnn/bgcf/train.py View File

@@ -52,7 +52,7 @@ def train_and_eval():


eval_class = BGCFEvaluate(parser, train_graph, test_graph, parser.Ks) eval_class = BGCFEvaluate(parser, train_graph, test_graph, parser.Ks)


itr = train_ds.create_dict_iterator(parser.num_epoch)
itr = train_ds.create_dict_iterator(parser.num_epoch, output_numpy=True)
num_iter = int(num_pairs / parser.batch_pairs) num_iter = int(num_pairs / parser.batch_pairs)


for _epoch in range(1, parser.num_epoch + 1): for _epoch in range(1, parser.num_epoch + 1):


+ 1
- 2
model_zoo/official/nlp/bert/run_classifier.py View File

@@ -29,7 +29,6 @@ from mindspore import context
from mindspore import log as logger from mindspore import log as logger
from mindspore.nn.wrap.loss_scale import DynamicLossScaleUpdateCell from mindspore.nn.wrap.loss_scale import DynamicLossScaleUpdateCell
from mindspore.nn.optim import AdamWeightDecay, Lamb, Momentum from mindspore.nn.optim import AdamWeightDecay, Lamb, Momentum
from mindspore.common.tensor import Tensor
from mindspore.train.model import Model from mindspore.train.model import Model
from mindspore.train.callback import CheckpointConfig, ModelCheckpoint, TimeMonitor from mindspore.train.callback import CheckpointConfig, ModelCheckpoint, TimeMonitor
from mindspore.train.serialization import load_checkpoint, load_param_into_net from mindspore.train.serialization import load_checkpoint, load_param_into_net
@@ -123,7 +122,7 @@ def do_eval(dataset=None, network=None, num_class=2, assessment_method="accuracy
for data in dataset.create_dict_iterator(): for data in dataset.create_dict_iterator():
input_data = [] input_data = []
for i in columns_list: for i in columns_list:
input_data.append(Tensor(data[i]))
input_data.append(data[i])
input_ids, input_mask, token_type_id, label_ids = input_data input_ids, input_mask, token_type_id, label_ids = input_data
logits = model.predict(input_ids, input_mask, token_type_id, label_ids) logits = model.predict(input_ids, input_mask, token_type_id, label_ids)
callback.update(logits, label_ids) callback.update(logits, label_ids)


+ 1
- 2
model_zoo/official/nlp/bert/run_ner.py View File

@@ -30,7 +30,6 @@ from mindspore import context
from mindspore import log as logger from mindspore import log as logger
from mindspore.nn.wrap.loss_scale import DynamicLossScaleUpdateCell from mindspore.nn.wrap.loss_scale import DynamicLossScaleUpdateCell
from mindspore.nn.optim import AdamWeightDecay, Lamb, Momentum from mindspore.nn.optim import AdamWeightDecay, Lamb, Momentum
from mindspore.common.tensor import Tensor
from mindspore.train.model import Model from mindspore.train.model import Model
from mindspore.train.callback import CheckpointConfig, ModelCheckpoint, TimeMonitor from mindspore.train.callback import CheckpointConfig, ModelCheckpoint, TimeMonitor
from mindspore.train.serialization import load_checkpoint, load_param_into_net from mindspore.train.serialization import load_checkpoint, load_param_into_net
@@ -132,7 +131,7 @@ def do_eval(dataset=None, network=None, use_crf="", num_class=2, assessment_meth
for data in dataset.create_dict_iterator(): for data in dataset.create_dict_iterator():
input_data = [] input_data = []
for i in columns_list: for i in columns_list:
input_data.append(Tensor(data[i]))
input_data.append(data[i])
input_ids, input_mask, token_type_id, label_ids = input_data input_ids, input_mask, token_type_id, label_ids = input_data
logits = model.predict(input_ids, input_mask, token_type_id, label_ids) logits = model.predict(input_ids, input_mask, token_type_id, label_ids)
callback.update(logits, label_ids) callback.update(logits, label_ids)


+ 1
- 1
model_zoo/official/nlp/bert/run_squad.py View File

@@ -112,7 +112,7 @@ def do_eval(dataset=None, vocab_file="", eval_json="", load_checkpoint_path="",
for data in dataset.create_dict_iterator(): for data in dataset.create_dict_iterator():
input_data = [] input_data = []
for i in columns_list: for i in columns_list:
input_data.append(Tensor(data[i]))
input_data.append(data[i])
input_ids, input_mask, segment_ids, unique_ids = input_data input_ids, input_mask, segment_ids, unique_ids = input_data
start_positions = Tensor([1], mstype.float32) start_positions = Tensor([1], mstype.float32)
end_positions = Tensor([1], mstype.float32) end_positions = Tensor([1], mstype.float32)


+ 2
- 2
model_zoo/official/nlp/mass/src/transformer/infer_mass.py View File

@@ -107,7 +107,7 @@ def transformer_infer(config, dataset):
probs = [] probs = []
source_sentences = [] source_sentences = []
target_sentences = [] target_sentences = []
for batch in dataset.create_dict_iterator():
for batch in dataset.create_dict_iterator(output_numpy=True):
source_sentences.append(batch["source_eos_ids"]) source_sentences.append(batch["source_eos_ids"])
target_sentences.append(batch["target_eos_ids"]) target_sentences.append(batch["target_eos_ids"])


@@ -232,7 +232,7 @@ def transformer_infer_ppl(config, dataset):
lengths = [] lengths = []
source_sentences = [] source_sentences = []
target_sentences = [] target_sentences = []
for batch in dataset.create_dict_iterator():
for batch in dataset.create_dict_iterator(output_numpy=True):
source_sentences.append(batch["source_eos_ids"]) source_sentences.append(batch["source_eos_ids"])
target_sentences.append(batch["target_eos_ids"]) target_sentences.append(batch["target_eos_ids"])




+ 1
- 2
model_zoo/official/nlp/tinybert/run_task_distill.py View File

@@ -19,7 +19,6 @@ import os
import re import re
import argparse import argparse
import mindspore.common.dtype as mstype import mindspore.common.dtype as mstype
from mindspore import Tensor
from mindspore import context from mindspore import context
from mindspore.train.model import Model from mindspore.train.model import Model
from mindspore.train.callback import TimeMonitor from mindspore.train.callback import TimeMonitor
@@ -282,7 +281,7 @@ def do_eval_standalone():
for data in eval_dataset.create_dict_iterator(): for data in eval_dataset.create_dict_iterator():
input_data = [] input_data = []
for i in columns_list: for i in columns_list:
input_data.append(Tensor(data[i]))
input_data.append(data[i])
input_ids, input_mask, token_type_id, label_ids = input_data input_ids, input_mask, token_type_id, label_ids = input_data
logits = eval_model(input_ids, token_type_id, input_mask) logits = eval_model(input_ids, token_type_id, input_mask)
callback.update(logits[3], label_ids) callback.update(logits[3], label_ids)


+ 1
- 1
model_zoo/official/nlp/tinybert/src/utils.py View File

@@ -96,7 +96,7 @@ class EvalCallBack(Callback):
for data in self.dataset.create_dict_iterator(): for data in self.dataset.create_dict_iterator():
input_data = [] input_data = []
for i in columns_list: for i in columns_list:
input_data.append(Tensor(data[i]))
input_data.append(data[i])
input_ids, input_mask, token_type_id, label_ids = input_data input_ids, input_mask, token_type_id, label_ids = input_data
self.network.set_train(False) self.network.set_train(False)
logits = self.network(input_ids, token_type_id, input_mask) logits = self.network(input_ids, token_type_id, input_mask)


+ 1
- 1
model_zoo/official/nlp/transformer/eval.py View File

@@ -113,7 +113,7 @@ def run_transformer_eval():
predictions = [] predictions = []
source_sents = [] source_sents = []
target_sents = [] target_sents = []
for batch in dataset.create_dict_iterator():
for batch in dataset.create_dict_iterator(output_numpy=True):
source_sents.append(batch["source_eos_ids"]) source_sents.append(batch["source_eos_ids"])
target_sents.append(batch["target_eos_ids"]) target_sents.append(batch["target_eos_ids"])
source_ids = Tensor(batch["source_eos_ids"], mstype.int32) source_ids = Tensor(batch["source_eos_ids"], mstype.int32)


+ 1
- 1
model_zoo/utils/cv_to_mindrecord/Caltech-UCSD-Birds-200-2011/create_dataset.py View File

@@ -22,7 +22,7 @@ def create_dataset(data_file):
num_parallel_workers=num_readers, num_parallel_workers=num_readers,
shuffle=True) shuffle=True)
index = 0 index = 0
for item in data_set.create_dict_iterator():
for item in data_set.create_dict_iterator(output_numpy=True):
print("example {}: {}".format(index, item)) print("example {}: {}".format(index, item))
index += 1 index += 1
if index % 1000 == 0: if index % 1000 == 0:


+ 1
- 1
model_zoo/utils/graph_to_mindrecord/reader.py View File

@@ -28,7 +28,7 @@ args = parser.parse_args()


data_set = ds.MindDataset(args.path) data_set = ds.MindDataset(args.path)
num_iter = 0 num_iter = 0
for item in data_set.create_dict_iterator():
for item in data_set.create_dict_iterator(output_numpy=True):
print(item) print(item)
num_iter += 1 num_iter += 1
print("Total items # is {}".format(num_iter)) print("Total items # is {}".format(num_iter))

+ 1
- 1
model_zoo/utils/nlp_to_mindrecord/aclImdb/create_dataset.py View File

@@ -22,7 +22,7 @@ def create_dataset(data_file):
num_parallel_workers=num_readers, num_parallel_workers=num_readers,
shuffle=True) shuffle=True)
index = 0 index = 0
for item in data_set.create_dict_iterator():
for item in data_set.create_dict_iterator(output_numpy=True):
print("example {}: {}".format(index, item)) print("example {}: {}".format(index, item))
index += 1 index += 1
if index % 1000 == 0: if index % 1000 == 0:


+ 1
- 1
model_zoo/utils/nlp_to_mindrecord/aclImdb_preprocess/create_dataset.py View File

@@ -22,7 +22,7 @@ def create_dataset(data_file):
num_parallel_workers=num_readers, num_parallel_workers=num_readers,
shuffle=True) shuffle=True)
index = 0 index = 0
for item in data_set.create_dict_iterator():
for item in data_set.create_dict_iterator(output_numpy=True):
print("example {}: {}".format(index, item)) print("example {}: {}".format(index, item))
index += 1 index += 1
if index % 1000 == 0: if index % 1000 == 0:


+ 1
- 1
tests/st/ops/ascend/test_tdt_data_ms.py View File

@@ -96,7 +96,7 @@ if __name__ == '__main__':
dataset_types, dataset_shapes, (), 'dataset') dataset_types, dataset_shapes, (), 'dataset')
ds1.send() ds1.send()


for data in data_set.create_tuple_iterator():
for data in data_set.create_tuple_iterator(output_numpy=True):
output = net() output = net()
print(data[0].any()) print(data[0].any())
print( print(


+ 2
- 2
tests/st/probability/test_bnn_layer.py View File

@@ -92,7 +92,7 @@ class BNNLeNet5(nn.Cell):
def train_model(train_net, net, dataset): def train_model(train_net, net, dataset):
accs = [] accs = []
loss_sum = 0 loss_sum = 0
for _, data in enumerate(dataset.create_dict_iterator()):
for _, data in enumerate(dataset.create_dict_iterator(output_numpy=True)):
train_x = Tensor(data['image'].astype(np.float32)) train_x = Tensor(data['image'].astype(np.float32))
label = Tensor(data['label'].astype(np.int32)) label = Tensor(data['label'].astype(np.int32))
loss = train_net(train_x, label) loss = train_net(train_x, label)
@@ -109,7 +109,7 @@ def train_model(train_net, net, dataset):


def validate_model(net, dataset): def validate_model(net, dataset):
accs = [] accs = []
for _, data in enumerate(dataset.create_dict_iterator()):
for _, data in enumerate(dataset.create_dict_iterator(output_numpy=True)):
train_x = Tensor(data['image'].astype(np.float32)) train_x = Tensor(data['image'].astype(np.float32))
label = Tensor(data['label'].astype(np.int32)) label = Tensor(data['label'].astype(np.int32))
output = net(train_x) output = net(train_x)


+ 1
- 1
tests/st/probability/test_gpu_svi_cvae.py View File

@@ -122,7 +122,7 @@ def test_svi_cvae():
sample_label = Tensor([i for i in range(0, 8)] * 8, dtype=mstype.int32) sample_label = Tensor([i for i in range(0, 8)] * 8, dtype=mstype.int32)
generated_sample = cvae.generate_sample(sample_label, 64, IMAGE_SHAPE) generated_sample = cvae.generate_sample(sample_label, 64, IMAGE_SHAPE)
# test function: reconstruct_sample # test function: reconstruct_sample
for sample in ds_train.create_dict_iterator():
for sample in ds_train.create_dict_iterator(output_numpy=True):
sample_x = Tensor(sample['image'], dtype=mstype.float32) sample_x = Tensor(sample['image'], dtype=mstype.float32)
sample_y = Tensor(sample['label'], dtype=mstype.int32) sample_y = Tensor(sample['label'], dtype=mstype.int32)
reconstructed_sample = cvae.reconstruct_sample(sample_x, sample_y) reconstructed_sample = cvae.reconstruct_sample(sample_x, sample_y)


+ 1
- 1
tests/st/probability/test_gpu_svi_vae.py View File

@@ -110,7 +110,7 @@ def test_svi_vae():
# test function: generate_sample # test function: generate_sample
generated_sample = vae.generate_sample(64, IMAGE_SHAPE) generated_sample = vae.generate_sample(64, IMAGE_SHAPE)
# test function: reconstruct_sample # test function: reconstruct_sample
for sample in ds_train.create_dict_iterator():
for sample in ds_train.create_dict_iterator(output_numpy=True):
sample_x = Tensor(sample['image'], dtype=mstype.float32) sample_x = Tensor(sample['image'], dtype=mstype.float32)
reconstructed_sample = vae.reconstruct_sample(sample_x) reconstructed_sample = vae.reconstruct_sample(sample_x)
print('The loss of the trained network is ', trained_loss) print('The loss of the trained network is ', trained_loss)


+ 2
- 2
tests/st/probability/test_transform_bnn_layer.py View File

@@ -93,7 +93,7 @@ class LeNet5(nn.Cell):
def train_model(train_net, net, dataset): def train_model(train_net, net, dataset):
accs = [] accs = []
loss_sum = 0 loss_sum = 0
for _, data in enumerate(dataset.create_dict_iterator()):
for _, data in enumerate(dataset.create_dict_iterator(output_numpy=True)):
train_x = Tensor(data['image'].astype(np.float32)) train_x = Tensor(data['image'].astype(np.float32))
label = Tensor(data['label'].astype(np.int32)) label = Tensor(data['label'].astype(np.int32))
loss = train_net(train_x, label) loss = train_net(train_x, label)
@@ -110,7 +110,7 @@ def train_model(train_net, net, dataset):


def validate_model(net, dataset): def validate_model(net, dataset):
accs = [] accs = []
for _, data in enumerate(dataset.create_dict_iterator()):
for _, data in enumerate(dataset.create_dict_iterator(output_numpy=True)):
train_x = Tensor(data['image'].astype(np.float32)) train_x = Tensor(data['image'].astype(np.float32))
label = Tensor(data['label'].astype(np.int32)) label = Tensor(data['label'].astype(np.int32))
output = net(train_x) output = net(train_x)


+ 2
- 2
tests/st/probability/test_transform_bnn_model.py View File

@@ -92,7 +92,7 @@ class LeNet5(nn.Cell):
def train_model(train_net, net, dataset): def train_model(train_net, net, dataset):
accs = [] accs = []
loss_sum = 0 loss_sum = 0
for _, data in enumerate(dataset.create_dict_iterator()):
for _, data in enumerate(dataset.create_dict_iterator(output_numpy=True)):
train_x = Tensor(data['image'].astype(np.float32)) train_x = Tensor(data['image'].astype(np.float32))
label = Tensor(data['label'].astype(np.int32)) label = Tensor(data['label'].astype(np.int32))
loss = train_net(train_x, label) loss = train_net(train_x, label)
@@ -109,7 +109,7 @@ def train_model(train_net, net, dataset):


def validate_model(net, dataset): def validate_model(net, dataset):
accs = [] accs = []
for _, data in enumerate(dataset.create_dict_iterator()):
for _, data in enumerate(dataset.create_dict_iterator(output_numpy=True)):
train_x = Tensor(data['image'].astype(np.float32)) train_x = Tensor(data['image'].astype(np.float32))
label = Tensor(data['label'].astype(np.int32)) label = Tensor(data['label'].astype(np.int32))
output = net(train_x) output = net(train_x)


+ 1
- 1
tests/st/probability/test_uncertainty.py View File

@@ -129,7 +129,7 @@ if __name__ == '__main__':
epi_uncer_model_path=None, epi_uncer_model_path=None,
ale_uncer_model_path=None, ale_uncer_model_path=None,
save_model=False) save_model=False)
for eval_data in ds_eval.create_dict_iterator():
for eval_data in ds_eval.create_dict_iterator(output_numpy=True):
eval_data = Tensor(eval_data['image'], mstype.float32) eval_data = Tensor(eval_data['image'], mstype.float32)
epistemic_uncertainty = evaluation.eval_epistemic_uncertainty(eval_data) epistemic_uncertainty = evaluation.eval_epistemic_uncertainty(eval_data)
aleatoric_uncertainty = evaluation.eval_aleatoric_uncertainty(eval_data) aleatoric_uncertainty = evaluation.eval_aleatoric_uncertainty(eval_data)

+ 2
- 2
tests/st/pynative/test_pynative_resnet50.py View File

@@ -423,8 +423,8 @@ def test_pynative_resnet50():
if step > max_step: if step > max_step:
break break
start_time = time.time() start_time = time.time()
input_data = Tensor(element["image"])
input_label = Tensor(element["label"])
input_data = element["image"]
input_label = element["label"]
loss_output = net_with_criterion(input_data, input_label) loss_output = net_with_criterion(input_data, input_label)
grads = train_network(input_data, input_label) grads = train_network(input_data, input_label)
optimizer(grads) optimizer(grads)


+ 4
- 2
tests/ut/python/dataset/test_HWC2CHW.py View File

@@ -48,7 +48,8 @@ def test_HWC2CHW(plot=False):


image_transposed = [] image_transposed = []
image = [] image = []
for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)):
for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True),
data2.create_dict_iterator(num_epochs=1, output_numpy=True)):
transposed_item = item1["image"].copy() transposed_item = item1["image"].copy()
original_item = item2["image"].copy() original_item = item2["image"].copy()
image_transposed.append(transposed_item.transpose(1, 2, 0)) image_transposed.append(transposed_item.transpose(1, 2, 0))
@@ -105,7 +106,8 @@ def test_HWC2CHW_comp(plot=False):


image_c_transposed = [] image_c_transposed = []
image_py_transposed = [] image_py_transposed = []
for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)):
for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True),
data2.create_dict_iterator(num_epochs=1, output_numpy=True)):
c_image = item1["image"] c_image = item1["image"]
py_image = (item2["image"].transpose(1, 2, 0) * 255).astype(np.uint8) py_image = (item2["image"].transpose(1, 2, 0) * 255).astype(np.uint8)




+ 4
- 2
tests/ut/python/dataset/test_apply.py View File

@@ -40,7 +40,8 @@ def test_apply_generator_case():
data2 = data2.repeat(2) data2 = data2.repeat(2)
data2 = data2.batch(4) data2 = data2.batch(4)


for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)):
for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True),
data2.create_dict_iterator(num_epochs=1, output_numpy=True)):
np.testing.assert_array_equal(item1["data"], item2["data"]) np.testing.assert_array_equal(item1["data"], item2["data"])




@@ -63,7 +64,8 @@ def test_apply_imagefolder_case():
data2 = data2.map(operations=normalize_op) data2 = data2.map(operations=normalize_op)
data2 = data2.repeat(2) data2 = data2.repeat(2)


for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)):
for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True),
data2.create_dict_iterator(num_epochs=1, output_numpy=True)):
np.testing.assert_array_equal(item1["image"], item2["image"]) np.testing.assert_array_equal(item1["image"], item2["image"])






+ 15
- 15
tests/ut/python/dataset/test_autocontrast.py View File

@@ -48,10 +48,10 @@ def test_auto_contrast_py(plot=False):


for idx, (image, _) in enumerate(ds_original): for idx, (image, _) in enumerate(ds_original):
if idx == 0: if idx == 0:
images_original = np.transpose(image, (0, 2, 3, 1))
images_original = np.transpose(image.asnumpy(), (0, 2, 3, 1))
else: else:
images_original = np.append(images_original, images_original = np.append(images_original,
np.transpose(image, (0, 2, 3, 1)),
np.transpose(image.asnumpy(), (0, 2, 3, 1)),
axis=0) axis=0)


# AutoContrast Images # AutoContrast Images
@@ -69,10 +69,10 @@ def test_auto_contrast_py(plot=False):


for idx, (image, _) in enumerate(ds_auto_contrast): for idx, (image, _) in enumerate(ds_auto_contrast):
if idx == 0: if idx == 0:
images_auto_contrast = np.transpose(image, (0, 2, 3, 1))
images_auto_contrast = np.transpose(image.asnumpy(), (0, 2, 3, 1))
else: else:
images_auto_contrast = np.append(images_auto_contrast, images_auto_contrast = np.append(images_auto_contrast,
np.transpose(image, (0, 2, 3, 1)),
np.transpose(image.asnumpy(), (0, 2, 3, 1)),
axis=0) axis=0)


num_samples = images_original.shape[0] num_samples = images_original.shape[0]
@@ -110,10 +110,10 @@ def test_auto_contrast_c(plot=False):


for idx, (image, _) in enumerate(ds_auto_contrast_py): for idx, (image, _) in enumerate(ds_auto_contrast_py):
if idx == 0: if idx == 0:
images_auto_contrast_py = image
images_auto_contrast_py = image.asnumpy()
else: else:
images_auto_contrast_py = np.append(images_auto_contrast_py, images_auto_contrast_py = np.append(images_auto_contrast_py,
image,
image.asnumpy(),
axis=0) axis=0)


ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
@@ -125,10 +125,10 @@ def test_auto_contrast_c(plot=False):


for idx, (image, _) in enumerate(ds_auto_contrast_c): for idx, (image, _) in enumerate(ds_auto_contrast_c):
if idx == 0: if idx == 0:
images_auto_contrast_c = image
images_auto_contrast_c = image.asnumpy()
else: else:
images_auto_contrast_c = np.append(images_auto_contrast_c, images_auto_contrast_c = np.append(images_auto_contrast_c,
image,
image.asnumpy(),
axis=0) axis=0)


num_samples = images_auto_contrast_c.shape[0] num_samples = images_auto_contrast_c.shape[0]
@@ -170,10 +170,10 @@ def test_auto_contrast_one_channel_c(plot=False):


for idx, (image, _) in enumerate(ds_auto_contrast_py): for idx, (image, _) in enumerate(ds_auto_contrast_py):
if idx == 0: if idx == 0:
images_auto_contrast_py = image
images_auto_contrast_py = image.asnumpy()
else: else:
images_auto_contrast_py = np.append(images_auto_contrast_py, images_auto_contrast_py = np.append(images_auto_contrast_py,
image,
image.asnumpy(),
axis=0) axis=0)


ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
@@ -186,10 +186,10 @@ def test_auto_contrast_one_channel_c(plot=False):


for idx, (image, _) in enumerate(ds_auto_contrast_c): for idx, (image, _) in enumerate(ds_auto_contrast_c):
if idx == 0: if idx == 0:
images_auto_contrast_c = image
images_auto_contrast_c = image.asnumpy()
else: else:
images_auto_contrast_c = np.append(images_auto_contrast_c, images_auto_contrast_c = np.append(images_auto_contrast_c,
image,
image.asnumpy(),
axis=0) axis=0)


num_samples = images_auto_contrast_c.shape[0] num_samples = images_auto_contrast_c.shape[0]
@@ -218,9 +218,9 @@ def test_auto_contrast_mnist_c(plot=False):
for _, (data_orig, data_trans) in enumerate(zip(ds_orig, ds_auto_contrast_c)): for _, (data_orig, data_trans) in enumerate(zip(ds_orig, ds_auto_contrast_c)):
image_orig, label_orig = data_orig image_orig, label_orig = data_orig
image_trans, _ = data_trans image_trans, _ = data_trans
images.append(image_orig)
labels.append(label_orig)
images_trans.append(image_trans)
images.append(image_orig.asnumpy())
labels.append(label_orig.asnumpy())
images_trans.append(image_trans.asnumpy())


# Compare with expected md5 from images # Compare with expected md5 from images
filename = "autocontrast_mnist_result_c.npz" filename = "autocontrast_mnist_result_c.npz"


+ 10
- 5
tests/ut/python/dataset/test_bounding_box_augment.py View File

@@ -58,7 +58,8 @@ def test_bounding_box_augment_with_rotation_op(plot_vis=False):


unaugSamp, augSamp = [], [] unaugSamp, augSamp = [], []


for unAug, Aug in zip(dataVoc1.create_dict_iterator(num_epochs=1), dataVoc2.create_dict_iterator(num_epochs=1)):
for unAug, Aug in zip(dataVoc1.create_dict_iterator(num_epochs=1, output_numpy=True),
dataVoc2.create_dict_iterator(num_epochs=1, output_numpy=True)):
unaugSamp.append(unAug) unaugSamp.append(unAug)
augSamp.append(Aug) augSamp.append(Aug)


@@ -96,7 +97,8 @@ def test_bounding_box_augment_with_crop_op(plot_vis=False):


unaugSamp, augSamp = [], [] unaugSamp, augSamp = [], []


for unAug, Aug in zip(dataVoc1.create_dict_iterator(num_epochs=1), dataVoc2.create_dict_iterator(num_epochs=1)):
for unAug, Aug in zip(dataVoc1.create_dict_iterator(num_epochs=1, output_numpy=True),
dataVoc2.create_dict_iterator(num_epochs=1, output_numpy=True)):
unaugSamp.append(unAug) unaugSamp.append(unAug)
augSamp.append(Aug) augSamp.append(Aug)


@@ -133,7 +135,8 @@ def test_bounding_box_augment_valid_ratio_c(plot_vis=False):


unaugSamp, augSamp = [], [] unaugSamp, augSamp = [], []


for unAug, Aug in zip(dataVoc1.create_dict_iterator(num_epochs=1), dataVoc2.create_dict_iterator(num_epochs=1)):
for unAug, Aug in zip(dataVoc1.create_dict_iterator(num_epochs=1, output_numpy=True),
dataVoc2.create_dict_iterator(num_epochs=1, output_numpy=True)):
unaugSamp.append(unAug) unaugSamp.append(unAug)
augSamp.append(Aug) augSamp.append(Aug)


@@ -166,7 +169,8 @@ def test_bounding_box_augment_op_coco_c(plot_vis=False):


unaugSamp, augSamp = [], [] unaugSamp, augSamp = [], []


for unAug, Aug in zip(dataCoco1.create_dict_iterator(num_epochs=1), dataCoco2.create_dict_iterator(num_epochs=1)):
for unAug, Aug in zip(dataCoco1.create_dict_iterator(num_epochs=1, output_numpy=True),
dataCoco2.create_dict_iterator(num_epochs=1, output_numpy=True)):
unaugSamp.append(unAug) unaugSamp.append(unAug)
augSamp.append(Aug) augSamp.append(Aug)


@@ -209,7 +213,8 @@ def test_bounding_box_augment_valid_edge_c(plot_vis=False):


unaugSamp, augSamp = [], [] unaugSamp, augSamp = [], []


for unAug, Aug in zip(dataVoc1.create_dict_iterator(num_epochs=1), dataVoc2.create_dict_iterator(num_epochs=1)):
for unAug, Aug in zip(dataVoc1.create_dict_iterator(num_epochs=1, output_numpy=True),
dataVoc2.create_dict_iterator(num_epochs=1, output_numpy=True)):
unaugSamp.append(unAug) unaugSamp.append(unAug)
augSamp.append(Aug) augSamp.append(Aug)




+ 10
- 10
tests/ut/python/dataset/test_bucket_batch_by_length.py View File

@@ -135,7 +135,7 @@ def test_bucket_batch_multi_bucket_no_padding():
[[1], [5], [9]]] [[1], [5], [9]]]


output = [] output = []
for data in dataset.create_dict_iterator(num_epochs=1):
for data in dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
output.append(data["col1"].tolist()) output.append(data["col1"].tolist())


assert output == expected_output assert output == expected_output
@@ -166,7 +166,7 @@ def test_bucket_batch_multi_bucket_with_padding():
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]]] [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]]]


output = [] output = []
for data in dataset.create_dict_iterator(num_epochs=1):
for data in dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
output.append(data["col1"].tolist()) output.append(data["col1"].tolist())


assert output == expected_output assert output == expected_output
@@ -187,7 +187,7 @@ def test_bucket_batch_single_bucket_no_padding():
[[5], [6], [7], [8], [9]]] [[5], [6], [7], [8], [9]]]


output = [] output = []
for data in dataset.create_dict_iterator(num_epochs=1):
for data in dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
output.append(data["col1"].tolist()) output.append(data["col1"].tolist())


assert output == expected_output assert output == expected_output
@@ -217,7 +217,7 @@ def test_bucket_batch_single_bucket_with_padding():
[0, 1, 2, 3, 4, 5, 6, 7, 8, 0, 0, 0]]] [0, 1, 2, 3, 4, 5, 6, 7, 8, 0, 0, 0]]]


output = [] output = []
for data in dataset.create_dict_iterator(num_epochs=1):
for data in dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
output.append(data["col1"].tolist()) output.append(data["col1"].tolist())


assert output == expected_output assert output == expected_output
@@ -248,7 +248,7 @@ def test_bucket_batch_pad_to_bucket_boundary():
[0, 1, 2, 3, 4, 5, 6, 7, 8, 0, 0, 0, 0, 0]]] [0, 1, 2, 3, 4, 5, 6, 7, 8, 0, 0, 0, 0, 0]]]


output = [] output = []
for data in dataset.create_dict_iterator(num_epochs=1):
for data in dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
output.append(data["col1"].tolist()) output.append(data["col1"].tolist())


assert output == expected_output assert output == expected_output
@@ -284,7 +284,7 @@ def test_bucket_batch_default_pad():
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]]] [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]]]


output = [] output = []
for data in dataset.create_dict_iterator(num_epochs=1):
for data in dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
output.append(data["col1"].tolist()) output.append(data["col1"].tolist())


assert output == expected_output assert output == expected_output
@@ -315,7 +315,7 @@ def test_bucket_batch_drop_remainder():
[[19], [22], [25]]] [[19], [22], [25]]]


output = [] output = []
for data in dataset.create_dict_iterator(num_epochs=1):
for data in dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
output.append(data["col1"].tolist()) output.append(data["col1"].tolist())


assert output == expected_output assert output == expected_output
@@ -345,7 +345,7 @@ def test_bucket_batch_default_length_function():
[0, 1, 2, 3, 4, 5, 6, 7, 8]]] [0, 1, 2, 3, 4, 5, 6, 7, 8]]]


output = [] output = []
for data in dataset.create_dict_iterator(num_epochs=1):
for data in dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
output.append(data["col1"].tolist()) output.append(data["col1"].tolist())


assert output == expected_output assert output == expected_output
@@ -380,7 +380,7 @@ def test_bucket_batch_multi_column():


same_shape_output = [] same_shape_output = []
variable_shape_output = [] variable_shape_output = []
for data in dataset.create_dict_iterator(num_epochs=1):
for data in dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
same_shape_output.append(data["same_shape"].tolist()) same_shape_output.append(data["same_shape"].tolist())
variable_shape_output.append(data["variable_shape"].tolist()) variable_shape_output.append(data["variable_shape"].tolist())


@@ -419,7 +419,7 @@ def test_bucket_batch_three_columns():
same_shape_output = [] same_shape_output = []
same_shape2_output = [] same_shape2_output = []
variable_shape_output = [] variable_shape_output = []
for data in dataset.create_dict_iterator(num_epochs=1):
for data in dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
same_shape_output.append(data["same_shape"].tolist()) same_shape_output.append(data["same_shape"].tolist())
same_shape2_output.append(data["same_shape2"].tolist()) same_shape2_output.append(data["same_shape2"].tolist())
variable_shape_output.append(data["variable_shape"].tolist()) variable_shape_output.append(data["variable_shape"].tolist())


+ 1
- 1
tests/ut/python/dataset/test_c_compose.py View File

@@ -27,7 +27,7 @@ def test_compose():
data = ds.NumpySlicesDataset(arr, column_names="col", shuffle=False) data = ds.NumpySlicesDataset(arr, column_names="col", shuffle=False)
data = data.map(operations=ops.Compose(op_list), input_columns=["col"]) data = data.map(operations=ops.Compose(op_list), input_columns=["col"])
res = [] res = []
for i in data.create_dict_iterator(num_epochs=1):
for i in data.create_dict_iterator(num_epochs=1, output_numpy=True):
res.append(i["col"].tolist()) res.append(i["col"].tolist())
return res return res
except (TypeError, ValueError) as e: except (TypeError, ValueError) as e:


+ 1
- 1
tests/ut/python/dataset/test_c_random_apply.py View File

@@ -26,7 +26,7 @@ def test_random_apply():
data = ds.NumpySlicesDataset(arr, column_names="col", shuffle=False) data = ds.NumpySlicesDataset(arr, column_names="col", shuffle=False)
data = data.map(operations=ops.RandomApply(op_list, prob), input_columns=["col"]) data = data.map(operations=ops.RandomApply(op_list, prob), input_columns=["col"])
res = [] res = []
for i in data.create_dict_iterator(num_epochs=1):
for i in data.create_dict_iterator(num_epochs=1, output_numpy=True):
res.append(i["col"].tolist()) res.append(i["col"].tolist())
return res return res
except (TypeError, ValueError) as e: except (TypeError, ValueError) as e:


+ 1
- 1
tests/ut/python/dataset/test_c_random_choice.py View File

@@ -29,7 +29,7 @@ def test_random_choice():
data = ds.NumpySlicesDataset(arr, column_names="col", shuffle=False) data = ds.NumpySlicesDataset(arr, column_names="col", shuffle=False)
data = data.map(operations=ops.RandomChoice(op_list), input_columns=["col"]) data = data.map(operations=ops.RandomChoice(op_list), input_columns=["col"])
res = [] res = []
for i in data.create_dict_iterator(num_epochs=1):
for i in data.create_dict_iterator(num_epochs=1, output_numpy=True):
res.append(i["col"].tolist()) res.append(i["col"].tolist())
return res return res
except (TypeError, ValueError) as e: except (TypeError, ValueError) as e:


+ 5
- 3
tests/ut/python/dataset/test_center_crop.py View File

@@ -49,7 +49,8 @@ def test_center_crop_op(height=375, width=375, plot=False):


image_cropped = [] image_cropped = []
image = [] image = []
for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)):
for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True),
data2.create_dict_iterator(num_epochs=1, output_numpy=True)):
image_cropped.append(item1["image"].copy()) image_cropped.append(item1["image"].copy())
image.append(item2["image"].copy()) image.append(item2["image"].copy())
if plot: if plot:
@@ -99,7 +100,8 @@ def test_center_crop_comp(height=375, width=375, plot=False):


image_c_cropped = [] image_c_cropped = []
image_py_cropped = [] image_py_cropped = []
for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)):
for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True),
data2.create_dict_iterator(num_epochs=1, output_numpy=True)):
c_image = item1["image"] c_image = item1["image"]
py_image = (item2["image"].transpose(1, 2, 0) * 255).astype(np.uint8) py_image = (item2["image"].transpose(1, 2, 0) * 255).astype(np.uint8)
# Note: The images aren't exactly the same due to rounding error # Note: The images aren't exactly the same due to rounding error
@@ -132,7 +134,7 @@ def test_crop_grayscale(height=375, width=375):
crop_gray = vision.CenterCrop([height, width]) crop_gray = vision.CenterCrop([height, width])
data1 = data1.map(operations=crop_gray, input_columns=["image"]) data1 = data1.map(operations=crop_gray, input_columns=["image"])


for item1 in data1.create_dict_iterator(num_epochs=1):
for item1 in data1.create_dict_iterator(num_epochs=1, output_numpy=True):
c_image = item1["image"] c_image = item1["image"]


# Check that the image is grayscale # Check that the image is grayscale


+ 43
- 33
tests/ut/python/dataset/test_concat.py View File

@@ -50,9 +50,10 @@ def test_concat_01():
data3 = data1 + data2 data3 = data1 + data2


# Here i refers to index, d refers to data element # Here i refers to index, d refers to data element
for i, d in enumerate(data3):
logger.info("data: %i", d[0][0])
assert i == d[0][0]
for i, d in enumerate(data3.create_tuple_iterator(output_numpy=True)):
t = d
logger.info("data: %i", t[0][0])
assert i == t[0][0]


assert sum([1 for _ in data3]) == 10 assert sum([1 for _ in data3]) == 10


@@ -68,9 +69,10 @@ def test_concat_02():
data3 = data1.concat(data2) data3 = data1.concat(data2)


# Here i refers to index, d refers to data element # Here i refers to index, d refers to data element
for i, d in enumerate(data3):
logger.info("data: %i", d[0][0])
assert i == d[0][0]
for i, d in enumerate(data3.create_tuple_iterator(output_numpy=True)):
t = d
logger.info("data: %i", t[0][0])
assert i == t[0][0]


assert sum([1 for _ in data3]) == 10 assert sum([1 for _ in data3]) == 10


@@ -145,9 +147,10 @@ def test_concat_06():
dataset = data1 + data2 + data3 dataset = data1 + data2 + data3


# Here i refers to index, d refers to data element # Here i refers to index, d refers to data element
for i, d in enumerate(dataset):
logger.info("data: %i", d[0][0])
assert i == d[0][0]
for i, d in enumerate(dataset.create_tuple_iterator(output_numpy=True)):
t = d
logger.info("data: %i", t[0][0])
assert i == t[0][0]


assert sum([1 for _ in dataset]) == 20 assert sum([1 for _ in dataset]) == 20


@@ -165,9 +168,10 @@ def test_concat_07():
data4 = data1 + dataset data4 = data1 + dataset


# Here i refers to index, d refers to data element # Here i refers to index, d refers to data element
for i, d in enumerate(data4):
logger.info("data: %i", d[0][0])
assert i == d[0][0]
for i, d in enumerate(data4.create_tuple_iterator(output_numpy=True)):
t = d
logger.info("data: %i", t[0][0])
assert i == t[0][0]


assert sum([1 for _ in data4]) == 20 assert sum([1 for _ in data4]) == 20


@@ -184,9 +188,10 @@ def test_concat_08():
data3 = data3.repeat(2) data3 = data3.repeat(2)


# Here i refers to index, d refers to data element # Here i refers to index, d refers to data element
for i, d in enumerate(data3):
logger.info("data: %i", d[0][0])
assert i % 10 == d[0][0]
for i, d in enumerate(data3.create_tuple_iterator(output_numpy=True)):
t = d
logger.info("data: %i", t[0][0])
assert i % 10 == t[0][0]


assert sum([1 for _ in data3]) == 20 assert sum([1 for _ in data3]) == 20


@@ -205,9 +210,10 @@ def test_concat_09():


res = [0, 1, 2, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 3, 4, 5, 6, 7, 8, 9] res = [0, 1, 2, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 3, 4, 5, 6, 7, 8, 9]
# Here i refers to index, d refers to data element # Here i refers to index, d refers to data element
for i, d in enumerate(data3):
logger.info("data: %i", d[0][0])
assert res[i] == d[0][0]
for i, d in enumerate(data3.create_tuple_iterator(output_numpy=True)):
t = d
logger.info("data: %i", t[0][0])
assert res[i] == t[0][0]


assert sum([1 for _ in data3]) == 20 assert sum([1 for _ in data3]) == 20


@@ -225,9 +231,10 @@ def test_concat_10():


res = [0, 1, 2, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] res = [0, 1, 2, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
# Here i refers to index, d refers to data element # Here i refers to index, d refers to data element
for i, d in enumerate(data3):
logger.info("data: %i", d[0][0])
assert res[i] == d[0][0]
for i, d in enumerate(data3.create_tuple_iterator(output_numpy=True)):
t = d
logger.info("data: %i", t[0][0])
assert res[i] == t[0][0]


assert sum([1 for _ in data3]) == 13 assert sum([1 for _ in data3]) == 13


@@ -247,9 +254,10 @@ def test_concat_11():
res = [0, 10, 15, 20] res = [0, 10, 15, 20]


# Here i refers to index, d refers to data element # Here i refers to index, d refers to data element
for i, d in enumerate(data3):
logger.info("data: %i", d[0][0])
assert res[i] == d[0][0]
for i, d in enumerate(data3.create_tuple_iterator(output_numpy=True)):
t = d
logger.info("data: %i", t[0][0])
assert res[i] == t[0][0]


assert sum([1 for _ in data3]) == 3 assert sum([1 for _ in data3]) == 3


@@ -270,9 +278,10 @@ def test_concat_12():
data3 = data3.shuffle(buffer_size=10) data3 = data3.shuffle(buffer_size=10)


# Here i refers to index, d refers to data element # Here i refers to index, d refers to data element
for i, d in enumerate(data3):
logger.info("data: %i", d[0][0])
assert res[i] == d[0][0]
for i, d in enumerate(data3.create_tuple_iterator(output_numpy=True)):
t = d
logger.info("data: %i", t[0][0])
assert res[i] == t[0][0]


assert sum([1 for _ in data3]) == 10 assert sum([1 for _ in data3]) == 10


@@ -297,9 +306,10 @@ def test_concat_13():
data3 = data3.shuffle(buffer_size=int(data3.get_dataset_size())) data3 = data3.shuffle(buffer_size=int(data3.get_dataset_size()))


# Here i refers to index, d refers to data element # Here i refers to index, d refers to data element
for i, d in enumerate(data3):
logger.info("data: %i", d[0][0])
assert res[i] == d[0][0]
for i, d in enumerate(data3.create_tuple_iterator(output_numpy=True)):
t = d
logger.info("data: %i", t[0][0])
assert res[i] == t[0][0]


assert sum([1 for _ in data3]) == 3 assert sum([1 for _ in data3]) == 3


@@ -324,11 +334,11 @@ def test_concat_14():
data3 = data1 + data2 data3 = data1 + data2


expected, output = [], [] expected, output = [], []
for d in data1:
for d in data1.create_tuple_iterator(output_numpy=True):
expected.append(d[0]) expected.append(d[0])
for d in data2:
for d in data2.create_tuple_iterator(output_numpy=True):
expected.append(d[0]) expected.append(d[0])
for d in data3:
for d in data3.create_tuple_iterator(output_numpy=True):
output.append(d[0]) output.append(d[0])


assert len(expected) == len(output) assert len(expected) == len(output)


+ 6
- 6
tests/ut/python/dataset/test_concatenate_op.py View File

@@ -34,7 +34,7 @@ def test_concatenate_op_all():
data = data.map(operations=concatenate_op, input_columns=["col"]) data = data.map(operations=concatenate_op, input_columns=["col"])
expected = np.array([1.4, 2., 3., 4., 4.5, 5., 6., 7., 8., 9., 10.3, expected = np.array([1.4, 2., 3., 4., 4.5, 5., 6., 7., 8., 9., 10.3,
11., 12.]) 11., 12.])
for data_row in data:
for data_row in data.create_tuple_iterator(output_numpy=True):
np.testing.assert_array_equal(data_row[0], expected) np.testing.assert_array_equal(data_row[0], expected)




@@ -46,7 +46,7 @@ def test_concatenate_op_none():
concatenate_op = data_trans.Concatenate() concatenate_op = data_trans.Concatenate()


data = data.map(operations=concatenate_op, input_columns=["col"]) data = data.map(operations=concatenate_op, input_columns=["col"])
for data_row in data:
for data_row in data.create_tuple_iterator(output_numpy=True):
np.testing.assert_array_equal(data_row[0], np.array([5., 6., 7., 8.], dtype=np.float)) np.testing.assert_array_equal(data_row[0], np.array([5., 6., 7., 8.], dtype=np.float))




@@ -61,7 +61,7 @@ def test_concatenate_op_string():


data = data.map(operations=concatenate_op, input_columns=["col"]) data = data.map(operations=concatenate_op, input_columns=["col"])
expected = np.array(["dw", "df", "ss", "ad", "dwsdf", "df"], dtype='S') expected = np.array(["dw", "df", "ss", "ad", "dwsdf", "df"], dtype='S')
for data_row in data:
for data_row in data.create_tuple_iterator(output_numpy=True):
np.testing.assert_array_equal(data_row[0], expected) np.testing.assert_array_equal(data_row[0], expected)




@@ -77,7 +77,7 @@ def test_concatenate_op_multi_input_string():
data = data.map(operations=concatenate_op, input_columns=["col1", "col2"], column_order=["out1"], data = data.map(operations=concatenate_op, input_columns=["col1", "col2"], column_order=["out1"],
output_columns=["out1"]) output_columns=["out1"])
expected = np.array(["dw", "df", "1", "2", "d", "3", "4", "e", "dwsdf", "df"], dtype='S') expected = np.array(["dw", "df", "1", "2", "d", "3", "4", "e", "dwsdf", "df"], dtype='S')
for data_row in data:
for data_row in data.create_tuple_iterator(output_numpy=True):
np.testing.assert_array_equal(data_row[0], expected) np.testing.assert_array_equal(data_row[0], expected)




@@ -92,7 +92,7 @@ def test_concatenate_op_multi_input_numeric():
data = data.map(operations=concatenate_op, input_columns=["col1", "col2"], column_order=["out1"], data = data.map(operations=concatenate_op, input_columns=["col1", "col2"], column_order=["out1"],
output_columns=["out1"]) output_columns=["out1"])
expected = np.array([3, 5, 1, 2, 3, 4]) expected = np.array([3, 5, 1, 2, 3, 4])
for data_row in data:
for data_row in data.create_tuple_iterator(output_numpy=True):
np.testing.assert_array_equal(data_row[0], expected) np.testing.assert_array_equal(data_row[0], expected)




@@ -158,7 +158,7 @@ def test_concatenate_op_negative_axis():
data = data.map(operations=concatenate_op, input_columns=["col"]) data = data.map(operations=concatenate_op, input_columns=["col"])
expected = np.array([1.4, 2., 3., 4., 4.5, 5., 6., 7., 8., 9., 10.3, expected = np.array([1.4, 2., 3., 4., 4.5, 5., 6., 7., 8., 9., 10.3,
11., 12.]) 11., 12.])
for data_row in data:
for data_row in data.create_tuple_iterator(output_numpy=True):
np.testing.assert_array_equal(data_row[0], expected) np.testing.assert_array_equal(data_row[0], expected)






+ 4
- 4
tests/ut/python/dataset/test_config.py View File

@@ -288,7 +288,7 @@ def test_deterministic_python_seed():
data1 = data1.map(operations=transform, input_columns=["image"]) data1 = data1.map(operations=transform, input_columns=["image"])
data1_output = [] data1_output = []
# config.set_seed() calls random.seed() # config.set_seed() calls random.seed()
for data_one in data1.create_dict_iterator(num_epochs=1):
for data_one in data1.create_dict_iterator(num_epochs=1, output_numpy=True):
data1_output.append(data_one["image"]) data1_output.append(data_one["image"])


# Second dataset # Second dataset
@@ -298,7 +298,7 @@ def test_deterministic_python_seed():
ds.config.set_seed(0) ds.config.set_seed(0)


data2_output = [] data2_output = []
for data_two in data2.create_dict_iterator(num_epochs=1):
for data_two in data2.create_dict_iterator(num_epochs=1, output_numpy=True):
data2_output.append(data_two["image"]) data2_output.append(data_two["image"])


np.testing.assert_equal(data1_output, data2_output) np.testing.assert_equal(data1_output, data2_output)
@@ -331,7 +331,7 @@ def test_deterministic_python_seed_multi_thread():
data1 = data1.map(operations=transform, input_columns=["image"], python_multiprocessing=True) data1 = data1.map(operations=transform, input_columns=["image"], python_multiprocessing=True)
data1_output = [] data1_output = []
# config.set_seed() calls random.seed() # config.set_seed() calls random.seed()
for data_one in data1.create_dict_iterator(num_epochs=1):
for data_one in data1.create_dict_iterator(num_epochs=1, output_numpy=True):
data1_output.append(data_one["image"]) data1_output.append(data_one["image"])


# Second dataset # Second dataset
@@ -342,7 +342,7 @@ def test_deterministic_python_seed_multi_thread():
ds.config.set_seed(0) ds.config.set_seed(0)


data2_output = [] data2_output = []
for data_two in data2.create_dict_iterator(num_epochs=1):
for data_two in data2.create_dict_iterator(num_epochs=1, output_numpy=True):
data2_output.append(data_two["image"]) data2_output.append(data_two["image"])


try: try:


+ 6
- 3
tests/ut/python/dataset/test_cut_out.py View File

@@ -61,7 +61,8 @@ def test_cut_out_op(plot=False):
data2 = data2.map(operations=transforms_2, input_columns=["image"]) data2 = data2.map(operations=transforms_2, input_columns=["image"])


num_iter = 0 num_iter = 0
for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)):
for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True),
data2.create_dict_iterator(num_epochs=1, output_numpy=True)):
num_iter += 1 num_iter += 1
image_1 = (item1["image"].transpose(1, 2, 0) * 255).astype(np.uint8) image_1 = (item1["image"].transpose(1, 2, 0) * 255).astype(np.uint8)
# C image doesn't require transpose # C image doesn't require transpose
@@ -108,7 +109,8 @@ def test_cut_out_op_multicut(plot=False):


num_iter = 0 num_iter = 0
image_list_1, image_list_2 = [], [] image_list_1, image_list_2 = [], []
for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)):
for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True),
data2.create_dict_iterator(num_epochs=1, output_numpy=True)):
num_iter += 1 num_iter += 1
image_1 = (item1["image"].transpose(1, 2, 0) * 255).astype(np.uint8) image_1 = (item1["image"].transpose(1, 2, 0) * 255).astype(np.uint8)
# C image doesn't require transpose # C image doesn't require transpose
@@ -189,7 +191,8 @@ def test_cut_out_comp(plot=False):


num_iter = 0 num_iter = 0
image_list_1, image_list_2 = [], [] image_list_1, image_list_2 = [], []
for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)):
for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True),
data2.create_dict_iterator(num_epochs=1, output_numpy=True)):
num_iter += 1 num_iter += 1
image_1 = (item1["image"].transpose(1, 2, 0) * 255).astype(np.uint8) image_1 = (item1["image"].transpose(1, 2, 0) * 255).astype(np.uint8)
# C image doesn't require transpose # C image doesn't require transpose


+ 24
- 24
tests/ut/python/dataset/test_cutmix_batch_op.py View File

@@ -44,9 +44,9 @@ def test_cutmix_batch_success1(plot=False):
images_original = None images_original = None
for idx, (image, _) in enumerate(ds_original): for idx, (image, _) in enumerate(ds_original):
if idx == 0: if idx == 0:
images_original = image
images_original = image.asnumpy()
else: else:
images_original = np.append(images_original, image, axis=0)
images_original = np.append(images_original, image.asnumpy(), axis=0)


# CutMix Images # CutMix Images
data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)
@@ -61,9 +61,9 @@ def test_cutmix_batch_success1(plot=False):
images_cutmix = None images_cutmix = None
for idx, (image, _) in enumerate(data1): for idx, (image, _) in enumerate(data1):
if idx == 0: if idx == 0:
images_cutmix = image.transpose(0, 2, 3, 1)
images_cutmix = image.asnumpy().transpose(0, 2, 3, 1)
else: else:
images_cutmix = np.append(images_cutmix, image.transpose(0, 2, 3, 1), axis=0)
images_cutmix = np.append(images_cutmix, image.asnumpy().transpose(0, 2, 3, 1), axis=0)
if plot: if plot:
visualize_list(images_original, images_cutmix) visualize_list(images_original, images_cutmix)


@@ -87,9 +87,9 @@ def test_cutmix_batch_success2(plot=False):
images_original = None images_original = None
for idx, (image, _) in enumerate(ds_original): for idx, (image, _) in enumerate(ds_original):
if idx == 0: if idx == 0:
images_original = image
images_original = image.asnumpy()
else: else:
images_original = np.append(images_original, image, axis=0)
images_original = np.append(images_original, image.asnumpy(), axis=0)


# CutMix Images # CutMix Images
data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)
@@ -104,9 +104,9 @@ def test_cutmix_batch_success2(plot=False):
images_cutmix = None images_cutmix = None
for idx, (image, _) in enumerate(data1): for idx, (image, _) in enumerate(data1):
if idx == 0: if idx == 0:
images_cutmix = image
images_cutmix = image.asnumpy()
else: else:
images_cutmix = np.append(images_cutmix, image, axis=0)
images_cutmix = np.append(images_cutmix, image.asnumpy(), axis=0)
if plot: if plot:
visualize_list(images_original, images_cutmix) visualize_list(images_original, images_cutmix)


@@ -131,9 +131,9 @@ def test_cutmix_batch_success3(plot=False):
images_original = None images_original = None
for idx, (image, _) in enumerate(ds_original): for idx, (image, _) in enumerate(ds_original):
if idx == 0: if idx == 0:
images_original = image
images_original = image.asnumpy()
else: else:
images_original = np.append(images_original, image, axis=0)
images_original = np.append(images_original, image.asnumpy(), axis=0)


# CutMix Images # CutMix Images
data1 = ds.ImageFolderDataset(dataset_dir=DATA_DIR2, shuffle=False) data1 = ds.ImageFolderDataset(dataset_dir=DATA_DIR2, shuffle=False)
@@ -151,9 +151,9 @@ def test_cutmix_batch_success3(plot=False):
images_cutmix = None images_cutmix = None
for idx, (image, _) in enumerate(data1): for idx, (image, _) in enumerate(data1):
if idx == 0: if idx == 0:
images_cutmix = image
images_cutmix = image.asnumpy()
else: else:
images_cutmix = np.append(images_cutmix, image, axis=0)
images_cutmix = np.append(images_cutmix, image.asnumpy(), axis=0)
if plot: if plot:
visualize_list(images_original, images_cutmix) visualize_list(images_original, images_cutmix)


@@ -178,9 +178,9 @@ def test_cutmix_batch_success4(plot=False):
images_original = None images_original = None
for idx, (image, _) in enumerate(ds_original): for idx, (image, _) in enumerate(ds_original):
if idx == 0: if idx == 0:
images_original = image
images_original = image.asnumpy()
else: else:
images_original = np.append(images_original, image, axis=0)
images_original = np.append(images_original, image.asnumpy(), axis=0)


# CutMix Images # CutMix Images
data1 = ds.CelebADataset(dataset_dir=DATA_DIR3, shuffle=False) data1 = ds.CelebADataset(dataset_dir=DATA_DIR3, shuffle=False)
@@ -198,9 +198,9 @@ def test_cutmix_batch_success4(plot=False):
images_cutmix = None images_cutmix = None
for idx, (image, _) in enumerate(data1): for idx, (image, _) in enumerate(data1):
if idx == 0: if idx == 0:
images_cutmix = image
images_cutmix = image.asnumpy()
else: else:
images_cutmix = np.append(images_cutmix, image, axis=0)
images_cutmix = np.append(images_cutmix, image.asnumpy(), axis=0)
if plot: if plot:
visualize_list(images_original, images_cutmix) visualize_list(images_original, images_cutmix)


@@ -279,9 +279,9 @@ def test_cutmix_batch_fail1():
data1 = data1.map(operations=cutmix_batch_op, input_columns=["image", "label"]) data1 = data1.map(operations=cutmix_batch_op, input_columns=["image", "label"])
for idx, (image, _) in enumerate(data1): for idx, (image, _) in enumerate(data1):
if idx == 0: if idx == 0:
images_cutmix = image
images_cutmix = image.asnumpy()
else: else:
images_cutmix = np.append(images_cutmix, image, axis=0)
images_cutmix = np.append(images_cutmix, image.asnumpy(), axis=0)
error_message = "You must make sure images are HWC or CHW and batch " error_message = "You must make sure images are HWC or CHW and batch "
assert error_message in str(error.value) assert error_message in str(error.value)


@@ -360,9 +360,9 @@ def test_cutmix_batch_fail5():
images_cutmix = np.array([]) images_cutmix = np.array([])
for idx, (image, _) in enumerate(data1): for idx, (image, _) in enumerate(data1):
if idx == 0: if idx == 0:
images_cutmix = image
images_cutmix = image.asnumpy()
else: else:
images_cutmix = np.append(images_cutmix, image, axis=0)
images_cutmix = np.append(images_cutmix, image.asnumpy(), axis=0)
error_message = "Both images and labels columns are required" error_message = "Both images and labels columns are required"
assert error_message in str(error.value) assert error_message in str(error.value)


@@ -387,9 +387,9 @@ def test_cutmix_batch_fail6():
images_cutmix = np.array([]) images_cutmix = np.array([])
for idx, (image, _) in enumerate(data1): for idx, (image, _) in enumerate(data1):
if idx == 0: if idx == 0:
images_cutmix = image
images_cutmix = image.asnumpy()
else: else:
images_cutmix = np.append(images_cutmix, image, axis=0)
images_cutmix = np.append(images_cutmix, image.asnumpy(), axis=0)
error_message = "CutMixBatch: Image doesn't match the given image format." error_message = "CutMixBatch: Image doesn't match the given image format."
assert error_message in str(error.value) assert error_message in str(error.value)


@@ -412,9 +412,9 @@ def test_cutmix_batch_fail7():
images_cutmix = np.array([]) images_cutmix = np.array([])
for idx, (image, _) in enumerate(data1): for idx, (image, _) in enumerate(data1):
if idx == 0: if idx == 0:
images_cutmix = image
images_cutmix = image.asnumpy()
else: else:
images_cutmix = np.append(images_cutmix, image, axis=0)
images_cutmix = np.append(images_cutmix, image.asnumpy(), axis=0)
error_message = "CutMixBatch: Wrong labels shape. The second column (labels) must have a shape of NC or NLC" error_message = "CutMixBatch: Wrong labels shape. The second column (labels) must have a shape of NC or NLC"
assert error_message in str(error.value) assert error_message in str(error.value)




+ 17
- 17
tests/ut/python/dataset/test_dataset_numpy_slices.py View File

@@ -28,7 +28,7 @@ def test_numpy_slices_list_1():
ds = de.NumpySlicesDataset(np_data, shuffle=False) ds = de.NumpySlicesDataset(np_data, shuffle=False)


for i, data in enumerate(ds): for i, data in enumerate(ds):
assert data[0] == np_data[i]
assert data[0].asnumpy() == np_data[i]




def test_numpy_slices_list_2(): def test_numpy_slices_list_2():
@@ -38,7 +38,7 @@ def test_numpy_slices_list_2():
ds = de.NumpySlicesDataset(np_data, column_names=["col1"], shuffle=False) ds = de.NumpySlicesDataset(np_data, column_names=["col1"], shuffle=False)


for i, data in enumerate(ds): for i, data in enumerate(ds):
assert np.equal(data[0], np_data[i]).all()
assert np.equal(data[0].asnumpy(), np_data[i]).all()




def test_numpy_slices_list_3(): def test_numpy_slices_list_3():
@@ -48,7 +48,7 @@ def test_numpy_slices_list_3():
ds = de.NumpySlicesDataset(np_data, column_names=["col1"], shuffle=False) ds = de.NumpySlicesDataset(np_data, column_names=["col1"], shuffle=False)


for i, data in enumerate(ds): for i, data in enumerate(ds):
assert np.equal(data[0], np_data[i]).all()
assert np.equal(data[0].asnumpy(), np_data[i]).all()




def test_numpy_slices_list_append(): def test_numpy_slices_list_append():
@@ -62,12 +62,12 @@ def test_numpy_slices_list_append():
data1 = data1.map(operations=[vision.Decode(True), resize_op], input_columns=["image"]) data1 = data1.map(operations=[vision.Decode(True), resize_op], input_columns=["image"])


res = [] res = []
for data in data1.create_dict_iterator(num_epochs=1):
for data in data1.create_dict_iterator(num_epochs=1, output_numpy=True):
res.append(data["image"]) res.append(data["image"])


ds = de.NumpySlicesDataset(res, column_names=["col1"], shuffle=False) ds = de.NumpySlicesDataset(res, column_names=["col1"], shuffle=False)


for i, data in enumerate(ds):
for i, data in enumerate(ds.create_tuple_iterator(output_numpy=True)):
assert np.equal(data, res[i]).all() assert np.equal(data, res[i]).all()




@@ -79,8 +79,8 @@ def test_numpy_slices_dict_1():
res = [[1, 3], [2, 4]] res = [[1, 3], [2, 4]]


for i, data in enumerate(ds): for i, data in enumerate(ds):
assert data[0] == res[i][0]
assert data[1] == res[i][1]
assert data[0].asnumpy() == res[i][0]
assert data[1].asnumpy() == res[i][1]




def test_numpy_slices_tuple_1(): def test_numpy_slices_tuple_1():
@@ -89,7 +89,7 @@ def test_numpy_slices_tuple_1():
np_data = [([1, 2], [3, 4]), ([11, 12], [13, 14]), ([21, 22], [23, 24])] np_data = [([1, 2], [3, 4]), ([11, 12], [13, 14]), ([21, 22], [23, 24])]
ds = de.NumpySlicesDataset(np_data, shuffle=False) ds = de.NumpySlicesDataset(np_data, shuffle=False)


for i, data in enumerate(ds):
for i, data in enumerate(ds.create_tuple_iterator(output_numpy=True)):
assert np.equal(data, np_data[i]).all() assert np.equal(data, np_data[i]).all()


assert sum([1 for _ in ds]) == 3 assert sum([1 for _ in ds]) == 3
@@ -102,7 +102,7 @@ def test_numpy_slices_tuple_2():
expected = [[1, 3, 5], [2, 4, 6]] expected = [[1, 3, 5], [2, 4, 6]]
ds = de.NumpySlicesDataset(np_data, shuffle=False) ds = de.NumpySlicesDataset(np_data, shuffle=False)


for i, data in enumerate(ds):
for i, data in enumerate(ds.create_tuple_iterator(output_numpy=True)):
assert np.equal(data, expected[i]).all() assert np.equal(data, expected[i]).all()


assert sum([1 for _ in ds]) == 2 assert sum([1 for _ in ds]) == 2
@@ -116,8 +116,8 @@ def test_numpy_slices_tuple_3():
ds = de.NumpySlicesDataset(data, column_names=["col1", "col2"], shuffle=False) ds = de.NumpySlicesDataset(data, column_names=["col1", "col2"], shuffle=False)


for i, data in enumerate(ds): for i, data in enumerate(ds):
assert np.equal(data[0], features[i]).all()
assert data[1] == labels[i]
assert np.equal(data[0].asnumpy(), features[i]).all()
assert data[1].asnumpy() == labels[i]




def test_numpy_slices_csv_value(): def test_numpy_slices_csv_value():
@@ -132,8 +132,8 @@ def test_numpy_slices_csv_value():
ds = de.NumpySlicesDataset(np_data, column_names=["col1", "col2"], shuffle=False) ds = de.NumpySlicesDataset(np_data, column_names=["col1", "col2"], shuffle=False)


for i, data in enumerate(ds): for i, data in enumerate(ds):
assert np.equal(np_data[0][i], data[0]).all()
assert np.equal(np_data[1][i], data[1]).all()
assert np.equal(np_data[0][i], data[0].asnumpy()).all()
assert np.equal(np_data[1][i], data[1].asnumpy()).all()




def test_numpy_slices_csv_dict(): def test_numpy_slices_csv_dict():
@@ -146,7 +146,7 @@ def test_numpy_slices_csv_dict():


ds = de.NumpySlicesDataset(dict(df), shuffle=False) ds = de.NumpySlicesDataset(dict(df), shuffle=False)


for i, data in enumerate(ds):
for i, data in enumerate(ds.create_tuple_iterator(output_numpy=True)):
assert np.equal(data, res[i]).all() assert np.equal(data, res[i]).all()




@@ -157,7 +157,7 @@ def test_numpy_slices_num_samplers():
ds = de.NumpySlicesDataset(np_data, shuffle=False, num_samples=2) ds = de.NumpySlicesDataset(np_data, shuffle=False, num_samples=2)


for i, data in enumerate(ds): for i, data in enumerate(ds):
assert np.equal(data[0], np_data[i]).all()
assert np.equal(data[0].asnumpy(), np_data[i]).all()


assert sum([1 for _ in ds]) == 2 assert sum([1 for _ in ds]) == 2


@@ -169,7 +169,7 @@ def test_numpy_slices_distributed_sampler():
ds = de.NumpySlicesDataset(np_data, shuffle=False, shard_id=0, num_shards=4) ds = de.NumpySlicesDataset(np_data, shuffle=False, shard_id=0, num_shards=4)


for i, data in enumerate(ds): for i, data in enumerate(ds):
assert np.equal(data[0], np_data[i * 4]).all()
assert np.equal(data[0].asnumpy(), np_data[i * 4]).all()


assert sum([1 for _ in ds]) == 2 assert sum([1 for _ in ds]) == 2


@@ -200,7 +200,7 @@ def test_numpy_slices_sequential_sampler():
ds = de.NumpySlicesDataset(np_data, sampler=de.SequentialSampler()).repeat(2) ds = de.NumpySlicesDataset(np_data, sampler=de.SequentialSampler()).repeat(2)


for i, data in enumerate(ds): for i, data in enumerate(ds):
assert np.equal(data[0], np_data[i % 8]).all()
assert np.equal(data[0].asnumpy(), np_data[i % 8]).all()




def test_numpy_slices_invalid_column_names_type(): def test_numpy_slices_invalid_column_names_type():


+ 2
- 2
tests/ut/python/dataset/test_datasets_celeba.py View File

@@ -27,7 +27,7 @@ def test_celeba_dataset_label():
[0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, [0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
0, 0, 1]] 0, 0, 1]]
count = 0 count = 0
for item in data.create_dict_iterator(num_epochs=1):
for item in data.create_dict_iterator(num_epochs=1, output_numpy=True):
logger.info("----------image--------") logger.info("----------image--------")
logger.info(item["image"]) logger.info(item["image"])
logger.info("----------attr--------") logger.info("----------attr--------")
@@ -63,7 +63,7 @@ def test_celeba_dataset_ext():
expect_labels = [0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, expect_labels = [0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1,
0, 1, 0, 1, 0, 0, 1], 0, 1, 0, 1, 0, 0, 1],
count = 0 count = 0
for item in data.create_dict_iterator(num_epochs=1):
for item in data.create_dict_iterator(num_epochs=1, output_numpy=True):
logger.info("----------image--------") logger.info("----------image--------")
logger.info(item["image"]) logger.info(item["image"])
logger.info("----------attr--------") logger.info("----------attr--------")


+ 8
- 7
tests/ut/python/dataset/test_datasets_cifarop.py View File

@@ -75,7 +75,7 @@ def test_cifar10_content_check():
images, labels = load_cifar(DATA_DIR_10) images, labels = load_cifar(DATA_DIR_10)
num_iter = 0 num_iter = 0
# in this example, each dictionary has keys "image" and "label" # in this example, each dictionary has keys "image" and "label"
for i, d in enumerate(data1.create_dict_iterator(num_epochs=1)):
for i, d in enumerate(data1.create_dict_iterator(num_epochs=1, output_numpy=True)):
np.testing.assert_array_equal(d["image"], images[i]) np.testing.assert_array_equal(d["image"], images[i])
np.testing.assert_array_equal(d["label"], labels[i]) np.testing.assert_array_equal(d["label"], labels[i])
num_iter += 1 num_iter += 1
@@ -153,7 +153,7 @@ def test_cifar10_pk_sampler():
data = ds.Cifar10Dataset(DATA_DIR_10, sampler=sampler) data = ds.Cifar10Dataset(DATA_DIR_10, sampler=sampler)
num_iter = 0 num_iter = 0
label_list = [] label_list = []
for item in data.create_dict_iterator(num_epochs=1):
for item in data.create_dict_iterator(num_epochs=1, output_numpy=True):
label_list.append(item["label"]) label_list.append(item["label"])
num_iter += 1 num_iter += 1
np.testing.assert_array_equal(golden, label_list) np.testing.assert_array_equal(golden, label_list)
@@ -170,7 +170,8 @@ def test_cifar10_sequential_sampler():
data1 = ds.Cifar10Dataset(DATA_DIR_10, sampler=sampler) data1 = ds.Cifar10Dataset(DATA_DIR_10, sampler=sampler)
data2 = ds.Cifar10Dataset(DATA_DIR_10, shuffle=False, num_samples=num_samples) data2 = ds.Cifar10Dataset(DATA_DIR_10, shuffle=False, num_samples=num_samples)
num_iter = 0 num_iter = 0
for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)):
for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True),
data2.create_dict_iterator(num_epochs=1, output_numpy=True)):
np.testing.assert_equal(item1["label"], item2["label"]) np.testing.assert_equal(item1["label"], item2["label"])
num_iter += 1 num_iter += 1
assert num_iter == num_samples assert num_iter == num_samples
@@ -225,7 +226,7 @@ def test_cifar10_visualize(plot=False):
data1 = ds.Cifar10Dataset(DATA_DIR_10, num_samples=10, shuffle=False) data1 = ds.Cifar10Dataset(DATA_DIR_10, num_samples=10, shuffle=False)
num_iter = 0 num_iter = 0
image_list, label_list = [], [] image_list, label_list = [], []
for item in data1.create_dict_iterator(num_epochs=1):
for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True):
image = item["image"] image = item["image"]
label = item["label"] label = item["label"]
image_list.append(image) image_list.append(image)
@@ -251,7 +252,7 @@ def test_cifar100_content_check():
images, labels = load_cifar(DATA_DIR_100, kind="cifar100") images, labels = load_cifar(DATA_DIR_100, kind="cifar100")
num_iter = 0 num_iter = 0
# in this example, each dictionary has keys "image", "coarse_label" and "fine_image" # in this example, each dictionary has keys "image", "coarse_label" and "fine_image"
for i, d in enumerate(data1.create_dict_iterator(num_epochs=1)):
for i, d in enumerate(data1.create_dict_iterator(num_epochs=1, output_numpy=True)):
np.testing.assert_array_equal(d["image"], images[i]) np.testing.assert_array_equal(d["image"], images[i])
np.testing.assert_array_equal(d["coarse_label"], labels[i][0]) np.testing.assert_array_equal(d["coarse_label"], labels[i][0])
np.testing.assert_array_equal(d["fine_label"], labels[i][1]) np.testing.assert_array_equal(d["fine_label"], labels[i][1])
@@ -319,7 +320,7 @@ def test_cifar100_pk_sampler():
data = ds.Cifar100Dataset(DATA_DIR_100, sampler=sampler) data = ds.Cifar100Dataset(DATA_DIR_100, sampler=sampler)
num_iter = 0 num_iter = 0
label_list = [] label_list = []
for item in data.create_dict_iterator(num_epochs=1):
for item in data.create_dict_iterator(num_epochs=1, output_numpy=True):
label_list.append(item["coarse_label"]) label_list.append(item["coarse_label"])
num_iter += 1 num_iter += 1
np.testing.assert_array_equal(golden, label_list) np.testing.assert_array_equal(golden, label_list)
@@ -375,7 +376,7 @@ def test_cifar100_visualize(plot=False):
data1 = ds.Cifar100Dataset(DATA_DIR_100, num_samples=10, shuffle=False) data1 = ds.Cifar100Dataset(DATA_DIR_100, num_samples=10, shuffle=False)
num_iter = 0 num_iter = 0
image_list, label_list = [], [] image_list, label_list = [], []
for item in data1.create_dict_iterator(num_epochs=1):
for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True):
image = item["image"] image = item["image"]
coarse_label = item["coarse_label"] coarse_label = item["coarse_label"]
fine_label = item["fine_label"] fine_label = item["fine_label"]


+ 21
- 21
tests/ut/python/dataset/test_datasets_clue.py View File

@@ -26,7 +26,7 @@ def test_clue():
data = ds.CLUEDataset(TRAIN_FILE, task='AFQMC', usage='train', shuffle=False) data = ds.CLUEDataset(TRAIN_FILE, task='AFQMC', usage='train', shuffle=False)
data = data.repeat(2) data = data.repeat(2)
data = data.skip(3) data = data.skip(3)
for d in data.create_dict_iterator(num_epochs=1):
for d in data.create_dict_iterator(num_epochs=1, output_numpy=True):
buffer.append({ buffer.append({
'label': d['label'].item().decode("utf8"), 'label': d['label'].item().decode("utf8"),
'sentence1': d['sentence1'].item().decode("utf8"), 'sentence1': d['sentence1'].item().decode("utf8"),
@@ -43,7 +43,7 @@ def test_clue_num_shards():


buffer = [] buffer = []
data = ds.CLUEDataset(TRAIN_FILE, task='AFQMC', usage='train', num_shards=3, shard_id=1) data = ds.CLUEDataset(TRAIN_FILE, task='AFQMC', usage='train', num_shards=3, shard_id=1)
for d in data.create_dict_iterator(num_epochs=1):
for d in data.create_dict_iterator(num_epochs=1, output_numpy=True):
buffer.append({ buffer.append({
'label': d['label'].item().decode("utf8"), 'label': d['label'].item().decode("utf8"),
'sentence1': d['sentence1'].item().decode("utf8"), 'sentence1': d['sentence1'].item().decode("utf8"),
@@ -60,7 +60,7 @@ def test_clue_num_samples():


data = ds.CLUEDataset(TRAIN_FILE, task='AFQMC', usage='train', num_samples=2) data = ds.CLUEDataset(TRAIN_FILE, task='AFQMC', usage='train', num_samples=2)
count = 0 count = 0
for _ in data.create_dict_iterator(num_epochs=1):
for _ in data.create_dict_iterator(num_epochs=1, output_numpy=True):
count += 1 count += 1
assert count == 2 assert count == 2


@@ -87,7 +87,7 @@ def test_clue_afqmc():
# train # train
buffer = [] buffer = []
data = ds.CLUEDataset(TRAIN_FILE, task='AFQMC', usage='train', shuffle=False) data = ds.CLUEDataset(TRAIN_FILE, task='AFQMC', usage='train', shuffle=False)
for d in data.create_dict_iterator(num_epochs=1):
for d in data.create_dict_iterator(num_epochs=1, output_numpy=True):
buffer.append({ buffer.append({
'label': d['label'].item().decode("utf8"), 'label': d['label'].item().decode("utf8"),
'sentence1': d['sentence1'].item().decode("utf8"), 'sentence1': d['sentence1'].item().decode("utf8"),
@@ -98,7 +98,7 @@ def test_clue_afqmc():
# test # test
buffer = [] buffer = []
data = ds.CLUEDataset(TEST_FILE, task='AFQMC', usage='test', shuffle=False) data = ds.CLUEDataset(TEST_FILE, task='AFQMC', usage='test', shuffle=False)
for d in data.create_dict_iterator(num_epochs=1):
for d in data.create_dict_iterator(num_epochs=1, output_numpy=True):
buffer.append({ buffer.append({
'id': d['id'], 'id': d['id'],
'sentence1': d['sentence1'].item().decode("utf8"), 'sentence1': d['sentence1'].item().decode("utf8"),
@@ -109,7 +109,7 @@ def test_clue_afqmc():
# evaluation # evaluation
buffer = [] buffer = []
data = ds.CLUEDataset(EVAL_FILE, task='AFQMC', usage='eval', shuffle=False) data = ds.CLUEDataset(EVAL_FILE, task='AFQMC', usage='eval', shuffle=False)
for d in data.create_dict_iterator(num_epochs=1):
for d in data.create_dict_iterator(num_epochs=1, output_numpy=True):
buffer.append({ buffer.append({
'label': d['label'].item().decode("utf8"), 'label': d['label'].item().decode("utf8"),
'sentence1': d['sentence1'].item().decode("utf8"), 'sentence1': d['sentence1'].item().decode("utf8"),
@@ -129,7 +129,7 @@ def test_clue_cmnli():
# train # train
buffer = [] buffer = []
data = ds.CLUEDataset(TRAIN_FILE, task='CMNLI', usage='train', shuffle=False) data = ds.CLUEDataset(TRAIN_FILE, task='CMNLI', usage='train', shuffle=False)
for d in data.create_dict_iterator(num_epochs=1):
for d in data.create_dict_iterator(num_epochs=1, output_numpy=True):
buffer.append({ buffer.append({
'label': d['label'].item().decode("utf8"), 'label': d['label'].item().decode("utf8"),
'sentence1': d['sentence1'].item().decode("utf8"), 'sentence1': d['sentence1'].item().decode("utf8"),
@@ -140,7 +140,7 @@ def test_clue_cmnli():
# test # test
buffer = [] buffer = []
data = ds.CLUEDataset(TEST_FILE, task='CMNLI', usage='test', shuffle=False) data = ds.CLUEDataset(TEST_FILE, task='CMNLI', usage='test', shuffle=False)
for d in data.create_dict_iterator(num_epochs=1):
for d in data.create_dict_iterator(num_epochs=1, output_numpy=True):
buffer.append({ buffer.append({
'id': d['id'], 'id': d['id'],
'sentence1': d['sentence1'], 'sentence1': d['sentence1'],
@@ -151,7 +151,7 @@ def test_clue_cmnli():
# eval # eval
buffer = [] buffer = []
data = ds.CLUEDataset(EVAL_FILE, task='CMNLI', usage='eval', shuffle=False) data = ds.CLUEDataset(EVAL_FILE, task='CMNLI', usage='eval', shuffle=False)
for d in data.create_dict_iterator(num_epochs=1):
for d in data.create_dict_iterator(num_epochs=1, output_numpy=True):
buffer.append({ buffer.append({
'label': d['label'], 'label': d['label'],
'sentence1': d['sentence1'], 'sentence1': d['sentence1'],
@@ -171,7 +171,7 @@ def test_clue_csl():
# train # train
buffer = [] buffer = []
data = ds.CLUEDataset(TRAIN_FILE, task='CSL', usage='train', shuffle=False) data = ds.CLUEDataset(TRAIN_FILE, task='CSL', usage='train', shuffle=False)
for d in data.create_dict_iterator(num_epochs=1):
for d in data.create_dict_iterator(num_epochs=1, output_numpy=True):
buffer.append({ buffer.append({
'id': d['id'], 'id': d['id'],
'abst': d['abst'].item().decode("utf8"), 'abst': d['abst'].item().decode("utf8"),
@@ -183,7 +183,7 @@ def test_clue_csl():
# test # test
buffer = [] buffer = []
data = ds.CLUEDataset(TEST_FILE, task='CSL', usage='test', shuffle=False) data = ds.CLUEDataset(TEST_FILE, task='CSL', usage='test', shuffle=False)
for d in data.create_dict_iterator(num_epochs=1):
for d in data.create_dict_iterator(num_epochs=1, output_numpy=True):
buffer.append({ buffer.append({
'id': d['id'], 'id': d['id'],
'abst': d['abst'].item().decode("utf8"), 'abst': d['abst'].item().decode("utf8"),
@@ -194,7 +194,7 @@ def test_clue_csl():
# eval # eval
buffer = [] buffer = []
data = ds.CLUEDataset(EVAL_FILE, task='CSL', usage='eval', shuffle=False) data = ds.CLUEDataset(EVAL_FILE, task='CSL', usage='eval', shuffle=False)
for d in data.create_dict_iterator(num_epochs=1):
for d in data.create_dict_iterator(num_epochs=1, output_numpy=True):
buffer.append({ buffer.append({
'id': d['id'], 'id': d['id'],
'abst': d['abst'].item().decode("utf8"), 'abst': d['abst'].item().decode("utf8"),
@@ -215,7 +215,7 @@ def test_clue_iflytek():
# train # train
buffer = [] buffer = []
data = ds.CLUEDataset(TRAIN_FILE, task='IFLYTEK', usage='train', shuffle=False) data = ds.CLUEDataset(TRAIN_FILE, task='IFLYTEK', usage='train', shuffle=False)
for d in data.create_dict_iterator(num_epochs=1):
for d in data.create_dict_iterator(num_epochs=1, output_numpy=True):
buffer.append({ buffer.append({
'label': d['label'].item().decode("utf8"), 'label': d['label'].item().decode("utf8"),
'label_des': d['label_des'].item().decode("utf8"), 'label_des': d['label_des'].item().decode("utf8"),
@@ -226,7 +226,7 @@ def test_clue_iflytek():
# test # test
buffer = [] buffer = []
data = ds.CLUEDataset(TEST_FILE, task='IFLYTEK', usage='test', shuffle=False) data = ds.CLUEDataset(TEST_FILE, task='IFLYTEK', usage='test', shuffle=False)
for d in data.create_dict_iterator(num_epochs=1):
for d in data.create_dict_iterator(num_epochs=1, output_numpy=True):
buffer.append({ buffer.append({
'id': d['id'], 'id': d['id'],
'sentence': d['sentence'].item().decode("utf8") 'sentence': d['sentence'].item().decode("utf8")
@@ -236,7 +236,7 @@ def test_clue_iflytek():
# eval # eval
buffer = [] buffer = []
data = ds.CLUEDataset(EVAL_FILE, task='IFLYTEK', usage='eval', shuffle=False) data = ds.CLUEDataset(EVAL_FILE, task='IFLYTEK', usage='eval', shuffle=False)
for d in data.create_dict_iterator(num_epochs=1):
for d in data.create_dict_iterator(num_epochs=1, output_numpy=True):
buffer.append({ buffer.append({
'label': d['label'].item().decode("utf8"), 'label': d['label'].item().decode("utf8"),
'label_des': d['label_des'].item().decode("utf8"), 'label_des': d['label_des'].item().decode("utf8"),
@@ -256,7 +256,7 @@ def test_clue_tnews():
# train # train
buffer = [] buffer = []
data = ds.CLUEDataset(TRAIN_FILE, task='TNEWS', usage='train', shuffle=False) data = ds.CLUEDataset(TRAIN_FILE, task='TNEWS', usage='train', shuffle=False)
for d in data.create_dict_iterator(num_epochs=1):
for d in data.create_dict_iterator(num_epochs=1, output_numpy=True):
buffer.append({ buffer.append({
'label': d['label'].item().decode("utf8"), 'label': d['label'].item().decode("utf8"),
'label_desc': d['label_desc'].item().decode("utf8"), 'label_desc': d['label_desc'].item().decode("utf8"),
@@ -269,7 +269,7 @@ def test_clue_tnews():
# test # test
buffer = [] buffer = []
data = ds.CLUEDataset(TEST_FILE, task='TNEWS', usage='test', shuffle=False) data = ds.CLUEDataset(TEST_FILE, task='TNEWS', usage='test', shuffle=False)
for d in data.create_dict_iterator(num_epochs=1):
for d in data.create_dict_iterator(num_epochs=1, output_numpy=True):
buffer.append({ buffer.append({
'id': d['id'], 'id': d['id'],
'sentence': d['sentence'].item().decode("utf8"), 'sentence': d['sentence'].item().decode("utf8"),
@@ -281,7 +281,7 @@ def test_clue_tnews():
# eval # eval
buffer = [] buffer = []
data = ds.CLUEDataset(EVAL_FILE, task='TNEWS', usage='eval', shuffle=False) data = ds.CLUEDataset(EVAL_FILE, task='TNEWS', usage='eval', shuffle=False)
for d in data.create_dict_iterator(num_epochs=1):
for d in data.create_dict_iterator(num_epochs=1, output_numpy=True):
buffer.append({ buffer.append({
'label': d['label'].item().decode("utf8"), 'label': d['label'].item().decode("utf8"),
'label_desc': d['label_desc'].item().decode("utf8"), 'label_desc': d['label_desc'].item().decode("utf8"),
@@ -303,7 +303,7 @@ def test_clue_wsc():
# train # train
buffer = [] buffer = []
data = ds.CLUEDataset(TRAIN_FILE, task='WSC', usage='train') data = ds.CLUEDataset(TRAIN_FILE, task='WSC', usage='train')
for d in data.create_dict_iterator(num_epochs=1):
for d in data.create_dict_iterator(num_epochs=1, output_numpy=True):
buffer.append({ buffer.append({
'span1_index': d['span1_index'], 'span1_index': d['span1_index'],
'span2_index': d['span2_index'], 'span2_index': d['span2_index'],
@@ -318,7 +318,7 @@ def test_clue_wsc():
# test # test
buffer = [] buffer = []
data = ds.CLUEDataset(TEST_FILE, task='WSC', usage='test') data = ds.CLUEDataset(TEST_FILE, task='WSC', usage='test')
for d in data.create_dict_iterator(num_epochs=1):
for d in data.create_dict_iterator(num_epochs=1, output_numpy=True):
buffer.append({ buffer.append({
'span1_index': d['span1_index'], 'span1_index': d['span1_index'],
'span2_index': d['span2_index'], 'span2_index': d['span2_index'],
@@ -332,7 +332,7 @@ def test_clue_wsc():
# eval # eval
buffer = [] buffer = []
data = ds.CLUEDataset(EVAL_FILE, task='WSC', usage='eval') data = ds.CLUEDataset(EVAL_FILE, task='WSC', usage='eval')
for d in data.create_dict_iterator(num_epochs=1):
for d in data.create_dict_iterator(num_epochs=1, output_numpy=True):
buffer.append({ buffer.append({
'span1_index': d['span1_index'], 'span1_index': d['span1_index'],
'span2_index': d['span2_index'], 'span2_index': d['span2_index'],


+ 4
- 4
tests/ut/python/dataset/test_datasets_coco.py View File

@@ -33,7 +33,7 @@ def test_coco_detection():
image_shape = [] image_shape = []
bbox = [] bbox = []
category_id = [] category_id = []
for data in data1.create_dict_iterator(num_epochs=1):
for data in data1.create_dict_iterator(num_epochs=1, output_numpy=True):
image_shape.append(data["image"].shape) image_shape.append(data["image"].shape)
bbox.append(data["bbox"]) bbox.append(data["bbox"])
category_id.append(data["category_id"]) category_id.append(data["category_id"])
@@ -66,7 +66,7 @@ def test_coco_stuff():
image_shape = [] image_shape = []
segmentation = [] segmentation = []
iscrowd = [] iscrowd = []
for data in data1.create_dict_iterator(num_epochs=1):
for data in data1.create_dict_iterator(num_epochs=1, output_numpy=True):
image_shape.append(data["image"].shape) image_shape.append(data["image"].shape)
segmentation.append(data["segmentation"]) segmentation.append(data["segmentation"])
iscrowd.append(data["iscrowd"]) iscrowd.append(data["iscrowd"])
@@ -107,7 +107,7 @@ def test_coco_keypoint():
image_shape = [] image_shape = []
keypoints = [] keypoints = []
num_keypoints = [] num_keypoints = []
for data in data1.create_dict_iterator(num_epochs=1):
for data in data1.create_dict_iterator(num_epochs=1, output_numpy=True):
image_shape.append(data["image"].shape) image_shape.append(data["image"].shape)
keypoints.append(data["keypoints"]) keypoints.append(data["keypoints"])
num_keypoints.append(data["num_keypoints"]) num_keypoints.append(data["num_keypoints"])
@@ -136,7 +136,7 @@ def test_coco_panoptic():
category_id = [] category_id = []
iscrowd = [] iscrowd = []
area = [] area = []
for data in data1.create_dict_iterator(num_epochs=1):
for data in data1.create_dict_iterator(num_epochs=1, output_numpy=True):
image_shape.append(data["image"].shape) image_shape.append(data["image"].shape)
bbox.append(data["bbox"]) bbox.append(data["bbox"])
category_id.append(data["category_id"]) category_id.append(data["category_id"])


+ 13
- 13
tests/ut/python/dataset/test_datasets_csv.py View File

@@ -33,7 +33,7 @@ def test_csv_dataset_basic():
shuffle=False) shuffle=False)
data = data.repeat(2) data = data.repeat(2)
data = data.skip(2) data = data.skip(2)
for d in data.create_dict_iterator(num_epochs=1):
for d in data.create_dict_iterator(num_epochs=1, output_numpy=True):
buffer.append(d) buffer.append(d)
assert len(buffer) == 4 assert len(buffer) == 4


@@ -45,7 +45,7 @@ def test_csv_dataset_one_file():
column_names=['col1', 'col2', 'col3', 'col4'], column_names=['col1', 'col2', 'col3', 'col4'],
shuffle=False) shuffle=False)
buffer = [] buffer = []
for d in data.create_dict_iterator(num_epochs=1):
for d in data.create_dict_iterator(num_epochs=1, output_numpy=True):
buffer.append(d) buffer.append(d)
assert len(buffer) == 3 assert len(buffer) == 3


@@ -58,7 +58,7 @@ def test_csv_dataset_all_file():
column_names=['col1', 'col2', 'col3', 'col4'], column_names=['col1', 'col2', 'col3', 'col4'],
shuffle=False) shuffle=False)
buffer = [] buffer = []
for d in data.create_dict_iterator(num_epochs=1):
for d in data.create_dict_iterator(num_epochs=1, output_numpy=True):
buffer.append(d) buffer.append(d)
assert len(buffer) == 10 assert len(buffer) == 10


@@ -70,7 +70,7 @@ def test_csv_dataset_num_samples():
column_names=['col1', 'col2', 'col3', 'col4'], column_names=['col1', 'col2', 'col3', 'col4'],
shuffle=False, num_samples=2) shuffle=False, num_samples=2)
count = 0 count = 0
for _ in data.create_dict_iterator(num_epochs=1):
for _ in data.create_dict_iterator(num_epochs=1, output_numpy=True):
count += 1 count += 1
assert count == 2 assert count == 2


@@ -83,7 +83,7 @@ def test_csv_dataset_distribution():
column_names=['col1', 'col2', 'col3', 'col4'], column_names=['col1', 'col2', 'col3', 'col4'],
shuffle=False, num_shards=2, shard_id=0) shuffle=False, num_shards=2, shard_id=0)
count = 0 count = 0
for _ in data.create_dict_iterator(num_epochs=1):
for _ in data.create_dict_iterator(num_epochs=1, output_numpy=True):
count += 1 count += 1
assert count == 2 assert count == 2


@@ -96,7 +96,7 @@ def test_csv_dataset_quoted():
column_names=['col1', 'col2', 'col3', 'col4'], column_names=['col1', 'col2', 'col3', 'col4'],
shuffle=False) shuffle=False)
buffer = [] buffer = []
for d in data.create_dict_iterator(num_epochs=1):
for d in data.create_dict_iterator(num_epochs=1, output_numpy=True):
buffer.extend([d['col1'].item().decode("utf8"), buffer.extend([d['col1'].item().decode("utf8"),
d['col2'].item().decode("utf8"), d['col2'].item().decode("utf8"),
d['col3'].item().decode("utf8"), d['col3'].item().decode("utf8"),
@@ -113,7 +113,7 @@ def test_csv_dataset_separated():
column_names=['col1', 'col2', 'col3', 'col4'], column_names=['col1', 'col2', 'col3', 'col4'],
shuffle=False) shuffle=False)
buffer = [] buffer = []
for d in data.create_dict_iterator(num_epochs=1):
for d in data.create_dict_iterator(num_epochs=1, output_numpy=True):
buffer.extend([d['col1'].item().decode("utf8"), buffer.extend([d['col1'].item().decode("utf8"),
d['col2'].item().decode("utf8"), d['col2'].item().decode("utf8"),
d['col3'].item().decode("utf8"), d['col3'].item().decode("utf8"),
@@ -129,7 +129,7 @@ def test_csv_dataset_embedded():
column_names=['col1', 'col2', 'col3', 'col4'], column_names=['col1', 'col2', 'col3', 'col4'],
shuffle=False) shuffle=False)
buffer = [] buffer = []
for d in data.create_dict_iterator(num_epochs=1):
for d in data.create_dict_iterator(num_epochs=1, output_numpy=True):
buffer.extend([d['col1'].item().decode("utf8"), buffer.extend([d['col1'].item().decode("utf8"),
d['col2'].item().decode("utf8"), d['col2'].item().decode("utf8"),
d['col3'].item().decode("utf8"), d['col3'].item().decode("utf8"),
@@ -145,7 +145,7 @@ def test_csv_dataset_chinese():
column_names=['col1', 'col2', 'col3', 'col4', 'col5'], column_names=['col1', 'col2', 'col3', 'col4', 'col5'],
shuffle=False) shuffle=False)
buffer = [] buffer = []
for d in data.create_dict_iterator(num_epochs=1):
for d in data.create_dict_iterator(num_epochs=1, output_numpy=True):
buffer.extend([d['col1'].item().decode("utf8"), buffer.extend([d['col1'].item().decode("utf8"),
d['col2'].item().decode("utf8"), d['col2'].item().decode("utf8"),
d['col3'].item().decode("utf8"), d['col3'].item().decode("utf8"),
@@ -161,7 +161,7 @@ def test_csv_dataset_header():
column_defaults=["", "", "", ""], column_defaults=["", "", "", ""],
shuffle=False) shuffle=False)
buffer = [] buffer = []
for d in data.create_dict_iterator(num_epochs=1):
for d in data.create_dict_iterator(num_epochs=1, output_numpy=True):
buffer.extend([d['col1'].item().decode("utf8"), buffer.extend([d['col1'].item().decode("utf8"),
d['col2'].item().decode("utf8"), d['col2'].item().decode("utf8"),
d['col3'].item().decode("utf8"), d['col3'].item().decode("utf8"),
@@ -177,7 +177,7 @@ def test_csv_dataset_number():
column_names=['col1', 'col2', 'col3', 'col4'], column_names=['col1', 'col2', 'col3', 'col4'],
shuffle=False) shuffle=False)
buffer = [] buffer = []
for d in data.create_dict_iterator(num_epochs=1):
for d in data.create_dict_iterator(num_epochs=1, output_numpy=True):
buffer.extend([d['col1'].item(), buffer.extend([d['col1'].item(),
d['col2'].item(), d['col2'].item(),
d['col3'].item(), d['col3'].item(),
@@ -203,7 +203,7 @@ def test_csv_dataset_exception():
column_names=['col1', 'col2', 'col3', 'col4'], column_names=['col1', 'col2', 'col3', 'col4'],
shuffle=False) shuffle=False)
with pytest.raises(Exception) as err: with pytest.raises(Exception) as err:
for _ in data.create_dict_iterator(num_epochs=1):
for _ in data.create_dict_iterator(num_epochs=1, output_numpy=True):
pass pass
assert "Failed to parse file" in str(err.value) assert "Failed to parse file" in str(err.value)


@@ -216,7 +216,7 @@ def test_csv_dataset_type_error():
column_names=['col1', 'col2', 'col3', 'col4'], column_names=['col1', 'col2', 'col3', 'col4'],
shuffle=False) shuffle=False)
with pytest.raises(Exception) as err: with pytest.raises(Exception) as err:
for _ in data.create_dict_iterator(num_epochs=1):
for _ in data.create_dict_iterator(num_epochs=1, output_numpy=True):
pass pass
assert "type does not match" in str(err.value) assert "type does not match" in str(err.value)




+ 27
- 27
tests/ut/python/dataset/test_datasets_generator.py View File

@@ -47,7 +47,7 @@ def test_generator_0():
data1 = ds.GeneratorDataset(generator_1d, ["data"]) data1 = ds.GeneratorDataset(generator_1d, ["data"])


i = 0 i = 0
for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary
for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary
golden = np.array([i]) golden = np.array([i])
np.testing.assert_array_equal(item["data"], golden) np.testing.assert_array_equal(item["data"], golden)
i = i + 1 i = i + 1
@@ -69,7 +69,7 @@ def test_generator_1():
data1 = ds.GeneratorDataset(generator_md, ["data"]) data1 = ds.GeneratorDataset(generator_md, ["data"])


i = 0 i = 0
for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary
for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary
golden = np.array([[i, i + 1], [i + 2, i + 3]]) golden = np.array([[i, i + 1], [i + 2, i + 3]])
np.testing.assert_array_equal(item["data"], golden) np.testing.assert_array_equal(item["data"], golden)
i = i + 1 i = i + 1
@@ -91,7 +91,7 @@ def test_generator_2():
data1 = ds.GeneratorDataset(generator_mc, ["col0", "col1"]) data1 = ds.GeneratorDataset(generator_mc, ["col0", "col1"])


i = 0 i = 0
for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary
for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary
golden = np.array([i]) golden = np.array([i])
np.testing.assert_array_equal(item["col0"], golden) np.testing.assert_array_equal(item["col0"], golden)
golden = np.array([[i, i + 1], [i + 2, i + 3]]) golden = np.array([[i, i + 1], [i + 2, i + 3]])
@@ -111,7 +111,7 @@ def test_generator_3():
data1 = data1.repeat(4) data1 = data1.repeat(4)


i = 0 i = 0
for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary
for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary
golden = np.array([i]) golden = np.array([i])
np.testing.assert_array_equal(item["data"], golden) np.testing.assert_array_equal(item["data"], golden)
i = i + 1 i = i + 1
@@ -131,7 +131,7 @@ def test_generator_4():
data1 = data1.batch(4) data1 = data1.batch(4)


i = 0 i = 0
for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary
for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary
golden = np.array([[i], [i + 1], [i + 2], [i + 3]]) golden = np.array([[i], [i + 1], [i + 2], [i + 3]])
np.testing.assert_array_equal(item["data"], golden) np.testing.assert_array_equal(item["data"], golden)
i = i + 4 i = i + 4
@@ -151,7 +151,7 @@ def type_tester(t):
data1 = data1.batch(4) data1 = data1.batch(4)


i = 0 i = 0
for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary
for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary
golden = np.array([[i], [i + 1], [i + 2], [i + 3]], dtype=t) golden = np.array([[i], [i + 1], [i + 2], [i + 3]], dtype=t)
np.testing.assert_array_equal(item["data"], golden) np.testing.assert_array_equal(item["data"], golden)
i = i + 4 i = i + 4
@@ -178,7 +178,7 @@ def type_tester_with_type_check(t, c):
data1 = data1.batch(4) data1 = data1.batch(4)


i = 0 i = 0
for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary
for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary
golden = np.array([[i], [i + 1], [i + 2], [i + 3]], dtype=t) golden = np.array([[i], [i + 1], [i + 2], [i + 3]], dtype=t)
np.testing.assert_array_equal(item["data"], golden) np.testing.assert_array_equal(item["data"], golden)
i = i + 4 i = i + 4
@@ -213,7 +213,7 @@ def type_tester_with_type_check_2c(t, c):
data1 = data1.batch(4) data1 = data1.batch(4)


i = 0 i = 0
for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary
for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary
golden = np.array([[i], [i + 1], [i + 2], [i + 3]], dtype=t) golden = np.array([[i], [i + 1], [i + 2], [i + 3]], dtype=t)
np.testing.assert_array_equal(item["data0"], golden) np.testing.assert_array_equal(item["data0"], golden)
i = i + 4 i = i + 4
@@ -250,7 +250,7 @@ def test_generator_8():
num_parallel_workers=2) num_parallel_workers=2)


i = 0 i = 0
for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary
for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary
golden = np.array([i * 3]) golden = np.array([i * 3])
np.testing.assert_array_equal(item["out0"], golden) np.testing.assert_array_equal(item["out0"], golden)
golden = np.array([[i * 7, (i + 1) * 7], [(i + 2) * 7, (i + 3) * 7]]) golden = np.array([[i * 7, (i + 1) * 7], [(i + 2) * 7, (i + 3) * 7]])
@@ -280,14 +280,14 @@ def test_generator_9():
i = 0 i = 0
for data1, data2 in zip(data1, data2): # each data is a dictionary for data1, data2 in zip(data1, data2): # each data is a dictionary
golden = np.array([i]) golden = np.array([i])
np.testing.assert_array_equal(data1[0], golden)
np.testing.assert_array_equal(data1[0].asnumpy(), golden)
golden = np.array([[i * 3, (i + 1) * 3], [(i + 2) * 3, (i + 3) * 3]]) golden = np.array([[i * 3, (i + 1) * 3], [(i + 2) * 3, (i + 3) * 3]])
np.testing.assert_array_equal(data1[1], golden)
np.testing.assert_array_equal(data1[1].asnumpy(), golden)


golden = np.array([i * 3]) golden = np.array([i * 3])
np.testing.assert_array_equal(data2[0], golden)
np.testing.assert_array_equal(data2[0].asnumpy(), golden)
golden = np.array([[i, i + 1], [i + 2, i + 3]]) golden = np.array([[i, i + 1], [i + 2, i + 3]])
np.testing.assert_array_equal(data2[1], golden)
np.testing.assert_array_equal(data2[1].asnumpy(), golden)
i = i + 1 i = i + 1




@@ -304,7 +304,7 @@ def test_generator_10():


# Expected column order is |col0|out1|out2| # Expected column order is |col0|out1|out2|
i = 0 i = 0
for item in data1.create_tuple_iterator(num_epochs=1):
for item in data1.create_tuple_iterator(num_epochs=1, output_numpy=True):
golden = np.array([i]) golden = np.array([i])
np.testing.assert_array_equal(item[0], golden) np.testing.assert_array_equal(item[0], golden)
golden = np.array([[i, i + 1], [i + 2, i + 3]]) golden = np.array([[i, i + 1], [i + 2, i + 3]])
@@ -328,7 +328,7 @@ def test_generator_11():


# Expected column order is |out1|out2| # Expected column order is |out1|out2|
i = 0 i = 0
for item in data1.create_tuple_iterator(num_epochs=1):
for item in data1.create_tuple_iterator(num_epochs=1, output_numpy=True):
# len should be 2 because col0 is dropped (not included in column_order) # len should be 2 because col0 is dropped (not included in column_order)
assert len(item) == 2 assert len(item) == 2
golden = np.array([[i, i + 1], [i + 2, i + 3]]) golden = np.array([[i, i + 1], [i + 2, i + 3]])
@@ -350,7 +350,7 @@ def test_generator_12():


# Expected column order is |col0|col1| # Expected column order is |col0|col1|
i = 0 i = 0
for item in data1.create_tuple_iterator(num_epochs=1):
for item in data1.create_tuple_iterator(num_epochs=1, output_numpy=True):
assert len(item) == 2 assert len(item) == 2
golden = np.array([i * 5]) golden = np.array([i * 5])
np.testing.assert_array_equal(item[0], golden) np.testing.assert_array_equal(item[0], golden)
@@ -363,7 +363,7 @@ def test_generator_12():


# Expected column order is |col0|col1| # Expected column order is |col0|col1|
i = 0 i = 0
for item in data1.create_tuple_iterator(num_epochs=1):
for item in data1.create_tuple_iterator(num_epochs=1, output_numpy=True):
assert len(item) == 2 assert len(item) == 2
golden = np.array([i * 5]) golden = np.array([i * 5])
np.testing.assert_array_equal(item[1], golden) np.testing.assert_array_equal(item[1], golden)
@@ -384,7 +384,7 @@ def test_generator_13():


# Expected column order is |out0|col1| # Expected column order is |out0|col1|
i = 0 i = 0
for item in data1.create_tuple_iterator(num_epochs=1):
for item in data1.create_tuple_iterator(num_epochs=1, output_numpy=True):
assert len(item) == 2 assert len(item) == 2
golden = np.array([i * 5]) golden = np.array([i * 5])
np.testing.assert_array_equal(item[0], golden) np.testing.assert_array_equal(item[0], golden)
@@ -392,7 +392,7 @@ def test_generator_13():
np.testing.assert_array_equal(item[1], golden) np.testing.assert_array_equal(item[1], golden)
i = i + 1 i = i + 1


for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary
for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary
# len should be 2 because col0 is dropped (not included in column_order) # len should be 2 because col0 is dropped (not included in column_order)
assert len(item) == 2 assert len(item) == 2
golden = np.array([i * 5]) golden = np.array([i * 5])
@@ -411,7 +411,7 @@ def test_generator_14():
source = [(np.array([x]),) for x in range(256)] source = [(np.array([x]),) for x in range(256)]
ds1 = ds.GeneratorDataset(source, ["data"], sampler=ds.SequentialSampler(), num_parallel_workers=4).repeat(2) ds1 = ds.GeneratorDataset(source, ["data"], sampler=ds.SequentialSampler(), num_parallel_workers=4).repeat(2)
i = 0 i = 0
for data in ds1.create_dict_iterator(num_epochs=1): # each data is a dictionary
for data in ds1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary
golden = np.array([i]) golden = np.array([i])
np.testing.assert_array_equal(data["data"], golden) np.testing.assert_array_equal(data["data"], golden)
i = i + 1 i = i + 1
@@ -429,7 +429,7 @@ def test_generator_15():
source = [(np.array([x]),) for x in range(256)] source = [(np.array([x]),) for x in range(256)]
ds1 = ds.GeneratorDataset(source, ["data"], sampler=sampler, num_parallel_workers=4).repeat(2) ds1 = ds.GeneratorDataset(source, ["data"], sampler=sampler, num_parallel_workers=4).repeat(2)
i = 0 i = 0
for data in ds1.create_dict_iterator(num_epochs=1): # each data is a dictionary
for data in ds1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary
golden = np.array([i]) golden = np.array([i])
np.testing.assert_array_equal(data["data"], golden) np.testing.assert_array_equal(data["data"], golden)
i = i + 1 i = i + 1
@@ -448,7 +448,7 @@ def test_generator_16():
data1 = ds.GeneratorDataset(source, ["col0", "col1"], sampler=ds.SequentialSampler()) data1 = ds.GeneratorDataset(source, ["col0", "col1"], sampler=ds.SequentialSampler())


i = 0 i = 0
for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary
for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary
golden = np.array([i]) golden = np.array([i])
np.testing.assert_array_equal(item["col0"], golden) np.testing.assert_array_equal(item["col0"], golden)
golden = np.array([i + 1]) golden = np.array([i + 1])
@@ -468,7 +468,7 @@ def test_generator_17():
data1 = ds.GeneratorDataset(source, ["col0", "col1"], sampler=sampler) data1 = ds.GeneratorDataset(source, ["col0", "col1"], sampler=sampler)


i = 0 i = 0
for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary
for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary
golden = np.array([i]) golden = np.array([i])
np.testing.assert_array_equal(item["col0"], golden) np.testing.assert_array_equal(item["col0"], golden)
golden = np.array([i + 1]) golden = np.array([i + 1])
@@ -528,7 +528,7 @@ def test_generator_sequential_sampler():
source = [(np.array([x]),) for x in range(64)] source = [(np.array([x]),) for x in range(64)]
ds1 = ds.GeneratorDataset(source, ["data"], sampler=ds.SequentialSampler()) ds1 = ds.GeneratorDataset(source, ["data"], sampler=ds.SequentialSampler())
i = 0 i = 0
for data in ds1.create_dict_iterator(num_epochs=1): # each data is a dictionary
for data in ds1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary
golden = np.array([i]) golden = np.array([i])
np.testing.assert_array_equal(data["data"], golden) np.testing.assert_array_equal(data["data"], golden)
i = i + 1 i = i + 1
@@ -546,7 +546,7 @@ def test_generator_distributed_sampler():
for sid in range(8): for sid in range(8):
ds1 = ds.GeneratorDataset(source, ["data"], shuffle=False, num_shards=8, shard_id=sid) ds1 = ds.GeneratorDataset(source, ["data"], shuffle=False, num_shards=8, shard_id=sid)
i = sid i = sid
for data in ds1.create_dict_iterator(num_epochs=1): # each data is a dictionary
for data in ds1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary
golden = np.array([i]) golden = np.array([i])
np.testing.assert_array_equal(data["data"], golden) np.testing.assert_array_equal(data["data"], golden)
i = i + 8 i = i + 8
@@ -605,7 +605,7 @@ def type_tester_with_type_check_2c_schema(t, c):
data1 = data1.batch(4) data1 = data1.batch(4)


i = 0 i = 0
for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary
for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary
golden = np.array([[i], [i + 1], [i + 2], [i + 3]], dtype=t) golden = np.array([[i], [i + 1], [i + 2], [i + 3]], dtype=t)
np.testing.assert_array_equal(item["data0"], golden) np.testing.assert_array_equal(item["data0"], golden)
i = i + 4 i = i + 4
@@ -636,7 +636,7 @@ def test_generator_dataset_size_0():
data_size = data1.get_dataset_size() data_size = data1.get_dataset_size()


num_rows = 0 num_rows = 0
for _ in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary
for _ in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary
num_rows = num_rows + 1 num_rows = num_rows + 1
assert data_size == num_rows assert data_size == num_rows




+ 3
- 3
tests/ut/python/dataset/test_datasets_imagefolder.py View File

@@ -171,7 +171,7 @@ def test_imagefolder_classindex():
333, 333, 333, 333, 333, 333, 333, 333, 333, 333, 333] 333, 333, 333, 333, 333, 333, 333, 333, 333, 333, 333]


num_iter = 0 num_iter = 0
for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary
for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary
# in this example, each dictionary has keys "image" and "label" # in this example, each dictionary has keys "image" and "label"
logger.info("image is {}".format(item["image"])) logger.info("image is {}".format(item["image"]))
logger.info("label is {}".format(item["label"])) logger.info("label is {}".format(item["label"]))
@@ -196,7 +196,7 @@ def test_imagefolder_negative_classindex():
-333, -333, -333, -333, -333, -333, -333, -333, -333, -333, -333] -333, -333, -333, -333, -333, -333, -333, -333, -333, -333, -333]


num_iter = 0 num_iter = 0
for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary
for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary
# in this example, each dictionary has keys "image" and "label" # in this example, each dictionary has keys "image" and "label"
logger.info("image is {}".format(item["image"])) logger.info("image is {}".format(item["image"]))
logger.info("label is {}".format(item["label"])) logger.info("label is {}".format(item["label"]))
@@ -267,7 +267,7 @@ def test_sequential_sampler():


result = [] result = []
num_iter = 0 num_iter = 0
for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary
for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary
# in this example, each dictionary has keys "image" and "label" # in this example, each dictionary has keys "image" and "label"
result.append(item["label"]) result.append(item["label"])
num_iter += 1 num_iter += 1


+ 5
- 5
tests/ut/python/dataset/test_datasets_manifestop.py View File

@@ -26,7 +26,7 @@ def test_manifest_dataset_train():
count = 0 count = 0
cat_count = 0 cat_count = 0
dog_count = 0 dog_count = 0
for item in data.create_dict_iterator(num_epochs=1):
for item in data.create_dict_iterator(num_epochs=1, output_numpy=True):
logger.info("item[image] is {}".format(item["image"])) logger.info("item[image] is {}".format(item["image"]))
count = count + 1 count = count + 1
if item["label"].size == 1 and item["label"] == 0: if item["label"].size == 1 and item["label"] == 0:
@@ -41,7 +41,7 @@ def test_manifest_dataset_train():
def test_manifest_dataset_eval(): def test_manifest_dataset_eval():
data = ds.ManifestDataset(DATA_FILE, "eval", decode=True) data = ds.ManifestDataset(DATA_FILE, "eval", decode=True)
count = 0 count = 0
for item in data.create_dict_iterator(num_epochs=1):
for item in data.create_dict_iterator(num_epochs=1, output_numpy=True):
logger.info("item[image] is {}".format(item["image"])) logger.info("item[image] is {}".format(item["image"]))
count = count + 1 count = count + 1
if item["label"] != 0 and item["label"] != 1: if item["label"] != 0 and item["label"] != 1:
@@ -55,7 +55,7 @@ def test_manifest_dataset_class_index():
out_class_indexing = data.get_class_indexing() out_class_indexing = data.get_class_indexing()
assert out_class_indexing == {"dog": 11} assert out_class_indexing == {"dog": 11}
count = 0 count = 0
for item in data.create_dict_iterator(num_epochs=1):
for item in data.create_dict_iterator(num_epochs=1, output_numpy=True):
logger.info("item[image] is {}".format(item["image"])) logger.info("item[image] is {}".format(item["image"]))
count = count + 1 count = count + 1
if item["label"] != 11: if item["label"] != 11:
@@ -81,7 +81,7 @@ def test_manifest_dataset_multi_label():
data = ds.ManifestDataset(DATA_FILE, decode=True, shuffle=False) data = ds.ManifestDataset(DATA_FILE, decode=True, shuffle=False)
count = 0 count = 0
expect_label = [1, 0, 0, [0, 2]] expect_label = [1, 0, 0, [0, 2]]
for item in data.create_dict_iterator(num_epochs=1):
for item in data.create_dict_iterator(num_epochs=1, output_numpy=True):
assert item["label"].tolist() == expect_label[count] assert item["label"].tolist() == expect_label[count]
logger.info("item[image] is {}".format(item["image"])) logger.info("item[image] is {}".format(item["image"]))
count = count + 1 count = count + 1
@@ -107,7 +107,7 @@ def test_manifest_dataset_multi_label_onehot():
data = data.map(operations=multi_label_hot, input_columns=["label"]) data = data.map(operations=multi_label_hot, input_columns=["label"])
data = data.batch(2) data = data.batch(2)
count = 0 count = 0
for item in data.create_dict_iterator(num_epochs=1):
for item in data.create_dict_iterator(num_epochs=1, output_numpy=True):
assert item["label"].tolist() == expect_label[count] assert item["label"].tolist() == expect_label[count]
logger.info("item[image] is {}".format(item["image"])) logger.info("item[image] is {}".format(item["image"]))
count = count + 1 count = count + 1


+ 5
- 5
tests/ut/python/dataset/test_datasets_mnist.py View File

@@ -64,7 +64,7 @@ def test_mnist_content_check():
num_iter = 0 num_iter = 0
# in this example, each dictionary has keys "image" and "label" # in this example, each dictionary has keys "image" and "label"
image_list, label_list = [], [] image_list, label_list = [], []
for i, data in enumerate(data1.create_dict_iterator(num_epochs=1)):
for i, data in enumerate(data1.create_dict_iterator(num_epochs=1, output_numpy=True)):
image_list.append(data["image"]) image_list.append(data["image"])
label_list.append("label {}".format(data["label"])) label_list.append("label {}".format(data["label"]))
np.testing.assert_array_equal(data["image"], images[i]) np.testing.assert_array_equal(data["image"], images[i])
@@ -137,7 +137,7 @@ def test_mnist_pk_sampler():
data = ds.MnistDataset(DATA_DIR, sampler=sampler) data = ds.MnistDataset(DATA_DIR, sampler=sampler)
num_iter = 0 num_iter = 0
label_list = [] label_list = []
for item in data.create_dict_iterator(num_epochs=1):
for item in data.create_dict_iterator(num_epochs=1, output_numpy=True):
label_list.append(item["label"]) label_list.append(item["label"])
num_iter += 1 num_iter += 1
np.testing.assert_array_equal(golden, label_list) np.testing.assert_array_equal(golden, label_list)
@@ -156,8 +156,8 @@ def test_mnist_sequential_sampler():
label_list1, label_list2 = [], [] label_list1, label_list2 = [], []
num_iter = 0 num_iter = 0
for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)):
label_list1.append(item1["label"])
label_list2.append(item2["label"])
label_list1.append(item1["label"].asnumpy())
label_list2.append(item2["label"].asnumpy())
num_iter += 1 num_iter += 1
np.testing.assert_array_equal(label_list1, label_list2) np.testing.assert_array_equal(label_list1, label_list2)
assert num_iter == num_samples assert num_iter == num_samples
@@ -214,7 +214,7 @@ def test_mnist_visualize(plot=False):
data1 = ds.MnistDataset(DATA_DIR, num_samples=10, shuffle=False) data1 = ds.MnistDataset(DATA_DIR, num_samples=10, shuffle=False)
num_iter = 0 num_iter = 0
image_list, label_list = [], [] image_list, label_list = [], []
for item in data1.create_dict_iterator(num_epochs=1):
for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True):
image = item["image"] image = item["image"]
label = item["label"] label = item["label"]
image_list.append(image) image_list.append(image)


+ 8
- 8
tests/ut/python/dataset/test_datasets_sharding.py View File

@@ -25,7 +25,7 @@ def test_imagefolder_shardings(print_res=False):
shuffle=shuffle, class_indexing=class_index, decode=True) shuffle=shuffle, class_indexing=class_index, decode=True)
data1 = data1.repeat(repeat_cnt) data1 = data1.repeat(repeat_cnt)
res = [] res = []
for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary
for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary
res.append(item["label"].item()) res.append(item["label"].item())
if print_res: if print_res:
logger.info("labels of dataset: {}".format(res)) logger.info("labels of dataset: {}".format(res))
@@ -59,7 +59,7 @@ def test_tfrecord_shardings1(print_res=False):
shuffle=ds.Shuffle.FILES, num_parallel_workers=1) shuffle=ds.Shuffle.FILES, num_parallel_workers=1)
data1 = data1.repeat(repeat_cnt) data1 = data1.repeat(repeat_cnt)
res = [] res = []
for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary
for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary
res.append(item["scalars"][0]) res.append(item["scalars"][0])
if print_res: if print_res:
logger.info("scalars of dataset: {}".format(res)) logger.info("scalars of dataset: {}".format(res))
@@ -97,7 +97,7 @@ def test_tfrecord_shardings4(print_res=False):
shuffle=ds.Shuffle.FILES, num_parallel_workers=4) shuffle=ds.Shuffle.FILES, num_parallel_workers=4)
data1 = data1.repeat(repeat_cnt) data1 = data1.repeat(repeat_cnt)
res = [] res = []
for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary
for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary
res.append(item["scalars"][0]) res.append(item["scalars"][0])
if print_res: if print_res:
logger.info("scalars of dataset: {}".format(res)) logger.info("scalars of dataset: {}".format(res))
@@ -141,7 +141,7 @@ def test_manifest_shardings(print_res=False):
shuffle=shuffle, decode=True) shuffle=shuffle, decode=True)
data1 = data1.repeat(repeat_cnt) data1 = data1.repeat(repeat_cnt)
res = [] res = []
for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary
for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary
res.append(item["label"].item()) res.append(item["label"].item())
if print_res: if print_res:
logger.info("labels of dataset: {}".format(res)) logger.info("labels of dataset: {}".format(res))
@@ -166,7 +166,7 @@ def test_voc_shardings(print_res=False):
data1 = ds.VOCDataset(voc_dir, decode=True, sampler=sampler) data1 = ds.VOCDataset(voc_dir, decode=True, sampler=sampler)
data1 = data1.repeat(repeat_cnt) data1 = data1.repeat(repeat_cnt)
res = [] res = []
for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary
for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary
res.append(item["image"].shape[0]) res.append(item["image"].shape[0])
if print_res: if print_res:
logger.info("labels of dataset: {}".format(res)) logger.info("labels of dataset: {}".format(res))
@@ -194,7 +194,7 @@ def test_cifar10_shardings(print_res=False):
shuffle=shuffle) shuffle=shuffle)
data1 = data1.repeat(repeat_cnt) data1 = data1.repeat(repeat_cnt)
res = [] res = []
for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary
for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary
res.append(item["label"].item()) res.append(item["label"].item())
if print_res: if print_res:
logger.info("labels of dataset: {}".format(res)) logger.info("labels of dataset: {}".format(res))
@@ -214,7 +214,7 @@ def test_cifar100_shardings(print_res=False):
shuffle=shuffle) shuffle=shuffle)
data1 = data1.repeat(repeat_cnt) data1 = data1.repeat(repeat_cnt)
res = [] res = []
for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary
for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary
res.append(item["coarse_label"].item()) res.append(item["coarse_label"].item())
if print_res: if print_res:
logger.info("labels of dataset: {}".format(res)) logger.info("labels of dataset: {}".format(res))
@@ -233,7 +233,7 @@ def test_mnist_shardings(print_res=False):
shuffle=shuffle) shuffle=shuffle)
data1 = data1.repeat(repeat_cnt) data1 = data1.repeat(repeat_cnt)
res = [] res = []
for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary
for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary
res.append(item["label"].item()) res.append(item["label"].item())
if print_res: if print_res:
logger.info("labels of dataset: {}".format(res)) logger.info("labels of dataset: {}".format(res))


+ 12
- 12
tests/ut/python/dataset/test_datasets_textfileop.py View File

@@ -25,7 +25,7 @@ DATA_ALL_FILE = "../data/dataset/testTextFileDataset/*"
def test_textline_dataset_one_file(): def test_textline_dataset_one_file():
data = ds.TextFileDataset(DATA_FILE) data = ds.TextFileDataset(DATA_FILE)
count = 0 count = 0
for i in data.create_dict_iterator(num_epochs=1):
for i in data.create_dict_iterator(num_epochs=1, output_numpy=True):
logger.info("{}".format(i["text"])) logger.info("{}".format(i["text"]))
count += 1 count += 1
assert count == 3 assert count == 3
@@ -34,7 +34,7 @@ def test_textline_dataset_one_file():
def test_textline_dataset_all_file(): def test_textline_dataset_all_file():
data = ds.TextFileDataset(DATA_ALL_FILE) data = ds.TextFileDataset(DATA_ALL_FILE)
count = 0 count = 0
for i in data.create_dict_iterator(num_epochs=1):
for i in data.create_dict_iterator(num_epochs=1, output_numpy=True):
logger.info("{}".format(i["text"])) logger.info("{}".format(i["text"]))
count += 1 count += 1
assert count == 5 assert count == 5
@@ -43,7 +43,7 @@ def test_textline_dataset_all_file():
def test_textline_dataset_num_samples_zero(): def test_textline_dataset_num_samples_zero():
data = ds.TextFileDataset(DATA_FILE, num_samples=0) data = ds.TextFileDataset(DATA_FILE, num_samples=0)
count = 0 count = 0
for i in data.create_dict_iterator(num_epochs=1):
for i in data.create_dict_iterator(num_epochs=1, output_numpy=True):
logger.info("{}".format(i["text"])) logger.info("{}".format(i["text"]))
count += 1 count += 1
assert count == 3 assert count == 3
@@ -56,7 +56,7 @@ def test_textline_dataset_shuffle_false4():
count = 0 count = 0
line = ["This is a text file.", "Another file.", line = ["This is a text file.", "Another file.",
"Be happy every day.", "End of file.", "Good luck to everyone."] "Be happy every day.", "End of file.", "Good luck to everyone."]
for i in data.create_dict_iterator(num_epochs=1):
for i in data.create_dict_iterator(num_epochs=1, output_numpy=True):
strs = i["text"].item().decode("utf8") strs = i["text"].item().decode("utf8")
assert strs == line[count] assert strs == line[count]
count += 1 count += 1
@@ -73,7 +73,7 @@ def test_textline_dataset_shuffle_false1():
count = 0 count = 0
line = ["This is a text file.", "Be happy every day.", "Good luck to everyone.", line = ["This is a text file.", "Be happy every day.", "Good luck to everyone.",
"Another file.", "End of file."] "Another file.", "End of file."]
for i in data.create_dict_iterator(num_epochs=1):
for i in data.create_dict_iterator(num_epochs=1, output_numpy=True):
strs = i["text"].item().decode("utf8") strs = i["text"].item().decode("utf8")
assert strs == line[count] assert strs == line[count]
count += 1 count += 1
@@ -90,7 +90,7 @@ def test_textline_dataset_shuffle_files4():
count = 0 count = 0
line = ["This is a text file.", "Another file.", line = ["This is a text file.", "Another file.",
"Be happy every day.", "End of file.", "Good luck to everyone."] "Be happy every day.", "End of file.", "Good luck to everyone."]
for i in data.create_dict_iterator(num_epochs=1):
for i in data.create_dict_iterator(num_epochs=1, output_numpy=True):
strs = i["text"].item().decode("utf8") strs = i["text"].item().decode("utf8")
assert strs == line[count] assert strs == line[count]
count += 1 count += 1
@@ -107,7 +107,7 @@ def test_textline_dataset_shuffle_files1():
count = 0 count = 0
line = ["This is a text file.", "Be happy every day.", "Good luck to everyone.", line = ["This is a text file.", "Be happy every day.", "Good luck to everyone.",
"Another file.", "End of file."] "Another file.", "End of file."]
for i in data.create_dict_iterator(num_epochs=1):
for i in data.create_dict_iterator(num_epochs=1, output_numpy=True):
strs = i["text"].item().decode("utf8") strs = i["text"].item().decode("utf8")
assert strs == line[count] assert strs == line[count]
count += 1 count += 1
@@ -124,7 +124,7 @@ def test_textline_dataset_shuffle_global4():
count = 0 count = 0
line = ["Another file.", "Good luck to everyone.", "End of file.", line = ["Another file.", "Good luck to everyone.", "End of file.",
"This is a text file.", "Be happy every day."] "This is a text file.", "Be happy every day."]
for i in data.create_dict_iterator(num_epochs=1):
for i in data.create_dict_iterator(num_epochs=1, output_numpy=True):
strs = i["text"].item().decode("utf8") strs = i["text"].item().decode("utf8")
assert strs == line[count] assert strs == line[count]
count += 1 count += 1
@@ -141,7 +141,7 @@ def test_textline_dataset_shuffle_global1():
count = 0 count = 0
line = ["Another file.", "Good luck to everyone.", "This is a text file.", line = ["Another file.", "Good luck to everyone.", "This is a text file.",
"End of file.", "Be happy every day."] "End of file.", "Be happy every day."]
for i in data.create_dict_iterator(num_epochs=1):
for i in data.create_dict_iterator(num_epochs=1, output_numpy=True):
strs = i["text"].item().decode("utf8") strs = i["text"].item().decode("utf8")
assert strs == line[count] assert strs == line[count]
count += 1 count += 1
@@ -154,7 +154,7 @@ def test_textline_dataset_shuffle_global1():
def test_textline_dataset_num_samples(): def test_textline_dataset_num_samples():
data = ds.TextFileDataset(DATA_FILE, num_samples=2) data = ds.TextFileDataset(DATA_FILE, num_samples=2)
count = 0 count = 0
for _ in data.create_dict_iterator(num_epochs=1):
for _ in data.create_dict_iterator(num_epochs=1, output_numpy=True):
count += 1 count += 1
assert count == 2 assert count == 2


@@ -162,7 +162,7 @@ def test_textline_dataset_num_samples():
def test_textline_dataset_distribution(): def test_textline_dataset_distribution():
data = ds.TextFileDataset(DATA_ALL_FILE, num_shards=2, shard_id=1) data = ds.TextFileDataset(DATA_ALL_FILE, num_shards=2, shard_id=1)
count = 0 count = 0
for _ in data.create_dict_iterator(num_epochs=1):
for _ in data.create_dict_iterator(num_epochs=1, output_numpy=True):
count += 1 count += 1
assert count == 3 assert count == 3


@@ -174,7 +174,7 @@ def test_textline_dataset_repeat():
line = ["This is a text file.", "Be happy every day.", "Good luck to everyone.", line = ["This is a text file.", "Be happy every day.", "Good luck to everyone.",
"This is a text file.", "Be happy every day.", "Good luck to everyone.", "This is a text file.", "Be happy every day.", "Good luck to everyone.",
"This is a text file.", "Be happy every day.", "Good luck to everyone."] "This is a text file.", "Be happy every day.", "Good luck to everyone."]
for i in data.create_dict_iterator(num_epochs=1):
for i in data.create_dict_iterator(num_epochs=1, output_numpy=True):
strs = i["text"].item().decode("utf8") strs = i["text"].item().decode("utf8")
assert strs == line[count] assert strs == line[count]
count += 1 count += 1


+ 8
- 8
tests/ut/python/dataset/test_datasets_tfrecord.py View File

@@ -39,7 +39,7 @@ def test_tfrecord_shape():
schema_file = "../data/dataset/testTFTestAllTypes/datasetSchemaRank0.json" schema_file = "../data/dataset/testTFTestAllTypes/datasetSchemaRank0.json"
ds1 = ds.TFRecordDataset(FILES, schema_file) ds1 = ds.TFRecordDataset(FILES, schema_file)
ds1 = ds1.batch(2) ds1 = ds1.batch(2)
for data in ds1.create_dict_iterator(num_epochs=1):
for data in ds1.create_dict_iterator(num_epochs=1, output_numpy=True):
logger.info(data) logger.info(data)
output_shape = ds1.output_shapes() output_shape = ds1.output_shapes()
assert len(output_shape[-1]) == 1 assert len(output_shape[-1]) == 1
@@ -162,7 +162,7 @@ def test_tfrecord_schema():


for d1, d2 in zip(data1, data2): for d1, d2 in zip(data1, data2):
for t1, t2 in zip(d1, d2): for t1, t2 in zip(d1, d2):
np.testing.assert_array_equal(t1, t2)
np.testing.assert_array_equal(t1.asnumpy(), t2.asnumpy())




def test_tfrecord_shuffle(): def test_tfrecord_shuffle():
@@ -174,7 +174,7 @@ def test_tfrecord_shuffle():


for d1, d2 in zip(data1, data2): for d1, d2 in zip(data1, data2):
for t1, t2 in zip(d1, d2): for t1, t2 in zip(d1, d2):
np.testing.assert_array_equal(t1, t2)
np.testing.assert_array_equal(t1.asnumpy(), t2.asnumpy())




def test_tfrecord_shard(): def test_tfrecord_shard():
@@ -187,7 +187,7 @@ def test_tfrecord_shard():
shuffle=ds.Shuffle.FILES) shuffle=ds.Shuffle.FILES)
data1 = data1.repeat(num_repeats) data1 = data1.repeat(num_repeats)
res = list() res = list()
for item in data1.create_dict_iterator(num_epochs=1):
for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True):
res.append(item["scalars"][0]) res.append(item["scalars"][0])
return res return res


@@ -215,7 +215,7 @@ def test_tfrecord_shard_equal_rows():
ds1 = ds.TFRecordDataset(tf_files, num_shards=num_shards, shard_id=shard_id, shard_equal_rows=True) ds1 = ds.TFRecordDataset(tf_files, num_shards=num_shards, shard_id=shard_id, shard_equal_rows=True)
ds1 = ds1.repeat(num_repeats) ds1 = ds1.repeat(num_repeats)
res = list() res = list()
for data in ds1.create_dict_iterator(num_epochs=1):
for data in ds1.create_dict_iterator(num_epochs=1, output_numpy=True):
res.append(data["scalars"][0]) res.append(data["scalars"][0])
return res return res


@@ -238,7 +238,7 @@ def test_tfrecord_shard_equal_rows():
def test_tfrecord_no_schema_columns_list(): def test_tfrecord_no_schema_columns_list():
logger.info("test_tfrecord_no_schema_columns_list") logger.info("test_tfrecord_no_schema_columns_list")
data = ds.TFRecordDataset(FILES, shuffle=False, columns_list=["col_sint16"]) data = ds.TFRecordDataset(FILES, shuffle=False, columns_list=["col_sint16"])
row = data.create_dict_iterator(num_epochs=1).__next__()
row = data.create_dict_iterator(num_epochs=1, output_numpy=True).__next__()
assert row["col_sint16"] == [-32768] assert row["col_sint16"] == [-32768]


with pytest.raises(KeyError) as info: with pytest.raises(KeyError) as info:
@@ -258,7 +258,7 @@ def test_tfrecord_schema_columns_list():
schema.add_column('col_sint32', de_type=mstype.int64, shape=[1]) schema.add_column('col_sint32', de_type=mstype.int64, shape=[1])
schema.add_column('col_sint64', de_type=mstype.int64, shape=[1]) schema.add_column('col_sint64', de_type=mstype.int64, shape=[1])
data = ds.TFRecordDataset(FILES, schema=schema, shuffle=False, columns_list=["col_sint16"]) data = ds.TFRecordDataset(FILES, schema=schema, shuffle=False, columns_list=["col_sint16"])
row = data.create_dict_iterator(num_epochs=1).__next__()
row = data.create_dict_iterator(num_epochs=1, output_numpy=True).__next__()
assert row["col_sint16"] == [-32768] assert row["col_sint16"] == [-32768]


with pytest.raises(KeyError) as info: with pytest.raises(KeyError) as info:
@@ -275,7 +275,7 @@ def test_tfrecord_invalid_files():
data = ds.TFRecordDataset(files, SCHEMA_FILE, shuffle=ds.Shuffle.FILES) data = ds.TFRecordDataset(files, SCHEMA_FILE, shuffle=ds.Shuffle.FILES)


with pytest.raises(RuntimeError) as info: with pytest.raises(RuntimeError) as info:
_ = data.create_dict_iterator(num_epochs=1).get_next()
_ = data.create_dict_iterator(num_epochs=1, output_numpy=True).get_next()
assert "cannot be opened" in str(info.value) assert "cannot be opened" in str(info.value)
assert "not valid tfrecord files" in str(info.value) assert "not valid tfrecord files" in str(info.value)
assert valid_file not in str(info.value) assert valid_file not in str(info.value)


+ 4
- 4
tests/ut/python/dataset/test_datasets_voc.py View File

@@ -23,7 +23,7 @@ TARGET_SHAPE = [680, 680, 680, 680, 642, 607, 561, 596, 612, 680]
def test_voc_segmentation(): def test_voc_segmentation():
data1 = ds.VOCDataset(DATA_DIR, task="Segmentation", usage="train", shuffle=False, decode=True) data1 = ds.VOCDataset(DATA_DIR, task="Segmentation", usage="train", shuffle=False, decode=True)
num = 0 num = 0
for item in data1.create_dict_iterator(num_epochs=1):
for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True):
assert item["image"].shape[0] == IMAGE_SHAPE[num] assert item["image"].shape[0] == IMAGE_SHAPE[num]
assert item["target"].shape[0] == TARGET_SHAPE[num] assert item["target"].shape[0] == TARGET_SHAPE[num]
num += 1 num += 1
@@ -34,7 +34,7 @@ def test_voc_detection():
data1 = ds.VOCDataset(DATA_DIR, task="Detection", usage="train", shuffle=False, decode=True) data1 = ds.VOCDataset(DATA_DIR, task="Detection", usage="train", shuffle=False, decode=True)
num = 0 num = 0
count = [0, 0, 0, 0, 0, 0] count = [0, 0, 0, 0, 0, 0]
for item in data1.create_dict_iterator(num_epochs=1):
for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True):
assert item["image"].shape[0] == IMAGE_SHAPE[num] assert item["image"].shape[0] == IMAGE_SHAPE[num]
for label in item["label"]: for label in item["label"]:
count[label[0]] += 1 count[label[0]] += 1
@@ -53,7 +53,7 @@ def test_voc_class_index():
assert (class_index2 == {'car': 0, 'cat': 1, 'train': 5}) assert (class_index2 == {'car': 0, 'cat': 1, 'train': 5})
num = 0 num = 0
count = [0, 0, 0, 0, 0, 0] count = [0, 0, 0, 0, 0, 0]
for item in data1.create_dict_iterator(num_epochs=1):
for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True):
for label in item["label"]: for label in item["label"]:
count[label[0]] += 1 count[label[0]] += 1
assert label[0] in (0, 1, 5) assert label[0] in (0, 1, 5)
@@ -71,7 +71,7 @@ def test_voc_get_class_indexing():
assert (class_index2 == {'car': 0, 'cat': 1, 'chair': 2, 'dog': 3, 'person': 4, 'train': 5}) assert (class_index2 == {'car': 0, 'cat': 1, 'chair': 2, 'dog': 3, 'person': 4, 'train': 5})
num = 0 num = 0
count = [0, 0, 0, 0, 0, 0] count = [0, 0, 0, 0, 0, 0]
for item in data1.create_dict_iterator(num_epochs=1):
for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True):
for label in item["label"]: for label in item["label"]:
count[label[0]] += 1 count[label[0]] += 1
assert label[0] in (0, 1, 2, 3, 4, 5) assert label[0] in (0, 1, 2, 3, 4, 5)


+ 4
- 2
tests/ut/python/dataset/test_decode.py View File

@@ -40,7 +40,8 @@ def test_decode_op():


# Second dataset # Second dataset
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)):
for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True),
data2.create_dict_iterator(num_epochs=1, output_numpy=True)):
actual = item1["image"] actual = item1["image"]
expected = cv2.imdecode(item2["image"], cv2.IMREAD_COLOR) expected = cv2.imdecode(item2["image"], cv2.IMREAD_COLOR)
expected = cv2.cvtColor(expected, cv2.COLOR_BGR2RGB) expected = cv2.cvtColor(expected, cv2.COLOR_BGR2RGB)
@@ -65,7 +66,8 @@ def test_decode_op_tf_file_dataset():
# Second dataset # Second dataset
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)


for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)):
for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True),
data2.create_dict_iterator(num_epochs=1, output_numpy=True)):
actual = item1["image"] actual = item1["image"]
expected = cv2.imdecode(item2["image"], cv2.IMREAD_COLOR) expected = cv2.imdecode(item2["image"], cv2.IMREAD_COLOR)
expected = cv2.cvtColor(expected, cv2.COLOR_BGR2RGB) expected = cv2.cvtColor(expected, cv2.COLOR_BGR2RGB)


+ 1
- 1
tests/ut/python/dataset/test_duplicate_op.py View File

@@ -26,7 +26,7 @@ def compare(array):
array = np.array(array) array = np.array(array)
data = data.map(operations=ops.Duplicate(), input_columns=["x"], output_columns=["x", "y"], data = data.map(operations=ops.Duplicate(), input_columns=["x"], output_columns=["x", "y"],
column_order=["x", "y"]) column_order=["x", "y"])
for d in data.create_dict_iterator(num_epochs=1):
for d in data.create_dict_iterator(num_epochs=1, output_numpy=True):
np.testing.assert_array_equal(array, d["x"]) np.testing.assert_array_equal(array, d["x"])
np.testing.assert_array_equal(array, d["y"]) np.testing.assert_array_equal(array, d["y"])




+ 27
- 27
tests/ut/python/dataset/test_epoch_ctrl.py View File

@@ -86,9 +86,9 @@ def test_decode_op():


num_epoch = 5 num_epoch = 5
# iter1 will always assume there is a next epoch and never shutdown. # iter1 will always assume there is a next epoch and never shutdown.
iter1 = data1.create_dict_iterator()
iter1 = data1.create_dict_iterator(output_numpy=True)
# iter 2 will stop and shutdown pipeline after num_epoch # iter 2 will stop and shutdown pipeline after num_epoch
iter2 = data2.create_dict_iterator(num_epoch)
iter2 = data2.create_dict_iterator(num_epoch, output_numpy=True)
for _ in range(num_epoch): for _ in range(num_epoch):
i = 0 i = 0
for item1, item2 in itertools.zip_longest(iter1, iter2): for item1, item2 in itertools.zip_longest(iter1, iter2):
@@ -135,7 +135,7 @@ def test_generator_dict_0():


i = 0 i = 0
# create the iterator inside the loop declaration # create the iterator inside the loop declaration
for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary
for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary
golden = np.array([i]) golden = np.array([i])
np.testing.assert_array_equal(item["data"], golden) np.testing.assert_array_equal(item["data"], golden)
i = i + 1 i = i + 1
@@ -154,7 +154,7 @@ def test_generator_dict_1():
i = 0 i = 0
# BAD. Do not create iterator every time inside. # BAD. Do not create iterator every time inside.
# Create iterator outside the epoch for loop. # Create iterator outside the epoch for loop.
for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary
for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary
golden = np.array([i]) golden = np.array([i])
np.testing.assert_array_equal(item["data"], golden) np.testing.assert_array_equal(item["data"], golden)
i = i + 1 i = i + 1
@@ -174,7 +174,7 @@ def test_generator_dict_2():
i = 0 i = 0
for item in iter1: # each data is a dictionary for item in iter1: # each data is a dictionary
golden = np.array([i]) golden = np.array([i])
np.testing.assert_array_equal(item["data"], golden)
np.testing.assert_array_equal(item["data"].asnumpy(), golden)
i = i + 1 i = i + 1
assert i == 64 assert i == 64


@@ -197,7 +197,7 @@ def test_generator_dict_3():
i = 0 i = 0
for item in iter1: # each data is a dictionary for item in iter1: # each data is a dictionary
golden = np.array([i]) golden = np.array([i])
np.testing.assert_array_equal(item["data"], golden)
np.testing.assert_array_equal(item["data"].asnumpy(), golden)
i = i + 1 i = i + 1
assert i == 64 assert i == 64
# optional # optional
@@ -221,7 +221,7 @@ def test_generator_dict_4():
i = 0 i = 0
for item in iter1: # each data is a dictionary for item in iter1: # each data is a dictionary
golden = np.array([i]) golden = np.array([i])
np.testing.assert_array_equal(item["data"], golden)
np.testing.assert_array_equal(item["data"].asnumpy(), golden)
i = i + 1 i = i + 1
assert i == 64 assert i == 64


@@ -240,7 +240,7 @@ def test_generator_dict_4_1():
# apply dataset operations # apply dataset operations
data1 = ds.GeneratorDataset(generator_1d, ["data"]) data1 = ds.GeneratorDataset(generator_1d, ["data"])
# epoch ctrl op will not be injected if num_epochs is 1. # epoch ctrl op will not be injected if num_epochs is 1.
iter1 = data1.create_dict_iterator(num_epochs=1)
iter1 = data1.create_dict_iterator(num_epochs=1, output_numpy=True)
for _ in range(1): for _ in range(1):
i = 0 i = 0
for item in iter1: # each data is a dictionary for item in iter1: # each data is a dictionary
@@ -266,7 +266,7 @@ def test_generator_dict_4_2():
# repeat will not be injected when num repeat is 1. # repeat will not be injected when num repeat is 1.
data1 = data1.repeat(1) data1 = data1.repeat(1)
# epoch ctrl op will not be injected if num_epochs is 1. # epoch ctrl op will not be injected if num_epochs is 1.
iter1 = data1.create_dict_iterator(num_epochs=1)
iter1 = data1.create_dict_iterator(num_epochs=1, output_numpy=True)
for _ in range(1): for _ in range(1):
i = 0 i = 0
for item in iter1: # each data is a dictionary for item in iter1: # each data is a dictionary
@@ -289,7 +289,7 @@ def test_generator_dict_5():


# apply dataset operations # apply dataset operations
data1 = ds.GeneratorDataset(generator_1d, ["data"]) data1 = ds.GeneratorDataset(generator_1d, ["data"])
iter1 = data1.create_dict_iterator(num_epochs=11)
iter1 = data1.create_dict_iterator(num_epochs=11, output_numpy=True)
for _ in range(10): for _ in range(10):
i = 0 i = 0
for item in iter1: # each data is a dictionary for item in iter1: # each data is a dictionary
@@ -326,7 +326,7 @@ def test_generator_tuple_0():


i = 0 i = 0
# create the iterator inside the loop declaration # create the iterator inside the loop declaration
for item in data1.create_tuple_iterator(num_epochs=1): # each data is a dictionary
for item in data1.create_tuple_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary
golden = np.array([i]) golden = np.array([i])
np.testing.assert_array_equal(item[0], golden) np.testing.assert_array_equal(item[0], golden)
i = i + 1 i = i + 1
@@ -345,7 +345,7 @@ def test_generator_tuple_1():
i = 0 i = 0
# BAD. Do not create iterator every time inside. # BAD. Do not create iterator every time inside.
# Create iterator outside the epoch for loop. # Create iterator outside the epoch for loop.
for item in data1.create_tuple_iterator(num_epochs=1): # each data is a dictionary
for item in data1.create_tuple_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary
golden = np.array([i]) golden = np.array([i])
np.testing.assert_array_equal(item[0], golden) np.testing.assert_array_equal(item[0], golden)
i = i + 1 i = i + 1
@@ -360,7 +360,7 @@ def test_generator_tuple_2():


# apply dataset operations # apply dataset operations
data1 = ds.GeneratorDataset(generator_1d, ["data"]) data1 = ds.GeneratorDataset(generator_1d, ["data"])
iter1 = data1.create_tuple_iterator()
iter1 = data1.create_tuple_iterator(output_numpy=True)
for _ in range(10): for _ in range(10):
i = 0 i = 0
for item in iter1: # each data is a dictionary for item in iter1: # each data is a dictionary
@@ -383,7 +383,7 @@ def test_generator_tuple_3():


# apply dataset operations # apply dataset operations
data1 = ds.GeneratorDataset(generator_1d, ["data"]) data1 = ds.GeneratorDataset(generator_1d, ["data"])
iter1 = data1.create_tuple_iterator()
iter1 = data1.create_tuple_iterator(output_numpy=True)
for _ in range(10): for _ in range(10):
i = 0 i = 0
for item in iter1: # each data is a dictionary for item in iter1: # each data is a dictionary
@@ -407,7 +407,7 @@ def test_generator_tuple_4():


# apply dataset operations # apply dataset operations
data1 = ds.GeneratorDataset(generator_1d, ["data"]) data1 = ds.GeneratorDataset(generator_1d, ["data"])
iter1 = data1.create_tuple_iterator(num_epochs=10)
iter1 = data1.create_tuple_iterator(num_epochs=10, output_numpy=True)
for _ in range(10): for _ in range(10):
i = 0 i = 0
for item in iter1: # each data is a dictionary for item in iter1: # each data is a dictionary
@@ -430,7 +430,7 @@ def test_generator_tuple_5():


# apply dataset operations # apply dataset operations
data1 = ds.GeneratorDataset(generator_1d, ["data"]) data1 = ds.GeneratorDataset(generator_1d, ["data"])
iter1 = data1.create_tuple_iterator(num_epochs=11)
iter1 = data1.create_tuple_iterator(num_epochs=11, output_numpy=True)
for _ in range(10): for _ in range(10):
i = 0 i = 0
for item in iter1: # each data is a dictionary for item in iter1: # each data is a dictionary
@@ -464,7 +464,7 @@ def test_generator_tuple_repeat_1():
# apply dataset operations # apply dataset operations
data1 = ds.GeneratorDataset(generator_1d, ["data"]) data1 = ds.GeneratorDataset(generator_1d, ["data"])
data1 = data1.repeat(2) data1 = data1.repeat(2)
iter1 = data1.create_tuple_iterator(num_epochs=11)
iter1 = data1.create_tuple_iterator(num_epochs=11, output_numpy=True)
for _ in range(10): for _ in range(10):
i = 0 i = 0
for item in iter1: # each data is a dictionary for item in iter1: # each data is a dictionary
@@ -499,7 +499,7 @@ def test_generator_tuple_repeat_repeat_1():
data1 = ds.GeneratorDataset(generator_1d, ["data"]) data1 = ds.GeneratorDataset(generator_1d, ["data"])
data1 = data1.repeat(2) data1 = data1.repeat(2)
data1 = data1.repeat(3) data1 = data1.repeat(3)
iter1 = data1.create_tuple_iterator(num_epochs=11)
iter1 = data1.create_tuple_iterator(num_epochs=11, output_numpy=True)
for _ in range(10): for _ in range(10):
i = 0 i = 0
for item in iter1: # each data is a dictionary for item in iter1: # each data is a dictionary
@@ -533,7 +533,7 @@ def test_generator_tuple_repeat_repeat_2():
data1 = ds.GeneratorDataset(generator_1d, ["data"]) data1 = ds.GeneratorDataset(generator_1d, ["data"])
data1 = data1.repeat(2) data1 = data1.repeat(2)
data1 = data1.repeat(3) data1 = data1.repeat(3)
iter1 = data1.create_tuple_iterator()
iter1 = data1.create_tuple_iterator(output_numpy=True)
for _ in range(10): for _ in range(10):
i = 0 i = 0
for item in iter1: # each data is a dictionary for item in iter1: # each data is a dictionary
@@ -559,7 +559,7 @@ def test_generator_tuple_repeat_repeat_3():
data1 = ds.GeneratorDataset(generator_1d, ["data"]) data1 = ds.GeneratorDataset(generator_1d, ["data"])
data1 = data1.repeat(2) data1 = data1.repeat(2)
data1 = data1.repeat(3) data1 = data1.repeat(3)
iter1 = data1.create_tuple_iterator()
iter1 = data1.create_tuple_iterator(output_numpy=True)
for _ in range(10): for _ in range(10):
i = 0 i = 0
for item in iter1: # each data is a dictionary for item in iter1: # each data is a dictionary
@@ -589,7 +589,7 @@ def test_generator_tuple_infinite_repeat_repeat_1():
data1 = ds.GeneratorDataset(generator_1d, ["data"]) data1 = ds.GeneratorDataset(generator_1d, ["data"])
data1 = data1.repeat() data1 = data1.repeat()
data1 = data1.repeat(3) data1 = data1.repeat(3)
iter1 = data1.create_tuple_iterator(num_epochs=11)
iter1 = data1.create_tuple_iterator(num_epochs=11, output_numpy=True)


i = 0 i = 0
for item in iter1: # each data is a dictionary for item in iter1: # each data is a dictionary
@@ -612,7 +612,7 @@ def test_generator_tuple_infinite_repeat_repeat_2():
data1 = ds.GeneratorDataset(generator_1d, ["data"]) data1 = ds.GeneratorDataset(generator_1d, ["data"])
data1 = data1.repeat(3) data1 = data1.repeat(3)
data1 = data1.repeat() data1 = data1.repeat()
iter1 = data1.create_tuple_iterator(num_epochs=11)
iter1 = data1.create_tuple_iterator(num_epochs=11, output_numpy=True)


i = 0 i = 0
for item in iter1: # each data is a dictionary for item in iter1: # each data is a dictionary
@@ -635,7 +635,7 @@ def test_generator_tuple_infinite_repeat_repeat_3():
data1 = ds.GeneratorDataset(generator_1d, ["data"]) data1 = ds.GeneratorDataset(generator_1d, ["data"])
data1 = data1.repeat() data1 = data1.repeat()
data1 = data1.repeat() data1 = data1.repeat()
iter1 = data1.create_tuple_iterator(num_epochs=11)
iter1 = data1.create_tuple_iterator(num_epochs=11, output_numpy=True)


i = 0 i = 0
for item in iter1: # each data is a dictionary for item in iter1: # each data is a dictionary
@@ -658,7 +658,7 @@ def test_generator_tuple_infinite_repeat_repeat_4():
data1 = ds.GeneratorDataset(generator_1d, ["data"]) data1 = ds.GeneratorDataset(generator_1d, ["data"])
data1 = data1.repeat() data1 = data1.repeat()
data1 = data1.repeat() data1 = data1.repeat()
iter1 = data1.create_tuple_iterator()
iter1 = data1.create_tuple_iterator(output_numpy=True)


i = 0 i = 0
for item in iter1: # each data is a dictionary for item in iter1: # each data is a dictionary
@@ -680,7 +680,7 @@ def test_generator_reusedataset():
# apply dataset operations # apply dataset operations
data1 = ds.GeneratorDataset(generator_1d, ["data"]) data1 = ds.GeneratorDataset(generator_1d, ["data"])
data1 = data1.repeat(2) data1 = data1.repeat(2)
iter1 = data1.create_tuple_iterator()
iter1 = data1.create_tuple_iterator(output_numpy=True)
for _ in range(10): for _ in range(10):
i = 0 i = 0
for item in iter1: # each data is a dictionary for item in iter1: # each data is a dictionary
@@ -690,7 +690,7 @@ def test_generator_reusedataset():
assert i == 64 * 2 assert i == 64 * 2


data1 = data1.repeat(3) data1 = data1.repeat(3)
iter1 = data1.create_tuple_iterator()
iter1 = data1.create_tuple_iterator(output_numpy=True)
for _ in range(5): for _ in range(5):
i = 0 i = 0
for item in iter1: # each data is a dictionary for item in iter1: # each data is a dictionary
@@ -700,7 +700,7 @@ def test_generator_reusedataset():
assert i == 64 * 2 * 3 assert i == 64 * 2 * 3


data1 = data1.batch(2) data1 = data1.batch(2)
iter1 = data1.create_dict_iterator()
iter1 = data1.create_dict_iterator(output_numpy=True)
for _ in range(5): for _ in range(5):
i = 0 i = 0
sample = 0 sample = 0


+ 15
- 15
tests/ut/python/dataset/test_equalize.py View File

@@ -49,10 +49,10 @@ def test_equalize_py(plot=False):


for idx, (image, _) in enumerate(ds_original): for idx, (image, _) in enumerate(ds_original):
if idx == 0: if idx == 0:
images_original = np.transpose(image, (0, 2, 3, 1))
images_original = np.transpose(image.asnumpy(), (0, 2, 3, 1))
else: else:
images_original = np.append(images_original, images_original = np.append(images_original,
np.transpose(image, (0, 2, 3, 1)),
np.transpose(image.asnumpy(), (0, 2, 3, 1)),
axis=0) axis=0)


# Color Equalized Images # Color Equalized Images
@@ -69,10 +69,10 @@ def test_equalize_py(plot=False):


for idx, (image, _) in enumerate(ds_equalize): for idx, (image, _) in enumerate(ds_equalize):
if idx == 0: if idx == 0:
images_equalize = np.transpose(image, (0, 2, 3, 1))
images_equalize = np.transpose(image.asnumpy(), (0, 2, 3, 1))
else: else:
images_equalize = np.append(images_equalize, images_equalize = np.append(images_equalize,
np.transpose(image, (0, 2, 3, 1)),
np.transpose(image.asnumpy(), (0, 2, 3, 1)),
axis=0) axis=0)


num_samples = images_original.shape[0] num_samples = images_original.shape[0]
@@ -102,10 +102,10 @@ def test_equalize_c(plot=False):


for idx, (image, _) in enumerate(ds_original): for idx, (image, _) in enumerate(ds_original):
if idx == 0: if idx == 0:
images_original = image
images_original = image.asnumpy()
else: else:
images_original = np.append(images_original, images_original = np.append(images_original,
image,
image.asnumpy(),
axis=0) axis=0)


# Equalize Images # Equalize Images
@@ -120,10 +120,10 @@ def test_equalize_c(plot=False):


for idx, (image, _) in enumerate(ds_equalize): for idx, (image, _) in enumerate(ds_equalize):
if idx == 0: if idx == 0:
images_equalize = image
images_equalize = image.asnumpy()
else: else:
images_equalize = np.append(images_equalize, images_equalize = np.append(images_equalize,
image,
image.asnumpy(),
axis=0) axis=0)
if plot: if plot:
visualize_list(images_original, images_equalize) visualize_list(images_original, images_equalize)
@@ -151,10 +151,10 @@ def test_equalize_py_c(plot=False):


for idx, (image, _) in enumerate(ds_c_equalize): for idx, (image, _) in enumerate(ds_c_equalize):
if idx == 0: if idx == 0:
images_c_equalize = image
images_c_equalize = image.asnumpy()
else: else:
images_c_equalize = np.append(images_c_equalize, images_c_equalize = np.append(images_c_equalize,
image,
image.asnumpy(),
axis=0) axis=0)


# Equalize images in python # Equalize images in python
@@ -172,10 +172,10 @@ def test_equalize_py_c(plot=False):


for idx, (image, _) in enumerate(ds_p_equalize): for idx, (image, _) in enumerate(ds_p_equalize):
if idx == 0: if idx == 0:
images_p_equalize = image
images_p_equalize = image.asnumpy()
else: else:
images_p_equalize = np.append(images_p_equalize, images_p_equalize = np.append(images_p_equalize,
image,
image.asnumpy(),
axis=0) axis=0)


num_samples = images_c_equalize.shape[0] num_samples = images_c_equalize.shape[0]
@@ -223,9 +223,9 @@ def test_equalize_mnist_c(plot=False):
for _, (data_orig, data_trans) in enumerate(zip(ds_orig, ds_equalize_c)): for _, (data_orig, data_trans) in enumerate(zip(ds_orig, ds_equalize_c)):
image_orig, label_orig = data_orig image_orig, label_orig = data_orig
image_trans, _ = data_trans image_trans, _ = data_trans
images.append(image_orig)
labels.append(label_orig)
images_trans.append(image_trans)
images.append(image_orig.asnumpy())
labels.append(label_orig.asnumpy())
images_trans.append(image_trans.asnumpy())


# Compare with expected md5 from images # Compare with expected md5 from images
filename = "equalize_mnist_result_c.npz" filename = "equalize_mnist_result_c.npz"


+ 4
- 4
tests/ut/python/dataset/test_fill_op.py View File

@@ -31,7 +31,7 @@ def test_fillop_basic():
data = data.map(operations=fill_op, input_columns=["col"]) data = data.map(operations=fill_op, input_columns=["col"])
expected = np.array([3, 3, 3, 3], dtype=np.uint8) expected = np.array([3, 3, 3, 3], dtype=np.uint8)
for data_row in data: for data_row in data:
np.testing.assert_array_equal(data_row[0], expected)
np.testing.assert_array_equal(data_row[0].asnumpy(), expected)




def test_fillop_down_type_cast(): def test_fillop_down_type_cast():
@@ -44,7 +44,7 @@ def test_fillop_down_type_cast():
data = data.map(operations=fill_op, input_columns=["col"]) data = data.map(operations=fill_op, input_columns=["col"])
expected = np.array([253, 253, 253, 253], dtype=np.uint8) expected = np.array([253, 253, 253, 253], dtype=np.uint8)
for data_row in data: for data_row in data:
np.testing.assert_array_equal(data_row[0], expected)
np.testing.assert_array_equal(data_row[0].asnumpy(), expected)




def test_fillop_up_type_cast(): def test_fillop_up_type_cast():
@@ -57,7 +57,7 @@ def test_fillop_up_type_cast():
data = data.map(operations=fill_op, input_columns=["col"]) data = data.map(operations=fill_op, input_columns=["col"])
expected = np.array([3., 3., 3., 3.], dtype=np.float) expected = np.array([3., 3., 3., 3.], dtype=np.float)
for data_row in data: for data_row in data:
np.testing.assert_array_equal(data_row[0], expected)
np.testing.assert_array_equal(data_row[0].asnumpy(), expected)




def test_fillop_string(): def test_fillop_string():
@@ -69,7 +69,7 @@ def test_fillop_string():


data = data.map(operations=fill_op, input_columns=["col"]) data = data.map(operations=fill_op, input_columns=["col"])
expected = np.array(['error', 'error'], dtype='S') expected = np.array(['error', 'error'], dtype='S')
for data_row in data:
for data_row in data.create_tuple_iterator(output_numpy=True):
np.testing.assert_array_equal(data_row[0], expected) np.testing.assert_array_equal(data_row[0], expected)






+ 18
- 18
tests/ut/python/dataset/test_filterop.py View File

@@ -35,7 +35,7 @@ def test_diff_predicate_func():


num_iter = 0 num_iter = 0
label_list = [] label_list = []
for data in dataset.create_dict_iterator(num_epochs=1):
for data in dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
num_iter += 1 num_iter += 1
label = data["label"] label = data["label"]
label_list.append(label) label_list.append(label)
@@ -64,7 +64,7 @@ def test_filter_by_generator_with_no():
dataset_f = dataset.filter(predicate=lambda data: data < 11, num_parallel_workers=4) dataset_f = dataset.filter(predicate=lambda data: data < 11, num_parallel_workers=4)
num_iter = 0 num_iter = 0
expected_rs = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] expected_rs = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
for item in dataset_f.create_dict_iterator(num_epochs=1):
for item in dataset_f.create_dict_iterator(num_epochs=1, output_numpy=True):
assert item["data"] == expected_rs[num_iter] assert item["data"] == expected_rs[num_iter]
num_iter += 1 num_iter += 1


@@ -77,7 +77,7 @@ def test_filter_by_generator_with_repeat():
num_iter = 0 num_iter = 0
ret_data = [] ret_data = []
expected_rs = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] expected_rs = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
for item in dataset_f.create_dict_iterator(num_epochs=1):
for item in dataset_f.create_dict_iterator(num_epochs=1, output_numpy=True):
num_iter += 1 num_iter += 1
ret_data.append(item["data"]) ret_data.append(item["data"])
assert num_iter == 44 assert num_iter == 44
@@ -95,7 +95,7 @@ def test_filter_by_generator_with_repeat_after():
num_iter = 0 num_iter = 0
ret_data = [] ret_data = []
expected_rs = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] expected_rs = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
for item in dataset_r.create_dict_iterator(num_epochs=1):
for item in dataset_r.create_dict_iterator(num_epochs=1, output_numpy=True):
num_iter += 1 num_iter += 1
ret_data.append(item["data"]) ret_data.append(item["data"])
assert num_iter == 44 assert num_iter == 44
@@ -120,7 +120,7 @@ def test_filter_by_generator_with_batch():
dataset_f = dataset_b.filter(predicate=filter_func_batch, num_parallel_workers=4) dataset_f = dataset_b.filter(predicate=filter_func_batch, num_parallel_workers=4)
num_iter = 0 num_iter = 0
ret_data = [] ret_data = []
for item in dataset_f.create_dict_iterator(num_epochs=1):
for item in dataset_f.create_dict_iterator(num_epochs=1, output_numpy=True):
num_iter += 1 num_iter += 1
ret_data.append(item["data"]) ret_data.append(item["data"])
assert num_iter == 3 assert num_iter == 3
@@ -136,7 +136,7 @@ def test_filter_by_generator_with_batch_after():
dataset_b = dataset_f.batch(4) dataset_b = dataset_f.batch(4)
num_iter = 0 num_iter = 0
ret_data = [] ret_data = []
for item in dataset_b.create_dict_iterator(num_epochs=1):
for item in dataset_b.create_dict_iterator(num_epochs=1, output_numpy=True):
num_iter += 1 num_iter += 1
ret_data.append(item["data"]) ret_data.append(item["data"])
assert num_iter == 6 assert num_iter == 6
@@ -202,7 +202,7 @@ def test_filter_by_generator_with_zip():
dataset_f = dataz.filter(predicate=filter_func_zip, num_parallel_workers=1) dataset_f = dataz.filter(predicate=filter_func_zip, num_parallel_workers=1)
num_iter = 0 num_iter = 0
ret_data = [] ret_data = []
for item in dataset_f.create_dict_iterator(num_epochs=1):
for item in dataset_f.create_dict_iterator(num_epochs=1, output_numpy=True):
num_iter += 1 num_iter += 1
ret_data.append({"data1": item["data1"], "data2": item["data2"]}) ret_data.append({"data1": item["data1"], "data2": item["data2"]})
assert num_iter == 21 assert num_iter == 21
@@ -221,7 +221,7 @@ def test_filter_by_generator_with_zip_after():
dataz = ds.zip((dt1, dt2)) dataz = ds.zip((dt1, dt2))
num_iter = 0 num_iter = 0
ret_data = [] ret_data = []
for item in dataz.create_dict_iterator(num_epochs=1):
for item in dataz.create_dict_iterator(num_epochs=1, output_numpy=True):
num_iter += 1 num_iter += 1
ret_data.append({"data1": item["data1"], "data2": item["data2"]}) ret_data.append({"data1": item["data1"], "data2": item["data2"]})
assert num_iter == 21 assert num_iter == 21
@@ -266,7 +266,7 @@ def test_filter_by_generator_with_map_all_col():
dataset_f = dataset_map.filter(input_columns=["col1"], predicate=filter_func_map_part, num_parallel_workers=1) dataset_f = dataset_map.filter(input_columns=["col1"], predicate=filter_func_map_part, num_parallel_workers=1)
num_iter = 0 num_iter = 0
ret_data = [] ret_data = []
for item in dataset_f.create_dict_iterator(num_epochs=1):
for item in dataset_f.create_dict_iterator(num_epochs=1, output_numpy=True):
num_iter += 1 num_iter += 1
ret_data.append(item["col1"]) ret_data.append(item["col1"])
assert num_iter == 3 assert num_iter == 3
@@ -282,7 +282,7 @@ def test_filter_by_generator_with_map_part_col():
dataset_f = dataset_map.filter(input_columns=["out1", "col2"], predicate=filter_func_map, num_parallel_workers=4) dataset_f = dataset_map.filter(input_columns=["out1", "col2"], predicate=filter_func_map, num_parallel_workers=4)
num_iter = 0 num_iter = 0
ret_data = [] ret_data = []
for item in dataset_f.create_dict_iterator(num_epochs=1):
for item in dataset_f.create_dict_iterator(num_epochs=1, output_numpy=True):
num_iter += 1 num_iter += 1
print(item) print(item)
ret_data.append(item["out1"]) ret_data.append(item["out1"])
@@ -302,7 +302,7 @@ def test_filter_by_generator_with_rename():
dataset_f = dataset_b.filter(predicate=filter_func_rename, num_parallel_workers=4) dataset_f = dataset_b.filter(predicate=filter_func_rename, num_parallel_workers=4)
num_iter = 0 num_iter = 0
ret_data = [] ret_data = []
for item in dataset_f.create_dict_iterator(num_epochs=1):
for item in dataset_f.create_dict_iterator(num_epochs=1, output_numpy=True):
num_iter += 1 num_iter += 1
ret_data.append(item["col1"]) ret_data.append(item["col1"])
assert num_iter == 55 assert num_iter == 55
@@ -336,7 +336,7 @@ def test_filter_by_generator_with_input_column():
dataset_f4 = dataset_f3.filter(predicate=filter_func_input_column1, num_parallel_workers=4) dataset_f4 = dataset_f3.filter(predicate=filter_func_input_column1, num_parallel_workers=4)
num_iter = 0 num_iter = 0
ret_data = [] ret_data = []
for item in dataset_f4.create_dict_iterator(num_epochs=1):
for item in dataset_f4.create_dict_iterator(num_epochs=1, output_numpy=True):
num_iter += 1 num_iter += 1
ret_data.append(item["out1"]) ret_data.append(item["out1"])
assert num_iter == 8 assert num_iter == 8
@@ -370,7 +370,7 @@ def test_filter_by_generator_Partial0():
dataset_zip = ds.zip((dataset1, dataset2)) dataset_zip = ds.zip((dataset1, dataset2))
dataset_f1 = dataset_zip.filter(predicate=filter_func_Partial_0, num_parallel_workers=2) dataset_f1 = dataset_zip.filter(predicate=filter_func_Partial_0, num_parallel_workers=2)
ret = [] ret = []
for item in dataset_f1.create_dict_iterator(num_epochs=1):
for item in dataset_f1.create_dict_iterator(num_epochs=1, output_numpy=True):
ret.append(item["col1"]) ret.append(item["col1"])
assert ret[0] == 5 assert ret[0] == 5
assert ret[6] == 12 assert ret[6] == 12
@@ -384,7 +384,7 @@ def test_filter_by_generator_Partial1():
dataset_f1 = dataset_zip.filter(predicate=filter_func_Partial_0, num_parallel_workers=2) dataset_f1 = dataset_zip.filter(predicate=filter_func_Partial_0, num_parallel_workers=2)
dataset_map = dataset_f1.map(operations=lambda x1: x1 + 400, input_columns=["col1"], output_columns=["out1"]) dataset_map = dataset_f1.map(operations=lambda x1: x1 + 400, input_columns=["col1"], output_columns=["out1"])
ret = [] ret = []
for item in dataset_map.create_dict_iterator(num_epochs=1):
for item in dataset_map.create_dict_iterator(num_epochs=1, output_numpy=True):
ret.append(item["out1"]) ret.append(item["out1"])
assert ret[0] == 405 assert ret[0] == 405
assert ret[6] == 412 assert ret[6] == 412
@@ -403,7 +403,7 @@ def test_filter_by_generator_Partial2():
output_columns=["out1", "out3"]) output_columns=["out1", "out3"])
ret1 = [] ret1 = []
ret3 = [] ret3 = []
for item in dataset_map.create_dict_iterator(num_epochs=1):
for item in dataset_map.create_dict_iterator(num_epochs=1, output_numpy=True):
ret1.append(item["out1"]) ret1.append(item["out1"])
ret3.append(item["out3"]) ret3.append(item["out3"])
assert ret1[0] == 400 assert ret1[0] == 400
@@ -428,7 +428,7 @@ def test_filter_by_generator_Partial():
dataset_s = dataset.shuffle(4) dataset_s = dataset.shuffle(4)
dataset_f1 = dataset_s.filter(input_columns=["col1", "col2"], predicate=filter_func_Partial, num_parallel_workers=1) dataset_f1 = dataset_s.filter(input_columns=["col1", "col2"], predicate=filter_func_Partial, num_parallel_workers=1)


for item in dataset_f1.create_dict_iterator(num_epochs=1):
for item in dataset_f1.create_dict_iterator(num_epochs=1, output_numpy=True):
assert item["col1"] % 3 == 0 assert item["col1"] % 3 == 0




@@ -442,7 +442,7 @@ def test_filte_case_dataset_cifar10():
DATA_DIR_10 = "../data/dataset/testCifar10Data" DATA_DIR_10 = "../data/dataset/testCifar10Data"
dataset_c = ds.Cifar10Dataset(dataset_dir=DATA_DIR_10, num_samples=100000, shuffle=False) dataset_c = ds.Cifar10Dataset(dataset_dir=DATA_DIR_10, num_samples=100000, shuffle=False)
dataset_f1 = dataset_c.filter(input_columns=["image", "label"], predicate=filter_func_cifar, num_parallel_workers=1) dataset_f1 = dataset_c.filter(input_columns=["image", "label"], predicate=filter_func_cifar, num_parallel_workers=1)
for item in dataset_f1.create_dict_iterator(num_epochs=1):
for item in dataset_f1.create_dict_iterator(num_epochs=1, output_numpy=True):
# in this example, each dictionary has keys "image" and "label" # in this example, each dictionary has keys "image" and "label"
assert item["label"] % 3 == 0 assert item["label"] % 3 == 0


@@ -476,7 +476,7 @@ def test_filter_by_generator_with_map_all_sort():
dataset_f = dataz.filter(predicate=filter_func_part_sort, num_parallel_workers=1) dataset_f = dataz.filter(predicate=filter_func_part_sort, num_parallel_workers=1)
num_iter = 0 num_iter = 0
ret_data = [] ret_data = []
for item in dataset_f.create_dict_iterator(num_epochs=1):
for item in dataset_f.create_dict_iterator(num_epochs=1, output_numpy=True):
num_iter += 1 num_iter += 1
ret_data.append(item) ret_data.append(item)




+ 2
- 1
tests/ut/python/dataset/test_five_crop.py View File

@@ -54,7 +54,8 @@ def test_five_crop_op(plot=False):
data2 = data2.map(operations=transform_2, input_columns=["image"]) data2 = data2.map(operations=transform_2, input_columns=["image"])


num_iter = 0 num_iter = 0
for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)):
for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True),
data2.create_dict_iterator(num_epochs=1, output_numpy=True)):
num_iter += 1 num_iter += 1
image_1 = (item1["image"].transpose(1, 2, 0) * 255).astype(np.uint8) image_1 = (item1["image"].transpose(1, 2, 0) * 255).astype(np.uint8)
image_2 = item2["image"] image_2 = item2["image"]


+ 2
- 2
tests/ut/python/dataset/test_flat_map.py View File

@@ -34,7 +34,7 @@ def test_flat_map_1():
data = data.flat_map(flat_map_func) data = data.flat_map(flat_map_func)


count = 0 count = 0
for d in data:
for d in data.create_tuple_iterator(output_numpy=True):
assert isinstance(d[0], np.ndarray) assert isinstance(d[0], np.ndarray)
count += 1 count += 1
assert count == 52 assert count == 52
@@ -60,7 +60,7 @@ def test_flat_map_2():
data = data.flat_map(flat_map_func_2) data = data.flat_map(flat_map_func_2)


count = 0 count = 0
for d in data:
for d in data.create_tuple_iterator(output_numpy=True):
assert isinstance(d[0], np.ndarray) assert isinstance(d[0], np.ndarray)
count += 1 count += 1
assert count == 104 assert count == 104


+ 4
- 4
tests/ut/python/dataset/test_from_dataset.py View File

@@ -28,7 +28,7 @@ def test_demo_basic_from_dataset():
special_first=True) special_first=True)
data = data.map(operations=text.Lookup(vocab, "<unk>"), input_columns=["text"]) data = data.map(operations=text.Lookup(vocab, "<unk>"), input_columns=["text"])
res = [] res = []
for d in data.create_dict_iterator(num_epochs=1):
for d in data.create_dict_iterator(num_epochs=1, output_numpy=True):
res.append(d["text"].item()) res.append(d["text"].item())
assert res == [4, 5, 3, 6, 7, 2], res assert res == [4, 5, 3, 6, 7, 2], res


@@ -41,7 +41,7 @@ def test_demo_basic_from_dataset_with_tokenizer():
special_first=True) special_first=True)
data = data.map(operations=text.Lookup(vocab, "<unk>"), input_columns=["text"]) data = data.map(operations=text.Lookup(vocab, "<unk>"), input_columns=["text"])
res = [] res = []
for d in data.create_dict_iterator(num_epochs=1):
for d in data.create_dict_iterator(num_epochs=1, output_numpy=True):
res.append(list(d["text"])) res.append(list(d["text"]))
assert res == [[13, 3, 7, 14, 9, 17, 3, 2, 19, 9, 2, 11, 3, 4, 16, 4, 8, 6, 5], [21, 20, 10, 25, 23, 26], assert res == [[13, 3, 7, 14, 9, 17, 3, 2, 19, 9, 2, 11, 3, 4, 16, 4, 8, 6, 5], [21, 20, 10, 25, 23, 26],
[24, 22, 10, 12, 8, 6, 7, 4, 18, 15, 5], [2, 2]] [24, 22, 10, 12, 8, 6, 7, 4, 18, 15, 5], [2, 2]]
@@ -62,7 +62,7 @@ def test_from_dataset():
special_first=True) special_first=True)
corpus_dataset = corpus_dataset.map(operations=text.Lookup(vocab, "<unk>"), input_columns="text") corpus_dataset = corpus_dataset.map(operations=text.Lookup(vocab, "<unk>"), input_columns="text")
res = [] res = []
for d in corpus_dataset.create_dict_iterator(num_epochs=1):
for d in corpus_dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
res.append(list(d["text"])) res.append(list(d["text"]))
return res return res


@@ -110,7 +110,7 @@ def test_from_dataset_special_token():
data = ds.GeneratorDataset(gen_input(texts), column_names=["text"]) data = ds.GeneratorDataset(gen_input(texts), column_names=["text"])
data = data.map(operations=text.Lookup(vocab, "<unk>"), input_columns="text") data = data.map(operations=text.Lookup(vocab, "<unk>"), input_columns="text")
res = [] res = []
for d in data.create_dict_iterator(num_epochs=1):
for d in data.create_dict_iterator(num_epochs=1, output_numpy=True):
res.append(d["text"].item()) res.append(d["text"].item())
return res return res




+ 1
- 1
tests/ut/python/dataset/test_graphdata.py View File

@@ -186,7 +186,7 @@ def test_graphdata_generatordataset():
dataset = ds.GeneratorDataset(source=GNNGraphDataset(g, batch_num), column_names=out_column_names, dataset = ds.GeneratorDataset(source=GNNGraphDataset(g, batch_num), column_names=out_column_names,
sampler=RandomBatchedSampler(edge_num, batch_num), num_parallel_workers=4) sampler=RandomBatchedSampler(edge_num, batch_num), num_parallel_workers=4)
dataset = dataset.repeat(2) dataset = dataset.repeat(2)
itr = dataset.create_dict_iterator(num_epochs=1)
itr = dataset.create_dict_iterator(num_epochs=1, output_numpy=True)
i = 0 i = 0
for data in itr: for data in itr:
assert data['neighbors'].shape == (2, 7) assert data['neighbors'].shape == (2, 7)


+ 1
- 1
tests/ut/python/dataset/test_graphdata_distributed.py View File

@@ -112,7 +112,7 @@ def test_graphdata_distributed():
sampler=RandomBatchedSampler(edge_num, batch_num), num_parallel_workers=4, sampler=RandomBatchedSampler(edge_num, batch_num), num_parallel_workers=4,
python_multiprocessing=False) python_multiprocessing=False)
dataset = dataset.repeat(2) dataset = dataset.repeat(2)
itr = dataset.create_dict_iterator(num_epochs=1)
itr = dataset.create_dict_iterator(num_epochs=1, output_numpy=True)
i = 0 i = 0
for data in itr: for data in itr:
assert data['neighbors'].shape == (2, 7) assert data['neighbors'].shape == (2, 7)


+ 12
- 12
tests/ut/python/dataset/test_invert.py View File

@@ -48,10 +48,10 @@ def test_invert_py(plot=False):


for idx, (image, _) in enumerate(ds_original): for idx, (image, _) in enumerate(ds_original):
if idx == 0: if idx == 0:
images_original = np.transpose(image, (0, 2, 3, 1))
images_original = np.transpose(image.asnumpy(), (0, 2, 3, 1))
else: else:
images_original = np.append(images_original, images_original = np.append(images_original,
np.transpose(image, (0, 2, 3, 1)),
np.transpose(image.asnumpy(), (0, 2, 3, 1)),
axis=0) axis=0)


# Color Inverted Images # Color Inverted Images
@@ -68,10 +68,10 @@ def test_invert_py(plot=False):


for idx, (image, _) in enumerate(ds_invert): for idx, (image, _) in enumerate(ds_invert):
if idx == 0: if idx == 0:
images_invert = np.transpose(image, (0, 2, 3, 1))
images_invert = np.transpose(image.asnumpy(), (0, 2, 3, 1))
else: else:
images_invert = np.append(images_invert, images_invert = np.append(images_invert,
np.transpose(image, (0, 2, 3, 1)),
np.transpose(image.asnumpy(), (0, 2, 3, 1)),
axis=0) axis=0)


num_samples = images_original.shape[0] num_samples = images_original.shape[0]
@@ -101,10 +101,10 @@ def test_invert_c(plot=False):


for idx, (image, _) in enumerate(ds_original): for idx, (image, _) in enumerate(ds_original):
if idx == 0: if idx == 0:
images_original = image
images_original = image.asnumpy()
else: else:
images_original = np.append(images_original, images_original = np.append(images_original,
image,
image.asnumpy(),
axis=0) axis=0)


# Invert Images # Invert Images
@@ -119,10 +119,10 @@ def test_invert_c(plot=False):


for idx, (image, _) in enumerate(ds_invert): for idx, (image, _) in enumerate(ds_invert):
if idx == 0: if idx == 0:
images_invert = image
images_invert = image.asnumpy()
else: else:
images_invert = np.append(images_invert, images_invert = np.append(images_invert,
image,
image.asnumpy(),
axis=0) axis=0)
if plot: if plot:
visualize_list(images_original, images_invert) visualize_list(images_original, images_invert)
@@ -150,10 +150,10 @@ def test_invert_py_c(plot=False):


for idx, (image, _) in enumerate(ds_c_invert): for idx, (image, _) in enumerate(ds_c_invert):
if idx == 0: if idx == 0:
images_c_invert = image
images_c_invert = image.asnumpy()
else: else:
images_c_invert = np.append(images_c_invert, images_c_invert = np.append(images_c_invert,
image,
image.asnumpy(),
axis=0) axis=0)


# invert images in python # invert images in python
@@ -171,10 +171,10 @@ def test_invert_py_c(plot=False):


for idx, (image, _) in enumerate(ds_p_invert): for idx, (image, _) in enumerate(ds_p_invert):
if idx == 0: if idx == 0:
images_p_invert = image
images_p_invert = image.asnumpy()
else: else:
images_p_invert = np.append(images_p_invert, images_p_invert = np.append(images_p_invert,
image,
image.asnumpy(),
axis=0) axis=0)


num_samples = images_c_invert.shape[0] num_samples = images_c_invert.shape[0]


+ 47
- 5
tests/ut/python/dataset/test_iterator.py View File

@@ -15,6 +15,8 @@
import numpy as np import numpy as np
import pytest import pytest


import mindspore.common.dtype as mstype
from mindspore.common.tensor import Tensor
import mindspore.dataset as ds import mindspore.dataset as ds
from mindspore.dataset.engine.iterators import ITERATORS_LIST, _cleanup from mindspore.dataset.engine.iterators import ITERATORS_LIST, _cleanup


@@ -28,15 +30,15 @@ def check(project_columns):
data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=COLUMNS, shuffle=False) data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=COLUMNS, shuffle=False)
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=project_columns, shuffle=False) data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=project_columns, shuffle=False)


for data_actual, data_expected in zip(data1.create_tuple_iterator(project_columns, num_epochs=1),
data2.create_tuple_iterator(num_epochs=1)):
for data_actual, data_expected in zip(data1.create_tuple_iterator(project_columns, num_epochs=1, output_numpy=True),
data2.create_tuple_iterator(num_epochs=1, output_numpy=True)):
assert len(data_actual) == len(data_expected) assert len(data_actual) == len(data_expected)
assert all([np.array_equal(d1, d2) for d1, d2 in zip(data_actual, data_expected)]) assert all([np.array_equal(d1, d2) for d1, d2 in zip(data_actual, data_expected)])




def test_iterator_create_tuple():
def test_iterator_create_tuple_numpy():
""" """
Test creating tuple iterator
Test creating tuple iterator with output NumPy
""" """
check(COLUMNS) check(COLUMNS)
check(COLUMNS[0:1]) check(COLUMNS[0:1])
@@ -45,6 +47,46 @@ def test_iterator_create_tuple():
check(COLUMNS[7:8]) check(COLUMNS[7:8])
check(COLUMNS[0:2:8]) check(COLUMNS[0:2:8])


def test_iterator_create_dict_mstensor():
"""
Test creating dict iterator with output MSTensor
"""
def generator():
for i in range(64):
yield (np.array([i], dtype=np.float32),)

# apply dataset operations
data1 = ds.GeneratorDataset(generator, ["data"])

i = 0
for item in data1.create_dict_iterator(num_epochs=1):
golden = np.array([i], dtype=np.float32)
np.testing.assert_array_equal(item["data"].asnumpy(), golden)
assert isinstance(item["data"], Tensor)
assert item["data"].dtype == mstype.float32
i += 1
assert i == 64

def test_iterator_create_tuple_mstensor():
"""
Test creating tuple iterator with output MSTensor
"""
def generator():
for i in range(64):
yield (np.array([i], dtype=np.float32),)

# apply dataset operations
data1 = ds.GeneratorDataset(generator, ["data"])

i = 0
for item in data1.create_tuple_iterator(num_epochs=1):
golden = np.array([i], dtype=np.float32)
np.testing.assert_array_equal(item[0].asnumpy(), golden)
assert isinstance(item[0], Tensor)
assert item[0].dtype == mstype.float32
i += 1
assert i == 64



def test_iterator_weak_ref(): def test_iterator_weak_ref():
ITERATORS_LIST.clear() ITERATORS_LIST.clear()
@@ -113,6 +155,6 @@ def test_tree_copy():




if __name__ == '__main__': if __name__ == '__main__':
test_iterator_create_tuple()
test_iterator_create_tuple_numpy()
test_iterator_weak_ref() test_iterator_weak_ref()
test_tree_copy() test_tree_copy()

+ 2
- 1
tests/ut/python/dataset/test_linear_transformation.py View File

@@ -63,7 +63,8 @@ def test_linear_transformation_op(plot=False):


image_transformed = [] image_transformed = []
image = [] image = []
for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)):
for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True),
data2.create_dict_iterator(num_epochs=1, output_numpy=True)):
image1 = (item1["image"].transpose(1, 2, 0) * 255).astype(np.uint8) image1 = (item1["image"].transpose(1, 2, 0) * 255).astype(np.uint8)
image2 = (item2["image"].transpose(1, 2, 0) * 255).astype(np.uint8) image2 = (item2["image"].transpose(1, 2, 0) * 255).astype(np.uint8)
image_transformed.append(image1) image_transformed.append(image1)


+ 1
- 1
tests/ut/python/dataset/test_mask_op.py View File

@@ -59,7 +59,7 @@ def mask_compare(array, op, constant, dtype=mstype.bool_):


array = array.astype(dtype=mstype_to_np_type[dtype]) array = array.astype(dtype=mstype_to_np_type[dtype])


np.testing.assert_array_equal(array, d[0])
np.testing.assert_array_equal(array, d[0].asnumpy())




def test_mask_int_comparison(): def test_mask_int_comparison():


+ 48
- 47
tests/ut/python/dataset/test_minddataset.py View File

@@ -187,7 +187,7 @@ def test_nlp_compress_data(add_and_remove_nlp_compress_file):
NLP_FILE_NAME + "0", None, num_readers, shuffle=False) NLP_FILE_NAME + "0", None, num_readers, shuffle=False)
assert data_set.get_dataset_size() == 16 assert data_set.get_dataset_size() == 16
num_iter = 0 num_iter = 0
for x, item in zip(data, data_set.create_dict_iterator(num_epochs=1)):
for x, item in zip(data, data_set.create_dict_iterator(num_epochs=1, output_numpy=True)):
assert (item["array_a"] == x["array_a"]).all() assert (item["array_a"] == x["array_a"]).all()
assert (item["array_b"] == x["array_b"]).all() assert (item["array_b"] == x["array_b"]).all()
assert item["array_c"].tobytes() == x["array_c"] assert item["array_c"].tobytes() == x["array_c"]
@@ -206,7 +206,8 @@ def test_nlp_compress_data_old_version(add_and_remove_nlp_compress_file):
OLD_NLP_FILE_NAME + "0", None, num_readers, shuffle=False) OLD_NLP_FILE_NAME + "0", None, num_readers, shuffle=False)
assert old_data_set.get_dataset_size() == 16 assert old_data_set.get_dataset_size() == 16
num_iter = 0 num_iter = 0
for x, item in zip(old_data_set.create_dict_iterator(num_epochs=1), data_set.create_dict_iterator(num_epochs=1)):
for x, item in zip(old_data_set.create_dict_iterator(num_epochs=1, output_numpy=True),
data_set.create_dict_iterator(num_epochs=1, output_numpy=True)):
assert (item["array_a"] == x["array_a"]).all() assert (item["array_a"] == x["array_a"]).all()
assert (item["array_b"] == x["array_b"]).all() assert (item["array_b"] == x["array_b"]).all()
assert (item["array_c"] == x["array_c"]).all() assert (item["array_c"] == x["array_c"]).all()
@@ -255,7 +256,7 @@ def test_cv_minddataset_partition_tutorial(add_and_remove_cv_file):
data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers, data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers,
num_shards=num_shards, shard_id=partition_id) num_shards=num_shards, shard_id=partition_id)
num_iter = 0 num_iter = 0
for item in data_set.create_dict_iterator(num_epochs=1):
for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True):
logger.info("-------------- partition : {} ------------------------".format(partition_id)) logger.info("-------------- partition : {} ------------------------".format(partition_id))
logger.info("-------------- item[file_name]: {}-----------------------".format(item["file_name"])) logger.info("-------------- item[file_name]: {}-----------------------".format(item["file_name"]))
logger.info("-------------- item[label]: {} -----------------------".format(item["label"])) logger.info("-------------- item[label]: {} -----------------------".format(item["label"]))
@@ -278,7 +279,7 @@ def test_cv_minddataset_partition_num_samples_0(add_and_remove_cv_file):
num_shards=num_shards, num_shards=num_shards,
shard_id=partition_id, num_samples=1) shard_id=partition_id, num_samples=1)
num_iter = 0 num_iter = 0
for item in data_set.create_dict_iterator(num_epochs=1):
for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True):
logger.info("-------------- partition : {} ------------------------".format(partition_id)) logger.info("-------------- partition : {} ------------------------".format(partition_id))
logger.info("-------------- item[file_name]: {}-----------------------".format(item["file_name"])) logger.info("-------------- item[file_name]: {}-----------------------".format(item["file_name"]))
logger.info("-------------- item[label]: {} -----------------------".format(item["label"])) logger.info("-------------- item[label]: {} -----------------------".format(item["label"]))
@@ -301,7 +302,7 @@ def test_cv_minddataset_partition_num_samples_1(add_and_remove_cv_file):
num_shards=num_shards, num_shards=num_shards,
shard_id=partition_id, num_samples=2) shard_id=partition_id, num_samples=2)
num_iter = 0 num_iter = 0
for item in data_set.create_dict_iterator(num_epochs=1):
for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True):
logger.info("-------------- partition : {} ------------------------".format(partition_id)) logger.info("-------------- partition : {} ------------------------".format(partition_id))
logger.info("-------------- item[file_name]: {}-----------------------".format(item["file_name"])) logger.info("-------------- item[file_name]: {}-----------------------".format(item["file_name"]))
logger.info("-------------- item[label]: {} -----------------------".format(item["label"])) logger.info("-------------- item[label]: {} -----------------------".format(item["label"]))
@@ -324,7 +325,7 @@ def test_cv_minddataset_partition_num_samples_2(add_and_remove_cv_file):
num_shards=num_shards, num_shards=num_shards,
shard_id=partition_id, num_samples=3) shard_id=partition_id, num_samples=3)
num_iter = 0 num_iter = 0
for item in data_set.create_dict_iterator(num_epochs=1):
for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True):
logger.info("-------------- partition : {} ------------------------".format(partition_id)) logger.info("-------------- partition : {} ------------------------".format(partition_id))
logger.info("-------------- item[file_name]: {}-----------------------".format(item["file_name"])) logger.info("-------------- item[file_name]: {}-----------------------".format(item["file_name"]))
logger.info("-------------- item[label]: {} -----------------------".format(item["label"])) logger.info("-------------- item[label]: {} -----------------------".format(item["label"]))
@@ -352,7 +353,7 @@ def test_cv_minddataset_partition_tutorial_check_shuffle_result(add_and_remove_c
data_set = data_set.repeat(3) data_set = data_set.repeat(3)


num_iter = 0 num_iter = 0
for item in data_set.create_dict_iterator(num_epochs=1):
for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True):
logger.info("-------------- partition : {} ------------------------".format(partition_id)) logger.info("-------------- partition : {} ------------------------".format(partition_id))
logger.info("-------------- item[file_name]: {}-----------------------".format(item["file_name"])) logger.info("-------------- item[file_name]: {}-----------------------".format(item["file_name"]))
logger.info("-------------- item[label]: {} -----------------------".format(item["label"])) logger.info("-------------- item[label]: {} -----------------------".format(item["label"]))
@@ -391,7 +392,7 @@ def test_cv_minddataset_partition_tutorial_check_whole_reshuffle_result_per_epoc
data_set = data_set.repeat(3) data_set = data_set.repeat(3)


num_iter = 0 num_iter = 0
for item in data_set.create_dict_iterator(num_epochs=1):
for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True):
logger.info("-------------- partition : {} ------------------------".format(partition_id)) logger.info("-------------- partition : {} ------------------------".format(partition_id))
logger.info("-------------- item[file_name]: {}-----------------------".format(item["file_name"])) logger.info("-------------- item[file_name]: {}-----------------------".format(item["file_name"]))
logger.info("-------------- item[label]: {} -----------------------".format(item["label"])) logger.info("-------------- item[label]: {} -----------------------".format(item["label"]))
@@ -424,7 +425,7 @@ def test_cv_minddataset_check_shuffle_result(add_and_remove_cv_file):
data_set = data_set.repeat(3) data_set = data_set.repeat(3)


num_iter = 0 num_iter = 0
for item in data_set.create_dict_iterator(num_epochs=1):
for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True):
logger.info("-------------- item[file_name]: {}-----------------------".format(item["file_name"])) logger.info("-------------- item[file_name]: {}-----------------------".format(item["file_name"]))
logger.info("-------------- item[label]: {} -----------------------".format(item["label"])) logger.info("-------------- item[label]: {} -----------------------".format(item["label"]))
num_iter += 1 num_iter += 1
@@ -450,7 +451,7 @@ def test_cv_minddataset_check_shuffle_result(add_and_remove_cv_file):
data_set2 = data_set2.repeat(3) data_set2 = data_set2.repeat(3)


num_iter = 0 num_iter = 0
for item in data_set2.create_dict_iterator(num_epochs=1):
for item in data_set2.create_dict_iterator(num_epochs=1, output_numpy=True):
logger.info("-------------- item[file_name]: {}-----------------------".format(item["file_name"])) logger.info("-------------- item[file_name]: {}-----------------------".format(item["file_name"]))
logger.info("-------------- item[label]: {} -----------------------".format(item["label"])) logger.info("-------------- item[label]: {} -----------------------".format(item["label"]))
num_iter += 1 num_iter += 1
@@ -481,7 +482,7 @@ def test_cv_minddataset_check_shuffle_result(add_and_remove_cv_file):
data_set3 = data_set3.repeat(3) data_set3 = data_set3.repeat(3)


num_iter = 0 num_iter = 0
for item in data_set3.create_dict_iterator(num_epochs=1):
for item in data_set3.create_dict_iterator(num_epochs=1, output_numpy=True):
logger.info("-------------- item[file_name]: {}-----------------------".format(item["file_name"])) logger.info("-------------- item[file_name]: {}-----------------------".format(item["file_name"]))
logger.info("-------------- item[label]: {} -----------------------".format(item["label"])) logger.info("-------------- item[label]: {} -----------------------".format(item["label"]))
num_iter += 1 num_iter += 1
@@ -513,7 +514,7 @@ def test_cv_minddataset_dataset_size(add_and_remove_cv_file):
repeat_num = 2 repeat_num = 2
data_set = data_set.repeat(repeat_num) data_set = data_set.repeat(repeat_num)
num_iter = 0 num_iter = 0
for item in data_set.create_dict_iterator(num_epochs=1):
for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True):
logger.info( logger.info(
"-------------- get dataset size {} -----------------".format(num_iter)) "-------------- get dataset size {} -----------------".format(num_iter))
logger.info( logger.info(
@@ -542,7 +543,7 @@ def test_cv_minddataset_repeat_reshuffle(add_and_remove_cv_file):
data_set = data_set.repeat(2) data_set = data_set.repeat(2)
num_iter = 0 num_iter = 0
labels = [] labels = []
for item in data_set.create_dict_iterator(num_epochs=1):
for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True):
logger.info( logger.info(
"-------------- get dataset size {} -----------------".format(num_iter)) "-------------- get dataset size {} -----------------".format(num_iter))
logger.info( logger.info(
@@ -571,7 +572,7 @@ def test_cv_minddataset_batch_size_larger_than_records(add_and_remove_cv_file):
num_parallel_workers=2) num_parallel_workers=2)
data_set = data_set.batch(32, drop_remainder=True) data_set = data_set.batch(32, drop_remainder=True)
num_iter = 0 num_iter = 0
for item in data_set.create_dict_iterator(num_epochs=1):
for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True):
logger.info( logger.info(
"-------------- get dataset size {} -----------------".format(num_iter)) "-------------- get dataset size {} -----------------".format(num_iter))
logger.info( logger.info(
@@ -603,7 +604,7 @@ def test_cv_minddataset_reader_file_list(add_and_remove_cv_file):
for x in range(FILES_NUM)], columns_list, num_readers) for x in range(FILES_NUM)], columns_list, num_readers)
assert data_set.get_dataset_size() == 10 assert data_set.get_dataset_size() == 10
num_iter = 0 num_iter = 0
for item in data_set.create_dict_iterator(num_epochs=1):
for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True):
logger.info( logger.info(
"-------------- cv reader basic: {} ------------------------".format(num_iter)) "-------------- cv reader basic: {} ------------------------".format(num_iter))
logger.info( logger.info(
@@ -625,7 +626,7 @@ def test_cv_minddataset_reader_one_partition(add_and_remove_cv_file):
data_set = ds.MindDataset([CV_FILE_NAME + "0"], columns_list, num_readers) data_set = ds.MindDataset([CV_FILE_NAME + "0"], columns_list, num_readers)
assert data_set.get_dataset_size() < 10 assert data_set.get_dataset_size() < 10
num_iter = 0 num_iter = 0
for item in data_set.create_dict_iterator(num_epochs=1):
for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True):
logger.info( logger.info(
"-------------- cv reader basic: {} ------------------------".format(num_iter)) "-------------- cv reader basic: {} ------------------------".format(num_iter))
logger.info( logger.info(
@@ -678,7 +679,7 @@ def test_cv_minddataset_reader_two_dataset(add_and_remove_cv_file):
columns_list, num_readers) columns_list, num_readers)
assert data_set.get_dataset_size() == 30 assert data_set.get_dataset_size() == 30
num_iter = 0 num_iter = 0
for item in data_set.create_dict_iterator(num_epochs=1):
for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True):
logger.info( logger.info(
"-------------- cv reader basic: {} ------------------------".format(num_iter)) "-------------- cv reader basic: {} ------------------------".format(num_iter))
logger.info( logger.info(
@@ -739,7 +740,7 @@ def test_cv_minddataset_reader_two_dataset_partition(add_and_remove_cv_file):
columns_list, num_readers) columns_list, num_readers)
assert data_set.get_dataset_size() < 20 assert data_set.get_dataset_size() < 20
num_iter = 0 num_iter = 0
for item in data_set.create_dict_iterator(num_epochs=1):
for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True):
logger.info( logger.info(
"-------------- cv reader basic: {} ------------------------".format(num_iter)) "-------------- cv reader basic: {} ------------------------".format(num_iter))
logger.info( logger.info(
@@ -770,7 +771,7 @@ def test_cv_minddataset_reader_basic_tutorial(add_and_remove_cv_file):
data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers) data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers)
assert data_set.get_dataset_size() == 10 assert data_set.get_dataset_size() == 10
num_iter = 0 num_iter = 0
for item in data_set.create_dict_iterator(num_epochs=1):
for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True):
logger.info( logger.info(
"-------------- cv reader basic: {} ------------------------".format(num_iter)) "-------------- cv reader basic: {} ------------------------".format(num_iter))
logger.info( logger.info(
@@ -791,7 +792,7 @@ def test_nlp_minddataset_reader_basic_tutorial(add_and_remove_nlp_file):
data_set = ds.MindDataset(NLP_FILE_NAME + "0", None, num_readers) data_set = ds.MindDataset(NLP_FILE_NAME + "0", None, num_readers)
assert data_set.get_dataset_size() == 10 assert data_set.get_dataset_size() == 10
num_iter = 0 num_iter = 0
for item in data_set.create_dict_iterator(num_epochs=1):
for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True):
logger.info( logger.info(
"-------------- cv reader basic: {} ------------------------".format(num_iter)) "-------------- cv reader basic: {} ------------------------".format(num_iter))
logger.info( logger.info(
@@ -821,7 +822,7 @@ def test_cv_minddataset_reader_basic_tutorial_5_epoch(add_and_remove_cv_file):
assert data_set.get_dataset_size() == 10 assert data_set.get_dataset_size() == 10
for _ in range(5): for _ in range(5):
num_iter = 0 num_iter = 0
for data in data_set:
for data in data_set.create_tuple_iterator(output_numpy=True):
logger.info("data is {}".format(data)) logger.info("data is {}".format(data))
num_iter += 1 num_iter += 1
assert num_iter == 10 assert num_iter == 10
@@ -852,7 +853,7 @@ def test_cv_minddataset_reader_basic_tutorial_5_epoch_with_batch(add_and_remove_
assert data_set.get_dataset_size() == 5 assert data_set.get_dataset_size() == 5
for _ in range(5): for _ in range(5):
num_iter = 0 num_iter = 0
for data in data_set:
for data in data_set.create_tuple_iterator(output_numpy=True):
logger.info("data is {}".format(data)) logger.info("data is {}".format(data))
num_iter += 1 num_iter += 1
assert num_iter == 5 assert num_iter == 5
@@ -865,7 +866,7 @@ def test_cv_minddataset_reader_no_columns(add_and_remove_cv_file):
data_set = ds.MindDataset(CV_FILE_NAME + "0") data_set = ds.MindDataset(CV_FILE_NAME + "0")
assert data_set.get_dataset_size() == 10 assert data_set.get_dataset_size() == 10
num_iter = 0 num_iter = 0
for item in data_set.create_dict_iterator(num_epochs=1):
for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True):
logger.info( logger.info(
"-------------- cv reader basic: {} ------------------------".format(num_iter)) "-------------- cv reader basic: {} ------------------------".format(num_iter))
logger.info( logger.info(
@@ -888,7 +889,7 @@ def test_cv_minddataset_reader_repeat_tutorial(add_and_remove_cv_file):
repeat_num = 2 repeat_num = 2
data_set = data_set.repeat(repeat_num) data_set = data_set.repeat(repeat_num)
num_iter = 0 num_iter = 0
for item in data_set.create_dict_iterator(num_epochs=1):
for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True):
logger.info( logger.info(
"-------------- repeat two test {} ------------------------".format(num_iter)) "-------------- repeat two test {} ------------------------".format(num_iter))
logger.info( logger.info(
@@ -1217,7 +1218,7 @@ def test_write_with_multi_bytes_and_array_and_read_by_MindDataset():
shuffle=False) shuffle=False)
assert data_set.get_dataset_size() == 6 assert data_set.get_dataset_size() == 6
num_iter = 0 num_iter = 0
for item in data_set.create_dict_iterator(num_epochs=1):
for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True):
assert len(item) == 13 assert len(item) == 13
for field in item: for field in item:
if isinstance(item[field], np.ndarray): if isinstance(item[field], np.ndarray):
@@ -1236,7 +1237,7 @@ def test_write_with_multi_bytes_and_array_and_read_by_MindDataset():
shuffle=False) shuffle=False)
assert data_set.get_dataset_size() == 6 assert data_set.get_dataset_size() == 6
num_iter = 0 num_iter = 0
for item in data_set.create_dict_iterator(num_epochs=1):
for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True):
assert len(item) == 3 assert len(item) == 3
for field in item: for field in item:
if isinstance(item[field], np.ndarray): if isinstance(item[field], np.ndarray):
@@ -1253,7 +1254,7 @@ def test_write_with_multi_bytes_and_array_and_read_by_MindDataset():
shuffle=False) shuffle=False)
assert data_set.get_dataset_size() == 6 assert data_set.get_dataset_size() == 6
num_iter = 0 num_iter = 0
for item in data_set.create_dict_iterator(num_epochs=1):
for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True):
assert len(item) == 4 assert len(item) == 4
for field in item: for field in item:
if isinstance(item[field], np.ndarray): if isinstance(item[field], np.ndarray):
@@ -1272,7 +1273,7 @@ def test_write_with_multi_bytes_and_array_and_read_by_MindDataset():
shuffle=False) shuffle=False)
assert data_set.get_dataset_size() == 6 assert data_set.get_dataset_size() == 6
num_iter = 0 num_iter = 0
for item in data_set.create_dict_iterator(num_epochs=1):
for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True):
assert len(item) == 3 assert len(item) == 3
for field in item: for field in item:
if isinstance(item[field], np.ndarray): if isinstance(item[field], np.ndarray):
@@ -1291,7 +1292,7 @@ def test_write_with_multi_bytes_and_array_and_read_by_MindDataset():
shuffle=False) shuffle=False)
assert data_set.get_dataset_size() == 6 assert data_set.get_dataset_size() == 6
num_iter = 0 num_iter = 0
for item in data_set.create_dict_iterator(num_epochs=1):
for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True):
assert len(item) == 5 assert len(item) == 5
for field in item: for field in item:
if isinstance(item[field], np.ndarray): if isinstance(item[field], np.ndarray):
@@ -1310,7 +1311,7 @@ def test_write_with_multi_bytes_and_array_and_read_by_MindDataset():
shuffle=False) shuffle=False)
assert data_set.get_dataset_size() == 6 assert data_set.get_dataset_size() == 6
num_iter = 0 num_iter = 0
for item in data_set.create_dict_iterator(num_epochs=1):
for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True):
assert len(item) == 5 assert len(item) == 5
for field in item: for field in item:
if isinstance(item[field], np.ndarray): if isinstance(item[field], np.ndarray):
@@ -1330,7 +1331,7 @@ def test_write_with_multi_bytes_and_array_and_read_by_MindDataset():
shuffle=False) shuffle=False)
assert data_set.get_dataset_size() == 6 assert data_set.get_dataset_size() == 6
num_iter = 0 num_iter = 0
for item in data_set.create_dict_iterator(num_epochs=1):
for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True):
assert len(item) == 11 assert len(item) == 11
for field in item: for field in item:
if isinstance(item[field], np.ndarray): if isinstance(item[field], np.ndarray):
@@ -1420,7 +1421,7 @@ def test_write_with_multi_bytes_and_MindDataset():
shuffle=False) shuffle=False)
assert data_set.get_dataset_size() == 6 assert data_set.get_dataset_size() == 6
num_iter = 0 num_iter = 0
for item in data_set.create_dict_iterator(num_epochs=1):
for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True):
assert len(item) == 7 assert len(item) == 7
for field in item: for field in item:
if isinstance(item[field], np.ndarray): if isinstance(item[field], np.ndarray):
@@ -1438,7 +1439,7 @@ def test_write_with_multi_bytes_and_MindDataset():
shuffle=False) shuffle=False)
assert data_set.get_dataset_size() == 6 assert data_set.get_dataset_size() == 6
num_iter = 0 num_iter = 0
for item in data_set.create_dict_iterator(num_epochs=1):
for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True):
assert len(item) == 3 assert len(item) == 3
for field in item: for field in item:
if isinstance(item[field], np.ndarray): if isinstance(item[field], np.ndarray):
@@ -1456,7 +1457,7 @@ def test_write_with_multi_bytes_and_MindDataset():
shuffle=False) shuffle=False)
assert data_set.get_dataset_size() == 6 assert data_set.get_dataset_size() == 6
num_iter = 0 num_iter = 0
for item in data_set.create_dict_iterator(num_epochs=1):
for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True):
assert len(item) == 2 assert len(item) == 2
for field in item: for field in item:
if isinstance(item[field], np.ndarray): if isinstance(item[field], np.ndarray):
@@ -1474,7 +1475,7 @@ def test_write_with_multi_bytes_and_MindDataset():
shuffle=False) shuffle=False)
assert data_set.get_dataset_size() == 6 assert data_set.get_dataset_size() == 6
num_iter = 0 num_iter = 0
for item in data_set.create_dict_iterator(num_epochs=1):
for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True):
assert len(item) == 2 assert len(item) == 2
for field in item: for field in item:
if isinstance(item[field], np.ndarray): if isinstance(item[field], np.ndarray):
@@ -1492,7 +1493,7 @@ def test_write_with_multi_bytes_and_MindDataset():
shuffle=False) shuffle=False)
assert data_set.get_dataset_size() == 6 assert data_set.get_dataset_size() == 6
num_iter = 0 num_iter = 0
for item in data_set.create_dict_iterator(num_epochs=1):
for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True):
assert len(item) == 3 assert len(item) == 3
for field in item: for field in item:
if isinstance(item[field], np.ndarray): if isinstance(item[field], np.ndarray):
@@ -1511,7 +1512,7 @@ def test_write_with_multi_bytes_and_MindDataset():
shuffle=False) shuffle=False)
assert data_set.get_dataset_size() == 6 assert data_set.get_dataset_size() == 6
num_iter = 0 num_iter = 0
for item in data_set.create_dict_iterator(num_epochs=1):
for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True):
assert len(item) == 5 assert len(item) == 5
for field in item: for field in item:
if isinstance(item[field], np.ndarray): if isinstance(item[field], np.ndarray):
@@ -1615,7 +1616,7 @@ def test_write_with_multi_array_and_MindDataset():
shuffle=False) shuffle=False)
assert data_set.get_dataset_size() == 6 assert data_set.get_dataset_size() == 6
num_iter = 0 num_iter = 0
for item in data_set.create_dict_iterator(num_epochs=1):
for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True):
assert len(item) == 8 assert len(item) == 8
for field in item: for field in item:
if isinstance(item[field], np.ndarray): if isinstance(item[field], np.ndarray):
@@ -1635,7 +1636,7 @@ def test_write_with_multi_array_and_MindDataset():
shuffle=False) shuffle=False)
assert data_set.get_dataset_size() == 6 assert data_set.get_dataset_size() == 6
num_iter = 0 num_iter = 0
for item in data_set.create_dict_iterator(num_epochs=1):
for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True):
assert len(item) == 6 assert len(item) == 6
for field in item: for field in item:
if isinstance(item[field], np.ndarray): if isinstance(item[field], np.ndarray):
@@ -1655,7 +1656,7 @@ def test_write_with_multi_array_and_MindDataset():
shuffle=False) shuffle=False)
assert data_set.get_dataset_size() == 6 assert data_set.get_dataset_size() == 6
num_iter = 0 num_iter = 0
for item in data_set.create_dict_iterator(num_epochs=1):
for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True):
assert len(item) == 3 assert len(item) == 3
for field in item: for field in item:
if isinstance(item[field], np.ndarray): if isinstance(item[field], np.ndarray):
@@ -1675,7 +1676,7 @@ def test_write_with_multi_array_and_MindDataset():
shuffle=False) shuffle=False)
assert data_set.get_dataset_size() == 6 assert data_set.get_dataset_size() == 6
num_iter = 0 num_iter = 0
for item in data_set.create_dict_iterator(num_epochs=1):
for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True):
assert len(item) == 3 assert len(item) == 3
for field in item: for field in item:
if isinstance(item[field], np.ndarray): if isinstance(item[field], np.ndarray):
@@ -1693,7 +1694,7 @@ def test_write_with_multi_array_and_MindDataset():
shuffle=False) shuffle=False)
assert data_set.get_dataset_size() == 6 assert data_set.get_dataset_size() == 6
num_iter = 0 num_iter = 0
for item in data_set.create_dict_iterator(num_epochs=1):
for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True):
assert len(item) == 1 assert len(item) == 1
for field in item: for field in item:
if isinstance(item[field], np.ndarray): if isinstance(item[field], np.ndarray):
@@ -1714,7 +1715,7 @@ def test_write_with_multi_array_and_MindDataset():
shuffle=False) shuffle=False)
assert data_set.get_dataset_size() == 6 assert data_set.get_dataset_size() == 6
num_iter = 0 num_iter = 0
for item in data_set.create_dict_iterator(num_epochs=1):
for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True):
assert len(item) == 8 assert len(item) == 8
for field in item: for field in item:
if isinstance(item[field], np.ndarray): if isinstance(item[field], np.ndarray):
@@ -1761,7 +1762,7 @@ def test_numpy_generic():
data_set = ds.MindDataset(CV_FILE_NAME + "0", None, num_readers, shuffle=False) data_set = ds.MindDataset(CV_FILE_NAME + "0", None, num_readers, shuffle=False)
assert data_set.get_dataset_size() == 10 assert data_set.get_dataset_size() == 10
idx = 0 idx = 0
for item in data_set.create_dict_iterator(num_epochs=1):
for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True):
assert item['label1'] == item['label1'] assert item['label1'] == item['label1']
assert item['label2'] == item['label2'] assert item['label2'] == item['label2']
assert item['label3'] == item['label3'] assert item['label3'] == item['label3']
@@ -1861,7 +1862,7 @@ def test_write_with_float32_float64_float32_array_float64_array_and_MindDataset(
shuffle=False) shuffle=False)
assert data_set.get_dataset_size() == 5 assert data_set.get_dataset_size() == 5
num_iter = 0 num_iter = 0
for item in data_set.create_dict_iterator(num_epochs=1):
for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True):
assert len(item) == 8 assert len(item) == 8
for field in item: for field in item:
if isinstance(item[field], np.ndarray): if isinstance(item[field], np.ndarray):
@@ -1883,7 +1884,7 @@ def test_write_with_float32_float64_float32_array_float64_array_and_MindDataset(
shuffle=False) shuffle=False)
assert data_set.get_dataset_size() == 5 assert data_set.get_dataset_size() == 5
num_iter = 0 num_iter = 0
for item in data_set.create_dict_iterator(num_epochs=1):
for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True):
assert len(item) == 2 assert len(item) == 2
for field in item: for field in item:
if isinstance(item[field], np.ndarray): if isinstance(item[field], np.ndarray):
@@ -1905,7 +1906,7 @@ def test_write_with_float32_float64_float32_array_float64_array_and_MindDataset(
shuffle=False) shuffle=False)
assert data_set.get_dataset_size() == 5 assert data_set.get_dataset_size() == 5
num_iter = 0 num_iter = 0
for item in data_set.create_dict_iterator(num_epochs=1):
for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True):
assert len(item) == 2 assert len(item) == 2
for field in item: for field in item:
if isinstance(item[field], np.ndarray): if isinstance(item[field], np.ndarray):


+ 4
- 4
tests/ut/python/dataset/test_minddataset_exception.py View File

@@ -97,7 +97,7 @@ def test_invalid_mindrecord():
with pytest.raises(Exception, match="MindRecordOp init failed"): with pytest.raises(Exception, match="MindRecordOp init failed"):
data_set = ds.MindDataset('dummy.mindrecord', columns_list, num_readers) data_set = ds.MindDataset('dummy.mindrecord', columns_list, num_readers)
num_iter = 0 num_iter = 0
for _ in data_set.create_dict_iterator(num_epochs=1):
for _ in data_set.create_dict_iterator(num_epochs=1, output_numpy=True):
num_iter += 1 num_iter += 1
try: try:
assert num_iter == 0 assert num_iter == 0
@@ -116,7 +116,7 @@ def test_minddataset_lack_db():
with pytest.raises(Exception, match="MindRecordOp init failed"): with pytest.raises(Exception, match="MindRecordOp init failed"):
data_set = ds.MindDataset(CV_FILE_NAME, columns_list, num_readers) data_set = ds.MindDataset(CV_FILE_NAME, columns_list, num_readers)
num_iter = 0 num_iter = 0
for _ in data_set.create_dict_iterator(num_epochs=1):
for _ in data_set.create_dict_iterator(num_epochs=1, output_numpy=True):
num_iter += 1 num_iter += 1
try: try:
assert num_iter == 0 assert num_iter == 0
@@ -135,7 +135,7 @@ def test_cv_minddataset_pk_sample_error_class_column():
with pytest.raises(Exception, match="MindRecordOp launch failed"): with pytest.raises(Exception, match="MindRecordOp launch failed"):
data_set = ds.MindDataset(CV_FILE_NAME, columns_list, num_readers, sampler=sampler) data_set = ds.MindDataset(CV_FILE_NAME, columns_list, num_readers, sampler=sampler)
num_iter = 0 num_iter = 0
for _ in data_set.create_dict_iterator(num_epochs=1):
for _ in data_set.create_dict_iterator(num_epochs=1, output_numpy=True):
num_iter += 1 num_iter += 1
os.remove(CV_FILE_NAME) os.remove(CV_FILE_NAME)
os.remove("{}.db".format(CV_FILE_NAME)) os.remove("{}.db".format(CV_FILE_NAME))
@@ -150,7 +150,7 @@ def test_cv_minddataset_pk_sample_exclusive_shuffle():
data_set = ds.MindDataset(CV_FILE_NAME, columns_list, num_readers, data_set = ds.MindDataset(CV_FILE_NAME, columns_list, num_readers,
sampler=sampler, shuffle=False) sampler=sampler, shuffle=False)
num_iter = 0 num_iter = 0
for _ in data_set.create_dict_iterator(num_epochs=1):
for _ in data_set.create_dict_iterator(num_epochs=1, output_numpy=True):
num_iter += 1 num_iter += 1
os.remove(CV_FILE_NAME) os.remove(CV_FILE_NAME)
os.remove("{}.db".format(CV_FILE_NAME)) os.remove("{}.db".format(CV_FILE_NAME))


+ 2
- 2
tests/ut/python/dataset/test_minddataset_multi_images.py View File

@@ -29,7 +29,7 @@ def test_cv_minddataset_reader_two_png_tutorial():
data_set = ds.MindDataset(CV_FILE_NAME, columns_list, num_readers) data_set = ds.MindDataset(CV_FILE_NAME, columns_list, num_readers)
assert data_set.get_dataset_size() == 5 assert data_set.get_dataset_size() == 5
num_iter = 0 num_iter = 0
for item in data_set.create_dict_iterator(num_epochs=1):
for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True):
assert len(item) == 5 assert len(item) == 5
logger.info("-------------- cv reader basic is {} -----------------".format(num_iter)) logger.info("-------------- cv reader basic is {} -----------------".format(num_iter))
logger.info("-------------- item[id] is {} ------------------------".format(item["id"])) logger.info("-------------- item[id] is {} ------------------------".format(item["id"]))
@@ -50,7 +50,7 @@ def test_cv_minddataset_reader_two_png_tutorial_just_image2():
data_set = ds.MindDataset(CV_FILE_NAME, columns_list, num_readers) data_set = ds.MindDataset(CV_FILE_NAME, columns_list, num_readers)
assert data_set.get_dataset_size() == 5 assert data_set.get_dataset_size() == 5
num_iter = 0 num_iter = 0
for item in data_set.create_dict_iterator(num_epochs=1):
for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True):
assert len(item) == 2 assert len(item) == 2
logger.info("-------------- cv reader basic is {} -----------------".format(num_iter)) logger.info("-------------- cv reader basic is {} -----------------".format(num_iter))
logger.info("-------------- item[img_data] is {} ------------------".format(item["img_data"])) logger.info("-------------- item[img_data] is {} ------------------".format(item["img_data"]))


+ 1
- 1
tests/ut/python/dataset/test_minddataset_multi_images_and_ndarray.py View File

@@ -57,7 +57,7 @@ def test_cv_minddataset_reader_multi_image_and_ndarray_tutorial():
data_set = ds.MindDataset(CV_FILE_NAME, columns_list, num_readers) data_set = ds.MindDataset(CV_FILE_NAME, columns_list, num_readers)
assert data_set.get_dataset_size() == 5 assert data_set.get_dataset_size() == 5
num_iter = 0 num_iter = 0
for item in data_set.create_dict_iterator(num_epochs=1):
for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True):
assert len(item) == 7 assert len(item) == 7
logger.info("item: {}".format(item)) logger.info("item: {}".format(item))
assert item["image_0"].dtype == np.uint8 assert item["image_0"].dtype == np.uint8


+ 11
- 11
tests/ut/python/dataset/test_minddataset_padded.py View File

@@ -122,7 +122,7 @@ def test_cv_minddataset_reader_basic_padded_samples(add_and_remove_cv_file):
assert data_set.get_dataset_size() == 15 assert data_set.get_dataset_size() == 15
num_iter = 0 num_iter = 0
num_padded_iter = 0 num_padded_iter = 0
for item in data_set.create_dict_iterator(num_epochs=1):
for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True):
logger.info("-------------- cv reader basic: {} ------------------------".format(num_iter)) logger.info("-------------- cv reader basic: {} ------------------------".format(num_iter))
logger.info("-------------- item[file_name]: {} ------------------------".format(item["file_name"])) logger.info("-------------- item[file_name]: {} ------------------------".format(item["file_name"]))
logger.info("-------------- item[label]: {} ----------------------------".format(item["label"])) logger.info("-------------- item[label]: {} ----------------------------".format(item["label"]))
@@ -157,7 +157,7 @@ def test_cv_minddataset_partition_padded_samples(add_and_remove_cv_file):
padded_sample=padded_sample, padded_sample=padded_sample,
num_padded=num_padded) num_padded=num_padded)
assert data_set.get_dataset_size() == dataset_size assert data_set.get_dataset_size() == dataset_size
for item in data_set.create_dict_iterator(num_epochs=1):
for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True):
logger.info("-------------- partition : {} ------------------------".format(partition_id)) logger.info("-------------- partition : {} ------------------------".format(partition_id))
logger.info("-------------- len(item[data]): {} ------------------------".format(len(item["data"]))) logger.info("-------------- len(item[data]): {} ------------------------".format(len(item["data"])))
logger.info("-------------- item[data]: {} -----------------------------".format(item["data"])) logger.info("-------------- item[data]: {} -----------------------------".format(item["data"]))
@@ -205,7 +205,7 @@ def test_cv_minddataset_partition_padded_samples_multi_epoch(add_and_remove_cv_f
assert data_set.get_dataset_size() == dataset_size assert data_set.get_dataset_size() == dataset_size
data_set = data_set.repeat(repeat_size) data_set = data_set.repeat(repeat_size)
local_index = 0 local_index = 0
for item in data_set.create_dict_iterator(num_epochs=1):
for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True):
logger.info("-------------- partition : {} ------------------------".format(partition_id)) logger.info("-------------- partition : {} ------------------------".format(partition_id))
logger.info("-------------- len(item[data]): {} ------------------------".format(len(item["data"]))) logger.info("-------------- len(item[data]): {} ------------------------".format(len(item["data"])))
logger.info("-------------- item[data]: {} -----------------------------".format(item["data"])) logger.info("-------------- item[data]: {} -----------------------------".format(item["data"]))
@@ -267,7 +267,7 @@ def test_cv_minddataset_partition_padded_samples_no_dividsible(add_and_remove_cv
padded_sample=padded_sample, padded_sample=padded_sample,
num_padded=num_padded) num_padded=num_padded)
num_iter = 0 num_iter = 0
for item in data_set.create_dict_iterator(num_epochs=1):
for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True):
num_iter += 1 num_iter += 1
return num_iter return num_iter


@@ -313,7 +313,7 @@ def test_cv_minddataset_partition_padded_samples_no_equal_column_list(add_and_re
shard_id=partition_id, shard_id=partition_id,
padded_sample=padded_sample, padded_sample=padded_sample,
num_padded=num_padded) num_padded=num_padded)
for item in data_set.create_dict_iterator(num_epochs=1):
for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True):
logger.info("-------------- partition : {} ------------------------".format(partition_id)) logger.info("-------------- partition : {} ------------------------".format(partition_id))
logger.info("-------------- len(item[data]): {} ------------------------".format(len(item["data"]))) logger.info("-------------- len(item[data]): {} ------------------------".format(len(item["data"])))
logger.info("-------------- item[data]: {} -----------------------------".format(item["data"])) logger.info("-------------- item[data]: {} -----------------------------".format(item["data"]))
@@ -337,7 +337,7 @@ def test_cv_minddataset_partition_padded_samples_no_column_list(add_and_remove_c
shard_id=partition_id, shard_id=partition_id,
padded_sample=padded_sample, padded_sample=padded_sample,
num_padded=num_padded) num_padded=num_padded)
for item in data_set.create_dict_iterator(num_epochs=1):
for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True):
logger.info("-------------- partition : {} ------------------------".format(partition_id)) logger.info("-------------- partition : {} ------------------------".format(partition_id))
logger.info("-------------- len(item[data]): {} ------------------------".format(len(item["data"]))) logger.info("-------------- len(item[data]): {} ------------------------".format(len(item["data"])))
logger.info("-------------- item[data]: {} -----------------------------".format(item["data"])) logger.info("-------------- item[data]: {} -----------------------------".format(item["data"]))
@@ -360,7 +360,7 @@ def test_cv_minddataset_partition_padded_samples_no_num_padded(add_and_remove_cv
num_shards=num_shards, num_shards=num_shards,
shard_id=partition_id, shard_id=partition_id,
padded_sample=padded_sample) padded_sample=padded_sample)
for item in data_set.create_dict_iterator(num_epochs=1):
for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True):
logger.info("-------------- partition : {} ------------------------".format(partition_id)) logger.info("-------------- partition : {} ------------------------".format(partition_id))
logger.info("-------------- len(item[data]): {} ------------------------".format(len(item["data"]))) logger.info("-------------- len(item[data]): {} ------------------------".format(len(item["data"])))
logger.info("-------------- item[data]: {} -----------------------------".format(item["data"])) logger.info("-------------- item[data]: {} -----------------------------".format(item["data"]))
@@ -383,7 +383,7 @@ def test_cv_minddataset_partition_padded_samples_no_padded_samples(add_and_remov
num_shards=num_shards, num_shards=num_shards,
shard_id=partition_id, shard_id=partition_id,
num_padded=num_padded) num_padded=num_padded)
for item in data_set.create_dict_iterator(num_epochs=1):
for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True):
logger.info("-------------- partition : {} ------------------------".format(partition_id)) logger.info("-------------- partition : {} ------------------------".format(partition_id))
logger.info("-------------- len(item[data]): {} ------------------------".format(len(item["data"]))) logger.info("-------------- len(item[data]): {} ------------------------".format(len(item["data"])))
logger.info("-------------- item[data]: {} -----------------------------".format(item["data"])) logger.info("-------------- item[data]: {} -----------------------------".format(item["data"]))
@@ -413,7 +413,7 @@ def test_nlp_minddataset_reader_basic_padded_samples(add_and_remove_nlp_file):
padded_sample=padded_sample, padded_sample=padded_sample,
num_padded=num_padded) num_padded=num_padded)
assert data_set.get_dataset_size() == dataset_size assert data_set.get_dataset_size() == dataset_size
for item in data_set.create_dict_iterator(num_epochs=1):
for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True):
logger.info("-------------- item[id]: {} ------------------------".format(item["id"])) logger.info("-------------- item[id]: {} ------------------------".format(item["id"]))
logger.info("-------------- item[rating]: {} --------------------".format(item["rating"])) logger.info("-------------- item[rating]: {} --------------------".format(item["rating"]))
logger.info("-------------- item[input_ids]: {}, shape: {} -----------------".format( logger.info("-------------- item[input_ids]: {}, shape: {} -----------------".format(
@@ -461,7 +461,7 @@ def test_nlp_minddataset_reader_basic_padded_samples_multi_epoch(add_and_remove_
data_set = data_set.repeat(repeat_size) data_set = data_set.repeat(repeat_size)


local_index = 0 local_index = 0
for item in data_set.create_dict_iterator(num_epochs=1):
for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True):
logger.info("-------------- item[id]: {} ------------------------".format(item["id"])) logger.info("-------------- item[id]: {} ------------------------".format(item["id"]))
logger.info("-------------- item[rating]: {} --------------------".format(item["rating"])) logger.info("-------------- item[rating]: {} --------------------".format(item["rating"]))
logger.info("-------------- item[input_ids]: {}, shape: {} -----------------".format( logger.info("-------------- item[input_ids]: {}, shape: {} -----------------".format(
@@ -523,7 +523,7 @@ def test_nlp_minddataset_reader_basic_padded_samples_check_whole_reshuffle_resul
assert data_set.get_dataset_size() == dataset_size assert data_set.get_dataset_size() == dataset_size
data_set = data_set.repeat(repeat_size) data_set = data_set.repeat(repeat_size)
inner_num_iter = 0 inner_num_iter = 0
for item in data_set.create_dict_iterator(num_epochs=1):
for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True):
logger.info("-------------- item[id]: {} ------------------------".format(item["id"])) logger.info("-------------- item[id]: {} ------------------------".format(item["id"]))
logger.info("-------------- item[rating]: {} --------------------".format(item["rating"])) logger.info("-------------- item[rating]: {} --------------------".format(item["rating"]))
logger.info("-------------- item[input_ids]: {}, shape: {} -----------------" logger.info("-------------- item[input_ids]: {}, shape: {} -----------------"


+ 28
- 28
tests/ut/python/dataset/test_minddataset_sampler.py View File

@@ -70,7 +70,7 @@ def test_cv_minddataset_pk_sample_no_column(add_and_remove_cv_file):


assert data_set.get_dataset_size() == 6 assert data_set.get_dataset_size() == 6
num_iter = 0 num_iter = 0
for item in data_set.create_dict_iterator(num_epochs=1):
for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True):
logger.info( logger.info(
"-------------- cv reader basic: {} ------------------------".format(num_iter)) "-------------- cv reader basic: {} ------------------------".format(num_iter))
logger.info("-------------- item[file_name]: \ logger.info("-------------- item[file_name]: \
@@ -90,7 +90,7 @@ def test_cv_minddataset_pk_sample_basic(add_and_remove_cv_file):


assert data_set.get_dataset_size() == 6 assert data_set.get_dataset_size() == 6
num_iter = 0 num_iter = 0
for item in data_set.create_dict_iterator(num_epochs=1):
for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True):
logger.info( logger.info(
"-------------- cv reader basic: {} ------------------------".format(num_iter)) "-------------- cv reader basic: {} ------------------------".format(num_iter))
logger.info("-------------- item[data]: \ logger.info("-------------- item[data]: \
@@ -111,7 +111,7 @@ def test_cv_minddataset_pk_sample_shuffle(add_and_remove_cv_file):


assert data_set.get_dataset_size() == 9 assert data_set.get_dataset_size() == 9
num_iter = 0 num_iter = 0
for item in data_set.create_dict_iterator(num_epochs=1):
for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True):
logger.info( logger.info(
"-------------- cv reader basic: {} ------------------------".format(num_iter)) "-------------- cv reader basic: {} ------------------------".format(num_iter))
logger.info("-------------- item[file_name]: \ logger.info("-------------- item[file_name]: \
@@ -132,7 +132,7 @@ def test_cv_minddataset_pk_sample_shuffle_1(add_and_remove_cv_file):


assert data_set.get_dataset_size() == 5 assert data_set.get_dataset_size() == 5
num_iter = 0 num_iter = 0
for item in data_set.create_dict_iterator(num_epochs=1):
for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True):
logger.info( logger.info(
"-------------- cv reader basic: {} ------------------------".format(num_iter)) "-------------- cv reader basic: {} ------------------------".format(num_iter))
logger.info("-------------- item[file_name]: \ logger.info("-------------- item[file_name]: \
@@ -152,7 +152,7 @@ def test_cv_minddataset_pk_sample_shuffle_2(add_and_remove_cv_file):


assert data_set.get_dataset_size() == 9 assert data_set.get_dataset_size() == 9
num_iter = 0 num_iter = 0
for item in data_set.create_dict_iterator(num_epochs=1):
for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True):
logger.info( logger.info(
"-------------- cv reader basic: {} ------------------------".format(num_iter)) "-------------- cv reader basic: {} ------------------------".format(num_iter))
logger.info("-------------- item[file_name]: \ logger.info("-------------- item[file_name]: \
@@ -172,7 +172,7 @@ def test_cv_minddataset_pk_sample_out_of_range_0(add_and_remove_cv_file):
sampler=sampler) sampler=sampler)
assert data_set.get_dataset_size() == 15 assert data_set.get_dataset_size() == 15
num_iter = 0 num_iter = 0
for item in data_set.create_dict_iterator(num_epochs=1):
for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True):
logger.info( logger.info(
"-------------- cv reader basic: {} ------------------------".format(num_iter)) "-------------- cv reader basic: {} ------------------------".format(num_iter))
logger.info("-------------- item[file_name]: \ logger.info("-------------- item[file_name]: \
@@ -191,7 +191,7 @@ def test_cv_minddataset_pk_sample_out_of_range_1(add_and_remove_cv_file):
sampler=sampler) sampler=sampler)
assert data_set.get_dataset_size() == 15 assert data_set.get_dataset_size() == 15
num_iter = 0 num_iter = 0
for item in data_set.create_dict_iterator(num_epochs=1):
for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True):
logger.info( logger.info(
"-------------- cv reader basic: {} ------------------------".format(num_iter)) "-------------- cv reader basic: {} ------------------------".format(num_iter))
logger.info("-------------- item[file_name]: \ logger.info("-------------- item[file_name]: \
@@ -210,7 +210,7 @@ def test_cv_minddataset_pk_sample_out_of_range_2(add_and_remove_cv_file):
sampler=sampler) sampler=sampler)
assert data_set.get_dataset_size() == 10 assert data_set.get_dataset_size() == 10
num_iter = 0 num_iter = 0
for item in data_set.create_dict_iterator(num_epochs=1):
for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True):
logger.info( logger.info(
"-------------- cv reader basic: {} ------------------------".format(num_iter)) "-------------- cv reader basic: {} ------------------------".format(num_iter))
logger.info("-------------- item[file_name]: \ logger.info("-------------- item[file_name]: \
@@ -231,7 +231,7 @@ def test_cv_minddataset_subset_random_sample_basic(add_and_remove_cv_file):
sampler=sampler) sampler=sampler)
assert data_set.get_dataset_size() == 5 assert data_set.get_dataset_size() == 5
num_iter = 0 num_iter = 0
for item in data_set.create_dict_iterator(num_epochs=1):
for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True):
logger.info( logger.info(
"-------------- cv reader basic: {} ------------------------".format(num_iter)) "-------------- cv reader basic: {} ------------------------".format(num_iter))
logger.info( logger.info(
@@ -254,7 +254,7 @@ def test_cv_minddataset_subset_random_sample_replica(add_and_remove_cv_file):
sampler=sampler) sampler=sampler)
assert data_set.get_dataset_size() == 6 assert data_set.get_dataset_size() == 6
num_iter = 0 num_iter = 0
for item in data_set.create_dict_iterator(num_epochs=1):
for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True):
logger.info( logger.info(
"-------------- cv reader basic: {} ------------------------".format(num_iter)) "-------------- cv reader basic: {} ------------------------".format(num_iter))
logger.info( logger.info(
@@ -277,7 +277,7 @@ def test_cv_minddataset_subset_random_sample_empty(add_and_remove_cv_file):
sampler=sampler) sampler=sampler)
assert data_set.get_dataset_size() == 0 assert data_set.get_dataset_size() == 0
num_iter = 0 num_iter = 0
for item in data_set.create_dict_iterator(num_epochs=1):
for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True):
logger.info( logger.info(
"-------------- cv reader basic: {} ------------------------".format(num_iter)) "-------------- cv reader basic: {} ------------------------".format(num_iter))
logger.info( logger.info(
@@ -300,7 +300,7 @@ def test_cv_minddataset_subset_random_sample_out_of_range(add_and_remove_cv_file
sampler=sampler) sampler=sampler)
assert data_set.get_dataset_size() == 5 assert data_set.get_dataset_size() == 5
num_iter = 0 num_iter = 0
for item in data_set.create_dict_iterator(num_epochs=1):
for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True):
logger.info( logger.info(
"-------------- cv reader basic: {} ------------------------".format(num_iter)) "-------------- cv reader basic: {} ------------------------".format(num_iter))
logger.info( logger.info(
@@ -322,7 +322,7 @@ def test_cv_minddataset_subset_random_sample_negative(add_and_remove_cv_file):
sampler=sampler) sampler=sampler)
assert data_set.get_dataset_size() == 5 assert data_set.get_dataset_size() == 5
num_iter = 0 num_iter = 0
for item in data_set.create_dict_iterator(num_epochs=1):
for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True):
logger.info( logger.info(
"-------------- cv reader basic: {} ------------------------".format(num_iter)) "-------------- cv reader basic: {} ------------------------".format(num_iter))
logger.info( logger.info(
@@ -345,7 +345,7 @@ def test_cv_minddataset_random_sampler_basic(add_and_remove_cv_file):
assert data_set.get_dataset_size() == 10 assert data_set.get_dataset_size() == 10
num_iter = 0 num_iter = 0
new_dataset = [] new_dataset = []
for item in data_set.create_dict_iterator(num_epochs=1):
for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True):
logger.info( logger.info(
"-------------- cv reader basic: {} ------------------------".format(num_iter)) "-------------- cv reader basic: {} ------------------------".format(num_iter))
logger.info( logger.info(
@@ -371,7 +371,7 @@ def test_cv_minddataset_random_sampler_repeat(add_and_remove_cv_file):
epoch1_dataset = [] epoch1_dataset = []
epoch2_dataset = [] epoch2_dataset = []
epoch3_dataset = [] epoch3_dataset = []
for item in ds1.create_dict_iterator(num_epochs=1):
for item in ds1.create_dict_iterator(num_epochs=1, output_numpy=True):
logger.info( logger.info(
"-------------- cv reader basic: {} ------------------------".format(num_iter)) "-------------- cv reader basic: {} ------------------------".format(num_iter))
logger.info( logger.info(
@@ -400,7 +400,7 @@ def test_cv_minddataset_random_sampler_replacement(add_and_remove_cv_file):
sampler=sampler) sampler=sampler)
assert data_set.get_dataset_size() == 5 assert data_set.get_dataset_size() == 5
num_iter = 0 num_iter = 0
for item in data_set.create_dict_iterator(num_epochs=1):
for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True):
logger.info( logger.info(
"-------------- cv reader basic: {} ------------------------".format(num_iter)) "-------------- cv reader basic: {} ------------------------".format(num_iter))
logger.info( logger.info(
@@ -422,7 +422,7 @@ def test_cv_minddataset_sequential_sampler_basic(add_and_remove_cv_file):
sampler=sampler) sampler=sampler)
assert data_set.get_dataset_size() == 4 assert data_set.get_dataset_size() == 4
num_iter = 0 num_iter = 0
for item in data_set.create_dict_iterator(num_epochs=1):
for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True):
logger.info( logger.info(
"-------------- cv reader basic: {} ------------------------".format(num_iter)) "-------------- cv reader basic: {} ------------------------".format(num_iter))
logger.info( logger.info(
@@ -447,7 +447,7 @@ def test_cv_minddataset_sequential_sampler_exceed_size(add_and_remove_cv_file):
dataset_size = data_set.get_dataset_size() dataset_size = data_set.get_dataset_size()
assert dataset_size == 10 assert dataset_size == 10
num_iter = 0 num_iter = 0
for item in data_set.create_dict_iterator(num_epochs=1):
for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True):
logger.info( logger.info(
"-------------- cv reader basic: {} ------------------------".format(num_iter)) "-------------- cv reader basic: {} ------------------------".format(num_iter))
logger.info( logger.info(
@@ -473,7 +473,7 @@ def test_cv_minddataset_split_basic(add_and_remove_cv_file):
assert d1.get_dataset_size() == 8 assert d1.get_dataset_size() == 8
assert d2.get_dataset_size() == 2 assert d2.get_dataset_size() == 2
num_iter = 0 num_iter = 0
for item in d1.create_dict_iterator(num_epochs=1):
for item in d1.create_dict_iterator(num_epochs=1, output_numpy=True):
logger.info( logger.info(
"-------------- item[data]: {} -----------------------------".format(item["data"])) "-------------- item[data]: {} -----------------------------".format(item["data"]))
logger.info( logger.info(
@@ -485,7 +485,7 @@ def test_cv_minddataset_split_basic(add_and_remove_cv_file):
num_iter += 1 num_iter += 1
assert num_iter == 8 assert num_iter == 8
num_iter = 0 num_iter = 0
for item in d2.create_dict_iterator(num_epochs=1):
for item in d2.create_dict_iterator(num_epochs=1, output_numpy=True):
logger.info( logger.info(
"-------------- item[data]: {} -----------------------------".format(item["data"])) "-------------- item[data]: {} -----------------------------".format(item["data"]))
logger.info( logger.info(
@@ -509,7 +509,7 @@ def test_cv_minddataset_split_exact_percent(add_and_remove_cv_file):
assert d1.get_dataset_size() == 8 assert d1.get_dataset_size() == 8
assert d2.get_dataset_size() == 2 assert d2.get_dataset_size() == 2
num_iter = 0 num_iter = 0
for item in d1.create_dict_iterator(num_epochs=1):
for item in d1.create_dict_iterator(num_epochs=1, output_numpy=True):
logger.info( logger.info(
"-------------- item[data]: {} -----------------------------".format(item["data"])) "-------------- item[data]: {} -----------------------------".format(item["data"]))
logger.info( logger.info(
@@ -521,7 +521,7 @@ def test_cv_minddataset_split_exact_percent(add_and_remove_cv_file):
num_iter += 1 num_iter += 1
assert num_iter == 8 assert num_iter == 8
num_iter = 0 num_iter = 0
for item in d2.create_dict_iterator(num_epochs=1):
for item in d2.create_dict_iterator(num_epochs=1, output_numpy=True):
logger.info( logger.info(
"-------------- item[data]: {} -----------------------------".format(item["data"])) "-------------- item[data]: {} -----------------------------".format(item["data"]))
logger.info( logger.info(
@@ -545,7 +545,7 @@ def test_cv_minddataset_split_fuzzy_percent(add_and_remove_cv_file):
assert d1.get_dataset_size() == 4 assert d1.get_dataset_size() == 4
assert d2.get_dataset_size() == 6 assert d2.get_dataset_size() == 6
num_iter = 0 num_iter = 0
for item in d1.create_dict_iterator(num_epochs=1):
for item in d1.create_dict_iterator(num_epochs=1, output_numpy=True):
logger.info( logger.info(
"-------------- item[data]: {} -----------------------------".format(item["data"])) "-------------- item[data]: {} -----------------------------".format(item["data"]))
logger.info( logger.info(
@@ -557,7 +557,7 @@ def test_cv_minddataset_split_fuzzy_percent(add_and_remove_cv_file):
num_iter += 1 num_iter += 1
assert num_iter == 4 assert num_iter == 4
num_iter = 0 num_iter = 0
for item in d2.create_dict_iterator(num_epochs=1):
for item in d2.create_dict_iterator(num_epochs=1, output_numpy=True):
logger.info( logger.info(
"-------------- item[data]: {} -----------------------------".format(item["data"])) "-------------- item[data]: {} -----------------------------".format(item["data"]))
logger.info( logger.info(
@@ -585,7 +585,7 @@ def test_cv_minddataset_split_deterministic(add_and_remove_cv_file):
d1_dataset = [] d1_dataset = []
d2_dataset = [] d2_dataset = []
num_iter = 0 num_iter = 0
for item in d1.create_dict_iterator(num_epochs=1):
for item in d1.create_dict_iterator(num_epochs=1, output_numpy=True):
logger.info( logger.info(
"-------------- item[data]: {} -----------------------------".format(item["data"])) "-------------- item[data]: {} -----------------------------".format(item["data"]))
logger.info( logger.info(
@@ -596,7 +596,7 @@ def test_cv_minddataset_split_deterministic(add_and_remove_cv_file):
num_iter += 1 num_iter += 1
assert num_iter == 8 assert num_iter == 8
num_iter = 0 num_iter = 0
for item in d2.create_dict_iterator(num_epochs=1):
for item in d2.create_dict_iterator(num_epochs=1, output_numpy=True):
logger.info( logger.info(
"-------------- item[data]: {} -----------------------------".format(item["data"])) "-------------- item[data]: {} -----------------------------".format(item["data"]))
logger.info( logger.info(
@@ -628,7 +628,7 @@ def test_cv_minddataset_split_sharding(add_and_remove_cv_file):


num_iter = 0 num_iter = 0
d1_shard1 = [] d1_shard1 = []
for item in d1.create_dict_iterator(num_epochs=1):
for item in d1.create_dict_iterator(num_epochs=1, output_numpy=True):
logger.info( logger.info(
"-------------- item[data]: {} -----------------------------".format(item["data"])) "-------------- item[data]: {} -----------------------------".format(item["data"]))
logger.info( logger.info(
@@ -649,7 +649,7 @@ def test_cv_minddataset_split_sharding(add_and_remove_cv_file):
epoch2_dataset = [] epoch2_dataset = []
epoch3_dataset = [] epoch3_dataset = []
num_iter = 0 num_iter = 0
for item in d1s.create_dict_iterator(num_epochs=1):
for item in d1s.create_dict_iterator(num_epochs=1, output_numpy=True):
logger.info( logger.info(
"-------------- item[data]: {} -----------------------------".format(item["data"])) "-------------- item[data]: {} -----------------------------".format(item["data"]))
logger.info( logger.info(


+ 5
- 3
tests/ut/python/dataset/test_mixup_label_smoothing.py View File

@@ -44,7 +44,7 @@ def test_one_hot_op():
golden_label = np.ones(num_classes) * epsilon_para / num_classes golden_label = np.ones(num_classes) * epsilon_para / num_classes
golden_label[1] = 1 - epsilon_para / num_classes golden_label[1] = 1 - epsilon_para / num_classes


for data in dataset.create_dict_iterator(num_epochs=1):
for data in dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
label = data["label"] label = data["label"]
logger.info("label is {}".format(label)) logger.info("label is {}".format(label))
logger.info("golden_label is {}".format(golden_label)) logger.info("golden_label is {}".format(golden_label))
@@ -83,7 +83,8 @@ def test_mix_up_single():
] ]
ds1 = ds1.map(operations=transforms, input_columns=["image", "label"]) ds1 = ds1.map(operations=transforms, input_columns=["image", "label"])


for data1, data2 in zip(ds1.create_dict_iterator(num_epochs=1), ds2.create_dict_iterator(num_epochs=1)):
for data1, data2 in zip(ds1.create_dict_iterator(num_epochs=1, output_numpy=True),
ds2.create_dict_iterator(num_epochs=1, output_numpy=True)):
image1 = data1["image"] image1 = data1["image"]
label = data1["label"] label = data1["label"]
logger.info("label is {}".format(label)) logger.info("label is {}".format(label))
@@ -133,7 +134,8 @@ def test_mix_up_multi():
ds1 = ds1.map(operations=transforms, input_columns=["image", "label"]) ds1 = ds1.map(operations=transforms, input_columns=["image", "label"])
num_iter = 0 num_iter = 0
batch1_image1 = 0 batch1_image1 = 0
for data1, data2 in zip(ds1.create_dict_iterator(num_epochs=1), ds2.create_dict_iterator(num_epochs=1)):
for data1, data2 in zip(ds1.create_dict_iterator(num_epochs=1, output_numpy=True),
ds2.create_dict_iterator(num_epochs=1, output_numpy=True)):
image1 = data1["image"] image1 = data1["image"]
label1 = data1["label"] label1 = data1["label"]
logger.info("label: {}".format(label1)) logger.info("label: {}".format(label1))


+ 32
- 32
tests/ut/python/dataset/test_mixup_op.py View File

@@ -44,9 +44,9 @@ def test_mixup_batch_success1(plot=False):
images_original = None images_original = None
for idx, (image, _) in enumerate(ds_original): for idx, (image, _) in enumerate(ds_original):
if idx == 0: if idx == 0:
images_original = image
images_original = image.asnumpy()
else: else:
images_original = np.append(images_original, image, axis=0)
images_original = np.append(images_original, image.asnumpy(), axis=0)


# MixUp Images # MixUp Images
data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)
@@ -60,9 +60,9 @@ def test_mixup_batch_success1(plot=False):
images_mixup = None images_mixup = None
for idx, (image, _) in enumerate(data1): for idx, (image, _) in enumerate(data1):
if idx == 0: if idx == 0:
images_mixup = image
images_mixup = image.asnumpy()
else: else:
images_mixup = np.append(images_mixup, image, axis=0)
images_mixup = np.append(images_mixup, image.asnumpy(), axis=0)
if plot: if plot:
visualize_list(images_original, images_mixup) visualize_list(images_original, images_mixup)


@@ -88,9 +88,9 @@ def test_mixup_batch_success2(plot=False):
images_original = None images_original = None
for idx, (image, _) in enumerate(ds_original): for idx, (image, _) in enumerate(ds_original):
if idx == 0: if idx == 0:
images_original = image
images_original = image.asnumpy()
else: else:
images_original = np.append(images_original, image, axis=0)
images_original = np.append(images_original, image.asnumpy(), axis=0)


# MixUp Images # MixUp Images
data1 = ds.ImageFolderDataset(dataset_dir=DATA_DIR2, shuffle=False) data1 = ds.ImageFolderDataset(dataset_dir=DATA_DIR2, shuffle=False)
@@ -108,9 +108,9 @@ def test_mixup_batch_success2(plot=False):
images_mixup = None images_mixup = None
for idx, (image, _) in enumerate(data1): for idx, (image, _) in enumerate(data1):
if idx == 0: if idx == 0:
images_mixup = image
images_mixup = image.asnumpy()
else: else:
images_mixup = np.append(images_mixup, image, axis=0)
images_mixup = np.append(images_mixup, image.asnumpy(), axis=0)
if plot: if plot:
visualize_list(images_original, images_mixup) visualize_list(images_original, images_mixup)


@@ -135,9 +135,9 @@ def test_mixup_batch_success3(plot=False):
images_original = None images_original = None
for idx, (image, _) in enumerate(ds_original): for idx, (image, _) in enumerate(ds_original):
if idx == 0: if idx == 0:
images_original = image
images_original = image.asnumpy()
else: else:
images_original = np.append(images_original, image, axis=0)
images_original = np.append(images_original, image.asnumpy(), axis=0)


# MixUp Images # MixUp Images
data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)
@@ -151,9 +151,9 @@ def test_mixup_batch_success3(plot=False):
images_mixup = np.array([]) images_mixup = np.array([])
for idx, (image, _) in enumerate(data1): for idx, (image, _) in enumerate(data1):
if idx == 0: if idx == 0:
images_mixup = image
images_mixup = image.asnumpy()
else: else:
images_mixup = np.append(images_mixup, image, axis=0)
images_mixup = np.append(images_mixup, image.asnumpy(), axis=0)
if plot: if plot:
visualize_list(images_original, images_mixup) visualize_list(images_original, images_mixup)


@@ -180,9 +180,9 @@ def test_mixup_batch_success4(plot=False):
images_original = None images_original = None
for idx, (image, _) in enumerate(ds_original): for idx, (image, _) in enumerate(ds_original):
if idx == 0: if idx == 0:
images_original = image
images_original = image.asnumpy()
else: else:
images_original = np.append(images_original, image, axis=0)
images_original = np.append(images_original, image.asnumpy(), axis=0)


# MixUp Images # MixUp Images
data1 = ds.CelebADataset(DATA_DIR3, shuffle=False) data1 = ds.CelebADataset(DATA_DIR3, shuffle=False)
@@ -200,9 +200,9 @@ def test_mixup_batch_success4(plot=False):
images_mixup = np.array([]) images_mixup = np.array([])
for idx, (image, _) in enumerate(data1): for idx, (image, _) in enumerate(data1):
if idx == 0: if idx == 0:
images_mixup = image
images_mixup = image.asnumpy()
else: else:
images_mixup = np.append(images_mixup, image, axis=0)
images_mixup = np.append(images_mixup, image.asnumpy(), axis=0)
if plot: if plot:
visualize_list(images_original, images_mixup) visualize_list(images_original, images_mixup)


@@ -252,9 +252,9 @@ def test_mixup_batch_fail1():
images_original = np.array([]) images_original = np.array([])
for idx, (image, _) in enumerate(ds_original): for idx, (image, _) in enumerate(ds_original):
if idx == 0: if idx == 0:
images_original = image
images_original = image.asnumpy()
else: else:
images_original = np.append(images_original, image, axis=0)
images_original = np.append(images_original, image.asnumpy(), axis=0)


# MixUp Images # MixUp Images
data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)
@@ -266,9 +266,9 @@ def test_mixup_batch_fail1():
data1 = data1.map(operations=mixup_batch_op, input_columns=["image", "label"]) data1 = data1.map(operations=mixup_batch_op, input_columns=["image", "label"])
for idx, (image, _) in enumerate(data1): for idx, (image, _) in enumerate(data1):
if idx == 0: if idx == 0:
images_mixup = image
images_mixup = image.asnumpy()
else: else:
images_mixup = np.append(images_mixup, image, axis=0)
images_mixup = np.append(images_mixup, image.asnumpy(), axis=0)
error_message = "You must make sure images are HWC or CHW and batched" error_message = "You must make sure images are HWC or CHW and batched"
assert error_message in str(error.value) assert error_message in str(error.value)


@@ -287,9 +287,9 @@ def test_mixup_batch_fail2():
images_original = np.array([]) images_original = np.array([])
for idx, (image, _) in enumerate(ds_original): for idx, (image, _) in enumerate(ds_original):
if idx == 0: if idx == 0:
images_original = image
images_original = image.asnumpy()
else: else:
images_original = np.append(images_original, image, axis=0)
images_original = np.append(images_original, image.asnumpy(), axis=0)


# MixUp Images # MixUp Images
data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)
@@ -315,9 +315,9 @@ def test_mixup_batch_fail3():
images_original = None images_original = None
for idx, (image, _) in enumerate(ds_original): for idx, (image, _) in enumerate(ds_original):
if idx == 0: if idx == 0:
images_original = image
images_original = image.asnumpy()
else: else:
images_original = np.append(images_original, image, axis=0)
images_original = np.append(images_original, image.asnumpy(), axis=0)


# MixUp Images # MixUp Images
data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)
@@ -332,9 +332,9 @@ def test_mixup_batch_fail3():
images_mixup = np.array([]) images_mixup = np.array([])
for idx, (image, _) in enumerate(data1): for idx, (image, _) in enumerate(data1):
if idx == 0: if idx == 0:
images_mixup = image
images_mixup = image.asnumpy()
else: else:
images_mixup = np.append(images_mixup, image, axis=0)
images_mixup = np.append(images_mixup, image.asnumpy(), axis=0)
error_message = "Both images and labels columns are required" error_message = "Both images and labels columns are required"
assert error_message in str(error.value) assert error_message in str(error.value)


@@ -353,9 +353,9 @@ def test_mixup_batch_fail4():
images_original = np.array([]) images_original = np.array([])
for idx, (image, _) in enumerate(ds_original): for idx, (image, _) in enumerate(ds_original):
if idx == 0: if idx == 0:
images_original = image
images_original = image.asnumpy()
else: else:
images_original = np.append(images_original, image, axis=0)
images_original = np.append(images_original, image.asnumpy(), axis=0)


# MixUp Images # MixUp Images
data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)
@@ -382,9 +382,9 @@ def test_mixup_batch_fail5():
images_original = np.array([]) images_original = np.array([])
for idx, (image, _) in enumerate(ds_original): for idx, (image, _) in enumerate(ds_original):
if idx == 0: if idx == 0:
images_original = image
images_original = image.asnumpy()
else: else:
images_original = np.append(images_original, image, axis=0)
images_original = np.append(images_original, image.asnumpy(), axis=0)


# MixUp Images # MixUp Images
data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)
@@ -397,9 +397,9 @@ def test_mixup_batch_fail5():
images_mixup = np.array([]) images_mixup = np.array([])
for idx, (image, _) in enumerate(data1): for idx, (image, _) in enumerate(data1):
if idx == 0: if idx == 0:
images_mixup = image
images_mixup = image.asnumpy()
else: else:
images_mixup = np.append(images_mixup, image, axis=0)
images_mixup = np.append(images_mixup, image.asnumpy(), axis=0)
error_message = "MixUpBatch: Wrong labels shape. The second column (labels) must have a shape of NC or NLC" error_message = "MixUpBatch: Wrong labels shape. The second column (labels) must have a shape of NC or NLC"
assert error_message in str(error.value) assert error_message in str(error.value)




+ 3
- 3
tests/ut/python/dataset/test_ngram_op.py View File

@@ -42,7 +42,7 @@ def test_multiple_ngrams():
dataset = dataset.map(operations=text.Ngram([1, 2, 3], ("_", 2), ("_", 2), " "), input_columns="text") dataset = dataset.map(operations=text.Ngram([1, 2, 3], ("_", 2), ("_", 2), " "), input_columns="text")


i = 0 i = 0
for data in dataset.create_dict_iterator(num_epochs=1):
for data in dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
assert [d.decode("utf8") for d in data["text"]] == n_gram_mottos[i] assert [d.decode("utf8") for d in data["text"]] == n_gram_mottos[i]
i += 1 i += 1


@@ -64,7 +64,7 @@ def test_simple_ngram():
dataset = dataset.map(operations=text.Ngram(3, separator=" "), input_columns="text") dataset = dataset.map(operations=text.Ngram(3, separator=" "), input_columns="text")


i = 0 i = 0
for data in dataset.create_dict_iterator(num_epochs=1):
for data in dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
assert [d.decode("utf8") for d in data["text"]] == n_gram_mottos[i], i assert [d.decode("utf8") for d in data["text"]] == n_gram_mottos[i], i
i += 1 i += 1


@@ -79,7 +79,7 @@ def test_corner_cases():
try: try:
dataset = ds.GeneratorDataset(gen(input_line), column_names=["text"]) dataset = ds.GeneratorDataset(gen(input_line), column_names=["text"])
dataset = dataset.map(operations=text.Ngram(n, l_pad, r_pad, separator=sep), input_columns=["text"]) dataset = dataset.map(operations=text.Ngram(n, l_pad, r_pad, separator=sep), input_columns=["text"])
for data in dataset.create_dict_iterator(num_epochs=1):
for data in dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
return [d.decode("utf8") for d in data["text"]] return [d.decode("utf8") for d in data["text"]]
except (ValueError, TypeError) as e: except (ValueError, TypeError) as e:
return str(e) return str(e)


+ 2
- 2
tests/ut/python/dataset/test_nlp.py View File

@@ -38,7 +38,7 @@ def test_on_tokenized_line():
data = data.map(operations=lookup, input_columns=["text"]) data = data.map(operations=lookup, input_columns=["text"])
res = np.array([[10, 1, 11, 1, 12, 1, 15, 1, 13, 1, 14], res = np.array([[10, 1, 11, 1, 12, 1, 15, 1, 13, 1, 14],
[11, 1, 12, 1, 10, 1, 14, 1, 13, 1, 15]], dtype=np.int32) [11, 1, 12, 1, 10, 1, 14, 1, 13, 1, 15]], dtype=np.int32)
for i, d in enumerate(data.create_dict_iterator(num_epochs=1)):
for i, d in enumerate(data.create_dict_iterator(num_epochs=1, output_numpy=True)):
np.testing.assert_array_equal(d["text"], res[i]) np.testing.assert_array_equal(d["text"], res[i])




@@ -56,7 +56,7 @@ def test_on_tokenized_line_with_no_special_tokens():
data = data.map(operations=lookup, input_columns=["text"]) data = data.map(operations=lookup, input_columns=["text"])
res = np.array([[8, 0, 9, 0, 10, 0, 13, 0, 11, 0, 12], res = np.array([[8, 0, 9, 0, 10, 0, 13, 0, 11, 0, 12],
[9, 0, 10, 0, 8, 0, 12, 0, 11, 0, 13]], dtype=np.int32) [9, 0, 10, 0, 8, 0, 12, 0, 11, 0, 13]], dtype=np.int32)
for i, d in enumerate(data.create_dict_iterator(num_epochs=1)):
for i, d in enumerate(data.create_dict_iterator(num_epochs=1, output_numpy=True)):
np.testing.assert_array_equal(d["text"], res[i]) np.testing.assert_array_equal(d["text"], res[i])






+ 4
- 2
tests/ut/python/dataset/test_normalizeOp.py View File

@@ -107,7 +107,8 @@ def test_normalize_op_c(plot=False):
data2 = data2.map(operations=decode_op, input_columns=["image"]) data2 = data2.map(operations=decode_op, input_columns=["image"])


num_iter = 0 num_iter = 0
for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)):
for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True),
data2.create_dict_iterator(num_epochs=1, output_numpy=True)):
image_de_normalized = item1["image"] image_de_normalized = item1["image"]
image_original = item2["image"] image_original = item2["image"]
image_np_normalized = normalize_np(image_original, mean, std) image_np_normalized = normalize_np(image_original, mean, std)
@@ -144,7 +145,8 @@ def test_normalize_op_py(plot=False):
data2 = data2.map(operations=transform, input_columns=["image"]) data2 = data2.map(operations=transform, input_columns=["image"])


num_iter = 0 num_iter = 0
for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)):
for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True),
data2.create_dict_iterator(num_epochs=1, output_numpy=True)):
image_de_normalized = (item1["image"].transpose(1, 2, 0) * 255).astype(np.uint8) image_de_normalized = (item1["image"].transpose(1, 2, 0) * 255).astype(np.uint8)
image_np_normalized = (normalize_np(item2["image"].transpose(1, 2, 0), mean, std) * 255).astype(np.uint8) image_np_normalized = (normalize_np(item2["image"].transpose(1, 2, 0), mean, std) * 255).astype(np.uint8)
image_original = (item2["image"].transpose(1, 2, 0) * 255).astype(np.uint8) image_original = (item2["image"].transpose(1, 2, 0) * 255).astype(np.uint8)


+ 1
- 1
tests/ut/python/dataset/test_opt.py View File

@@ -39,7 +39,7 @@ def test_case_0():
data1 = data1.batch(2) data1 = data1.batch(2)


expected_data = np.array([[[1], [2]], [[3], [0]]]) expected_data = np.array([[[1], [2]], [[3], [0]]])
for i, data_row in enumerate(data1):
for i, data_row in enumerate(data1.create_tuple_iterator(output_numpy=True)):
np.testing.assert_array_equal(data_row[0], expected_data[i]) np.testing.assert_array_equal(data_row[0], expected_data[i])


# Restore configuration # Restore configuration


+ 5
- 5
tests/ut/python/dataset/test_opt_pass.py View File

@@ -31,7 +31,7 @@ def test_map_reorder0():
data0 = data0.map(operations=(lambda x: x), input_columns="col0", output_columns="out", data0 = data0.map(operations=(lambda x: x), input_columns="col0", output_columns="out",
column_order=["col1", "out"]) column_order=["col1", "out"])


for item in data0.create_tuple_iterator(num_epochs=1): # each data is a dictionary
for item in data0.create_tuple_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary
assert item == [np.array(1), np.array(0)] assert item == [np.array(1), np.array(0)]




@@ -51,7 +51,7 @@ def test_map_reorder1():
data2 = ds.zip((data0, data1)) data2 = ds.zip((data0, data1))
data2 = data2.map(operations=(lambda x: x), input_columns="a0", column_order=["b2", "a2", "b1", "a1", "b0", "a0"]) data2 = data2.map(operations=(lambda x: x), input_columns="a0", column_order=["b2", "a2", "b1", "a1", "b0", "a0"])


for item in data2.create_tuple_iterator(num_epochs=1):
for item in data2.create_tuple_iterator(num_epochs=1, output_numpy=True):
assert item == [np.array(2), np.array(2), np.array(1), np.array(1), np.array(0), np.array(0)] assert item == [np.array(2), np.array(2), np.array(1), np.array(1), np.array(0), np.array(0)]




@@ -67,7 +67,7 @@ def test_shuffle():
data2 = ds.TFRecordDataset(FILES, schema=SCHEMA_FILE, shuffle=ds.Shuffle.FILES) data2 = ds.TFRecordDataset(FILES, schema=SCHEMA_FILE, shuffle=ds.Shuffle.FILES)
data2 = data2.shuffle(10000) data2 = data2.shuffle(10000)


for d1, d2 in zip(data1, data2):
for d1, d2 in zip(data1.create_tuple_iterator(output_numpy=True), data2.create_tuple_iterator(output_numpy=True)):
for t1, t2 in zip(d1, d2): for t1, t2 in zip(d1, d2):
np.testing.assert_array_equal(t1, t2) np.testing.assert_array_equal(t1, t2)


@@ -77,7 +77,7 @@ def test_shuffle():
data2 = ds.TextFileDataset(DATA_ALL_FILE, shuffle=ds.Shuffle.FILES) data2 = ds.TextFileDataset(DATA_ALL_FILE, shuffle=ds.Shuffle.FILES)
data2 = data2.shuffle(10000) data2 = data2.shuffle(10000)


for d1, d2 in zip(data1, data2):
for d1, d2 in zip(data1.create_tuple_iterator(output_numpy=True), data2.create_tuple_iterator(output_numpy=True)):
for t1, t2 in zip(d1, d2): for t1, t2 in zip(d1, d2):
np.testing.assert_array_equal(t1, t2) np.testing.assert_array_equal(t1, t2)


@@ -87,7 +87,7 @@ def test_shuffle():
data2 = ds.CLUEDataset(TRAIN_FILE, task='AFQMC', usage='train', shuffle=ds.Shuffle.FILES) data2 = ds.CLUEDataset(TRAIN_FILE, task='AFQMC', usage='train', shuffle=ds.Shuffle.FILES)
data2 = data2.shuffle(10000) data2 = data2.shuffle(10000)


for d1, d2 in zip(data1, data2):
for d1, d2 in zip(data1.create_tuple_iterator(output_numpy=True), data2.create_tuple_iterator(output_numpy=True)):
for t1, t2 in zip(d1, d2): for t1, t2 in zip(d1, d2):
np.testing.assert_array_equal(t1, t2) np.testing.assert_array_equal(t1, t2)




+ 4
- 3
tests/ut/python/dataset/test_pad.py View File

@@ -56,7 +56,8 @@ def test_pad_op():
data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
data2 = data2.map(operations=transform, input_columns=["image"]) data2 = data2.map(operations=transform, input_columns=["image"])


for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)):
for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True),
data2.create_dict_iterator(num_epochs=1, output_numpy=True)):
c_image = item1["image"] c_image = item1["image"]
py_image = (item2["image"].transpose(1, 2, 0) * 255).astype(np.uint8) py_image = (item2["image"].transpose(1, 2, 0) * 255).astype(np.uint8)


@@ -94,7 +95,7 @@ def test_pad_grayscale():
pad_gray = c_vision.Pad(100, fill_value=(20, 20, 20)) pad_gray = c_vision.Pad(100, fill_value=(20, 20, 20))
data1 = data1.map(operations=pad_gray, input_columns=["image"]) data1 = data1.map(operations=pad_gray, input_columns=["image"])
dataset_shape_1 = [] dataset_shape_1 = []
for item1 in data1.create_dict_iterator(num_epochs=1):
for item1 in data1.create_dict_iterator(num_epochs=1, output_numpy=True):
c_image = item1["image"] c_image = item1["image"]
dataset_shape_1.append(c_image.shape) dataset_shape_1.append(c_image.shape)


@@ -108,7 +109,7 @@ def test_pad_grayscale():


data2 = data2.map(operations=ctrans, input_columns=["image"]) data2 = data2.map(operations=ctrans, input_columns=["image"])


for item2 in data2.create_dict_iterator(num_epochs=1):
for item2 in data2.create_dict_iterator(num_epochs=1, output_numpy=True):
c_image = item2["image"] c_image = item2["image"]
dataset_shape_2.append(c_image.shape) dataset_shape_2.append(c_image.shape)




+ 7
- 7
tests/ut/python/dataset/test_pad_batch.py View File

@@ -62,7 +62,7 @@ def test_batch_padding_01():
data1 = ds.GeneratorDataset((lambda: gen_2cols(2)), ["col1d", "col2d"]) data1 = ds.GeneratorDataset((lambda: gen_2cols(2)), ["col1d", "col2d"])
data1 = data1.batch(batch_size=2, drop_remainder=False, pad_info={"col2d": ([2, 2], -2), "col1d": ([2], -1)}) data1 = data1.batch(batch_size=2, drop_remainder=False, pad_info={"col2d": ([2, 2], -2), "col1d": ([2], -1)})
data1 = data1.repeat(2) data1 = data1.repeat(2)
for data in data1.create_dict_iterator(num_epochs=1):
for data in data1.create_dict_iterator(num_epochs=1, output_numpy=True):
np.testing.assert_array_equal([[0, -1], [1, -1]], data["col1d"]) np.testing.assert_array_equal([[0, -1], [1, -1]], data["col1d"])
np.testing.assert_array_equal([[[100, -2], [200, -2]], [[101, -2], [201, -2]]], data["col2d"]) np.testing.assert_array_equal([[[100, -2], [200, -2]], [[101, -2], [201, -2]]], data["col2d"])


@@ -71,7 +71,7 @@ def test_batch_padding_02():
data1 = ds.GeneratorDataset((lambda: gen_2cols(2)), ["col1d", "col2d"]) data1 = ds.GeneratorDataset((lambda: gen_2cols(2)), ["col1d", "col2d"])
data1 = data1.batch(batch_size=2, drop_remainder=False, pad_info={"col2d": ([1, 2], -2)}) data1 = data1.batch(batch_size=2, drop_remainder=False, pad_info={"col2d": ([1, 2], -2)})
data1 = data1.repeat(2) data1 = data1.repeat(2)
for data in data1.create_dict_iterator(num_epochs=1):
for data in data1.create_dict_iterator(num_epochs=1, output_numpy=True):
np.testing.assert_array_equal([[0], [1]], data["col1d"]) np.testing.assert_array_equal([[0], [1]], data["col1d"])
np.testing.assert_array_equal([[[100, -2]], [[101, -2]]], data["col2d"]) np.testing.assert_array_equal([[[100, -2]], [[101, -2]]], data["col2d"])


@@ -81,7 +81,7 @@ def test_batch_padding_03():
data1 = data1.batch(batch_size=2, drop_remainder=False, pad_info={"col": (None, -1)}) # pad automatically data1 = data1.batch(batch_size=2, drop_remainder=False, pad_info={"col": (None, -1)}) # pad automatically
data1 = data1.repeat(2) data1 = data1.repeat(2)
res = dict() res = dict()
for ind, data in enumerate(data1.create_dict_iterator(num_epochs=1)):
for ind, data in enumerate(data1.create_dict_iterator(num_epochs=1, output_numpy=True)):
res[ind] = data["col"].copy() res[ind] = data["col"].copy()
np.testing.assert_array_equal(res[0], [[0, -1], [0, 1]]) np.testing.assert_array_equal(res[0], [[0, -1], [0, 1]])
np.testing.assert_array_equal(res[1], [[0, 1, 2, -1], [0, 1, 2, 3]]) np.testing.assert_array_equal(res[1], [[0, 1, 2, -1], [0, 1, 2, 3]])
@@ -93,7 +93,7 @@ def test_batch_padding_04():
data1 = ds.GeneratorDataset((lambda: gen_var_cols(2)), ["col1", "col2"]) data1 = ds.GeneratorDataset((lambda: gen_var_cols(2)), ["col1", "col2"])
data1 = data1.batch(batch_size=2, drop_remainder=False, pad_info={}) # pad automatically data1 = data1.batch(batch_size=2, drop_remainder=False, pad_info={}) # pad automatically
data1 = data1.repeat(2) data1 = data1.repeat(2)
for data in data1.create_dict_iterator(num_epochs=1):
for data in data1.create_dict_iterator(num_epochs=1, output_numpy=True):
np.testing.assert_array_equal(data["col1"], [[0, 0], [0, 1]]) np.testing.assert_array_equal(data["col1"], [[0, 0], [0, 1]])
np.testing.assert_array_equal(data["col2"], [[100, 0], [100, 101]]) np.testing.assert_array_equal(data["col2"], [[100, 0], [100, 101]])


@@ -102,7 +102,7 @@ def test_batch_padding_05():
data1 = ds.GeneratorDataset((lambda: gen_var_cols_2d(3)), ["col1", "col2"]) data1 = ds.GeneratorDataset((lambda: gen_var_cols_2d(3)), ["col1", "col2"])
data1 = data1.batch(batch_size=3, drop_remainder=False, data1 = data1.batch(batch_size=3, drop_remainder=False,
pad_info={"col2": ([2, None], -2), "col1": (None, -1)}) # pad automatically pad_info={"col2": ([2, None], -2), "col1": (None, -1)}) # pad automatically
for data in data1.create_dict_iterator(num_epochs=1):
for data in data1.create_dict_iterator(num_epochs=1, output_numpy=True):
np.testing.assert_array_equal(data["col1"], [[[0, -1, -1]], [[0, 1, -1]], [[0, 1, 2]]]) np.testing.assert_array_equal(data["col1"], [[[0, -1, -1]], [[0, 1, -1]], [[0, 1, 2]]])
np.testing.assert_array_equal(data["col2"], [[[100, -2, -2], [-2, -2, -2]], [[100, 101, -2], [-2, -2, -2]], np.testing.assert_array_equal(data["col2"], [[[100, -2, -2], [-2, -2, -2]], [[100, 101, -2], [-2, -2, -2]],
[[100, 101, 102], [-2, -2, -2]]]) [[100, 101, 102], [-2, -2, -2]]])
@@ -180,7 +180,7 @@ def test_pad_via_map():
data1 = data1.map(operations=(lambda x: np.pad(x, (0, 816))), input_columns="image") data1 = data1.map(operations=(lambda x: np.pad(x, (0, 816))), input_columns="image")
data1 = data1.batch(batch_size=25, drop_remainder=True) data1 = data1.batch(batch_size=25, drop_remainder=True)
res = [] res = []
for data in data1.create_dict_iterator(num_epochs=1):
for data in data1.create_dict_iterator(num_epochs=1, output_numpy=True):
res.append(data["image"]) res.append(data["image"])
return res return res


@@ -189,7 +189,7 @@ def test_pad_via_map():
data2 = data2.map(operations=(lambda x: x.reshape(-1)), input_columns="image") # reshape to 1d data2 = data2.map(operations=(lambda x: x.reshape(-1)), input_columns="image") # reshape to 1d
data2 = data2.batch(batch_size=25, drop_remainder=True, pad_info={"image": ([3888], 0)}) data2 = data2.batch(batch_size=25, drop_remainder=True, pad_info={"image": ([3888], 0)})
res = [] res = []
for data in data2.create_dict_iterator(num_epochs=1):
for data in data2.create_dict_iterator(num_epochs=1, output_numpy=True):
res.append(data["image"]) res.append(data["image"])
return res return res




+ 1
- 1
tests/ut/python/dataset/test_pad_end_op.py View File

@@ -30,7 +30,7 @@ def pad_compare(array, pad_shape, pad_value, res):
data = data.map(operations=ops.PadEnd(pad_shape, pad_value)) data = data.map(operations=ops.PadEnd(pad_shape, pad_value))
else: else:
data = data.map(operations=ops.PadEnd(pad_shape)) data = data.map(operations=ops.PadEnd(pad_shape))
for d in data:
for d in data.create_tuple_iterator(output_numpy=True):
np.testing.assert_array_equal(res, d[0]) np.testing.assert_array_equal(res, d[0])






+ 15
- 15
tests/ut/python/dataset/test_paddeddataset.py View File

@@ -57,7 +57,7 @@ def test_TFRecord_Padded():
testsampler = ds.DistributedSampler(num_shards=shard_num, shard_id=i, shuffle=False, num_samples=None) testsampler = ds.DistributedSampler(num_shards=shard_num, shard_id=i, shuffle=False, num_samples=None)
concat_ds.use_sampler(testsampler) concat_ds.use_sampler(testsampler)
shard_list = [] shard_list = []
for item in concat_ds.create_dict_iterator(num_epochs=1):
for item in concat_ds.create_dict_iterator(num_epochs=1, output_numpy=True):
shard_list.append(len(item['image'])) shard_list.append(len(item['image']))
verify_list.append(shard_list) verify_list.append(shard_list)
assert verify_list == result_list assert verify_list == result_list
@@ -80,7 +80,7 @@ def test_GeneratorDataSet_Padded():
distributed_sampler = ds.DistributedSampler(num_shards=shard_num, shard_id=i, shuffle=False, num_samples=None) distributed_sampler = ds.DistributedSampler(num_shards=shard_num, shard_id=i, shuffle=False, num_samples=None)
data3.use_sampler(distributed_sampler) data3.use_sampler(distributed_sampler)
tem_list = [] tem_list = []
for ele in data3.create_dict_iterator(num_epochs=1):
for ele in data3.create_dict_iterator(num_epochs=1, output_numpy=True):
tem_list.append(ele['col1'][0]) tem_list.append(ele['col1'][0])
verify_list.append(tem_list) verify_list.append(tem_list)


@@ -105,7 +105,7 @@ def test_Reapeat_afterPadded():
ds3.use_sampler(testsampler) ds3.use_sampler(testsampler)
repeat_num = 2 repeat_num = 2
ds3 = ds3.repeat(repeat_num) ds3 = ds3.repeat(repeat_num)
for item in ds3.create_dict_iterator(num_epochs=1):
for item in ds3.create_dict_iterator(num_epochs=1, output_numpy=True):
verify_list.append(len(item['image'])) verify_list.append(len(item['image']))


assert verify_list == result_list * repeat_num assert verify_list == result_list * repeat_num
@@ -149,7 +149,7 @@ def test_Unevenly_distributed():
tem_list = [] tem_list = []
testsampler = ds.DistributedSampler(num_shards=numShard, shard_id=i, shuffle=False, num_samples=None) testsampler = ds.DistributedSampler(num_shards=numShard, shard_id=i, shuffle=False, num_samples=None)
ds3.use_sampler(testsampler) ds3.use_sampler(testsampler)
for item in ds3.create_dict_iterator(num_epochs=1):
for item in ds3.create_dict_iterator(num_epochs=1, output_numpy=True):
tem_list.append(len(item['image'])) tem_list.append(len(item['image']))
verify_list.append(tem_list) verify_list.append(tem_list)
assert verify_list == result_list assert verify_list == result_list
@@ -174,7 +174,7 @@ def test_three_datasets_connected():
distributed_sampler = ds.DistributedSampler(num_shards=shard_num, shard_id=i, shuffle=False, num_samples=None) distributed_sampler = ds.DistributedSampler(num_shards=shard_num, shard_id=i, shuffle=False, num_samples=None)
data4.use_sampler(distributed_sampler) data4.use_sampler(distributed_sampler)
tem_list = [] tem_list = []
for ele in data4.create_dict_iterator(num_epochs=1):
for ele in data4.create_dict_iterator(num_epochs=1, output_numpy=True):
tem_list.append(ele['col1'][0]) tem_list.append(ele['col1'][0])
verify_list.append(tem_list) verify_list.append(tem_list)


@@ -232,7 +232,7 @@ def test_imagefolder_padded():
assert sum([1 for _ in data3]) == 10 assert sum([1 for _ in data3]) == 10
verify_list = [] verify_list = []


for ele in data3.create_dict_iterator(num_epochs=1):
for ele in data3.create_dict_iterator(num_epochs=1, output_numpy=True):
verify_list.append(len(ele['image'])) verify_list.append(len(ele['image']))
assert verify_list[8] == 1 assert verify_list[8] == 1
assert verify_list[9] == 6 assert verify_list[9] == 6
@@ -259,7 +259,7 @@ def test_imagefolder_padded_with_decode():
data3.use_sampler(testsampler) data3.use_sampler(testsampler)
data3 = data3.map(operations=V_C.Decode(), input_columns="image") data3 = data3.map(operations=V_C.Decode(), input_columns="image")
shard_sample_count = 0 shard_sample_count = 0
for ele in data3.create_dict_iterator(num_epochs=1):
for ele in data3.create_dict_iterator(num_epochs=1, output_numpy=True):
print("label: {}".format(ele['label'])) print("label: {}".format(ele['label']))
count += 1 count += 1
shard_sample_count += 1 shard_sample_count += 1
@@ -289,7 +289,7 @@ def test_imagefolder_padded_with_decode_and_get_dataset_size():
shard_dataset_size = data3.get_dataset_size() shard_dataset_size = data3.get_dataset_size()
data3 = data3.map(operations=V_C.Decode(), input_columns="image") data3 = data3.map(operations=V_C.Decode(), input_columns="image")
shard_sample_count = 0 shard_sample_count = 0
for ele in data3.create_dict_iterator(num_epochs=1):
for ele in data3.create_dict_iterator(num_epochs=1, output_numpy=True):
print("label: {}".format(ele['label'])) print("label: {}".format(ele['label']))
count += 1 count += 1
shard_sample_count += 1 shard_sample_count += 1
@@ -313,7 +313,7 @@ def test_more_shard_padded():
tem_list = [] tem_list = []
testsampler = ds.DistributedSampler(num_shards=numShard, shard_id=i, shuffle=False, num_samples=None) testsampler = ds.DistributedSampler(num_shards=numShard, shard_id=i, shuffle=False, num_samples=None)
data3.use_sampler(testsampler) data3.use_sampler(testsampler)
for item in data3.create_dict_iterator(num_epochs=1):
for item in data3.create_dict_iterator(num_epochs=1, output_numpy=True):
tem_list.append(item['col1']) tem_list.append(item['col1'])
vertifyList.append(tem_list) vertifyList.append(tem_list)


@@ -339,7 +339,7 @@ def test_more_shard_padded():
tem_list = [] tem_list = []
testsampler = ds.DistributedSampler(num_shards=numShard, shard_id=i, shuffle=False, num_samples=None) testsampler = ds.DistributedSampler(num_shards=numShard, shard_id=i, shuffle=False, num_samples=None)
ds3.use_sampler(testsampler) ds3.use_sampler(testsampler)
for item in ds3.create_dict_iterator(num_epochs=1):
for item in ds3.create_dict_iterator(num_epochs=1, output_numpy=True):
tem_list.append(len(item['image'])) tem_list.append(len(item['image']))
vertifyList1.append(tem_list) vertifyList1.append(tem_list)


@@ -426,7 +426,7 @@ def test_Mindrecord_Padded(remove_mindrecord_file):
testsampler = ds.DistributedSampler(num_shards=shard_num, shard_id=i, shuffle=False, num_samples=None) testsampler = ds.DistributedSampler(num_shards=shard_num, shard_id=i, shuffle=False, num_samples=None)
ds2.use_sampler(testsampler) ds2.use_sampler(testsampler)
tem_list = [] tem_list = []
for ele in ds2.create_dict_iterator(num_epochs=1):
for ele in ds2.create_dict_iterator(num_epochs=1, output_numpy=True):
tem_list.append(int(ele['file_name'].tostring().decode().lstrip('image_').rstrip('.jpg'))) tem_list.append(int(ele['file_name'].tostring().decode().lstrip('image_').rstrip('.jpg')))
result_list.append(tem_list) result_list.append(tem_list)
assert result_list == verify_list assert result_list == verify_list
@@ -440,7 +440,7 @@ def test_clue_padded_and_skip_with_0_samples():


data = ds.CLUEDataset(TRAIN_FILE, task='AFQMC', usage='train') data = ds.CLUEDataset(TRAIN_FILE, task='AFQMC', usage='train')
count = 0 count = 0
for _ in data.create_dict_iterator(num_epochs=1):
for _ in data.create_dict_iterator(num_epochs=1, output_numpy=True):
count += 1 count += 1
assert count == 3 assert count == 3


@@ -456,20 +456,20 @@ def test_clue_padded_and_skip_with_0_samples():
dataset.use_sampler(testsampler) dataset.use_sampler(testsampler)
assert dataset.get_dataset_size() == 2 assert dataset.get_dataset_size() == 2
count = 0 count = 0
for data in dataset.create_dict_iterator(num_epochs=1):
for data in dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
count += 1 count += 1
assert count == 2 assert count == 2


dataset = dataset.skip(count=2) # dataset2 has none samples dataset = dataset.skip(count=2) # dataset2 has none samples
count = 0 count = 0
for data in dataset.create_dict_iterator(num_epochs=1):
for data in dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
count += 1 count += 1
assert count == 0 assert count == 0


with pytest.raises(ValueError, match="There is no samples in the "): with pytest.raises(ValueError, match="There is no samples in the "):
dataset = dataset.concat(data_copy1) dataset = dataset.concat(data_copy1)
count = 0 count = 0
for data in dataset.create_dict_iterator(num_epochs=1):
for data in dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
count += 1 count += 1
assert count == 2 assert count == 2




+ 2
- 1
tests/ut/python/dataset/test_pair_truncate.py View File

@@ -24,7 +24,8 @@ import mindspore.dataset.text as text
def compare(in1, in2, length, out1, out2): def compare(in1, in2, length, out1, out2):
data = ds.NumpySlicesDataset({"s1": [in1], "s2": [in2]}) data = ds.NumpySlicesDataset({"s1": [in1], "s2": [in2]})
data = data.map(operations=text.TruncateSequencePair(length), input_columns=["s1", "s2"]) data = data.map(operations=text.TruncateSequencePair(length), input_columns=["s1", "s2"])
for d in data.create_dict_iterator(num_epochs=1):
data = data.map(input_columns=["s1", "s2"], operations=text.TruncateSequencePair(length))
for d in data.create_dict_iterator(num_epochs=1, output_numpy=True):
np.testing.assert_array_equal(out1, d["s1"]) np.testing.assert_array_equal(out1, d["s1"])
np.testing.assert_array_equal(out2, d["s2"]) np.testing.assert_array_equal(out2, d["s2"])




+ 10
- 10
tests/ut/python/dataset/test_pyfunc.py View File

@@ -36,7 +36,7 @@ def test_case_0():
data1 = data1.map(operations=(lambda x: x + x), input_columns="col0", output_columns="out") data1 = data1.map(operations=(lambda x: x + x), input_columns="col0", output_columns="out")


i = 0 i = 0
for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary
for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary
# In this test, the dataset is 2x2 sequential tensors # In this test, the dataset is 2x2 sequential tensors
golden = np.array([[i * 2, (i + 1) * 2], [(i + 2) * 2, (i + 3) * 2]]) golden = np.array([[i * 2, (i + 1) * 2], [(i + 2) * 2, (i + 3) * 2]])
np.testing.assert_array_equal(item["out"], golden) np.testing.assert_array_equal(item["out"], golden)
@@ -57,7 +57,7 @@ def test_case_1():
column_order=["out0", "out1"]) column_order=["out0", "out1"])


i = 0 i = 0
for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary
for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary
# In this test, the dataset is 2x2 sequential tensors # In this test, the dataset is 2x2 sequential tensors
golden = np.array([[i, i + 1], [i + 2, i + 3]]) golden = np.array([[i, i + 1], [i + 2, i + 3]])
np.testing.assert_array_equal(item["out0"], golden) np.testing.assert_array_equal(item["out0"], golden)
@@ -81,7 +81,7 @@ def test_case_2():
column_order=["out"]) column_order=["out"])


i = 0 i = 0
for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary
for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary
# In this test, the dataset is 2x2 sequential tensors # In this test, the dataset is 2x2 sequential tensors
golden = np.array([[i * 2, (i + 1) * 2], [(i + 2) * 2, (i + 3) * 2]]) golden = np.array([[i * 2, (i + 1) * 2], [(i + 2) * 2, (i + 3) * 2]])
np.testing.assert_array_equal(item["out"], golden) np.testing.assert_array_equal(item["out"], golden)
@@ -103,7 +103,7 @@ def test_case_3():
output_columns=["out0", "out1", "out2"], column_order=["out0", "out1", "out2"]) output_columns=["out0", "out1", "out2"], column_order=["out0", "out1", "out2"])


i = 0 i = 0
for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary
for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary
# In this test, the dataset is 2x2 sequential tensors # In this test, the dataset is 2x2 sequential tensors
golden = np.array([[i, i + 1], [i + 2, i + 3]]) golden = np.array([[i, i + 1], [i + 2, i + 3]])
np.testing.assert_array_equal(item["out0"], golden) np.testing.assert_array_equal(item["out0"], golden)
@@ -130,7 +130,7 @@ def test_case_4():
column_order=["out0", "out1", "out2"]) column_order=["out0", "out1", "out2"])


i = 0 i = 0
for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary
for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary
# In this test, the dataset is 2x2 sequential tensors # In this test, the dataset is 2x2 sequential tensors
golden = np.array([[i, i + 1], [i + 2, i + 3]]) golden = np.array([[i, i + 1], [i + 2, i + 3]])
np.testing.assert_array_equal(item["out0"], golden) np.testing.assert_array_equal(item["out0"], golden)
@@ -157,7 +157,7 @@ def test_case_5():


data1 = data1.map(operations=func_5, input_columns="col0", output_columns="out") data1 = data1.map(operations=func_5, input_columns="col0", output_columns="out")


for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary
for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary
# In this test, the dataset is 2x2 sequential tensors # In this test, the dataset is 2x2 sequential tensors
golden = np.array([[1, 1], [1, 1]]) golden = np.array([[1, 1], [1, 1]])
np.testing.assert_array_equal(item["out"], golden) np.testing.assert_array_equal(item["out"], golden)
@@ -175,7 +175,7 @@ def test_case_6():
data1 = data1.map(operations=[(lambda x: x + x), (lambda x: x + x)], input_columns="col0", output_columns="out") data1 = data1.map(operations=[(lambda x: x + x), (lambda x: x + x)], input_columns="col0", output_columns="out")


i = 0 i = 0
for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary
for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary
# In this test, the dataset is 2x2 sequential tensors # In this test, the dataset is 2x2 sequential tensors
golden = np.array([[i * 4, (i + 1) * 4], [(i + 2) * 4, (i + 3) * 4]]) golden = np.array([[i * 4, (i + 1) * 4], [(i + 2) * 4, (i + 3) * 4]])
np.testing.assert_array_equal(item["out"], golden) np.testing.assert_array_equal(item["out"], golden)
@@ -195,7 +195,7 @@ def test_case_7():
num_parallel_workers=4, python_multiprocessing=True) num_parallel_workers=4, python_multiprocessing=True)


i = 0 i = 0
for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary
for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary
# In this test, the dataset is 2x2 sequential tensors # In this test, the dataset is 2x2 sequential tensors
golden = np.array([[i * 2, (i + 1) * 2], [(i + 2) * 2, (i + 3) * 2]]) golden = np.array([[i * 2, (i + 1) * 2], [(i + 2) * 2, (i + 3) * 2]])
np.testing.assert_array_equal(item["out"], golden) np.testing.assert_array_equal(item["out"], golden)
@@ -219,7 +219,7 @@ def test_case_8():
python_multiprocessing=True) python_multiprocessing=True)


i = 0 i = 0
for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary
for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary
# In this test, the dataset is 2x2 sequential tensors # In this test, the dataset is 2x2 sequential tensors
golden = np.array([[i, i + 1], [i + 2, i + 3]]) golden = np.array([[i, i + 1], [i + 2, i + 3]])
np.testing.assert_array_equal(item["out0"], golden) np.testing.assert_array_equal(item["out0"], golden)
@@ -243,7 +243,7 @@ def test_case_9():
output_columns="out", num_parallel_workers=4, python_multiprocessing=True) output_columns="out", num_parallel_workers=4, python_multiprocessing=True)


i = 0 i = 0
for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary
for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary
# In this test, the dataset is 2x2 sequential tensors # In this test, the dataset is 2x2 sequential tensors
golden = np.array([[i * 2 + 3, (i + 1) * 2 + 3], [(i + 2) * 2 + 3, (i + 3) * 2 + 3]]) golden = np.array([[i * 2 + 3, (i + 1) * 2 + 3], [(i + 2) * 2 + 3, (i + 3) * 2 + 3]])
np.testing.assert_array_equal(item["out"], golden) np.testing.assert_array_equal(item["out"], golden)


+ 1
- 1
tests/ut/python/dataset/test_python_tokenizer.py View File

@@ -41,7 +41,7 @@ def test_whitespace_tokenizer_ch():
tokenizer = text.PythonTokenizer(my_tokenizer) tokenizer = text.PythonTokenizer(my_tokenizer)
dataset = dataset.map(operations=tokenizer, num_parallel_workers=1) dataset = dataset.map(operations=tokenizer, num_parallel_workers=1)
tokens = [] tokens = []
for i in dataset.create_dict_iterator(num_epochs=1):
for i in dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
s = text.to_str(i['text']).tolist() s = text.to_str(i['text']).tolist()
tokens.append(s) tokens.append(s)
logger.info("The out tokens is : {}".format(tokens)) logger.info("The out tokens is : {}".format(tokens))


+ 4
- 2
tests/ut/python/dataset/test_random_affine.py View File

@@ -59,7 +59,8 @@ def test_random_affine_op(plot=False):


image_affine = [] image_affine = []
image_original = [] image_original = []
for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)):
for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True),
data2.create_dict_iterator(num_epochs=1, output_numpy=True)):
image1 = (item1["image"].transpose(1, 2, 0) * 255).astype(np.uint8) image1 = (item1["image"].transpose(1, 2, 0) * 255).astype(np.uint8)
image2 = (item2["image"].transpose(1, 2, 0) * 255).astype(np.uint8) image2 = (item2["image"].transpose(1, 2, 0) * 255).astype(np.uint8)
image_affine.append(image1) image_affine.append(image1)
@@ -92,7 +93,8 @@ def test_random_affine_op_c(plot=False):


image_affine = [] image_affine = []
image_original = [] image_original = []
for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)):
for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True),
data2.create_dict_iterator(num_epochs=1, output_numpy=True)):
image1 = item1["image"] image1 = item1["image"]
image2 = item2["image"] image2 = item2["image"]
image_affine.append(image1) image_affine.append(image1)


+ 2
- 1
tests/ut/python/dataset/test_random_apply.py View File

@@ -58,7 +58,8 @@ def test_random_apply_op(plot=False):


image_apply = [] image_apply = []
image_original = [] image_original = []
for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)):
for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True),
data2.create_dict_iterator(num_epochs=1, output_numpy=True)):
image1 = (item1["image"].transpose(1, 2, 0) * 255).astype(np.uint8) image1 = (item1["image"].transpose(1, 2, 0) * 255).astype(np.uint8)
image2 = (item2["image"].transpose(1, 2, 0) * 255).astype(np.uint8) image2 = (item2["image"].transpose(1, 2, 0) * 255).astype(np.uint8)
image_apply.append(image1) image_apply.append(image1)


+ 4
- 2
tests/ut/python/dataset/test_random_choice.py View File

@@ -55,7 +55,8 @@ def test_random_choice_op(plot=False):


image_choice = [] image_choice = []
image_original = [] image_original = []
for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)):
for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True),
data2.create_dict_iterator(num_epochs=1, output_numpy=True)):
image1 = (item1["image"].transpose(1, 2, 0) * 255).astype(np.uint8) image1 = (item1["image"].transpose(1, 2, 0) * 255).astype(np.uint8)
image2 = (item2["image"].transpose(1, 2, 0) * 255).astype(np.uint8) image2 = (item2["image"].transpose(1, 2, 0) * 255).astype(np.uint8)
image_choice.append(image1) image_choice.append(image1)
@@ -94,7 +95,8 @@ def test_random_choice_comp(plot=False):


image_choice = [] image_choice = []
image_original = [] image_original = []
for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)):
for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True),
data2.create_dict_iterator(num_epochs=1, output_numpy=True)):
image1 = (item1["image"].transpose(1, 2, 0) * 255).astype(np.uint8) image1 = (item1["image"].transpose(1, 2, 0) * 255).astype(np.uint8)
image2 = (item2["image"].transpose(1, 2, 0) * 255).astype(np.uint8) image2 = (item2["image"].transpose(1, 2, 0) * 255).astype(np.uint8)
image_choice.append(image1) image_choice.append(image1)


Some files were not shown because too many files changed in this diff

Loading…
Cancel
Save