Merge pull request !5968 from nhussain/api_changes2tags/v1.0.0
| @@ -282,8 +282,9 @@ class UnicodeCharTokenizer(cde.UnicodeCharTokenizerOp): | |||
| >>> # If with_offsets=False, then output three columns {["token", dtype=str], ["offsets_start", dtype=uint32], | |||
| >>> # ["offsets_limit", dtype=uint32]} | |||
| >>> tokenizer_op = text.UnicodeCharTokenizer(True) | |||
| >>> data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"], | |||
| >>> column_order=["token", "offsets_start", "offsets_limit"], operations=tokenizer_op) | |||
| >>> data = data.map(operations=tokenizer_op, input_columns=["text"], | |||
| >>> output_columns=["token", "offsets_start", "offsets_limit"], | |||
| >>> column_order=["token", "offsets_start", "offsets_limit"]) | |||
| """ | |||
| @check_with_offsets | |||
| @@ -313,8 +314,9 @@ class WordpieceTokenizer(cde.WordpieceTokenizerOp): | |||
| >>> # ["offsets_limit", dtype=uint32]} | |||
| >>> tokenizer_op = text.WordpieceTokenizer(vocab=vocab, unknown_token=['UNK'], | |||
| >>> max_bytes_per_token=100, with_offsets=True) | |||
| >>> data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"], | |||
| >>> column_order=["token", "offsets_start", "offsets_limit"], operations=tokenizer_op) | |||
| >>> data = data.map(operations=tokenizer_op, | |||
| >>> input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"], | |||
| >>> column_order=["token", "offsets_start", "offsets_limit"]) | |||
| """ | |||
| @check_wordpiece_tokenizer | |||
| @@ -378,8 +380,9 @@ if platform.system().lower() != 'windows': | |||
| >>> # ["offsets_start", dtype=uint32], | |||
| >>> # ["offsets_limit", dtype=uint32]} | |||
| >>> tokenizer_op = text.WhitespaceTokenizer(True) | |||
| >>> data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"], | |||
| >>> column_order=["token", "offsets_start", "offsets_limit"], operations=tokenizer_op) | |||
| >>> data = data.map(operations=tokenizer_op, input_columns=["text"], | |||
| >>> output_columns=["token", "offsets_start", "offsets_limit"], | |||
| >>> column_order=["token", "offsets_start", "offsets_limit"]) | |||
| """ | |||
| @check_with_offsets | |||
| @@ -404,8 +407,9 @@ if platform.system().lower() != 'windows': | |||
| >>> # ["offsets_start", dtype=uint32], | |||
| >>> # ["offsets_limit", dtype=uint32]} | |||
| >>> tokenizer_op = text.UnicodeScriptTokenizerOp(keep_whitespace=True, with_offsets=True) | |||
| >>> data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"], | |||
| >>> column_order=["token", "offsets_start", "offsets_limit"], operations=tokenizer_op) | |||
| >>> data = data.map(operations=tokenizer_op, input_columns=["text"], | |||
| >>> output_columns=["token", "offsets_start", "offsets_limit"], | |||
| >>> column_order=["token", "offsets_start", "offsets_limit"]) | |||
| """ | |||
| @check_unicode_script_tokenizer | |||
| @@ -497,8 +501,9 @@ if platform.system().lower() != 'windows': | |||
| >>> # ["offsets_start", dtype=uint32], | |||
| >>> # ["offsets_limit", dtype=uint32]} | |||
| >>> tokenizer_op = text.RegexTokenizer(delim_pattern, keep_delim_pattern, with_offsets=True) | |||
| >>> data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"], | |||
| >>> column_order=["token", "offsets_start", "offsets_limit"], operations=tokenizer_op) | |||
| >>> data = data.map(operations=tokenizer_op, input_columns=["text"], | |||
| >>> output_columns=["token", "offsets_start", "offsets_limit"], | |||
| >>> column_order=["token", "offsets_start", "offsets_limit"]) | |||
| """ | |||
| @check_regex_tokenizer | |||
| @@ -540,8 +545,9 @@ if platform.system().lower() != 'windows': | |||
| >>> normalization_form=NormalizeForm.NONE, | |||
| >>> preserve_unused_token=True, | |||
| >>> with_offsets=True) | |||
| >>> data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"], | |||
| >>> column_order=["token", "offsets_start", "offsets_limit"], operations=tokenizer_op) | |||
| >>> data = data.map(operations=tokenizer_op, input_columns=["text"], | |||
| >>> output_columns=["token", "offsets_start", "offsets_limit"], | |||
| >>> column_order=["token", "offsets_start", "offsets_limit"]) | |||
| """ | |||
| @check_basic_tokenizer | |||
| @@ -593,8 +599,9 @@ if platform.system().lower() != 'windows': | |||
| >>> unknown_token=100, lower_case=False, keep_whitespace=False, | |||
| >>> normalization_form=NormalizeForm.NONE, preserve_unused_token=True, | |||
| >>> with_offsets=True) | |||
| >>> data = data.map(input_columns=["text"], output_columns=["token", "offsets_start", "offsets_limit"], | |||
| >>> column_order=["token", "offsets_start", "offsets_limit"], operations=tokenizer_op) | |||
| >>> data = data.map(operations=tokenizer_op, input_columns=["text"], | |||
| >>> output_columns=["token", "offsets_start", "offsets_limit"], | |||
| >>> column_order=["token", "offsets_start", "offsets_limit"]) | |||
| """ | |||
| @check_bert_tokenizer | |||
| @@ -39,14 +39,14 @@ def create_dataset_cifar10(data_path, batch_size=32, repeat_size=1, status="trai | |||
| random_horizontal_op = CV.RandomHorizontalFlip() | |||
| channel_swap_op = CV.HWC2CHW() | |||
| typecast_op = C.TypeCast(mstype.int32) | |||
| cifar_ds = cifar_ds.map(input_columns="label", operations=typecast_op) | |||
| cifar_ds = cifar_ds.map(operations=typecast_op, input_columns="label") | |||
| if status == "train": | |||
| cifar_ds = cifar_ds.map(input_columns="image", operations=random_crop_op) | |||
| cifar_ds = cifar_ds.map(input_columns="image", operations=random_horizontal_op) | |||
| cifar_ds = cifar_ds.map(input_columns="image", operations=resize_op) | |||
| cifar_ds = cifar_ds.map(input_columns="image", operations=rescale_op) | |||
| cifar_ds = cifar_ds.map(input_columns="image", operations=normalize_op) | |||
| cifar_ds = cifar_ds.map(input_columns="image", operations=channel_swap_op) | |||
| cifar_ds = cifar_ds.map(operations=random_crop_op, input_columns="image") | |||
| cifar_ds = cifar_ds.map(operations=random_horizontal_op, input_columns="image") | |||
| cifar_ds = cifar_ds.map(operations=resize_op, input_columns="image") | |||
| cifar_ds = cifar_ds.map(operations=rescale_op, input_columns="image") | |||
| cifar_ds = cifar_ds.map(operations=normalize_op, input_columns="image") | |||
| cifar_ds = cifar_ds.map(operations=channel_swap_op, input_columns="image") | |||
| cifar_ds = cifar_ds.shuffle(buffer_size=cfg.buffer_size) | |||
| cifar_ds = cifar_ds.batch(batch_size, drop_remainder=True) | |||
| @@ -84,8 +84,9 @@ class SegDataset: | |||
| shuffle=True, num_parallel_workers=self.num_readers, | |||
| num_shards=self.shard_num, shard_id=self.shard_id) | |||
| transforms_list = self.preprocess_ | |||
| data_set = data_set.map(input_columns=["data", "label"], output_columns=["data", "label"], | |||
| operations=transforms_list, num_parallel_workers=self.num_parallel_calls) | |||
| data_set = data_set.map(operations=transforms_list, input_columns=["data", "label"], | |||
| output_columns=["data", "label"], | |||
| num_parallel_workers=self.num_parallel_calls) | |||
| data_set = data_set.shuffle(buffer_size=self.batch_size * 10) | |||
| data_set = data_set.batch(self.batch_size, drop_remainder=True) | |||
| data_set = data_set.repeat(repeat) | |||
| @@ -74,8 +74,10 @@ def bbox_overlaps(bboxes1, bboxes2, mode='iou'): | |||
| ious = ious.T | |||
| return ious | |||
| class PhotoMetricDistortion: | |||
| """Photo Metric Distortion""" | |||
| def __init__(self, | |||
| brightness_delta=32, | |||
| contrast_range=(0.5, 1.5), | |||
| @@ -134,8 +136,10 @@ class PhotoMetricDistortion: | |||
| return img, boxes, labels | |||
| class Expand: | |||
| """expand image""" | |||
| def __init__(self, mean=(0, 0, 0), to_rgb=True, ratio_range=(1, 4)): | |||
| if to_rgb: | |||
| self.mean = mean[::-1] | |||
| @@ -158,12 +162,13 @@ class Expand: | |||
| boxes += np.tile((left, top), 2) | |||
| return img, boxes, labels | |||
| def rescale_column(img, img_shape, gt_bboxes, gt_label, gt_num): | |||
| """rescale operation for image""" | |||
| img_data, scale_factor = mmcv.imrescale(img, (config.img_width, config.img_height), return_scale=True) | |||
| if img_data.shape[0] > config.img_height: | |||
| img_data, scale_factor2 = mmcv.imrescale(img_data, (config.img_height, config.img_width), return_scale=True) | |||
| scale_factor = scale_factor*scale_factor2 | |||
| scale_factor = scale_factor * scale_factor2 | |||
| img_shape = np.append(img_shape, scale_factor) | |||
| img_shape = np.asarray(img_shape, dtype=np.float32) | |||
| gt_bboxes = gt_bboxes * scale_factor | |||
| @@ -171,7 +176,8 @@ def rescale_column(img, img_shape, gt_bboxes, gt_label, gt_num): | |||
| gt_bboxes[:, 0::2] = np.clip(gt_bboxes[:, 0::2], 0, img_shape[1] - 1) | |||
| gt_bboxes[:, 1::2] = np.clip(gt_bboxes[:, 1::2], 0, img_shape[0] - 1) | |||
| return (img_data, img_shape, gt_bboxes, gt_label, gt_num) | |||
| return (img_data, img_shape, gt_bboxes, gt_label, gt_num) | |||
| def resize_column(img, img_shape, gt_bboxes, gt_label, gt_num): | |||
| """resize operation for image""" | |||
| @@ -188,7 +194,8 @@ def resize_column(img, img_shape, gt_bboxes, gt_label, gt_num): | |||
| gt_bboxes[:, 0::2] = np.clip(gt_bboxes[:, 0::2], 0, img_shape[1] - 1) | |||
| gt_bboxes[:, 1::2] = np.clip(gt_bboxes[:, 1::2], 0, img_shape[0] - 1) | |||
| return (img_data, img_shape, gt_bboxes, gt_label, gt_num) | |||
| return (img_data, img_shape, gt_bboxes, gt_label, gt_num) | |||
| def resize_column_test(img, img_shape, gt_bboxes, gt_label, gt_num): | |||
| """resize operation for image of eval""" | |||
| @@ -205,7 +212,8 @@ def resize_column_test(img, img_shape, gt_bboxes, gt_label, gt_num): | |||
| gt_bboxes[:, 0::2] = np.clip(gt_bboxes[:, 0::2], 0, img_shape[1] - 1) | |||
| gt_bboxes[:, 1::2] = np.clip(gt_bboxes[:, 1::2], 0, img_shape[0] - 1) | |||
| return (img_data, img_shape, gt_bboxes, gt_label, gt_num) | |||
| return (img_data, img_shape, gt_bboxes, gt_label, gt_num) | |||
| def impad_to_multiple_column(img, img_shape, gt_bboxes, gt_label, gt_num): | |||
| """impad operation for image""" | |||
| @@ -213,12 +221,14 @@ def impad_to_multiple_column(img, img_shape, gt_bboxes, gt_label, gt_num): | |||
| img_data = img_data.astype(np.float32) | |||
| return (img_data, img_shape, gt_bboxes, gt_label, gt_num) | |||
| def imnormalize_column(img, img_shape, gt_bboxes, gt_label, gt_num): | |||
| """imnormalize operation for image""" | |||
| img_data = mmcv.imnormalize(img, [123.675, 116.28, 103.53], [58.395, 57.12, 57.375], True) | |||
| img_data = img_data.astype(np.float32) | |||
| return (img_data, img_shape, gt_bboxes, gt_label, gt_num) | |||
| def flip_column(img, img_shape, gt_bboxes, gt_label, gt_num): | |||
| """flip operation for image""" | |||
| img_data = img | |||
| @@ -229,7 +239,8 @@ def flip_column(img, img_shape, gt_bboxes, gt_label, gt_num): | |||
| flipped[..., 0::4] = w - gt_bboxes[..., 2::4] - 1 | |||
| flipped[..., 2::4] = w - gt_bboxes[..., 0::4] - 1 | |||
| return (img_data, img_shape, flipped, gt_label, gt_num) | |||
| return (img_data, img_shape, flipped, gt_label, gt_num) | |||
| def flipped_generation(img, img_shape, gt_bboxes, gt_label, gt_num): | |||
| """flipped generation""" | |||
| @@ -240,11 +251,13 @@ def flipped_generation(img, img_shape, gt_bboxes, gt_label, gt_num): | |||
| flipped[..., 0::4] = w - gt_bboxes[..., 2::4] - 1 | |||
| flipped[..., 2::4] = w - gt_bboxes[..., 0::4] - 1 | |||
| return (img_data, img_shape, flipped, gt_label, gt_num) | |||
| return (img_data, img_shape, flipped, gt_label, gt_num) | |||
| def image_bgr_rgb(img, img_shape, gt_bboxes, gt_label, gt_num): | |||
| img_data = img[:, :, ::-1] | |||
| return (img_data, img_shape, gt_bboxes, gt_label, gt_num) | |||
| return (img_data, img_shape, gt_bboxes, gt_label, gt_num) | |||
| def transpose_column(img, img_shape, gt_bboxes, gt_label, gt_num): | |||
| """transpose operation for image""" | |||
| @@ -257,6 +270,7 @@ def transpose_column(img, img_shape, gt_bboxes, gt_label, gt_num): | |||
| return (img_data, img_shape, gt_bboxes, gt_label, gt_num) | |||
| def photo_crop_column(img, img_shape, gt_bboxes, gt_label, gt_num): | |||
| """photo crop operation for image""" | |||
| random_photo = PhotoMetricDistortion() | |||
| @@ -264,6 +278,7 @@ def photo_crop_column(img, img_shape, gt_bboxes, gt_label, gt_num): | |||
| return (img_data, img_shape, gt_bboxes, gt_label, gt_num) | |||
| def expand_column(img, img_shape, gt_bboxes, gt_label, gt_num): | |||
| """expand operation for image""" | |||
| expand = Expand() | |||
| @@ -271,8 +286,10 @@ def expand_column(img, img_shape, gt_bboxes, gt_label, gt_num): | |||
| return (img, img_shape, gt_bboxes, gt_label, gt_num) | |||
| def preprocess_fn(image, box, is_training): | |||
| """Preprocess function for dataset.""" | |||
| def _infer_data(image_bgr, image_shape, gt_box_new, gt_label_new, gt_iscrowd_new_revert): | |||
| image_shape = image_shape[:2] | |||
| input_data = image_bgr, image_shape, gt_box_new, gt_label_new, gt_iscrowd_new_revert | |||
| @@ -325,6 +342,7 @@ def preprocess_fn(image, box, is_training): | |||
| return _data_aug(image, box, is_training) | |||
| def create_coco_label(is_training): | |||
| """Get image path and annotation from COCO.""" | |||
| from pycocotools.coco import COCO | |||
| @@ -334,7 +352,7 @@ def create_coco_label(is_training): | |||
| if is_training: | |||
| data_type = config.train_data_type | |||
| #Classes need to train or test. | |||
| # Classes need to train or test. | |||
| train_cls = config.coco_classes | |||
| train_cls_dict = {} | |||
| for i, cls in enumerate(train_cls): | |||
| @@ -375,6 +393,7 @@ def create_coco_label(is_training): | |||
| return image_files, image_anno_dict | |||
| def anno_parser(annos_str): | |||
| """Parse annotation from string to list.""" | |||
| annos = [] | |||
| @@ -383,6 +402,7 @@ def anno_parser(annos_str): | |||
| annos.append(anno) | |||
| return annos | |||
| def filter_valid_data(image_dir, anno_path): | |||
| """Filter valid image file, which both in image_dir and anno_path.""" | |||
| image_files = [] | |||
| @@ -404,6 +424,7 @@ def filter_valid_data(image_dir, anno_path): | |||
| image_files.append(image_path) | |||
| return image_files, image_anno_dict | |||
| def data_to_mindrecord_byte_image(dataset="coco", is_training=True, prefix="fasterrcnn.mindrecord", file_num=8): | |||
| """Create MindRecord file.""" | |||
| mindrecord_dir = config.mindrecord_dir | |||
| @@ -435,7 +456,7 @@ def create_fasterrcnn_dataset(mindrecord_file, batch_size=2, repeat_num=12, devi | |||
| ds = de.MindDataset(mindrecord_file, columns_list=["image", "annotation"], num_shards=device_num, shard_id=rank_id, | |||
| num_parallel_workers=1, shuffle=is_training) | |||
| decode = C.Decode() | |||
| ds = ds.map(input_columns=["image"], operations=decode, num_parallel_workers=1) | |||
| ds = ds.map(operations=decode, input_columns=["image"], num_parallel_workers=1) | |||
| compose_map_func = (lambda image, annotation: preprocess_fn(image, annotation, is_training)) | |||
| hwc_to_chw = C.HWC2CHW() | |||
| @@ -447,38 +468,39 @@ def create_fasterrcnn_dataset(mindrecord_file, batch_size=2, repeat_num=12, devi | |||
| type_cast3 = CC.TypeCast(mstype.bool_) | |||
| if is_training: | |||
| ds = ds.map(input_columns=["image", "annotation"], | |||
| ds = ds.map(operations=compose_map_func, input_columns=["image", "annotation"], | |||
| output_columns=["image", "image_shape", "box", "label", "valid_num"], | |||
| column_order=["image", "image_shape", "box", "label", "valid_num"], | |||
| operations=compose_map_func, num_parallel_workers=num_parallel_workers) | |||
| num_parallel_workers=num_parallel_workers) | |||
| flip = (np.random.rand() < config.flip_ratio) | |||
| if flip: | |||
| ds = ds.map(input_columns=["image"], operations=[normalize_op, type_cast0, horizontally_op], | |||
| ds = ds.map(operations=[normalize_op, type_cast0, horizontally_op], input_columns=["image"], | |||
| num_parallel_workers=12) | |||
| ds = ds.map(input_columns=["image", "image_shape", "box", "label", "valid_num"], | |||
| operations=flipped_generation, num_parallel_workers=num_parallel_workers) | |||
| ds = ds.map(operations=flipped_generation, | |||
| input_columns=["image", "image_shape", "box", "label", "valid_num"], | |||
| num_parallel_workers=num_parallel_workers) | |||
| else: | |||
| ds = ds.map(input_columns=["image"], operations=[normalize_op, type_cast0], | |||
| ds = ds.map(operations=[normalize_op, type_cast0], input_columns=["image"], | |||
| num_parallel_workers=12) | |||
| ds = ds.map(input_columns=["image"], operations=[hwc_to_chw, type_cast1], | |||
| ds = ds.map(operations=[hwc_to_chw, type_cast1], input_columns=["image"], | |||
| num_parallel_workers=12) | |||
| else: | |||
| ds = ds.map(input_columns=["image", "annotation"], | |||
| ds = ds.map(operations=compose_map_func, | |||
| input_columns=["image", "annotation"], | |||
| output_columns=["image", "image_shape", "box", "label", "valid_num"], | |||
| column_order=["image", "image_shape", "box", "label", "valid_num"], | |||
| operations=compose_map_func, | |||
| num_parallel_workers=num_parallel_workers) | |||
| ds = ds.map(input_columns=["image"], operations=[normalize_op, hwc_to_chw, type_cast1], | |||
| ds = ds.map(operations=[normalize_op, hwc_to_chw, type_cast1], input_columns=["image"], | |||
| num_parallel_workers=24) | |||
| # transpose_column from python to c | |||
| ds = ds.map(input_columns=["image_shape"], operations=[type_cast1]) | |||
| ds = ds.map(input_columns=["box"], operations=[type_cast1]) | |||
| ds = ds.map(input_columns=["label"], operations=[type_cast2]) | |||
| ds = ds.map(input_columns=["valid_num"], operations=[type_cast3]) | |||
| ds = ds.map(operations=[type_cast1], input_columns=["image_shape"]) | |||
| ds = ds.map(operations=[type_cast1], input_columns=["box"]) | |||
| ds = ds.map(operations=[type_cast2], input_columns=["label"]) | |||
| ds = ds.map(operations=[type_cast3], input_columns=["valid_num"]) | |||
| ds = ds.batch(batch_size, drop_remainder=True) | |||
| ds = ds.repeat(repeat_num) | |||
| @@ -55,8 +55,8 @@ def create_dataset_cifar10(data_home, repeat_num=1, training=True): | |||
| c_trans += [resize_op, rescale_op, normalize_op, changeswap_op] | |||
| # apply map operations on images | |||
| data_set = data_set.map(input_columns="label", operations=type_cast_op) | |||
| data_set = data_set.map(input_columns="image", operations=c_trans) | |||
| data_set = data_set.map(operations=type_cast_op, input_columns="label") | |||
| data_set = data_set.map(operations=c_trans, input_columns="image") | |||
| # apply batch operations | |||
| data_set = data_set.batch(batch_size=cifar_cfg.batch_size, drop_remainder=True) | |||
| @@ -60,8 +60,8 @@ def create_dataset(dataset_path, do_train, rank, group_size, repeat_num=1): | |||
| C.HWC2CHW() | |||
| ] | |||
| type_cast_op = C2.TypeCast(mstype.int32) | |||
| ds = ds.map(input_columns="image", operations=trans, num_parallel_workers=cfg.work_nums) | |||
| ds = ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=cfg.work_nums) | |||
| ds = ds.map(operations=trans, input_columns="image", num_parallel_workers=cfg.work_nums) | |||
| ds = ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=cfg.work_nums) | |||
| # apply batch operations | |||
| ds = ds.batch(cfg.batch_size, drop_remainder=True) | |||
| # apply dataset repeat operation | |||
| @@ -45,11 +45,11 @@ def create_dataset(data_path, batch_size=32, repeat_size=1, | |||
| type_cast_op = C.TypeCast(mstype.int32) | |||
| # apply map operations on images | |||
| mnist_ds = mnist_ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=num_parallel_workers) | |||
| mnist_ds = mnist_ds.map(input_columns="image", operations=resize_op, num_parallel_workers=num_parallel_workers) | |||
| mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_op, num_parallel_workers=num_parallel_workers) | |||
| mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_nml_op, num_parallel_workers=num_parallel_workers) | |||
| mnist_ds = mnist_ds.map(input_columns="image", operations=hwc2chw_op, num_parallel_workers=num_parallel_workers) | |||
| mnist_ds = mnist_ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=num_parallel_workers) | |||
| mnist_ds = mnist_ds.map(operations=resize_op, input_columns="image", num_parallel_workers=num_parallel_workers) | |||
| mnist_ds = mnist_ds.map(operations=rescale_op, input_columns="image", num_parallel_workers=num_parallel_workers) | |||
| mnist_ds = mnist_ds.map(operations=rescale_nml_op, input_columns="image", num_parallel_workers=num_parallel_workers) | |||
| mnist_ds = mnist_ds.map(operations=hwc2chw_op, input_columns="image", num_parallel_workers=num_parallel_workers) | |||
| # apply DatasetOps | |||
| buffer_size = 10000 | |||
| @@ -45,11 +45,11 @@ def create_dataset(data_path, batch_size=32, repeat_size=1, | |||
| type_cast_op = C.TypeCast(mstype.int32) | |||
| # apply map operations on images | |||
| mnist_ds = mnist_ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=num_parallel_workers) | |||
| mnist_ds = mnist_ds.map(input_columns="image", operations=resize_op, num_parallel_workers=num_parallel_workers) | |||
| mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_op, num_parallel_workers=num_parallel_workers) | |||
| mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_nml_op, num_parallel_workers=num_parallel_workers) | |||
| mnist_ds = mnist_ds.map(input_columns="image", operations=hwc2chw_op, num_parallel_workers=num_parallel_workers) | |||
| mnist_ds = mnist_ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=num_parallel_workers) | |||
| mnist_ds = mnist_ds.map(operations=resize_op, input_columns="image", num_parallel_workers=num_parallel_workers) | |||
| mnist_ds = mnist_ds.map(operations=rescale_op, input_columns="image", num_parallel_workers=num_parallel_workers) | |||
| mnist_ds = mnist_ds.map(operations=rescale_nml_op, input_columns="image", num_parallel_workers=num_parallel_workers) | |||
| mnist_ds = mnist_ds.map(operations=hwc2chw_op, input_columns="image", num_parallel_workers=num_parallel_workers) | |||
| # apply DatasetOps | |||
| buffer_size = 10000 | |||
| @@ -498,24 +498,24 @@ def create_maskrcnn_dataset(mindrecord_file, batch_size=2, device_num=1, rank_id | |||
| num_parallel_workers=4, shuffle=is_training) | |||
| decode = C.Decode() | |||
| ds = ds.map(input_columns=["image"], operations=decode) | |||
| ds = ds.map(operations=decode, input_columns=["image"]) | |||
| compose_map_func = (lambda image, annotation, mask, mask_shape: | |||
| preprocess_fn(image, annotation, mask, mask_shape, is_training)) | |||
| if is_training: | |||
| ds = ds.map(input_columns=["image", "annotation", "mask", "mask_shape"], | |||
| ds = ds.map(operations=compose_map_func, | |||
| input_columns=["image", "annotation", "mask", "mask_shape"], | |||
| output_columns=["image", "image_shape", "box", "label", "valid_num", "mask"], | |||
| column_order=["image", "image_shape", "box", "label", "valid_num", "mask"], | |||
| operations=compose_map_func, | |||
| python_multiprocessing=False, | |||
| num_parallel_workers=num_parallel_workers) | |||
| ds = ds.batch(batch_size, drop_remainder=True) | |||
| else: | |||
| ds = ds.map(input_columns=["image", "annotation", "mask", "mask_shape"], | |||
| ds = ds.map(operations=compose_map_func, | |||
| input_columns=["image", "annotation", "mask", "mask_shape"], | |||
| output_columns=["image", "image_shape", "box", "label", "valid_num", "mask"], | |||
| column_order=["image", "image_shape", "box", "label", "valid_num", "mask"], | |||
| operations=compose_map_func, | |||
| num_parallel_workers=num_parallel_workers) | |||
| ds = ds.batch(batch_size, drop_remainder=True) | |||
| @@ -85,8 +85,8 @@ def create_dataset(dataset_path, do_train, config, repeat_num=1): | |||
| type_cast_op = C2.TypeCast(mstype.int32) | |||
| ds = ds.map(input_columns="image", operations=trans, num_parallel_workers=8) | |||
| ds = ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=8) | |||
| ds = ds.map(operations=trans, input_columns="image", num_parallel_workers=8) | |||
| ds = ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=8) | |||
| # apply shuffle operations | |||
| ds = ds.shuffle(buffer_size=buffer_size) | |||
| @@ -89,8 +89,8 @@ def create_dataset(dataset_path, do_train, config, device_target, repeat_num=1, | |||
| type_cast_op = C2.TypeCast(mstype.int32) | |||
| ds = ds.map(input_columns="image", operations=trans, num_parallel_workers=16) | |||
| ds = ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=8) | |||
| ds = ds.map(operations=trans, input_columns="image", num_parallel_workers=16) | |||
| ds = ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=8) | |||
| # apply batch operations | |||
| ds = ds.batch(batch_size, drop_remainder=True) | |||
| @@ -152,7 +152,7 @@ def create_dataset_py(dataset_path, do_train, config, device_target, repeat_num= | |||
| compose = P2.Compose(trans) | |||
| ds = ds.map(input_columns="image", operations=compose, num_parallel_workers=8, python_multiprocessing=True) | |||
| ds = ds.map(operations=compose, input_columns="image", num_parallel_workers=8, python_multiprocessing=True) | |||
| # apply batch operations | |||
| ds = ds.batch(batch_size, drop_remainder=True) | |||
| @@ -70,8 +70,8 @@ def create_dataset(dataset_path, do_train, config, device_target, repeat_num=1, | |||
| type_cast_op = C2.TypeCast(mstype.int32) | |||
| ds = ds.map(input_columns="image", operations=trans, num_parallel_workers=8) | |||
| ds = ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=8) | |||
| ds = ds.map(operations=trans, input_columns="image", num_parallel_workers=8) | |||
| ds = ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=8) | |||
| # apply shuffle operations | |||
| ds = ds.shuffle(buffer_size=buffer_size) | |||
| @@ -61,8 +61,8 @@ def create_dataset(dataset_path, config, do_train, repeat_num=1): | |||
| C.HWC2CHW() | |||
| ] | |||
| type_cast_op = C2.TypeCast(mstype.int32) | |||
| ds = ds.map(input_columns="image", operations=trans, num_parallel_workers=config.work_nums) | |||
| ds = ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=config.work_nums) | |||
| ds = ds.map(operations=trans, input_columns="image", num_parallel_workers=config.work_nums) | |||
| ds = ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=config.work_nums) | |||
| # apply batch operations | |||
| ds = ds.batch(config.batch_size, drop_remainder=True) | |||
| # apply dataset repeat operation | |||
| @@ -22,6 +22,7 @@ import mindspore.dataset.vision.c_transforms as C | |||
| import mindspore.dataset.transforms.c_transforms as C2 | |||
| from mindspore.communication.management import init, get_rank, get_group_size | |||
| def create_dataset1(dataset_path, do_train, repeat_num=1, batch_size=32, target="Ascend"): | |||
| """ | |||
| create a train or evaluate cifar10 dataset for resnet50 | |||
| @@ -65,8 +66,8 @@ def create_dataset1(dataset_path, do_train, repeat_num=1, batch_size=32, target= | |||
| type_cast_op = C2.TypeCast(mstype.int32) | |||
| ds = ds.map(input_columns="label", num_parallel_workers=8, operations=type_cast_op) | |||
| ds = ds.map(input_columns="image", num_parallel_workers=8, operations=trans) | |||
| ds = ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=8) | |||
| ds = ds.map(operations=trans, input_columns="image", num_parallel_workers=8) | |||
| # apply batch operations | |||
| ds = ds.batch(batch_size, drop_remainder=True) | |||
| @@ -126,8 +127,8 @@ def create_dataset2(dataset_path, do_train, repeat_num=1, batch_size=32, target= | |||
| type_cast_op = C2.TypeCast(mstype.int32) | |||
| ds = ds.map(input_columns="image", num_parallel_workers=8, operations=trans) | |||
| ds = ds.map(input_columns="label", num_parallel_workers=8, operations=type_cast_op) | |||
| ds = ds.map(operations=trans, input_columns="image", num_parallel_workers=8) | |||
| ds = ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=8) | |||
| # apply batch operations | |||
| ds = ds.batch(batch_size, drop_remainder=True) | |||
| @@ -165,7 +166,7 @@ def create_dataset3(dataset_path, do_train, repeat_num=1, batch_size=32, target= | |||
| if do_train: | |||
| trans = [ | |||
| C.RandomCropDecodeResize(image_size, scale=(0.08, 1.0), ratio=(0.75, 1.333)), | |||
| C.RandomHorizontalFlip(rank_id/ (rank_id +1)), | |||
| C.RandomHorizontalFlip(rank_id / (rank_id + 1)), | |||
| C.Normalize(mean=mean, std=std), | |||
| C.HWC2CHW() | |||
| ] | |||
| @@ -180,8 +181,8 @@ def create_dataset3(dataset_path, do_train, repeat_num=1, batch_size=32, target= | |||
| type_cast_op = C2.TypeCast(mstype.int32) | |||
| ds = ds.map(input_columns="image", operations=trans, num_parallel_workers=8) | |||
| ds = ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=8) | |||
| ds = ds.map(operations=trans, input_columns="image", num_parallel_workers=8) | |||
| ds = ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=8) | |||
| # apply batch operations | |||
| ds = ds.batch(batch_size, drop_remainder=True) | |||
| @@ -190,6 +191,7 @@ def create_dataset3(dataset_path, do_train, repeat_num=1, batch_size=32, target= | |||
| return ds | |||
| def create_dataset4(dataset_path, do_train, repeat_num=1, batch_size=32, target="Ascend"): | |||
| """ | |||
| create a train or eval imagenet2012 dataset for se-resnet50 | |||
| @@ -233,8 +235,8 @@ def create_dataset4(dataset_path, do_train, repeat_num=1, batch_size=32, target= | |||
| ] | |||
| type_cast_op = C2.TypeCast(mstype.int32) | |||
| ds = ds.map(input_columns="image", num_parallel_workers=12, operations=trans) | |||
| ds = ds.map(input_columns="label", num_parallel_workers=12, operations=type_cast_op) | |||
| ds = ds.map(operations=trans, input_columns="image", num_parallel_workers=12) | |||
| ds = ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=12) | |||
| # apply batch operations | |||
| ds = ds.batch(batch_size, drop_remainder=True) | |||
| @@ -244,6 +246,7 @@ def create_dataset4(dataset_path, do_train, repeat_num=1, batch_size=32, target= | |||
| return ds | |||
| def _get_rank_info(): | |||
| """ | |||
| get rank size and rank id | |||
| @@ -85,8 +85,8 @@ def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32, target=" | |||
| type_cast_op = C2.TypeCast(mstype.int32) | |||
| ds = ds.map(input_columns="image", num_parallel_workers=8, operations=trans) | |||
| ds = ds.map(input_columns="label", num_parallel_workers=8, operations=type_cast_op) | |||
| ds = ds.map(operations=trans, input_columns="image", num_parallel_workers=8) | |||
| ds = ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=8) | |||
| # apply batch operations | |||
| ds = ds.batch(batch_size, drop_remainder=True) | |||
| @@ -147,7 +147,7 @@ def create_dataset_py(dataset_path, do_train, repeat_num=1, batch_size=32, targe | |||
| trans = [decode_op, resize_op, center_crop, to_tensor, normalize_op] | |||
| compose = P2.Compose(trans) | |||
| ds = ds.map(input_columns="image", operations=compose, num_parallel_workers=8, python_multiprocessing=True) | |||
| ds = ds.map(operations=compose, input_columns="image", num_parallel_workers=8, python_multiprocessing=True) | |||
| # apply batch operations | |||
| ds = ds.batch(batch_size, drop_remainder=True) | |||
| @@ -75,8 +75,8 @@ def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32, target=" | |||
| type_cast_op = C2.TypeCast(mstype.int32) | |||
| ds = ds.map(input_columns="image", num_parallel_workers=num_parallels, operations=trans) | |||
| ds = ds.map(input_columns="label", num_parallel_workers=num_parallels, operations=type_cast_op) | |||
| ds = ds.map(operations=trans, input_columns="image", num_parallel_workers=num_parallels) | |||
| ds = ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=num_parallels) | |||
| # apply batch operations | |||
| ds = ds.batch(batch_size, drop_remainder=True) | |||
| @@ -25,6 +25,7 @@ from src.utils.sampler import DistributedSampler | |||
| ImageFile.LOAD_TRUNCATED_IMAGES = True | |||
| class TxtDataset(): | |||
| """ | |||
| create txt dataset. | |||
| @@ -33,6 +34,7 @@ class TxtDataset(): | |||
| Returns: | |||
| de_dataset. | |||
| """ | |||
| def __init__(self, root, txt_name): | |||
| super(TxtDataset, self).__init__() | |||
| self.imgs = [] | |||
| @@ -142,10 +144,10 @@ def classification_dataset(data_dir, image_size, per_batch_size, max_epoch, rank | |||
| sampler = DistributedSampler(dataset, rank, group_size, shuffle=shuffle) | |||
| de_dataset = de.GeneratorDataset(dataset, ["image", "label"], sampler=sampler) | |||
| de_dataset = de_dataset.map(input_columns="image", num_parallel_workers=num_parallel_workers, | |||
| operations=transform_img) | |||
| de_dataset = de_dataset.map(input_columns="label", num_parallel_workers=num_parallel_workers, | |||
| operations=transform_label) | |||
| de_dataset = de_dataset.map(operations=transform_img, input_columns="image", | |||
| num_parallel_workers=num_parallel_workers) | |||
| de_dataset = de_dataset.map(operations=transform_label, input_columns="label", | |||
| num_parallel_workers=num_parallel_workers) | |||
| columns_to_project = ["image", "label"] | |||
| de_dataset = de_dataset.project(columns=columns_to_project) | |||
| @@ -72,8 +72,8 @@ def create_dataset(dataset_path, do_train, rank, group_size, repeat_num=1): | |||
| ] | |||
| type_cast_op = C2.TypeCast(mstype.int32) | |||
| ds = ds.map(input_columns="image", operations=trans, num_parallel_workers=cfg.work_nums) | |||
| ds = ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=cfg.work_nums) | |||
| ds = ds.map(operations=trans, input_columns="image", num_parallel_workers=cfg.work_nums) | |||
| ds = ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=cfg.work_nums) | |||
| # apply batch operations | |||
| ds = ds.batch(cfg.batch_size, drop_remainder=True) | |||
| @@ -34,13 +34,15 @@ def _rand(a=0., b=1.): | |||
| """Generate random.""" | |||
| return np.random.rand() * (b - a) + a | |||
| def get_imageId_from_fileName(filename): | |||
| """Get imageID from fileName""" | |||
| try: | |||
| filename = os.path.splitext(filename)[0] | |||
| return int(filename) | |||
| except: | |||
| raise NotImplementedError('Filename %s is supposed to be an integer.'%(filename)) | |||
| raise NotImplementedError('Filename %s is supposed to be an integer.' % (filename)) | |||
| def random_sample_crop(image, boxes): | |||
| """Random Crop the image and boxes""" | |||
| @@ -64,7 +66,7 @@ def random_sample_crop(image, boxes): | |||
| left = _rand() * (width - w) | |||
| top = _rand() * (height - h) | |||
| rect = np.array([int(top), int(left), int(top+h), int(left+w)]) | |||
| rect = np.array([int(top), int(left), int(top + h), int(left + w)]) | |||
| overlap = jaccard_numpy(boxes, rect) | |||
| # dropout some boxes | |||
| @@ -103,13 +105,14 @@ def random_sample_crop(image, boxes): | |||
| def preprocess_fn(img_id, image, box, is_training): | |||
| """Preprocess function for dataset.""" | |||
| def _infer_data(image, input_shape): | |||
| img_h, img_w, _ = image.shape | |||
| input_h, input_w = input_shape | |||
| image = cv2.resize(image, (input_w, input_h)) | |||
| #When the channels of image is 1 | |||
| # When the channels of image is 1 | |||
| if len(image.shape) == 2: | |||
| image = np.expand_dims(image, axis=-1) | |||
| image = np.concatenate([image, image, image], axis=-1) | |||
| @@ -150,6 +153,7 @@ def preprocess_fn(img_id, image, box, is_training): | |||
| box, label, num_match = ssd_bboxes_encode(box) | |||
| return image, box, label, num_match | |||
| return _data_aug(image, box, is_training, image_size=config.img_shape) | |||
| @@ -158,7 +162,7 @@ def create_voc_label(is_training): | |||
| voc_dir = config.voc_dir | |||
| cls_map = {name: i for i, name in enumerate(config.coco_classes)} | |||
| sub_dir = 'train' if is_training else 'eval' | |||
| #sub_dir = 'train' | |||
| # sub_dir = 'train' | |||
| voc_dir = os.path.join(voc_dir, sub_dir) | |||
| if not os.path.isdir(voc_dir): | |||
| raise ValueError(f'Cannot find {sub_dir} dataset path.') | |||
| @@ -244,6 +248,7 @@ def create_voc_label(is_training): | |||
| return images, image_files_dict, image_anno_dict | |||
| def create_coco_label(is_training): | |||
| """Get image path and annotation from COCO.""" | |||
| from pycocotools.coco import COCO | |||
| @@ -253,7 +258,7 @@ def create_coco_label(is_training): | |||
| if is_training: | |||
| data_type = config.train_data_type | |||
| #Classes need to train or test. | |||
| # Classes need to train or test. | |||
| train_cls = config.coco_classes | |||
| train_cls_dict = {} | |||
| for i, cls in enumerate(train_cls): | |||
| @@ -391,9 +396,10 @@ def create_ssd_dataset(mindrecord_file, batch_size=32, repeat_num=10, device_num | |||
| ds = de.MindDataset(mindrecord_file, columns_list=["img_id", "image", "annotation"], num_shards=device_num, | |||
| shard_id=rank, num_parallel_workers=num_parallel_workers, shuffle=is_training) | |||
| decode = C.Decode() | |||
| ds = ds.map(input_columns=["image"], operations=decode) | |||
| ds = ds.map(operations=decode, input_columns=["image"]) | |||
| change_swap_op = C.HWC2CHW() | |||
| normalize_op = C.Normalize(mean=[0.485*255, 0.456*255, 0.406*255], std=[0.229*255, 0.224*255, 0.225*255]) | |||
| normalize_op = C.Normalize(mean=[0.485 * 255, 0.456 * 255, 0.406 * 255], | |||
| std=[0.229 * 255, 0.224 * 255, 0.225 * 255]) | |||
| color_adjust_op = C.RandomColorAdjust(brightness=0.4, contrast=0.4, saturation=0.4) | |||
| compose_map_func = (lambda img_id, image, annotation: preprocess_fn(img_id, image, annotation, is_training)) | |||
| if is_training: | |||
| @@ -402,11 +408,11 @@ def create_ssd_dataset(mindrecord_file, batch_size=32, repeat_num=10, device_num | |||
| else: | |||
| output_columns = ["img_id", "image", "image_shape"] | |||
| trans = [normalize_op, change_swap_op] | |||
| ds = ds.map(input_columns=["img_id", "image", "annotation"], | |||
| ds = ds.map(operations=compose_map_func, input_columns=["img_id", "image", "annotation"], | |||
| output_columns=output_columns, column_order=output_columns, | |||
| operations=compose_map_func, python_multiprocessing=is_training, | |||
| python_multiprocessing=is_training, | |||
| num_parallel_workers=num_parallel_workers) | |||
| ds = ds.map(input_columns=["image"], operations=trans, python_multiprocessing=is_training, | |||
| ds = ds.map(operations=trans, input_columns=["image"], python_multiprocessing=is_training, | |||
| num_parallel_workers=num_parallel_workers) | |||
| ds = ds.batch(batch_size, drop_remainder=True) | |||
| ds = ds.repeat(repeat_num) | |||
| @@ -54,8 +54,8 @@ def vgg_create_dataset(data_home, image_size, batch_size, rank_id=0, rank_size=1 | |||
| changeswap_op] | |||
| # apply map operations on images | |||
| data_set = data_set.map(input_columns="label", operations=type_cast_op) | |||
| data_set = data_set.map(input_columns="image", operations=c_trans) | |||
| data_set = data_set.map(operations=type_cast_op, input_columns="label") | |||
| data_set = data_set.map(operations=c_trans, input_columns="image") | |||
| # apply repeat operations | |||
| data_set = data_set.repeat(repeat_num) | |||
| @@ -157,8 +157,8 @@ def classification_dataset(data_dir, image_size, per_batch_size, rank=0, group_s | |||
| sampler = DistributedSampler(dataset, rank, group_size, shuffle=shuffle) | |||
| de_dataset = de.GeneratorDataset(dataset, ["image", "label"], sampler=sampler) | |||
| de_dataset = de_dataset.map(input_columns="image", num_parallel_workers=8, operations=transform_img) | |||
| de_dataset = de_dataset.map(input_columns="label", num_parallel_workers=8, operations=transform_label) | |||
| de_dataset = de_dataset.map(operations=transform_img, input_columns="image", num_parallel_workers=8) | |||
| de_dataset = de_dataset.map(operations=transform_label, input_columns="label", num_parallel_workers=8) | |||
| columns_to_project = ["image", "label"] | |||
| de_dataset = de_dataset.project(columns=columns_to_project) | |||
| @@ -90,8 +90,8 @@ def create_dataset(dataset_path, batch_size=1, num_shards=1, shard_id=0, device_ | |||
| label_trans = [ | |||
| c.TypeCast(mstype.int32) | |||
| ] | |||
| ds = ds.map(input_columns=["image"], num_parallel_workers=8, operations=image_trans) | |||
| ds = ds.map(input_columns=["label"], num_parallel_workers=8, operations=label_trans) | |||
| ds = ds.map(operations=image_trans, input_columns=["image"], num_parallel_workers=8) | |||
| ds = ds.map(operations=label_trans, input_columns=["label"], num_parallel_workers=8) | |||
| ds = ds.batch(batch_size, drop_remainder=True) | |||
| return ds | |||
| @@ -176,11 +176,11 @@ def create_yolo_dataset(image_dir, anno_path, batch_size, max_epoch, device_num, | |||
| ds = de.GeneratorDataset(yolo_dataset, column_names=["image", "img_id"], | |||
| sampler=distributed_sampler) | |||
| compose_map_func = (lambda image, img_id: reshape_fn(image, img_id, config)) | |||
| ds = ds.map(input_columns=["image", "img_id"], | |||
| ds = ds.map(operations=compose_map_func, input_columns=["image", "img_id"], | |||
| output_columns=["image", "image_shape", "img_id"], | |||
| column_order=["image", "image_shape", "img_id"], | |||
| operations=compose_map_func, num_parallel_workers=8) | |||
| ds = ds.map(input_columns=["image"], operations=hwc_to_chw, num_parallel_workers=8) | |||
| num_parallel_workers=8) | |||
| ds = ds.map(operations=hwc_to_chw, input_columns=["image"], num_parallel_workers=8) | |||
| ds = ds.batch(batch_size, drop_remainder=True) | |||
| ds = ds.repeat(max_epoch) | |||
| @@ -173,11 +173,11 @@ def create_yolo_dataset(image_dir, anno_path, batch_size, max_epoch, device_num, | |||
| ds = de.GeneratorDataset(yolo_dataset, column_names=["image", "img_id"], | |||
| sampler=distributed_sampler) | |||
| compose_map_func = (lambda image, img_id: reshape_fn(image, img_id, config)) | |||
| ds = ds.map(input_columns=["image", "img_id"], | |||
| ds = ds.map(operations=compose_map_func, input_columns=["image", "img_id"], | |||
| output_columns=["image", "image_shape", "img_id"], | |||
| column_order=["image", "image_shape", "img_id"], | |||
| operations=compose_map_func, num_parallel_workers=8) | |||
| ds = ds.map(input_columns=["image"], operations=hwc_to_chw, num_parallel_workers=8) | |||
| num_parallel_workers=8) | |||
| ds = ds.map(operations=hwc_to_chw, input_columns=["image"], num_parallel_workers=8) | |||
| ds = ds.batch(batch_size, drop_remainder=True) | |||
| ds = ds.repeat(max_epoch) | |||
| @@ -296,21 +296,21 @@ def create_yolo_dataset(mindrecord_dir, batch_size=32, repeat_num=1, device_num= | |||
| ds = de.MindDataset(mindrecord_dir, columns_list=["image", "annotation"], num_shards=device_num, shard_id=rank, | |||
| num_parallel_workers=num_parallel_workers, shuffle=is_training) | |||
| decode = C.Decode() | |||
| ds = ds.map(input_columns=["image"], operations=decode) | |||
| ds = ds.map(operations=decode, input_columns=["image"]) | |||
| compose_map_func = (lambda image, annotation: preprocess_fn(image, annotation, is_training)) | |||
| if is_training: | |||
| hwc_to_chw = C.HWC2CHW() | |||
| ds = ds.map(input_columns=["image", "annotation"], | |||
| ds = ds.map(operations=compose_map_func, input_columns=["image", "annotation"], | |||
| output_columns=["image", "bbox_1", "bbox_2", "bbox_3", "gt_box1", "gt_box2", "gt_box3"], | |||
| column_order=["image", "bbox_1", "bbox_2", "bbox_3", "gt_box1", "gt_box2", "gt_box3"], | |||
| operations=compose_map_func, num_parallel_workers=num_parallel_workers) | |||
| ds = ds.map(input_columns=["image"], operations=hwc_to_chw, num_parallel_workers=num_parallel_workers) | |||
| num_parallel_workers=num_parallel_workers) | |||
| ds = ds.map(operations=hwc_to_chw, input_columns=["image"], num_parallel_workers=num_parallel_workers) | |||
| ds = ds.batch(batch_size, drop_remainder=True) | |||
| ds = ds.repeat(repeat_num) | |||
| else: | |||
| ds = ds.map(input_columns=["image", "annotation"], | |||
| ds = ds.map(operations=compose_map_func, input_columns=["image", "annotation"], | |||
| output_columns=["image", "image_shape", "annotation"], | |||
| column_order=["image", "image_shape", "annotation"], | |||
| operations=compose_map_func, num_parallel_workers=num_parallel_workers) | |||
| num_parallel_workers=num_parallel_workers) | |||
| return ds | |||
| @@ -112,12 +112,12 @@ def get_enwiki_512_dataset(batch_size=1, repeat_count=1, distribute_file=''): | |||
| "masked_lm_weights", | |||
| "next_sentence_labels"]) | |||
| type_cast_op = C.TypeCast(mstype.int32) | |||
| ds = ds.map(input_columns="segment_ids", operations=type_cast_op) | |||
| ds = ds.map(input_columns="input_mask", operations=type_cast_op) | |||
| ds = ds.map(input_columns="input_ids", operations=type_cast_op) | |||
| ds = ds.map(input_columns="masked_lm_ids", operations=type_cast_op) | |||
| ds = ds.map(input_columns="masked_lm_positions", operations=type_cast_op) | |||
| ds = ds.map(input_columns="next_sentence_labels", operations=type_cast_op) | |||
| ds = ds.map(operations=type_cast_op, input_columns="segment_ids") | |||
| ds = ds.map(operations=type_cast_op, input_columns="input_mask") | |||
| ds = ds.map(operations=type_cast_op, input_columns="input_ids") | |||
| ds = ds.map(operations=type_cast_op, input_columns="masked_lm_ids") | |||
| ds = ds.map(operations=type_cast_op, input_columns="masked_lm_positions") | |||
| ds = ds.map(operations=type_cast_op, input_columns="next_sentence_labels") | |||
| ds = ds.repeat(repeat_count) | |||
| # apply batch operations | |||
| @@ -42,30 +42,31 @@ def process_tnews_clue_dataset(data_dir, label_list, bert_vocab_path, data_usage | |||
| usage=data_usage, shuffle=shuffle_dataset) | |||
| ### Processing label | |||
| if data_usage == 'test': | |||
| dataset = dataset.map(input_columns=["id"], output_columns=["id", "label_id"], | |||
| column_order=["id", "label_id", "sentence"], operations=ops.Duplicate()) | |||
| dataset = dataset.map(input_columns=["label_id"], operations=ops.Fill(0)) | |||
| dataset = dataset.map(operations=ops.Duplicate(), input_columns=["id"], output_columns=["id", "label_id"], | |||
| column_order=["id", "label_id", "sentence"]) | |||
| dataset = dataset.map(operations=ops.Fill(0), input_columns=["label_id"]) | |||
| else: | |||
| label_vocab = text.Vocab.from_list(label_list) | |||
| label_lookup = text.Lookup(label_vocab) | |||
| dataset = dataset.map(input_columns="label_desc", output_columns="label_id", operations=label_lookup) | |||
| dataset = dataset.map(operations=label_lookup, input_columns="label_desc", output_columns="label_id") | |||
| ### Processing sentence | |||
| vocab = text.Vocab.from_file(bert_vocab_path) | |||
| tokenizer = text.BertTokenizer(vocab, lower_case=True) | |||
| lookup = text.Lookup(vocab, unknown_token='[UNK]') | |||
| dataset = dataset.map(input_columns=["sentence"], operations=tokenizer) | |||
| dataset = dataset.map(input_columns=["sentence"], operations=ops.Slice(slice(0, max_seq_len))) | |||
| dataset = dataset.map(input_columns=["sentence"], | |||
| operations=ops.Concatenate(prepend=np.array(["[CLS]"], dtype='S'), | |||
| append=np.array(["[SEP]"], dtype='S'))) | |||
| dataset = dataset.map(input_columns=["sentence"], output_columns=["text_ids"], operations=lookup) | |||
| dataset = dataset.map(input_columns=["text_ids"], operations=ops.PadEnd([max_seq_len], 0)) | |||
| dataset = dataset.map(input_columns=["text_ids"], output_columns=["text_ids", "mask_ids"], | |||
| column_order=["text_ids", "mask_ids", "label_id"], operations=ops.Duplicate()) | |||
| dataset = dataset.map(input_columns=["mask_ids"], operations=ops.Mask(ops.Relational.NE, 0, mstype.int32)) | |||
| dataset = dataset.map(input_columns=["text_ids"], output_columns=["text_ids", "segment_ids"], | |||
| column_order=["text_ids", "mask_ids", "segment_ids", "label_id"], operations=ops.Duplicate()) | |||
| dataset = dataset.map(input_columns=["segment_ids"], operations=ops.Fill(0)) | |||
| dataset = dataset.map(operations=tokenizer, input_columns=["sentence"]) | |||
| dataset = dataset.map(operations=ops.Slice(slice(0, max_seq_len)), input_columns=["sentence"]) | |||
| dataset = dataset.map(operations=ops.Concatenate(prepend=np.array(["[CLS]"], dtype='S'), | |||
| append=np.array(["[SEP]"], dtype='S')), input_columns=["sentence"]) | |||
| dataset = dataset.map(operations=lookup, input_columns=["sentence"], output_columns=["text_ids"]) | |||
| dataset = dataset.map(operations=ops.PadEnd([max_seq_len], 0), input_columns=["text_ids"]) | |||
| dataset = dataset.map(operations=ops.Duplicate(), input_columns=["text_ids"], | |||
| output_columns=["text_ids", "mask_ids"], | |||
| column_order=["text_ids", "mask_ids", "label_id"]) | |||
| dataset = dataset.map(operations=ops.Mask(ops.Relational.NE, 0, mstype.int32), input_columns=["mask_ids"]) | |||
| dataset = dataset.map(operations=ops.Duplicate(), input_columns=["text_ids"], | |||
| output_columns=["text_ids", "segment_ids"], | |||
| column_order=["text_ids", "mask_ids", "segment_ids", "label_id"]) | |||
| dataset = dataset.map(operations=ops.Fill(0), input_columns=["segment_ids"]) | |||
| dataset = dataset.batch(batch_size, drop_remainder=drop_remainder) | |||
| return dataset | |||
| @@ -86,50 +87,51 @@ def process_cmnli_clue_dataset(data_dir, label_list, bert_vocab_path, data_usage | |||
| usage=data_usage, shuffle=shuffle_dataset) | |||
| ### Processing label | |||
| if data_usage == 'test': | |||
| dataset = dataset.map(input_columns=["id"], output_columns=["id", "label_id"], | |||
| column_order=["id", "label_id", "sentence1", "sentence2"], operations=ops.Duplicate()) | |||
| dataset = dataset.map(input_columns=["label_id"], operations=ops.Fill(0)) | |||
| dataset = dataset.map(operations=ops.Duplicate(), input_columns=["id"], output_columns=["id", "label_id"], | |||
| column_order=["id", "label_id", "sentence1", "sentence2"]) | |||
| dataset = dataset.map(operations=ops.Fill(0), input_columns=["label_id"]) | |||
| else: | |||
| label_vocab = text.Vocab.from_list(label_list) | |||
| label_lookup = text.Lookup(label_vocab) | |||
| dataset = dataset.map(input_columns="label", output_columns="label_id", operations=label_lookup) | |||
| dataset = dataset.map(operations=label_lookup, input_columns="label", output_columns="label_id") | |||
| ### Processing sentence pairs | |||
| vocab = text.Vocab.from_file(bert_vocab_path) | |||
| tokenizer = text.BertTokenizer(vocab, lower_case=True) | |||
| lookup = text.Lookup(vocab, unknown_token='[UNK]') | |||
| ### Tokenizing sentences and truncate sequence pair | |||
| dataset = dataset.map(input_columns=["sentence1"], operations=tokenizer) | |||
| dataset = dataset.map(input_columns=["sentence2"], operations=tokenizer) | |||
| dataset = dataset.map(input_columns=["sentence1", "sentence2"], | |||
| operations=text.TruncateSequencePair(max_seq_len-3)) | |||
| dataset = dataset.map(operations=tokenizer, input_columns=["sentence1"]) | |||
| dataset = dataset.map(operations=tokenizer, input_columns=["sentence2"]) | |||
| dataset = dataset.map(operations=text.TruncateSequencePair(max_seq_len - 3), | |||
| input_columns=["sentence1", "sentence2"]) | |||
| ### Adding special tokens | |||
| dataset = dataset.map(input_columns=["sentence1"], | |||
| operations=ops.Concatenate(prepend=np.array(["[CLS]"], dtype='S'), | |||
| append=np.array(["[SEP]"], dtype='S'))) | |||
| dataset = dataset.map(input_columns=["sentence2"], | |||
| operations=ops.Concatenate(append=np.array(["[SEP]"], dtype='S'))) | |||
| dataset = dataset.map(operations=ops.Concatenate(prepend=np.array(["[CLS]"], dtype='S'), | |||
| append=np.array(["[SEP]"], dtype='S')), | |||
| input_columns=["sentence1"]) | |||
| dataset = dataset.map(operations=ops.Concatenate(append=np.array(["[SEP]"], dtype='S')), | |||
| input_columns=["sentence2"]) | |||
| ### Generating segment_ids | |||
| dataset = dataset.map(input_columns=["sentence1"], output_columns=["sentence1", "type_sentence1"], | |||
| column_order=["sentence1", "type_sentence1", "sentence2", "label_id"], | |||
| operations=ops.Duplicate()) | |||
| dataset = dataset.map(input_columns=["sentence2"], output_columns=["sentence2", "type_sentence2"], | |||
| column_order=["sentence1", "type_sentence1", "sentence2", "type_sentence2", "label_id"], | |||
| operations=ops.Duplicate()) | |||
| dataset = dataset.map(input_columns=["type_sentence1"], operations=[lookup, ops.Fill(0)]) | |||
| dataset = dataset.map(input_columns=["type_sentence2"], operations=[lookup, ops.Fill(1)]) | |||
| dataset = dataset.map(input_columns=["type_sentence1", "type_sentence2"], output_columns=["segment_ids"], | |||
| column_order=["sentence1", "sentence2", "segment_ids", "label_id"], | |||
| operations=ops.Concatenate()) | |||
| dataset = dataset.map(input_columns=["segment_ids"], operations=ops.PadEnd([max_seq_len], 0)) | |||
| dataset = dataset.map(operations=ops.Duplicate(), input_columns=["sentence1"], | |||
| output_columns=["sentence1", "type_sentence1"], | |||
| column_order=["sentence1", "type_sentence1", "sentence2", "label_id"]) | |||
| dataset = dataset.map(operations=ops.Duplicate(), | |||
| input_columns=["sentence2"], output_columns=["sentence2", "type_sentence2"], | |||
| column_order=["sentence1", "type_sentence1", "sentence2", "type_sentence2", "label_id"]) | |||
| dataset = dataset.map(operations=[lookup, ops.Fill(0)], input_columns=["type_sentence1"]) | |||
| dataset = dataset.map(operations=[lookup, ops.Fill(1)], input_columns=["type_sentence2"]) | |||
| dataset = dataset.map(operations=ops.Concatenate(), | |||
| input_columns=["type_sentence1", "type_sentence2"], output_columns=["segment_ids"], | |||
| column_order=["sentence1", "sentence2", "segment_ids", "label_id"]) | |||
| dataset = dataset.map(operations=ops.PadEnd([max_seq_len], 0), input_columns=["segment_ids"]) | |||
| ### Generating text_ids | |||
| dataset = dataset.map(input_columns=["sentence1", "sentence2"], output_columns=["text_ids"], | |||
| column_order=["text_ids", "segment_ids", "label_id"], | |||
| operations=ops.Concatenate()) | |||
| dataset = dataset.map(input_columns=["text_ids"], operations=lookup) | |||
| dataset = dataset.map(input_columns=["text_ids"], operations=ops.PadEnd([max_seq_len], 0)) | |||
| dataset = dataset.map(operations=ops.Concatenate(), | |||
| input_columns=["sentence1", "sentence2"], output_columns=["text_ids"], | |||
| column_order=["text_ids", "segment_ids", "label_id"]) | |||
| dataset = dataset.map(operations=lookup, input_columns=["text_ids"]) | |||
| dataset = dataset.map(operations=ops.PadEnd([max_seq_len], 0), input_columns=["text_ids"]) | |||
| ### Generating mask_ids | |||
| dataset = dataset.map(input_columns=["text_ids"], output_columns=["text_ids", "mask_ids"], | |||
| column_order=["text_ids", "mask_ids", "segment_ids", "label_id"], operations=ops.Duplicate()) | |||
| dataset = dataset.map(input_columns=["mask_ids"], operations=ops.Mask(ops.Relational.NE, 0, mstype.int32)) | |||
| dataset = dataset.map(operations=ops.Duplicate(), input_columns=["text_ids"], | |||
| output_columns=["text_ids", "mask_ids"], | |||
| column_order=["text_ids", "mask_ids", "segment_ids", "label_id"]) | |||
| dataset = dataset.map(operations=ops.Mask(ops.Relational.NE, 0, mstype.int32), input_columns=["mask_ids"]) | |||
| dataset = dataset.batch(batch_size, drop_remainder=drop_remainder) | |||
| return dataset | |||
| @@ -39,12 +39,12 @@ def create_bert_dataset(device_num=1, rank=0, do_shuffle="true", data_dir=None, | |||
| ori_dataset_size = ds.get_dataset_size() | |||
| print('origin dataset size: ', ori_dataset_size) | |||
| type_cast_op = C.TypeCast(mstype.int32) | |||
| ds = ds.map(input_columns="masked_lm_ids", operations=type_cast_op) | |||
| ds = ds.map(input_columns="masked_lm_positions", operations=type_cast_op) | |||
| ds = ds.map(input_columns="next_sentence_labels", operations=type_cast_op) | |||
| ds = ds.map(input_columns="segment_ids", operations=type_cast_op) | |||
| ds = ds.map(input_columns="input_mask", operations=type_cast_op) | |||
| ds = ds.map(input_columns="input_ids", operations=type_cast_op) | |||
| ds = ds.map(operations=type_cast_op, input_columns="masked_lm_ids") | |||
| ds = ds.map(operations=type_cast_op, input_columns="masked_lm_positions") | |||
| ds = ds.map(operations=type_cast_op, input_columns="next_sentence_labels") | |||
| ds = ds.map(operations=type_cast_op, input_columns="segment_ids") | |||
| ds = ds.map(operations=type_cast_op, input_columns="input_mask") | |||
| ds = ds.map(operations=type_cast_op, input_columns="input_ids") | |||
| # apply batch operations | |||
| ds = ds.batch(bert_net_cfg.batch_size, drop_remainder=True) | |||
| logger.info("data size: {}".format(ds.get_dataset_size())) | |||
| @@ -60,12 +60,12 @@ def create_ner_dataset(batch_size=1, repeat_count=1, assessment_method="accuracy | |||
| columns_list=["input_ids", "input_mask", "segment_ids", "label_ids"], shuffle=do_shuffle) | |||
| if assessment_method == "Spearman_correlation": | |||
| type_cast_op_float = C.TypeCast(mstype.float32) | |||
| ds = ds.map(input_columns="label_ids", operations=type_cast_op_float) | |||
| ds = ds.map(operations=type_cast_op_float, input_columns="label_ids") | |||
| else: | |||
| ds = ds.map(input_columns="label_ids", operations=type_cast_op) | |||
| ds = ds.map(input_columns="segment_ids", operations=type_cast_op) | |||
| ds = ds.map(input_columns="input_mask", operations=type_cast_op) | |||
| ds = ds.map(input_columns="input_ids", operations=type_cast_op) | |||
| ds = ds.map(operations=type_cast_op, input_columns="label_ids") | |||
| ds = ds.map(operations=type_cast_op, input_columns="segment_ids") | |||
| ds = ds.map(operations=type_cast_op, input_columns="input_mask") | |||
| ds = ds.map(operations=type_cast_op, input_columns="input_ids") | |||
| ds = ds.repeat(repeat_count) | |||
| # apply batch operations | |||
| ds = ds.batch(batch_size, drop_remainder=True) | |||
| @@ -80,12 +80,12 @@ def create_classification_dataset(batch_size=1, repeat_count=1, assessment_metho | |||
| columns_list=["input_ids", "input_mask", "segment_ids", "label_ids"], shuffle=do_shuffle) | |||
| if assessment_method == "Spearman_correlation": | |||
| type_cast_op_float = C.TypeCast(mstype.float32) | |||
| ds = ds.map(input_columns="label_ids", operations=type_cast_op_float) | |||
| ds = ds.map(operations=type_cast_op_float, input_columns="label_ids") | |||
| else: | |||
| ds = ds.map(input_columns="label_ids", operations=type_cast_op) | |||
| ds = ds.map(input_columns="segment_ids", operations=type_cast_op) | |||
| ds = ds.map(input_columns="input_mask", operations=type_cast_op) | |||
| ds = ds.map(input_columns="input_ids", operations=type_cast_op) | |||
| ds = ds.map(operations=type_cast_op, input_columns="label_ids") | |||
| ds = ds.map(operations=type_cast_op, input_columns="segment_ids") | |||
| ds = ds.map(operations=type_cast_op, input_columns="input_mask") | |||
| ds = ds.map(operations=type_cast_op, input_columns="input_ids") | |||
| ds = ds.repeat(repeat_count) | |||
| # apply batch operations | |||
| ds = ds.batch(batch_size, drop_remainder=True) | |||
| @@ -101,14 +101,14 @@ def create_squad_dataset(batch_size=1, repeat_count=1, data_file_path=None, sche | |||
| columns_list=["input_ids", "input_mask", "segment_ids", "start_positions", | |||
| "end_positions", "unique_ids", "is_impossible"], | |||
| shuffle=do_shuffle) | |||
| ds = ds.map(input_columns="start_positions", operations=type_cast_op) | |||
| ds = ds.map(input_columns="end_positions", operations=type_cast_op) | |||
| ds = ds.map(operations=type_cast_op, input_columns="start_positions") | |||
| ds = ds.map(operations=type_cast_op, input_columns="end_positions") | |||
| else: | |||
| ds = de.TFRecordDataset([data_file_path], schema_file_path if schema_file_path != "" else None, | |||
| columns_list=["input_ids", "input_mask", "segment_ids", "unique_ids"]) | |||
| ds = ds.map(input_columns="segment_ids", operations=type_cast_op) | |||
| ds = ds.map(input_columns="input_mask", operations=type_cast_op) | |||
| ds = ds.map(input_columns="input_ids", operations=type_cast_op) | |||
| ds = ds.map(operations=type_cast_op, input_columns="segment_ids") | |||
| ds = ds.map(operations=type_cast_op, input_columns="input_mask") | |||
| ds = ds.map(operations=type_cast_op, input_columns="input_ids") | |||
| ds = ds.repeat(repeat_count) | |||
| # apply batch operations | |||
| ds = ds.batch(batch_size, drop_remainder=True) | |||
| @@ -117,12 +117,12 @@ def get_enwiki_512_dataset(batch_size=1, repeat_count=1, distribute_file=''): | |||
| "masked_lm_weights", | |||
| "next_sentence_labels"]) | |||
| type_cast_op = C.TypeCast(mstype.int32) | |||
| ds = ds.map(input_columns="segment_ids", operations=type_cast_op) | |||
| ds = ds.map(input_columns="input_mask", operations=type_cast_op) | |||
| ds = ds.map(input_columns="input_ids", operations=type_cast_op) | |||
| ds = ds.map(input_columns="masked_lm_ids", operations=type_cast_op) | |||
| ds = ds.map(input_columns="masked_lm_positions", operations=type_cast_op) | |||
| ds = ds.map(input_columns="next_sentence_labels", operations=type_cast_op) | |||
| ds = ds.map(operations=type_cast_op, input_columns="segment_ids") | |||
| ds = ds.map(operations=type_cast_op, input_columns="input_mask") | |||
| ds = ds.map(operations=type_cast_op, input_columns="input_ids") | |||
| ds = ds.map(operations=type_cast_op, input_columns="masked_lm_ids") | |||
| ds = ds.map(operations=type_cast_op, input_columns="masked_lm_positions") | |||
| ds = ds.map(operations=type_cast_op, input_columns="next_sentence_labels") | |||
| ds = ds.repeat(repeat_count) | |||
| # apply batch operations | |||
| @@ -40,12 +40,12 @@ def create_bert_dataset(device_num=1, rank=0, do_shuffle="true", data_dir=None, | |||
| ori_dataset_size = ds.get_dataset_size() | |||
| print('origin dataset size: ', ori_dataset_size) | |||
| type_cast_op = C.TypeCast(mstype.int32) | |||
| ds = ds.map(input_columns="masked_lm_ids", operations=type_cast_op) | |||
| ds = ds.map(input_columns="masked_lm_positions", operations=type_cast_op) | |||
| ds = ds.map(input_columns="next_sentence_labels", operations=type_cast_op) | |||
| ds = ds.map(input_columns="segment_ids", operations=type_cast_op) | |||
| ds = ds.map(input_columns="input_mask", operations=type_cast_op) | |||
| ds = ds.map(input_columns="input_ids", operations=type_cast_op) | |||
| ds = ds.map(operations=type_cast_op, input_columns="masked_lm_ids") | |||
| ds = ds.map(operations=type_cast_op, input_columns="masked_lm_positions") | |||
| ds = ds.map(operations=type_cast_op, input_columns="next_sentence_labels") | |||
| ds = ds.map(operations=type_cast_op, input_columns="segment_ids") | |||
| ds = ds.map(operations=type_cast_op, input_columns="input_mask") | |||
| ds = ds.map(operations=type_cast_op, input_columns="input_ids") | |||
| # apply batch operations | |||
| ds = ds.batch(bert_net_cfg.batch_size, drop_remainder=True) | |||
| logger.info("data size: {}".format(ds.get_dataset_size())) | |||
| @@ -61,12 +61,12 @@ def create_ner_dataset(batch_size=1, repeat_count=1, assessment_method="accuracy | |||
| columns_list=["input_ids", "input_mask", "segment_ids", "label_ids"]) | |||
| if assessment_method == "Spearman_correlation": | |||
| type_cast_op_float = C.TypeCast(mstype.float32) | |||
| ds = ds.map(input_columns="label_ids", operations=type_cast_op_float) | |||
| ds = ds.map(operations=type_cast_op_float, input_columns="label_ids") | |||
| else: | |||
| ds = ds.map(input_columns="label_ids", operations=type_cast_op) | |||
| ds = ds.map(input_columns="segment_ids", operations=type_cast_op) | |||
| ds = ds.map(input_columns="input_mask", operations=type_cast_op) | |||
| ds = ds.map(input_columns="input_ids", operations=type_cast_op) | |||
| ds = ds.map(operations=type_cast_op, input_columns="label_ids") | |||
| ds = ds.map(operations=type_cast_op, input_columns="segment_ids") | |||
| ds = ds.map(operations=type_cast_op, input_columns="input_mask") | |||
| ds = ds.map(operations=type_cast_op, input_columns="input_ids") | |||
| ds = ds.repeat(repeat_count) | |||
| # apply shuffle operation | |||
| buffer_size = 960 | |||
| @@ -84,12 +84,12 @@ def create_classification_dataset(batch_size=1, repeat_count=1, assessment_metho | |||
| columns_list=["input_ids", "input_mask", "segment_ids", "label_ids"]) | |||
| if assessment_method == "Spearman_correlation": | |||
| type_cast_op_float = C.TypeCast(mstype.float32) | |||
| ds = ds.map(input_columns="label_ids", operations=type_cast_op_float) | |||
| ds = ds.map(operations=type_cast_op_float, input_columns="label_ids") | |||
| else: | |||
| ds = ds.map(input_columns="label_ids", operations=type_cast_op) | |||
| ds = ds.map(input_columns="segment_ids", operations=type_cast_op) | |||
| ds = ds.map(input_columns="input_mask", operations=type_cast_op) | |||
| ds = ds.map(input_columns="input_ids", operations=type_cast_op) | |||
| ds = ds.map(operations=type_cast_op, input_columns="label_ids") | |||
| ds = ds.map(operations=type_cast_op, input_columns="segment_ids") | |||
| ds = ds.map(operations=type_cast_op, input_columns="input_mask") | |||
| ds = ds.map(operations=type_cast_op, input_columns="input_ids") | |||
| ds = ds.repeat(repeat_count) | |||
| # apply shuffle operation | |||
| buffer_size = 960 | |||
| @@ -107,17 +107,17 @@ def create_squad_dataset(batch_size=1, repeat_count=1, data_file_path=None, sche | |||
| columns_list=["input_ids", "input_mask", "segment_ids", | |||
| "start_positions", "end_positions", | |||
| "unique_ids", "is_impossible"]) | |||
| ds = ds.map(input_columns="start_positions", operations=type_cast_op) | |||
| ds = ds.map(input_columns="end_positions", operations=type_cast_op) | |||
| ds = ds.map(operations=type_cast_op, input_columns="start_positions") | |||
| ds = ds.map(operations=type_cast_op, input_columns="end_positions") | |||
| else: | |||
| ds = de.TFRecordDataset([data_file_path], schema_file_path if schema_file_path != "" else None, | |||
| columns_list=["input_ids", "input_mask", "segment_ids", "unique_ids"]) | |||
| ds = ds.map(input_columns="input_ids", operations=type_cast_op) | |||
| ds = ds.map(input_columns="input_mask", operations=type_cast_op) | |||
| ds = ds.map(input_columns="segment_ids", operations=type_cast_op) | |||
| ds = ds.map(input_columns="segment_ids", operations=type_cast_op) | |||
| ds = ds.map(input_columns="input_mask", operations=type_cast_op) | |||
| ds = ds.map(input_columns="input_ids", operations=type_cast_op) | |||
| ds = ds.map(operations=type_cast_op, input_columns="input_ids") | |||
| ds = ds.map(operations=type_cast_op, input_columns="input_mask") | |||
| ds = ds.map(operations=type_cast_op, input_columns="segment_ids") | |||
| ds = ds.map(operations=type_cast_op, input_columns="segment_ids") | |||
| ds = ds.map(operations=type_cast_op, input_columns="input_mask") | |||
| ds = ds.map(operations=type_cast_op, input_columns="input_ids") | |||
| ds = ds.repeat(repeat_count) | |||
| # apply shuffle operation | |||
| buffer_size = 960 | |||
| @@ -60,12 +60,12 @@ def _load_dataset(input_files, batch_size, epoch_count=1, | |||
| repeat_count = epoch_count | |||
| type_cast_op = deC.TypeCast(mstype.int32) | |||
| ds = ds.map(input_columns="src", operations=type_cast_op) | |||
| ds = ds.map(input_columns="src_padding", operations=type_cast_op) | |||
| ds = ds.map(input_columns="prev_opt", operations=type_cast_op) | |||
| ds = ds.map(input_columns="prev_padding", operations=type_cast_op) | |||
| ds = ds.map(input_columns="target", operations=type_cast_op) | |||
| ds = ds.map(input_columns="tgt_padding", operations=type_cast_op) | |||
| ds = ds.map(operations=type_cast_op, input_columns="src") | |||
| ds = ds.map(operations=type_cast_op, input_columns="src_padding") | |||
| ds = ds.map(operations=type_cast_op, input_columns="prev_opt") | |||
| ds = ds.map(operations=type_cast_op, input_columns="prev_padding") | |||
| ds = ds.map(operations=type_cast_op, input_columns="target") | |||
| ds = ds.map(operations=type_cast_op, input_columns="tgt_padding") | |||
| ds = ds.rename( | |||
| input_columns=["src", | |||
| @@ -49,11 +49,11 @@ def create_tinybert_dataset(task='td', batch_size=32, device_num=1, rank=0, | |||
| shard_equal_rows=True) | |||
| type_cast_op = C.TypeCast(mstype.int32) | |||
| ds = ds.map(input_columns="segment_ids", operations=type_cast_op) | |||
| ds = ds.map(input_columns="input_mask", operations=type_cast_op) | |||
| ds = ds.map(input_columns="input_ids", operations=type_cast_op) | |||
| ds = ds.map(operations=type_cast_op, input_columns="segment_ids") | |||
| ds = ds.map(operations=type_cast_op, input_columns="input_mask") | |||
| ds = ds.map(operations=type_cast_op, input_columns="input_ids") | |||
| if task == "td": | |||
| ds = ds.map(input_columns="label_ids", operations=type_cast_op) | |||
| ds = ds.map(operations=type_cast_op, input_columns="label_ids") | |||
| # apply batch operations | |||
| ds = ds.batch(batch_size, drop_remainder=True) | |||
| @@ -40,12 +40,12 @@ def load_test_data(batch_size=1, data_file=None): | |||
| "target_eos_ids", "target_eos_mask"], | |||
| shuffle=False) | |||
| type_cast_op = deC.TypeCast(mstype.int32) | |||
| ds = ds.map(input_columns="source_eos_ids", operations=type_cast_op) | |||
| ds = ds.map(input_columns="source_eos_mask", operations=type_cast_op) | |||
| ds = ds.map(input_columns="target_sos_ids", operations=type_cast_op) | |||
| ds = ds.map(input_columns="target_sos_mask", operations=type_cast_op) | |||
| ds = ds.map(input_columns="target_eos_ids", operations=type_cast_op) | |||
| ds = ds.map(input_columns="target_eos_mask", operations=type_cast_op) | |||
| ds = ds.map(operations=type_cast_op, input_columns="source_eos_ids") | |||
| ds = ds.map(operations=type_cast_op, input_columns="source_eos_mask") | |||
| ds = ds.map(operations=type_cast_op, input_columns="target_sos_ids") | |||
| ds = ds.map(operations=type_cast_op, input_columns="target_sos_mask") | |||
| ds = ds.map(operations=type_cast_op, input_columns="target_eos_ids") | |||
| ds = ds.map(operations=type_cast_op, input_columns="target_eos_mask") | |||
| # apply batch operations | |||
| ds = ds.batch(batch_size, drop_remainder=True) | |||
| ds.channel_name = 'transformer' | |||
| @@ -30,12 +30,12 @@ def create_transformer_dataset(epoch_count=1, rank_size=1, rank_id=0, do_shuffle | |||
| shuffle=(do_shuffle == "true"), num_shards=rank_size, shard_id=rank_id) | |||
| type_cast_op = deC.TypeCast(mstype.int32) | |||
| ds = ds.map(input_columns="source_eos_ids", operations=type_cast_op) | |||
| ds = ds.map(input_columns="source_eos_mask", operations=type_cast_op) | |||
| ds = ds.map(input_columns="target_sos_ids", operations=type_cast_op) | |||
| ds = ds.map(input_columns="target_sos_mask", operations=type_cast_op) | |||
| ds = ds.map(input_columns="target_eos_ids", operations=type_cast_op) | |||
| ds = ds.map(input_columns="target_eos_mask", operations=type_cast_op) | |||
| ds = ds.map(operations=type_cast_op, input_columns="source_eos_ids") | |||
| ds = ds.map(operations=type_cast_op, input_columns="source_eos_mask") | |||
| ds = ds.map(operations=type_cast_op, input_columns="target_sos_ids") | |||
| ds = ds.map(operations=type_cast_op, input_columns="target_sos_mask") | |||
| ds = ds.map(operations=type_cast_op, input_columns="target_eos_ids") | |||
| ds = ds.map(operations=type_cast_op, input_columns="target_eos_mask") | |||
| # apply batch operations | |||
| ds = ds.batch(transformer_net_cfg.batch_size, drop_remainder=True) | |||
| @@ -89,8 +89,8 @@ def create_dataset(repeat_num=1, training=True): | |||
| changeswap_op] | |||
| # apply map operations on images | |||
| ds = ds.map(input_columns="label", operations=type_cast_op) | |||
| ds = ds.map(input_columns="image", operations=c_trans) | |||
| ds = ds.map(operations=type_cast_op, input_columns="label") | |||
| ds = ds.map(operations=c_trans, input_columns="image") | |||
| # apply repeat operations | |||
| ds = ds.repeat(repeat_num) | |||
| @@ -89,8 +89,8 @@ def create_dataset(repeat_num=1, training=True): | |||
| changeswap_op] | |||
| # apply map operations on images | |||
| ds = ds.map(input_columns="label", operations=type_cast_op) | |||
| ds = ds.map(input_columns="image", operations=c_trans) | |||
| ds = ds.map(operations=type_cast_op, input_columns="label") | |||
| ds = ds.map(operations=c_trans, input_columns="image") | |||
| # apply repeat operations | |||
| ds = ds.repeat(repeat_num) | |||
| @@ -298,21 +298,21 @@ def create_yolo_dataset(mindrecord_dir, batch_size=32, repeat_num=10, device_num | |||
| ds = de.MindDataset(mindrecord_dir, columns_list=["image", "annotation"], num_shards=device_num, shard_id=rank, | |||
| num_parallel_workers=num_parallel_workers, shuffle=False) | |||
| decode = C.Decode() | |||
| ds = ds.map(input_columns=["image"], operations=decode) | |||
| ds = ds.map(operations=decode, input_columns=["image"]) | |||
| compose_map_func = (lambda image, annotation: preprocess_fn(image, annotation, is_training)) | |||
| if is_training: | |||
| hwc_to_chw = C.HWC2CHW() | |||
| ds = ds.map(input_columns=["image", "annotation"], | |||
| ds = ds.map(operations=compose_map_func, input_columns=["image", "annotation"], | |||
| output_columns=["image", "bbox_1", "bbox_2", "bbox_3", "gt_box1", "gt_box2", "gt_box3"], | |||
| column_order=["image", "bbox_1", "bbox_2", "bbox_3", "gt_box1", "gt_box2", "gt_box3"], | |||
| operations=compose_map_func, num_parallel_workers=num_parallel_workers) | |||
| ds = ds.map(input_columns=["image"], operations=hwc_to_chw, num_parallel_workers=num_parallel_workers) | |||
| num_parallel_workers=num_parallel_workers) | |||
| ds = ds.map(operations=hwc_to_chw, input_columns=["image"], num_parallel_workers=num_parallel_workers) | |||
| ds = ds.batch(batch_size, drop_remainder=True) | |||
| ds = ds.repeat(repeat_num) | |||
| else: | |||
| ds = ds.map(input_columns=["image", "annotation"], | |||
| ds = ds.map(operations=compose_map_func, input_columns=["image", "annotation"], | |||
| output_columns=["image", "image_shape", "annotation"], | |||
| column_order=["image", "image_shape", "annotation"], | |||
| operations=compose_map_func, num_parallel_workers=num_parallel_workers) | |||
| num_parallel_workers=num_parallel_workers) | |||
| return ds | |||
| @@ -102,12 +102,12 @@ def me_de_train_dataset(sink_mode=False): | |||
| if sink_mode: | |||
| sink_size = 100 | |||
| new_repeat_count = 3 | |||
| ds = ds.map(input_columns="masked_lm_ids", operations=type_cast_op) | |||
| ds = ds.map(input_columns="masked_lm_positions", operations=type_cast_op) | |||
| ds = ds.map(input_columns="next_sentence_labels", operations=type_cast_op) | |||
| ds = ds.map(input_columns="segment_ids", operations=type_cast_op) | |||
| ds = ds.map(input_columns="input_mask", operations=type_cast_op) | |||
| ds = ds.map(input_columns="input_ids", operations=type_cast_op) | |||
| ds = ds.map(operations=type_cast_op, input_columns="masked_lm_ids") | |||
| ds = ds.map(operations=type_cast_op, input_columns="masked_lm_positions") | |||
| ds = ds.map(operations=type_cast_op, input_columns="next_sentence_labels") | |||
| ds = ds.map(operations=type_cast_op, input_columns="segment_ids") | |||
| ds = ds.map(operations=type_cast_op, input_columns="input_mask") | |||
| ds = ds.map(operations=type_cast_op, input_columns="input_ids") | |||
| # apply batch operations | |||
| ds = ds.batch(batch_size, drop_remainder=True) | |||
| logger.info("data size: {}".format(ds.get_dataset_size())) | |||
| @@ -102,12 +102,12 @@ def me_de_train_dataset(sink_mode=False): | |||
| if sink_mode: | |||
| sink_size = 100 | |||
| new_repeat_count = 3 | |||
| ds = ds.map(input_columns="masked_lm_ids", operations=type_cast_op) | |||
| ds = ds.map(input_columns="masked_lm_positions", operations=type_cast_op) | |||
| ds = ds.map(input_columns="next_sentence_labels", operations=type_cast_op) | |||
| ds = ds.map(input_columns="segment_ids", operations=type_cast_op) | |||
| ds = ds.map(input_columns="input_mask", operations=type_cast_op) | |||
| ds = ds.map(input_columns="input_ids", operations=type_cast_op) | |||
| ds = ds.map(operations=type_cast_op, input_columns="masked_lm_ids") | |||
| ds = ds.map(operations=type_cast_op, input_columns="masked_lm_positions") | |||
| ds = ds.map(operations=type_cast_op, input_columns="next_sentence_labels") | |||
| ds = ds.map(operations=type_cast_op, input_columns="segment_ids") | |||
| ds = ds.map(operations=type_cast_op, input_columns="input_mask") | |||
| ds = ds.map(operations=type_cast_op, input_columns="input_ids") | |||
| # apply batch operations | |||
| ds = ds.batch(batch_size, drop_remainder=True) | |||
| logger.info("data size: {}".format(ds.get_dataset_size())) | |||
| @@ -41,12 +41,12 @@ def create_bert_dataset(epoch_size=1, device_num=1, rank=0, do_shuffle="true", d | |||
| print('origin dataset size: ', ori_dataset_size) | |||
| new_repeat_count = int(repeat_count * ori_dataset_size // ds.get_dataset_size()) | |||
| type_cast_op = C.TypeCast(mstype.int32) | |||
| ds = ds.map(input_columns="masked_lm_ids", operations=type_cast_op) | |||
| ds = ds.map(input_columns="masked_lm_positions", operations=type_cast_op) | |||
| ds = ds.map(input_columns="next_sentence_labels", operations=type_cast_op) | |||
| ds = ds.map(input_columns="segment_ids", operations=type_cast_op) | |||
| ds = ds.map(input_columns="input_mask", operations=type_cast_op) | |||
| ds = ds.map(input_columns="input_ids", operations=type_cast_op) | |||
| ds = ds.map(operations=type_cast_op, input_columns="masked_lm_ids") | |||
| ds = ds.map(operations=type_cast_op, input_columns="masked_lm_positions") | |||
| ds = ds.map(operations=type_cast_op, input_columns="next_sentence_labels") | |||
| ds = ds.map(operations=type_cast_op, input_columns="segment_ids") | |||
| ds = ds.map(operations=type_cast_op, input_columns="input_mask") | |||
| ds = ds.map(operations=type_cast_op, input_columns="input_ids") | |||
| # apply batch operations | |||
| ds = ds.batch(bert_net_cfg.batch_size, drop_remainder=True) | |||
| ds = ds.repeat(max(new_repeat_count, repeat_count)) | |||
| @@ -92,12 +92,12 @@ def me_de_train_dataset(): | |||
| "next_sentence_labels", "masked_lm_positions", | |||
| "masked_lm_ids", "masked_lm_weights"], shuffle=False) | |||
| type_cast_op = C.TypeCast(mstype.int32) | |||
| ds = ds.map(input_columns="masked_lm_ids", operations=type_cast_op) | |||
| ds = ds.map(input_columns="masked_lm_positions", operations=type_cast_op) | |||
| ds = ds.map(input_columns="next_sentence_labels", operations=type_cast_op) | |||
| ds = ds.map(input_columns="segment_ids", operations=type_cast_op) | |||
| ds = ds.map(input_columns="input_mask", operations=type_cast_op) | |||
| ds = ds.map(input_columns="input_ids", operations=type_cast_op) | |||
| ds = ds.map(operations=type_cast_op, input_columns="masked_lm_ids") | |||
| ds = ds.map(operations=type_cast_op, input_columns="masked_lm_positions") | |||
| ds = ds.map(operations=type_cast_op, input_columns="next_sentence_labels") | |||
| ds = ds.map(operations=type_cast_op, input_columns="segment_ids") | |||
| ds = ds.map(operations=type_cast_op, input_columns="input_mask") | |||
| ds = ds.map(operations=type_cast_op, input_columns="input_ids") | |||
| # apply batch operations | |||
| batch_size = int(os.getenv('BATCH_SIZE', '16')) | |||
| ds = ds.batch(batch_size, drop_remainder=True) | |||
| @@ -97,10 +97,10 @@ def create_dataset(args, data_url, epoch_num=1, batch_size=1, usage="train", shu | |||
| # wrapped with GeneratorDataset | |||
| dataset = de.GeneratorDataset(dataset, ["image", "label"], sampler=None) | |||
| dataset = dataset.map(input_columns=["image", "label"], operations=DataTransform(args, usage=usage)) | |||
| dataset = dataset.map(operations=DataTransform(args, usage=usage), input_columns=["image", "label"]) | |||
| channelswap_op = C.HWC2CHW() | |||
| dataset = dataset.map(input_columns="image", operations=channelswap_op) | |||
| dataset = dataset.map(operations=channelswap_op, input_columns="image") | |||
| # 1464 samples / batch_size 8 = 183 batches | |||
| # epoch_num is num of steps | |||
| @@ -68,8 +68,8 @@ def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32): | |||
| type_cast_op = C2.TypeCast(mstype.int32) | |||
| ds = ds.map(input_columns="image", num_parallel_workers=8, operations=trans) | |||
| ds = ds.map(input_columns="label", num_parallel_workers=8, operations=type_cast_op) | |||
| ds = ds.map(operations=trans, input_columns="image", num_parallel_workers=8) | |||
| ds = ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=8) | |||
| # apply batch operations | |||
| ds = ds.batch(batch_size, drop_remainder=True) | |||
| @@ -71,8 +71,8 @@ def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32): | |||
| type_cast_op = C2.TypeCast(mstype.int32) | |||
| ds = ds.map(input_columns="image", num_parallel_workers=8, operations=trans) | |||
| ds = ds.map(input_columns="label", num_parallel_workers=8, operations=type_cast_op) | |||
| ds = ds.map(operations=trans, input_columns="image", num_parallel_workers=8) | |||
| ds = ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=8) | |||
| # apply batch operations | |||
| ds = ds.batch(batch_size, drop_remainder=True) | |||
| @@ -171,11 +171,11 @@ def create_dataset(data_path, batch_size=32, repeat_size=1, | |||
| type_cast_op = C.TypeCast(mstype.int32) | |||
| # apply map operations on images | |||
| mnist_ds = mnist_ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=num_parallel_workers) | |||
| mnist_ds = mnist_ds.map(input_columns="image", operations=resize_op, num_parallel_workers=num_parallel_workers) | |||
| mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_op, num_parallel_workers=num_parallel_workers) | |||
| mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_nml_op, num_parallel_workers=num_parallel_workers) | |||
| mnist_ds = mnist_ds.map(input_columns="image", operations=hwc2chw_op, num_parallel_workers=num_parallel_workers) | |||
| mnist_ds = mnist_ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=num_parallel_workers) | |||
| mnist_ds = mnist_ds.map(operations=resize_op, input_columns="image", num_parallel_workers=num_parallel_workers) | |||
| mnist_ds = mnist_ds.map(operations=rescale_op, input_columns="image", num_parallel_workers=num_parallel_workers) | |||
| mnist_ds = mnist_ds.map(operations=rescale_nml_op, input_columns="image", num_parallel_workers=num_parallel_workers) | |||
| mnist_ds = mnist_ds.map(operations=hwc2chw_op, input_columns="image", num_parallel_workers=num_parallel_workers) | |||
| # apply DatasetOps | |||
| buffer_size = 10000 | |||
| @@ -47,11 +47,11 @@ def test_me_de_train_dataset(): | |||
| rescale_op = vision.Rescale(rescale, shift) | |||
| # apply map operations on images | |||
| data_set_new = data_set_new.map(input_columns="image/encoded", operations=decode_op) | |||
| data_set_new = data_set_new.map(input_columns="image/encoded", operations=resize_op) | |||
| data_set_new = data_set_new.map(input_columns="image/encoded", operations=rescale_op) | |||
| data_set_new = data_set_new.map(operations=decode_op, input_columns="image/encoded") | |||
| data_set_new = data_set_new.map(operations=resize_op, input_columns="image/encoded") | |||
| data_set_new = data_set_new.map(operations=rescale_op, input_columns="image/encoded") | |||
| hwc2chw_op = vision.HWC2CHW() | |||
| data_set_new = data_set_new.map(input_columns="image/encoded", operations=hwc2chw_op) | |||
| data_set_new = data_set_new.map(operations=hwc2chw_op, input_columns="image/encoded") | |||
| data_set_new = data_set_new.repeat(1) | |||
| # apply batch operations | |||
| batch_size_new = 32 | |||
| @@ -45,11 +45,11 @@ def create_dataset(data_path, batch_size=32, repeat_size=1, | |||
| type_cast_op = C.TypeCast(mstype.int32) | |||
| # apply map operations on images | |||
| mnist_ds = mnist_ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=num_parallel_workers) | |||
| mnist_ds = mnist_ds.map(input_columns="image", operations=resize_op, num_parallel_workers=num_parallel_workers) | |||
| mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_op, num_parallel_workers=num_parallel_workers) | |||
| mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_nml_op, num_parallel_workers=num_parallel_workers) | |||
| mnist_ds = mnist_ds.map(input_columns="image", operations=hwc2chw_op, num_parallel_workers=num_parallel_workers) | |||
| mnist_ds = mnist_ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=num_parallel_workers) | |||
| mnist_ds = mnist_ds.map(operations=resize_op, input_columns="image", num_parallel_workers=num_parallel_workers) | |||
| mnist_ds = mnist_ds.map(operations=rescale_op, input_columns="image", num_parallel_workers=num_parallel_workers) | |||
| mnist_ds = mnist_ds.map(operations=rescale_nml_op, input_columns="image", num_parallel_workers=num_parallel_workers) | |||
| mnist_ds = mnist_ds.map(operations=hwc2chw_op, input_columns="image", num_parallel_workers=num_parallel_workers) | |||
| # apply DatasetOps | |||
| buffer_size = 10000 | |||
| @@ -87,9 +87,9 @@ def create_dataset(data_path, batch_size=32, repeat_size=1, | |||
| hwc2chw_op = CV.HWC2CHW() | |||
| # apply map operations on images | |||
| mnist_ds = mnist_ds.map(input_columns="image", operations=resize_op, num_parallel_workers=num_parallel_workers) | |||
| mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_op, num_parallel_workers=num_parallel_workers) | |||
| mnist_ds = mnist_ds.map(input_columns="image", operations=hwc2chw_op, num_parallel_workers=num_parallel_workers) | |||
| mnist_ds = mnist_ds.map(operations=resize_op, input_columns="image", num_parallel_workers=num_parallel_workers) | |||
| mnist_ds = mnist_ds.map(operations=rescale_op, input_columns="image", num_parallel_workers=num_parallel_workers) | |||
| mnist_ds = mnist_ds.map(operations=hwc2chw_op, input_columns="image", num_parallel_workers=num_parallel_workers) | |||
| # apply DatasetOps | |||
| mnist_ds = mnist_ds.batch(batch_size) | |||
| @@ -77,9 +77,9 @@ def create_dataset(data_path, batch_size=32, repeat_size=1, | |||
| hwc2chw_op = CV.HWC2CHW() | |||
| # apply map operations on images | |||
| mnist_ds = mnist_ds.map(input_columns="image", operations=resize_op, num_parallel_workers=num_parallel_workers) | |||
| mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_op, num_parallel_workers=num_parallel_workers) | |||
| mnist_ds = mnist_ds.map(input_columns="image", operations=hwc2chw_op, num_parallel_workers=num_parallel_workers) | |||
| mnist_ds = mnist_ds.map(operations=resize_op, input_columns="image", num_parallel_workers=num_parallel_workers) | |||
| mnist_ds = mnist_ds.map(operations=rescale_op, input_columns="image", num_parallel_workers=num_parallel_workers) | |||
| mnist_ds = mnist_ds.map(operations=hwc2chw_op, input_columns="image", num_parallel_workers=num_parallel_workers) | |||
| # apply DatasetOps | |||
| mnist_ds = mnist_ds.batch(batch_size) | |||
| @@ -145,9 +145,9 @@ def create_dataset(data_path, batch_size=32, repeat_size=1, | |||
| hwc2chw_op = CV.HWC2CHW() | |||
| # apply map operations on images | |||
| mnist_ds = mnist_ds.map(input_columns="image", operations=resize_op, num_parallel_workers=num_parallel_workers) | |||
| mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_op, num_parallel_workers=num_parallel_workers) | |||
| mnist_ds = mnist_ds.map(input_columns="image", operations=hwc2chw_op, num_parallel_workers=num_parallel_workers) | |||
| mnist_ds = mnist_ds.map(operations=resize_op, input_columns="image", num_parallel_workers=num_parallel_workers) | |||
| mnist_ds = mnist_ds.map(operations=rescale_op, input_columns="image", num_parallel_workers=num_parallel_workers) | |||
| mnist_ds = mnist_ds.map(operations=hwc2chw_op, input_columns="image", num_parallel_workers=num_parallel_workers) | |||
| # apply DatasetOps | |||
| mnist_ds = mnist_ds.batch(batch_size) | |||
| @@ -98,11 +98,11 @@ def create_dataset(data_path, batch_size=32, repeat_size=1, | |||
| type_cast_op = C.TypeCast(mstype.int32) | |||
| # apply map operations on images | |||
| mnist_ds = mnist_ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=num_parallel_workers) | |||
| mnist_ds = mnist_ds.map(input_columns="image", operations=resize_op, num_parallel_workers=num_parallel_workers) | |||
| mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_op, num_parallel_workers=num_parallel_workers) | |||
| mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_nml_op, num_parallel_workers=num_parallel_workers) | |||
| mnist_ds = mnist_ds.map(input_columns="image", operations=hwc2chw_op, num_parallel_workers=num_parallel_workers) | |||
| mnist_ds = mnist_ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=num_parallel_workers) | |||
| mnist_ds = mnist_ds.map(operations=resize_op, input_columns="image", num_parallel_workers=num_parallel_workers) | |||
| mnist_ds = mnist_ds.map(operations=rescale_op, input_columns="image", num_parallel_workers=num_parallel_workers) | |||
| mnist_ds = mnist_ds.map(operations=rescale_nml_op, input_columns="image", num_parallel_workers=num_parallel_workers) | |||
| mnist_ds = mnist_ds.map(operations=hwc2chw_op, input_columns="image", num_parallel_workers=num_parallel_workers) | |||
| # apply DatasetOps | |||
| buffer_size = 10000 | |||
| @@ -107,11 +107,11 @@ def create_dataset(data_path, batch_size=32, repeat_size=1, | |||
| type_cast_op = C.TypeCast(mstype.int32) | |||
| # apply map operations on images | |||
| mnist_ds = mnist_ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=num_parallel_workers) | |||
| mnist_ds = mnist_ds.map(input_columns="image", operations=resize_op, num_parallel_workers=num_parallel_workers) | |||
| mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_op, num_parallel_workers=num_parallel_workers) | |||
| mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_nml_op, num_parallel_workers=num_parallel_workers) | |||
| mnist_ds = mnist_ds.map(input_columns="image", operations=hwc2chw_op, num_parallel_workers=num_parallel_workers) | |||
| mnist_ds = mnist_ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=num_parallel_workers) | |||
| mnist_ds = mnist_ds.map(operations=resize_op, input_columns="image", num_parallel_workers=num_parallel_workers) | |||
| mnist_ds = mnist_ds.map(operations=rescale_op, input_columns="image", num_parallel_workers=num_parallel_workers) | |||
| mnist_ds = mnist_ds.map(operations=rescale_nml_op, input_columns="image", num_parallel_workers=num_parallel_workers) | |||
| mnist_ds = mnist_ds.map(operations=hwc2chw_op, input_columns="image", num_parallel_workers=num_parallel_workers) | |||
| # apply DatasetOps | |||
| buffer_size = 10000 | |||
| @@ -351,8 +351,8 @@ def create_dataset(repeat_num=1, training=True, batch_size=32): | |||
| changeswap_op] | |||
| # apply map operations on images | |||
| data_set = data_set.map(input_columns="label", operations=type_cast_op) | |||
| data_set = data_set.map(input_columns="image", operations=c_trans) | |||
| data_set = data_set.map(operations=type_cast_op, input_columns="label") | |||
| data_set = data_set.map(operations=c_trans, input_columns="image") | |||
| # apply shuffle operations | |||
| data_set = data_set.shuffle(buffer_size=1000) | |||
| @@ -45,11 +45,11 @@ def create_dataset(data_path, batch_size=32, repeat_size=1, | |||
| type_cast_op = C.TypeCast(mstype.int32) | |||
| # apply map operations on images | |||
| mnist_ds = mnist_ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=num_parallel_workers) | |||
| mnist_ds = mnist_ds.map(input_columns="image", operations=resize_op, num_parallel_workers=num_parallel_workers) | |||
| mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_op, num_parallel_workers=num_parallel_workers) | |||
| mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_nml_op, num_parallel_workers=num_parallel_workers) | |||
| mnist_ds = mnist_ds.map(input_columns="image", operations=hwc2chw_op, num_parallel_workers=num_parallel_workers) | |||
| mnist_ds = mnist_ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=num_parallel_workers) | |||
| mnist_ds = mnist_ds.map(operations=resize_op, input_columns="image", num_parallel_workers=num_parallel_workers) | |||
| mnist_ds = mnist_ds.map(operations=rescale_op, input_columns="image", num_parallel_workers=num_parallel_workers) | |||
| mnist_ds = mnist_ds.map(operations=rescale_nml_op, input_columns="image", num_parallel_workers=num_parallel_workers) | |||
| mnist_ds = mnist_ds.map(operations=hwc2chw_op, input_columns="image", num_parallel_workers=num_parallel_workers) | |||
| # apply DatasetOps | |||
| buffer_size = 10000 | |||
| @@ -114,11 +114,11 @@ def create_dataset(data_path, batch_size=32, repeat_size=1, num_parallel_workers | |||
| type_cast_op = C.TypeCast(mstype.int32) | |||
| # apply map operations on images | |||
| mnist_ds = mnist_ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=num_parallel_workers) | |||
| mnist_ds = mnist_ds.map(input_columns="image", operations=resize_op, num_parallel_workers=num_parallel_workers) | |||
| mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_op, num_parallel_workers=num_parallel_workers) | |||
| mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_nml_op, num_parallel_workers=num_parallel_workers) | |||
| mnist_ds = mnist_ds.map(input_columns="image", operations=hwc2chw_op, num_parallel_workers=num_parallel_workers) | |||
| mnist_ds = mnist_ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=num_parallel_workers) | |||
| mnist_ds = mnist_ds.map(operations=resize_op, input_columns="image", num_parallel_workers=num_parallel_workers) | |||
| mnist_ds = mnist_ds.map(operations=rescale_op, input_columns="image", num_parallel_workers=num_parallel_workers) | |||
| mnist_ds = mnist_ds.map(operations=rescale_nml_op, input_columns="image", num_parallel_workers=num_parallel_workers) | |||
| mnist_ds = mnist_ds.map(operations=hwc2chw_op, input_columns="image", num_parallel_workers=num_parallel_workers) | |||
| # apply DatasetOps | |||
| mnist_ds = mnist_ds.shuffle(buffer_size=10000) # 10000 as in LeNet train script | |||
| @@ -90,8 +90,8 @@ def create_dataset(repeat_num=1, training=True): | |||
| changeswap_op] | |||
| # apply map operations on images | |||
| data_set = data_set.map(input_columns="label", operations=type_cast_op) | |||
| data_set = data_set.map(input_columns="image", operations=c_trans) | |||
| data_set = data_set.map(operations=type_cast_op, input_columns="label") | |||
| data_set = data_set.map(operations=c_trans, input_columns="image") | |||
| # apply repeat operations | |||
| data_set = data_set.repeat(repeat_num) | |||
| @@ -68,8 +68,8 @@ def create_dataset(repeat_num=1, training=True, batch_size=32): | |||
| changeswap_op] | |||
| # apply map operations on images | |||
| data_set = data_set.map(input_columns="label", operations=type_cast_op) | |||
| data_set = data_set.map(input_columns="image", operations=c_trans) | |||
| data_set = data_set.map(operations=type_cast_op, input_columns="label") | |||
| data_set = data_set.map(operations=c_trans, input_columns="image") | |||
| # apply shuffle operations | |||
| data_set = data_set.shuffle(buffer_size=1000) | |||
| @@ -79,8 +79,8 @@ def create_dataset(repeat_num=1, training=True, batch_size=32, rank_id=0, rank_s | |||
| changeswap_op] | |||
| # apply map operations on images | |||
| data_set = data_set.map(input_columns="label", operations=type_cast_op) | |||
| data_set = data_set.map(input_columns="image", operations=c_trans) | |||
| data_set = data_set.map(operations=type_cast_op, input_columns="label") | |||
| data_set = data_set.map(operations=c_trans, input_columns="image") | |||
| # apply shuffle operations | |||
| data_set = data_set.shuffle(buffer_size=1000) | |||
| @@ -29,7 +29,7 @@ def test_case_0(): | |||
| # apply dataset operations | |||
| ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False) | |||
| ds1 = ds1.map(input_columns=col, output_columns="out", operations=(lambda x: x + x)) | |||
| ds1 = ds1.map(operations=(lambda x: x + x), input_columns=col, output_columns="out") | |||
| print("************** Output Tensor *****************") | |||
| for data in ds1.create_dict_iterator(): # each data is a dictionary | |||
| @@ -49,7 +49,7 @@ def test_case_1(): | |||
| # apply dataset operations | |||
| ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False) | |||
| ds1 = ds1.map(input_columns=col, output_columns=["out0", "out1"], operations=(lambda x: (x, x + x))) | |||
| ds1 = ds1.map(operations=(lambda x: (x, x + x)), input_columns=col, output_columns=["out0", "out1"]) | |||
| print("************** Output Tensor *****************") | |||
| for data in ds1.create_dict_iterator(): # each data is a dictionary | |||
| @@ -72,7 +72,7 @@ def test_case_2(): | |||
| # apply dataset operations | |||
| ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False) | |||
| ds1 = ds1.map(input_columns=col, output_columns="out", operations=(lambda x, y: x + y)) | |||
| ds1 = ds1.map(operations=(lambda x, y: x + y), input_columns=col, output_columns="out") | |||
| print("************** Output Tensor *****************") | |||
| for data in ds1.create_dict_iterator(): # each data is a dictionary | |||
| @@ -93,8 +93,8 @@ def test_case_3(): | |||
| # apply dataset operations | |||
| ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False) | |||
| ds1 = ds1.map(input_columns=col, output_columns=["out0", "out1", "out2"], | |||
| operations=(lambda x, y: (x, x + y, x + x + y))) | |||
| ds1 = ds1.map(operations=(lambda x, y: (x, x + y, x + x + y)), input_columns=col, | |||
| output_columns=["out0", "out1", "out2"]) | |||
| print("************** Output Tensor *****************") | |||
| for data in ds1.create_dict_iterator(): # each data is a dictionary | |||
| @@ -119,8 +119,8 @@ def test_case_4(): | |||
| # apply dataset operations | |||
| ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False) | |||
| ds1 = ds1.map(input_columns=col, output_columns=["out0", "out1", "out2"], num_parallel_workers=4, | |||
| operations=(lambda x, y: (x, x + y, x + x + y))) | |||
| ds1 = ds1.map(operations=(lambda x, y: (x, x + y, x + x + y)), input_columns=col, | |||
| output_columns=["out0", "out1", "out2"], num_parallel_workers=4) | |||
| print("************** Output Tensor *****************") | |||
| for data in ds1.create_dict_iterator(): # each data is a dictionary | |||
| @@ -39,12 +39,12 @@ def test_HWC2CHW(plot=False): | |||
| data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) | |||
| decode_op = c_vision.Decode() | |||
| hwc2chw_op = c_vision.HWC2CHW() | |||
| data1 = data1.map(input_columns=["image"], operations=decode_op) | |||
| data1 = data1.map(input_columns=["image"], operations=hwc2chw_op) | |||
| data1 = data1.map(operations=decode_op, input_columns=["image"]) | |||
| data1 = data1.map(operations=hwc2chw_op, input_columns=["image"]) | |||
| # Second dataset | |||
| data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) | |||
| data2 = data2.map(input_columns=["image"], operations=decode_op) | |||
| data2 = data2.map(operations=decode_op, input_columns=["image"]) | |||
| image_transposed = [] | |||
| image = [] | |||
| @@ -72,8 +72,8 @@ def test_HWC2CHW_md5(): | |||
| data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) | |||
| decode_op = c_vision.Decode() | |||
| hwc2chw_op = c_vision.HWC2CHW() | |||
| data1 = data1.map(input_columns=["image"], operations=decode_op) | |||
| data1 = data1.map(input_columns=["image"], operations=hwc2chw_op) | |||
| data1 = data1.map(operations=decode_op, input_columns=["image"]) | |||
| data1 = data1.map(operations=hwc2chw_op, input_columns=["image"]) | |||
| # Compare with expected md5 from images | |||
| filename = "HWC2CHW_01_result.npz" | |||
| @@ -90,8 +90,8 @@ def test_HWC2CHW_comp(plot=False): | |||
| data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) | |||
| decode_op = c_vision.Decode() | |||
| hwc2chw_op = c_vision.HWC2CHW() | |||
| data1 = data1.map(input_columns=["image"], operations=decode_op) | |||
| data1 = data1.map(input_columns=["image"], operations=hwc2chw_op) | |||
| data1 = data1.map(operations=decode_op, input_columns=["image"]) | |||
| data1 = data1.map(operations=hwc2chw_op, input_columns=["image"]) | |||
| # Second dataset | |||
| data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) | |||
| @@ -101,7 +101,7 @@ def test_HWC2CHW_comp(plot=False): | |||
| py_vision.HWC2CHW() | |||
| ] | |||
| transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) | |||
| data2 = data2.map(input_columns=["image"], operations=transform) | |||
| data2 = data2.map(operations=transform, input_columns=["image"]) | |||
| image_c_transposed = [] | |||
| image_py_transposed = [] | |||
| @@ -42,8 +42,7 @@ def test_auto_contrast_py(plot=False): | |||
| F.Resize((224, 224)), | |||
| F.ToTensor()]) | |||
| ds_original = ds.map(input_columns="image", | |||
| operations=transforms_original) | |||
| ds_original = ds.map(operations=transforms_original, input_columns="image") | |||
| ds_original = ds_original.batch(512) | |||
| @@ -64,8 +63,7 @@ def test_auto_contrast_py(plot=False): | |||
| F.AutoContrast(cutoff=10.0, ignore=[10, 20]), | |||
| F.ToTensor()]) | |||
| ds_auto_contrast = ds.map(input_columns="image", | |||
| operations=transforms_auto_contrast) | |||
| ds_auto_contrast = ds.map(operations=transforms_auto_contrast, input_columns="image") | |||
| ds_auto_contrast = ds_auto_contrast.batch(512) | |||
| @@ -99,17 +97,14 @@ def test_auto_contrast_c(plot=False): | |||
| # AutoContrast Images | |||
| ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) | |||
| ds = ds.map(input_columns=["image"], | |||
| operations=[C.Decode(), | |||
| C.Resize((224, 224))]) | |||
| ds = ds.map(operations=[C.Decode(), C.Resize((224, 224))], input_columns=["image"]) | |||
| python_op = F.AutoContrast(cutoff=10.0, ignore=[10, 20]) | |||
| c_op = C.AutoContrast(cutoff=10.0, ignore=[10, 20]) | |||
| transforms_op = mindspore.dataset.transforms.py_transforms.Compose([lambda img: F.ToPIL()(img.astype(np.uint8)), | |||
| python_op, | |||
| np.array]) | |||
| ds_auto_contrast_py = ds.map(input_columns="image", | |||
| operations=transforms_op) | |||
| ds_auto_contrast_py = ds.map(operations=transforms_op, input_columns="image") | |||
| ds_auto_contrast_py = ds_auto_contrast_py.batch(512) | |||
| @@ -122,12 +117,9 @@ def test_auto_contrast_c(plot=False): | |||
| axis=0) | |||
| ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) | |||
| ds = ds.map(input_columns=["image"], | |||
| operations=[C.Decode(), | |||
| C.Resize((224, 224))]) | |||
| ds = ds.map(operations=[C.Decode(), C.Resize((224, 224))], input_columns=["image"]) | |||
| ds_auto_contrast_c = ds.map(input_columns="image", | |||
| operations=c_op) | |||
| ds_auto_contrast_c = ds.map(operations=c_op, input_columns="image") | |||
| ds_auto_contrast_c = ds_auto_contrast_c.batch(512) | |||
| @@ -162,9 +154,7 @@ def test_auto_contrast_one_channel_c(plot=False): | |||
| # AutoContrast Images | |||
| ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) | |||
| ds = ds.map(input_columns=["image"], | |||
| operations=[C.Decode(), | |||
| C.Resize((224, 224))]) | |||
| ds = ds.map(operations=[C.Decode(), C.Resize((224, 224))], input_columns=["image"]) | |||
| python_op = F.AutoContrast() | |||
| c_op = C.AutoContrast() | |||
| # not using F.ToTensor() since it converts to floats | |||
| @@ -174,8 +164,7 @@ def test_auto_contrast_one_channel_c(plot=False): | |||
| python_op, | |||
| np.array]) | |||
| ds_auto_contrast_py = ds.map(input_columns="image", | |||
| operations=transforms_op) | |||
| ds_auto_contrast_py = ds.map(operations=transforms_op, input_columns="image") | |||
| ds_auto_contrast_py = ds_auto_contrast_py.batch(512) | |||
| @@ -188,13 +177,10 @@ def test_auto_contrast_one_channel_c(plot=False): | |||
| axis=0) | |||
| ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) | |||
| ds = ds.map(input_columns=["image"], | |||
| operations=[C.Decode(), | |||
| C.Resize((224, 224)), | |||
| lambda img: np.array(img[:, :, 0])]) | |||
| ds = ds.map(operations=[C.Decode(), C.Resize((224, 224)), lambda img: np.array(img[:, :, 0])], | |||
| input_columns=["image"]) | |||
| ds_auto_contrast_c = ds.map(input_columns="image", | |||
| operations=c_op) | |||
| ds_auto_contrast_c = ds.map(operations=c_op, input_columns="image") | |||
| ds_auto_contrast_c = ds_auto_contrast_c.batch(512) | |||
| @@ -223,8 +209,7 @@ def test_auto_contrast_mnist_c(plot=False): | |||
| """ | |||
| logger.info("Test AutoContrast C Op With MNIST Images") | |||
| ds = de.MnistDataset(dataset_dir=MNIST_DATA_DIR, num_samples=2, shuffle=False) | |||
| ds_auto_contrast_c = ds.map(input_columns="image", | |||
| operations=C.AutoContrast(cutoff=1, ignore=(0, 255))) | |||
| ds_auto_contrast_c = ds.map(operations=C.AutoContrast(cutoff=1, ignore=(0, 255)), input_columns="image") | |||
| ds_orig = de.MnistDataset(dataset_dir=MNIST_DATA_DIR, num_samples=2, shuffle=False) | |||
| images = [] | |||
| @@ -252,25 +237,20 @@ def test_auto_contrast_invalid_ignore_param_c(): | |||
| logger.info("Test AutoContrast C Op with invalid ignore parameter") | |||
| try: | |||
| ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) | |||
| ds = ds.map(input_columns=["image"], | |||
| operations=[C.Decode(), | |||
| ds = ds.map(operations=[C.Decode(), | |||
| C.Resize((224, 224)), | |||
| lambda img: np.array(img[:, :, 0])]) | |||
| lambda img: np.array(img[:, :, 0])], input_columns=["image"]) | |||
| # invalid ignore | |||
| ds = ds.map(input_columns="image", | |||
| operations=C.AutoContrast(ignore=255.5)) | |||
| ds = ds.map(operations=C.AutoContrast(ignore=255.5), input_columns="image") | |||
| except TypeError as error: | |||
| logger.info("Got an exception in DE: {}".format(str(error))) | |||
| assert "Argument ignore with value 255.5 is not of type" in str(error) | |||
| try: | |||
| ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) | |||
| ds = ds.map(input_columns=["image"], | |||
| operations=[C.Decode(), | |||
| C.Resize((224, 224)), | |||
| lambda img: np.array(img[:, :, 0])]) | |||
| ds = ds.map(operations=[C.Decode(), C.Resize((224, 224)), | |||
| lambda img: np.array(img[:, :, 0])], input_columns=["image"]) | |||
| # invalid ignore | |||
| ds = ds.map(input_columns="image", | |||
| operations=C.AutoContrast(ignore=(10, 100))) | |||
| ds = ds.map(operations=C.AutoContrast(ignore=(10, 100)), input_columns="image") | |||
| except TypeError as error: | |||
| logger.info("Got an exception in DE: {}".format(str(error))) | |||
| assert "Argument ignore with value (10,100) is not of type" in str(error) | |||
| @@ -283,25 +263,21 @@ def test_auto_contrast_invalid_cutoff_param_c(): | |||
| logger.info("Test AutoContrast C Op with invalid cutoff parameter") | |||
| try: | |||
| ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) | |||
| ds = ds.map(input_columns=["image"], | |||
| operations=[C.Decode(), | |||
| ds = ds.map(operations=[C.Decode(), | |||
| C.Resize((224, 224)), | |||
| lambda img: np.array(img[:, :, 0])]) | |||
| lambda img: np.array(img[:, :, 0])], input_columns=["image"]) | |||
| # invalid ignore | |||
| ds = ds.map(input_columns="image", | |||
| operations=C.AutoContrast(cutoff=-10.0)) | |||
| ds = ds.map(operations=C.AutoContrast(cutoff=-10.0), input_columns="image") | |||
| except ValueError as error: | |||
| logger.info("Got an exception in DE: {}".format(str(error))) | |||
| assert "Input cutoff is not within the required interval of (0 to 100)." in str(error) | |||
| try: | |||
| ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) | |||
| ds = ds.map(input_columns=["image"], | |||
| operations=[C.Decode(), | |||
| ds = ds.map(operations=[C.Decode(), | |||
| C.Resize((224, 224)), | |||
| lambda img: np.array(img[:, :, 0])]) | |||
| lambda img: np.array(img[:, :, 0])], input_columns=["image"]) | |||
| # invalid ignore | |||
| ds = ds.map(input_columns="image", | |||
| operations=C.AutoContrast(cutoff=120.0)) | |||
| ds = ds.map(operations=C.AutoContrast(cutoff=120.0), input_columns="image") | |||
| except ValueError as error: | |||
| logger.info("Got an exception in DE: {}".format(str(error))) | |||
| assert "Input cutoff is not within the required interval of (0 to 100)." in str(error) | |||
| @@ -314,21 +290,21 @@ def test_auto_contrast_invalid_ignore_param_py(): | |||
| logger.info("Test AutoContrast python Op with invalid ignore parameter") | |||
| try: | |||
| ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) | |||
| ds = ds.map(input_columns=["image"], | |||
| operations=[mindspore.dataset.transforms.py_transforms.Compose([F.Decode(), | |||
| ds = ds.map(operations=[mindspore.dataset.transforms.py_transforms.Compose([F.Decode(), | |||
| F.Resize((224, 224)), | |||
| F.AutoContrast(ignore=255.5), | |||
| F.ToTensor()])]) | |||
| F.ToTensor()])], | |||
| input_columns=["image"]) | |||
| except TypeError as error: | |||
| logger.info("Got an exception in DE: {}".format(str(error))) | |||
| assert "Argument ignore with value 255.5 is not of type" in str(error) | |||
| try: | |||
| ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) | |||
| ds = ds.map(input_columns=["image"], | |||
| operations=[mindspore.dataset.transforms.py_transforms.Compose([F.Decode(), | |||
| ds = ds.map(operations=[mindspore.dataset.transforms.py_transforms.Compose([F.Decode(), | |||
| F.Resize((224, 224)), | |||
| F.AutoContrast(ignore=(10, 100)), | |||
| F.ToTensor()])]) | |||
| F.ToTensor()])], | |||
| input_columns=["image"]) | |||
| except TypeError as error: | |||
| logger.info("Got an exception in DE: {}".format(str(error))) | |||
| assert "Argument ignore with value (10,100) is not of type" in str(error) | |||
| @@ -341,21 +317,22 @@ def test_auto_contrast_invalid_cutoff_param_py(): | |||
| logger.info("Test AutoContrast python Op with invalid cutoff parameter") | |||
| try: | |||
| ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) | |||
| ds = ds.map(input_columns=["image"], | |||
| operations=[mindspore.dataset.transforms.py_transforms.Compose([F.Decode(), | |||
| ds = ds.map(operations=[mindspore.dataset.transforms.py_transforms.Compose([F.Decode(), | |||
| F.Resize((224, 224)), | |||
| F.AutoContrast(cutoff=-10.0), | |||
| F.ToTensor()])]) | |||
| F.ToTensor()])], | |||
| input_columns=["image"]) | |||
| except ValueError as error: | |||
| logger.info("Got an exception in DE: {}".format(str(error))) | |||
| assert "Input cutoff is not within the required interval of (0 to 100)." in str(error) | |||
| try: | |||
| ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) | |||
| ds = ds.map(input_columns=["image"], | |||
| operations=[mindspore.dataset.transforms.py_transforms.Compose([F.Decode(), | |||
| F.Resize((224, 224)), | |||
| F.AutoContrast(cutoff=120.0), | |||
| F.ToTensor()])]) | |||
| ds = ds.map( | |||
| operations=[mindspore.dataset.transforms.py_transforms.Compose([F.Decode(), | |||
| F.Resize((224, 224)), | |||
| F.AutoContrast(cutoff=120.0), | |||
| F.ToTensor()])], | |||
| input_columns=["image"]) | |||
| except ValueError as error: | |||
| logger.info("Got an exception in DE: {}".format(str(error))) | |||
| assert "Input cutoff is not within the required interval of (0 to 100)." in str(error) | |||
| @@ -49,10 +49,9 @@ def test_bounding_box_augment_with_rotation_op(plot_vis=False): | |||
| test_op = c_vision.BoundingBoxAugment(c_vision.RandomRotation(90), 1) | |||
| # map to apply ops | |||
| dataVoc2 = dataVoc2.map(input_columns=["image", "bbox"], | |||
| dataVoc2 = dataVoc2.map(operations=[test_op], input_columns=["image", "bbox"], | |||
| output_columns=["image", "bbox"], | |||
| column_order=["image", "bbox"], | |||
| operations=[test_op]) | |||
| column_order=["image", "bbox"]) | |||
| filename = "bounding_box_augment_rotation_c_result.npz" | |||
| save_and_check_md5(dataVoc2, filename, generate_golden=GENERATE_GOLDEN) | |||
| @@ -88,10 +87,9 @@ def test_bounding_box_augment_with_crop_op(plot_vis=False): | |||
| test_op = c_vision.BoundingBoxAugment(c_vision.RandomCrop(50), 0.9) | |||
| # map to apply ops | |||
| dataVoc2 = dataVoc2.map(input_columns=["image", "bbox"], | |||
| dataVoc2 = dataVoc2.map(operations=[test_op], input_columns=["image", "bbox"], | |||
| output_columns=["image", "bbox"], | |||
| column_order=["image", "bbox"], | |||
| operations=[test_op]) | |||
| column_order=["image", "bbox"]) | |||
| filename = "bounding_box_augment_crop_c_result.npz" | |||
| save_and_check_md5(dataVoc2, filename, generate_golden=GENERATE_GOLDEN) | |||
| @@ -126,10 +124,9 @@ def test_bounding_box_augment_valid_ratio_c(plot_vis=False): | |||
| test_op = c_vision.BoundingBoxAugment(c_vision.RandomHorizontalFlip(1), 0.9) | |||
| # map to apply ops | |||
| dataVoc2 = dataVoc2.map(input_columns=["image", "bbox"], | |||
| dataVoc2 = dataVoc2.map(operations=[test_op], input_columns=["image", "bbox"], | |||
| output_columns=["image", "bbox"], | |||
| column_order=["image", "bbox"], | |||
| operations=[test_op]) # Add column for "bbox" | |||
| column_order=["image", "bbox"]) # Add column for "bbox" | |||
| filename = "bounding_box_augment_valid_ratio_c_result.npz" | |||
| save_and_check_md5(dataVoc2, filename, generate_golden=GENERATE_GOLDEN) | |||
| @@ -163,10 +160,9 @@ def test_bounding_box_augment_op_coco_c(plot_vis=False): | |||
| test_op = c_vision.BoundingBoxAugment(c_vision.RandomHorizontalFlip(1), 1) | |||
| dataCoco2 = dataCoco2.map(input_columns=["image", "bbox"], | |||
| dataCoco2 = dataCoco2.map(operations=[test_op], input_columns=["image", "bbox"], | |||
| output_columns=["image", "bbox"], | |||
| column_order=["image", "bbox"], | |||
| operations=[test_op]) | |||
| column_order=["image", "bbox"]) | |||
| unaugSamp, augSamp = [], [] | |||
| @@ -195,20 +191,19 @@ def test_bounding_box_augment_valid_edge_c(plot_vis=False): | |||
| # map to apply ops | |||
| # Add column for "bbox" | |||
| dataVoc1 = dataVoc1.map(input_columns=["image", "bbox"], | |||
| dataVoc1 = dataVoc1.map( | |||
| operations=lambda img, bbox: (img, np.array([[0, 0, img.shape[1], img.shape[0], 0, 0, 0]]).astype(np.float32)), | |||
| input_columns=["image", "bbox"], | |||
| output_columns=["image", "bbox"], | |||
| column_order=["image", "bbox"]) | |||
| dataVoc2 = dataVoc2.map( | |||
| operations=lambda img, bbox: (img, np.array([[0, 0, img.shape[1], img.shape[0], 0, 0, 0]]).astype(np.float32)), | |||
| input_columns=["image", "bbox"], | |||
| output_columns=["image", "bbox"], | |||
| column_order=["image", "bbox"]) | |||
| dataVoc2 = dataVoc2.map(operations=[test_op], input_columns=["image", "bbox"], | |||
| output_columns=["image", "bbox"], | |||
| column_order=["image", "bbox"], | |||
| operations=lambda img, bbox: | |||
| (img, np.array([[0, 0, img.shape[1], img.shape[0], 0, 0, 0]]).astype(np.float32))) | |||
| dataVoc2 = dataVoc2.map(input_columns=["image", "bbox"], | |||
| output_columns=["image", "bbox"], | |||
| column_order=["image", "bbox"], | |||
| operations=lambda img, bbox: | |||
| (img, np.array([[0, 0, img.shape[1], img.shape[0], 0, 0, 0]]).astype(np.float32))) | |||
| dataVoc2 = dataVoc2.map(input_columns=["image", "bbox"], | |||
| output_columns=["image", "bbox"], | |||
| column_order=["image", "bbox"], | |||
| operations=[test_op]) | |||
| column_order=["image", "bbox"]) | |||
| filename = "bounding_box_augment_valid_edge_c_result.npz" | |||
| save_and_check_md5(dataVoc2, filename, generate_golden=GENERATE_GOLDEN) | |||
| @@ -238,10 +233,9 @@ def test_bounding_box_augment_invalid_ratio_c(): | |||
| # ratio range is from 0 - 1 | |||
| test_op = c_vision.BoundingBoxAugment(c_vision.RandomHorizontalFlip(1), 1.5) | |||
| # map to apply ops | |||
| dataVoc2 = dataVoc2.map(input_columns=["image", "bbox"], | |||
| dataVoc2 = dataVoc2.map(operations=[test_op], input_columns=["image", "bbox"], | |||
| output_columns=["image", "bbox"], | |||
| column_order=["image", "bbox"], | |||
| operations=[test_op]) # Add column for "bbox" | |||
| column_order=["image", "bbox"]) # Add column for "bbox" | |||
| except ValueError as error: | |||
| logger.info("Got an exception in DE: {}".format(str(error))) | |||
| assert "Input ratio is not within the required interval of (0.0 to 1.0)." in str(error) | |||
| @@ -25,7 +25,7 @@ def test_compose(): | |||
| def test_config(arr, op_list): | |||
| try: | |||
| data = ds.NumpySlicesDataset(arr, column_names="col", shuffle=False) | |||
| data = data.map(input_columns=["col"], operations=ops.Compose(op_list)) | |||
| data = data.map(operations=ops.Compose(op_list), input_columns=["col"]) | |||
| res = [] | |||
| for i in data.create_dict_iterator(num_epochs=1): | |||
| res.append(i["col"].tolist()) | |||
| @@ -24,7 +24,7 @@ def test_random_apply(): | |||
| def test_config(arr, op_list, prob=0.5): | |||
| try: | |||
| data = ds.NumpySlicesDataset(arr, column_names="col", shuffle=False) | |||
| data = data.map(input_columns=["col"], operations=ops.RandomApply(op_list, prob)) | |||
| data = data.map(operations=ops.RandomApply(op_list, prob), input_columns=["col"]) | |||
| res = [] | |||
| for i in data.create_dict_iterator(num_epochs=1): | |||
| res.append(i["col"].tolist()) | |||
| @@ -48,7 +48,7 @@ def test_cache_map_basic1(): | |||
| # This DATA_DIR only has 2 images in it | |||
| ds1 = ds.ImageFolderDataset(dataset_dir=DATA_DIR, cache=some_cache) | |||
| decode_op = c_vision.Decode() | |||
| ds1 = ds1.map(input_columns=["image"], operations=decode_op) | |||
| ds1 = ds1.map(operations=decode_op, input_columns=["image"]) | |||
| ds1 = ds1.repeat(4) | |||
| filename = "cache_map_01_result.npz" | |||
| @@ -77,7 +77,7 @@ def test_cache_map_basic2(): | |||
| # This DATA_DIR only has 2 images in it | |||
| ds1 = ds.ImageFolderDataset(dataset_dir=DATA_DIR) | |||
| decode_op = c_vision.Decode() | |||
| ds1 = ds1.map(input_columns=["image"], operations=decode_op, cache=some_cache) | |||
| ds1 = ds1.map(operations=decode_op, input_columns=["image"], cache=some_cache) | |||
| ds1 = ds1.repeat(4) | |||
| filename = "cache_map_02_result.npz" | |||
| @@ -107,7 +107,7 @@ def test_cache_map_basic3(): | |||
| ds1 = ds.ImageFolderDataset(dataset_dir=DATA_DIR) | |||
| decode_op = c_vision.Decode() | |||
| ds1 = ds1.repeat(4) | |||
| ds1 = ds1.map(input_columns=["image"], operations=decode_op, cache=some_cache) | |||
| ds1 = ds1.map(operations=decode_op, input_columns=["image"], cache=some_cache) | |||
| logger.info("ds1.dataset_size is ", ds1.get_dataset_size()) | |||
| num_iter = 0 | |||
| @@ -131,7 +131,7 @@ def test_cache_map_basic4(): | |||
| ds1 = ds.ImageFolderDataset(dataset_dir=DATA_DIR, cache=some_cache) | |||
| decode_op = c_vision.Decode() | |||
| ds1 = ds1.repeat(4) | |||
| ds1 = ds1.map(input_columns=["image"], operations=decode_op) | |||
| ds1 = ds1.map(operations=decode_op, input_columns=["image"]) | |||
| logger.info("ds1.dataset_size is ", ds1.get_dataset_size()) | |||
| shape = ds1.output_shapes() | |||
| logger.info(shape) | |||
| @@ -167,7 +167,7 @@ def test_cache_map_failure1(): | |||
| # This DATA_DIR only has 2 images in it | |||
| ds1 = ds.ImageFolderDataset(dataset_dir=DATA_DIR, cache=some_cache) | |||
| decode_op = c_vision.Decode() | |||
| ds1 = ds1.map(input_columns=["image"], operations=decode_op, cache=some_cache) | |||
| ds1 = ds1.map(operations=decode_op, input_columns=["image"], cache=some_cache) | |||
| ds1 = ds1.repeat(4) | |||
| try: | |||
| @@ -108,7 +108,7 @@ def test_cache_nomap_basic3(): | |||
| some_cache = ds.DatasetCache(session_id=1, size=0, spilling=True) | |||
| ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False, cache=some_cache) | |||
| decode_op = c_vision.Decode() | |||
| ds1 = ds1.map(input_columns=["image"], operations=decode_op) | |||
| ds1 = ds1.map(operations=decode_op, input_columns=["image"]) | |||
| ds1 = ds1.repeat(4) | |||
| num_iter = 0 | |||
| @@ -160,7 +160,7 @@ def test_cache_nomap_basic4(): | |||
| ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=ds.Shuffle.GLOBAL) | |||
| decode_op = c_vision.Decode() | |||
| ds1 = ds1.map(input_columns=["image"], operations=decode_op, cache=some_cache) | |||
| ds1 = ds1.map(operations=decode_op, input_columns=["image"], cache=some_cache) | |||
| ds1 = ds1.repeat(4) | |||
| num_iter = 0 | |||
| @@ -197,7 +197,7 @@ def test_cache_nomap_basic5(): | |||
| some_cache = ds.DatasetCache(session_id=1, size=0, spilling=True) | |||
| ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], cache=some_cache) | |||
| decode_op = c_vision.Decode() | |||
| ds1 = ds1.map(input_columns=["image"], operations=decode_op) | |||
| ds1 = ds1.map(operations=decode_op, input_columns=["image"]) | |||
| ds1 = ds1.repeat(4) | |||
| num_iter = 0 | |||
| @@ -237,7 +237,7 @@ def test_cache_nomap_basic6(): | |||
| # there was not any cache. | |||
| ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], num_shards=3, shard_id=1, cache=some_cache) | |||
| decode_op = c_vision.Decode() | |||
| ds1 = ds1.map(input_columns=["image"], operations=decode_op) | |||
| ds1 = ds1.map(operations=decode_op, input_columns=["image"]) | |||
| ds1 = ds1.repeat(4) | |||
| num_iter = 0 | |||
| @@ -273,7 +273,7 @@ def test_cache_nomap_basic7(): | |||
| ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=ds.Shuffle.GLOBAL, cache=some_cache) | |||
| decode_op = c_vision.Decode() | |||
| ds1 = ds1.map(input_columns=["image"], operations=decode_op) | |||
| ds1 = ds1.map(operations=decode_op, input_columns=["image"]) | |||
| ds1 = ds1.repeat(4) | |||
| num_iter = 0 | |||
| @@ -343,11 +343,11 @@ def test_cache_nomap_allowed_share2(): | |||
| decode_op = c_vision.Decode() | |||
| ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) | |||
| ds1 = ds1.map(input_columns=["image"], operations=decode_op, cache=some_cache) | |||
| ds1 = ds1.map(operations=decode_op, input_columns=["image"], cache=some_cache) | |||
| ds1 = ds1.repeat(4) | |||
| ds2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) | |||
| ds2 = ds2.map(input_columns=["image"], operations=decode_op, cache=some_cache) | |||
| ds2 = ds2.map(operations=decode_op, input_columns=["image"], cache=some_cache) | |||
| ds2 = ds2.shuffle(buffer_size=2) | |||
| num_iter = 0 | |||
| @@ -418,10 +418,10 @@ def test_cache_nomap_allowed_share4(): | |||
| decode_op = c_vision.Decode() | |||
| ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) | |||
| ds1 = ds1.map(input_columns=["image"], operations=decode_op, cache=some_cache, num_parallel_workers=1) | |||
| ds1 = ds1.map(operations=decode_op, input_columns=["image"], cache=some_cache, num_parallel_workers=1) | |||
| ds2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) | |||
| ds2 = ds2.map(input_columns=["image"], operations=decode_op, cache=some_cache, num_parallel_workers=2) | |||
| ds2 = ds2.map(operations=decode_op, input_columns=["image"], cache=some_cache, num_parallel_workers=2) | |||
| num_iter = 0 | |||
| for _ in ds1.create_dict_iterator(num_epochs=1): | |||
| @@ -458,10 +458,10 @@ def test_cache_nomap_disallowed_share1(): | |||
| rescale_op = c_vision.Rescale(1.0 / 255.0, -1.0) | |||
| ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) | |||
| ds1 = ds1.map(input_columns=["image"], operations=decode_op, cache=some_cache) | |||
| ds1 = ds1.map(operations=decode_op, input_columns=["image"], cache=some_cache) | |||
| ds2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) | |||
| ds2 = ds2.map(input_columns=["image"], operations=rescale_op, cache=some_cache) | |||
| ds2 = ds2.map(operations=rescale_op, input_columns=["image"], cache=some_cache) | |||
| num_iter = 0 | |||
| for _ in ds1.create_dict_iterator(num_epochs=1): | |||
| @@ -40,12 +40,12 @@ def test_center_crop_op(height=375, width=375, plot=False): | |||
| decode_op = vision.Decode() | |||
| # 3 images [375, 500] [600, 500] [512, 512] | |||
| center_crop_op = vision.CenterCrop([height, width]) | |||
| data1 = data1.map(input_columns=["image"], operations=decode_op) | |||
| data1 = data1.map(input_columns=["image"], operations=center_crop_op) | |||
| data1 = data1.map(operations=decode_op, input_columns=["image"]) | |||
| data1 = data1.map(operations=center_crop_op, input_columns=["image"]) | |||
| # Second dataset | |||
| data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"]) | |||
| data2 = data2.map(input_columns=["image"], operations=decode_op) | |||
| data2 = data2.map(operations=decode_op, input_columns=["image"]) | |||
| image_cropped = [] | |||
| image = [] | |||
| @@ -67,8 +67,8 @@ def test_center_crop_md5(height=375, width=375): | |||
| decode_op = vision.Decode() | |||
| # 3 images [375, 500] [600, 500] [512, 512] | |||
| center_crop_op = vision.CenterCrop([height, width]) | |||
| data1 = data1.map(input_columns=["image"], operations=decode_op) | |||
| data1 = data1.map(input_columns=["image"], operations=center_crop_op) | |||
| data1 = data1.map(operations=decode_op, input_columns=["image"]) | |||
| data1 = data1.map(operations=center_crop_op, input_columns=["image"]) | |||
| # Compare with expected md5 from images | |||
| filename = "center_crop_01_result.npz" | |||
| save_and_check_md5(data1, filename, generate_golden=GENERATE_GOLDEN) | |||
| @@ -84,8 +84,8 @@ def test_center_crop_comp(height=375, width=375, plot=False): | |||
| data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) | |||
| decode_op = vision.Decode() | |||
| center_crop_op = vision.CenterCrop([height, width]) | |||
| data1 = data1.map(input_columns=["image"], operations=decode_op) | |||
| data1 = data1.map(input_columns=["image"], operations=center_crop_op) | |||
| data1 = data1.map(operations=decode_op, input_columns=["image"]) | |||
| data1 = data1.map(operations=center_crop_op, input_columns=["image"]) | |||
| # Second dataset | |||
| data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) | |||
| @@ -95,7 +95,7 @@ def test_center_crop_comp(height=375, width=375, plot=False): | |||
| py_vision.ToTensor() | |||
| ] | |||
| transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) | |||
| data2 = data2.map(input_columns=["image"], operations=transform) | |||
| data2 = data2.map(operations=transform, input_columns=["image"]) | |||
| image_c_cropped = [] | |||
| image_py_cropped = [] | |||
| @@ -126,11 +126,11 @@ def test_crop_grayscale(height=375, width=375): | |||
| transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) | |||
| data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) | |||
| data1 = data1.map(input_columns=["image"], operations=transform) | |||
| data1 = data1.map(operations=transform, input_columns=["image"]) | |||
| # If input is grayscale, the output dimensions should be single channel | |||
| crop_gray = vision.CenterCrop([height, width]) | |||
| data1 = data1.map(input_columns=["image"], operations=crop_gray) | |||
| data1 = data1.map(operations=crop_gray, input_columns=["image"]) | |||
| for item1 in data1.create_dict_iterator(num_epochs=1): | |||
| c_image = item1["image"] | |||
| @@ -121,7 +121,7 @@ def test_concat_05(): | |||
| data2 = ds.GeneratorDataset(generator_10, ["col1"]) | |||
| type_cast_op = C.TypeCast(mstype.float32) | |||
| data1 = data1.map(input_columns=["col1"], operations=type_cast_op) | |||
| data1 = data1.map(operations=type_cast_op, input_columns=["col1"]) | |||
| data3 = data1 + data2 | |||
| @@ -319,8 +319,8 @@ def test_concat_14(): | |||
| F.Resize((224, 224)), | |||
| F.ToTensor()]) | |||
| data1 = data1.map(input_columns=["image"], operations=transforms1) | |||
| data2 = data2.map(input_columns=["image"], operations=transforms1) | |||
| data1 = data1.map(operations=transforms1, input_columns=["image"]) | |||
| data2 = data2.map(operations=transforms1, input_columns=["image"]) | |||
| data3 = data1 + data2 | |||
| expected, output = [], [] | |||
| @@ -31,7 +31,7 @@ def test_concatenate_op_all(): | |||
| append_tensor = np.array([9., 10.3, 11., 12.], dtype=np.float) | |||
| data = ds.GeneratorDataset(gen, column_names=["col"]) | |||
| concatenate_op = data_trans.Concatenate(0, prepend_tensor, append_tensor) | |||
| data = data.map(input_columns=["col"], operations=concatenate_op) | |||
| data = data.map(operations=concatenate_op, input_columns=["col"]) | |||
| expected = np.array([1.4, 2., 3., 4., 4.5, 5., 6., 7., 8., 9., 10.3, | |||
| 11., 12.]) | |||
| for data_row in data: | |||
| @@ -45,7 +45,7 @@ def test_concatenate_op_none(): | |||
| data = ds.GeneratorDataset(gen, column_names=["col"]) | |||
| concatenate_op = data_trans.Concatenate() | |||
| data = data.map(input_columns=["col"], operations=concatenate_op) | |||
| data = data.map(operations=concatenate_op, input_columns=["col"]) | |||
| for data_row in data: | |||
| np.testing.assert_array_equal(data_row[0], np.array([5., 6., 7., 8.], dtype=np.float)) | |||
| @@ -59,7 +59,7 @@ def test_concatenate_op_string(): | |||
| data = ds.GeneratorDataset(gen, column_names=["col"]) | |||
| concatenate_op = data_trans.Concatenate(0, prepend_tensor, append_tensor) | |||
| data = data.map(input_columns=["col"], operations=concatenate_op) | |||
| data = data.map(operations=concatenate_op, input_columns=["col"]) | |||
| expected = np.array(["dw", "df", "ss", "ad", "dwsdf", "df"], dtype='S') | |||
| for data_row in data: | |||
| np.testing.assert_array_equal(data_row[0], expected) | |||
| @@ -74,8 +74,8 @@ def test_concatenate_op_multi_input_string(): | |||
| concatenate_op = data_trans.Concatenate(0, prepend=prepend_tensor, append=append_tensor) | |||
| data = data.map(input_columns=["col1", "col2"], column_order=["out1"], output_columns=["out1"], | |||
| operations=concatenate_op) | |||
| data = data.map(operations=concatenate_op, input_columns=["col1", "col2"], column_order=["out1"], | |||
| output_columns=["out1"]) | |||
| expected = np.array(["dw", "df", "1", "2", "d", "3", "4", "e", "dwsdf", "df"], dtype='S') | |||
| for data_row in data: | |||
| np.testing.assert_array_equal(data_row[0], expected) | |||
| @@ -89,8 +89,8 @@ def test_concatenate_op_multi_input_numeric(): | |||
| concatenate_op = data_trans.Concatenate(0, prepend=prepend_tensor) | |||
| data = data.map(input_columns=["col1", "col2"], column_order=["out1"], output_columns=["out1"], | |||
| operations=concatenate_op) | |||
| data = data.map(operations=concatenate_op, input_columns=["col1", "col2"], column_order=["out1"], | |||
| output_columns=["out1"]) | |||
| expected = np.array([3, 5, 1, 2, 3, 4]) | |||
| for data_row in data: | |||
| np.testing.assert_array_equal(data_row[0], expected) | |||
| @@ -104,7 +104,7 @@ def test_concatenate_op_type_mismatch(): | |||
| data = ds.GeneratorDataset(gen, column_names=["col"]) | |||
| concatenate_op = data_trans.Concatenate(0, prepend_tensor) | |||
| data = data.map(input_columns=["col"], operations=concatenate_op) | |||
| data = data.map(operations=concatenate_op, input_columns=["col"]) | |||
| with pytest.raises(RuntimeError) as error_info: | |||
| for _ in data: | |||
| pass | |||
| @@ -119,7 +119,7 @@ def test_concatenate_op_type_mismatch2(): | |||
| data = ds.GeneratorDataset(gen, column_names=["col"]) | |||
| concatenate_op = data_trans.Concatenate(0, prepend_tensor) | |||
| data = data.map(input_columns=["col"], operations=concatenate_op) | |||
| data = data.map(operations=concatenate_op, input_columns=["col"]) | |||
| with pytest.raises(RuntimeError) as error_info: | |||
| for _ in data: | |||
| pass | |||
| @@ -134,7 +134,7 @@ def test_concatenate_op_incorrect_dim(): | |||
| concatenate_op = data_trans.Concatenate(0, prepend_tensor) | |||
| data = ds.GeneratorDataset(gen, column_names=["col"]) | |||
| data = data.map(input_columns=["col"], operations=concatenate_op) | |||
| data = data.map(operations=concatenate_op, input_columns=["col"]) | |||
| with pytest.raises(RuntimeError) as error_info: | |||
| for _ in data: | |||
| pass | |||
| @@ -155,7 +155,7 @@ def test_concatenate_op_negative_axis(): | |||
| append_tensor = np.array([9., 10.3, 11., 12.], dtype=np.float) | |||
| data = ds.GeneratorDataset(gen, column_names=["col"]) | |||
| concatenate_op = data_trans.Concatenate(-1, prepend_tensor, append_tensor) | |||
| data = data.map(input_columns=["col"], operations=concatenate_op) | |||
| data = data.map(operations=concatenate_op, input_columns=["col"]) | |||
| expected = np.array([1.4, 2., 3., 4., 4.5, 5., 6., 7., 8., 9., 10.3, | |||
| 11., 12.]) | |||
| for data_row in data: | |||
| @@ -86,12 +86,12 @@ def test_pipeline(): | |||
| num_parallel_workers_original = ds.config.get_num_parallel_workers() | |||
| data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False) | |||
| data1 = data1.map(input_columns=["image"], operations=[c_vision.Decode(True)]) | |||
| data1 = data1.map(operations=[c_vision.Decode(True)], input_columns=["image"]) | |||
| ds.serialize(data1, "testpipeline.json") | |||
| data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, num_parallel_workers=num_parallel_workers_original, | |||
| shuffle=False) | |||
| data2 = data2.map(input_columns=["image"], operations=[c_vision.Decode(True)]) | |||
| data2 = data2.map(operations=[c_vision.Decode(True)], input_columns=["image"]) | |||
| ds.serialize(data2, "testpipeline2.json") | |||
| # check that the generated output is different | |||
| @@ -131,14 +131,14 @@ def test_deterministic_run_fail(): | |||
| # outputs a deterministic series of numbers, e,g "a" = [1, 2, 3, 4, 5, 6] <- pretend these are random | |||
| random_crop_op = c_vision.RandomCrop([512, 512], [200, 200, 200, 200]) | |||
| decode_op = c_vision.Decode() | |||
| data1 = data1.map(input_columns=["image"], operations=decode_op) | |||
| data1 = data1.map(input_columns=["image"], operations=random_crop_op) | |||
| data1 = data1.map(operations=decode_op, input_columns=["image"]) | |||
| data1 = data1.map(operations=random_crop_op, input_columns=["image"]) | |||
| # Second dataset | |||
| data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) | |||
| data2 = data2.map(input_columns=["image"], operations=decode_op) | |||
| data2 = data2.map(operations=decode_op, input_columns=["image"]) | |||
| # If seed is set up on constructor | |||
| data2 = data2.map(input_columns=["image"], operations=random_crop_op) | |||
| data2 = data2.map(operations=random_crop_op, input_columns=["image"]) | |||
| try: | |||
| dataset_equal(data1, data2, 0) | |||
| @@ -171,16 +171,16 @@ def test_seed_undeterministic(): | |||
| # We get the seed when constructor is called | |||
| random_crop_op = c_vision.RandomCrop([512, 512], [200, 200, 200, 200]) | |||
| decode_op = c_vision.Decode() | |||
| data1 = data1.map(input_columns=["image"], operations=decode_op) | |||
| data1 = data1.map(input_columns=["image"], operations=random_crop_op) | |||
| data1 = data1.map(operations=decode_op, input_columns=["image"]) | |||
| data1 = data1.map(operations=random_crop_op, input_columns=["image"]) | |||
| # Second dataset | |||
| data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) | |||
| data2 = data2.map(input_columns=["image"], operations=decode_op) | |||
| data2 = data2.map(operations=decode_op, input_columns=["image"]) | |||
| # Since seed is set up on constructor, so the two ops output deterministic sequence. | |||
| # Assume the generated random sequence "a" = [1, 2, 3, 4, 5, 6] <- pretend these are random | |||
| random_crop_op2 = c_vision.RandomCrop([512, 512], [200, 200, 200, 200]) | |||
| data2 = data2.map(input_columns=["image"], operations=random_crop_op2) | |||
| data2 = data2.map(operations=random_crop_op2, input_columns=["image"]) | |||
| try: | |||
| dataset_equal(data1, data2, 0) | |||
| except Exception as e: | |||
| @@ -211,15 +211,15 @@ def test_seed_deterministic(): | |||
| # seed will be read in during constructor call | |||
| random_crop_op = c_vision.RandomCrop([512, 512], [200, 200, 200, 200]) | |||
| decode_op = c_vision.Decode() | |||
| data1 = data1.map(input_columns=["image"], operations=decode_op) | |||
| data1 = data1.map(input_columns=["image"], operations=random_crop_op) | |||
| data1 = data1.map(operations=decode_op, input_columns=["image"]) | |||
| data1 = data1.map(operations=random_crop_op, input_columns=["image"]) | |||
| # Second dataset | |||
| data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) | |||
| data2 = data2.map(input_columns=["image"], operations=decode_op) | |||
| data2 = data2.map(operations=decode_op, input_columns=["image"]) | |||
| # If seed is set up on constructor, so the two ops output deterministic sequence | |||
| random_crop_op2 = c_vision.RandomCrop([512, 512], [200, 200, 200, 200]) | |||
| data2 = data2.map(input_columns=["image"], operations=random_crop_op2) | |||
| data2 = data2.map(operations=random_crop_op2, input_columns=["image"]) | |||
| dataset_equal(data1, data2, 0) | |||
| @@ -246,15 +246,15 @@ def test_deterministic_run_distribution(): | |||
| data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) | |||
| random_horizontal_flip_op = c_vision.RandomHorizontalFlip(0.1) | |||
| decode_op = c_vision.Decode() | |||
| data1 = data1.map(input_columns=["image"], operations=decode_op) | |||
| data1 = data1.map(input_columns=["image"], operations=random_horizontal_flip_op) | |||
| data1 = data1.map(operations=decode_op, input_columns=["image"]) | |||
| data1 = data1.map(operations=random_horizontal_flip_op, input_columns=["image"]) | |||
| # Second dataset | |||
| data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) | |||
| data2 = data2.map(input_columns=["image"], operations=decode_op) | |||
| data2 = data2.map(operations=decode_op, input_columns=["image"]) | |||
| # If seed is set up on constructor, so the two ops output deterministic sequence | |||
| random_horizontal_flip_op2 = c_vision.RandomHorizontalFlip(0.1) | |||
| data2 = data2.map(input_columns=["image"], operations=random_horizontal_flip_op2) | |||
| data2 = data2.map(operations=random_horizontal_flip_op2, input_columns=["image"]) | |||
| dataset_equal(data1, data2, 0) | |||
| @@ -285,7 +285,7 @@ def test_deterministic_python_seed(): | |||
| py_vision.ToTensor(), | |||
| ] | |||
| transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) | |||
| data1 = data1.map(input_columns=["image"], operations=transform) | |||
| data1 = data1.map(operations=transform, input_columns=["image"]) | |||
| data1_output = [] | |||
| # config.set_seed() calls random.seed() | |||
| for data_one in data1.create_dict_iterator(num_epochs=1): | |||
| @@ -293,7 +293,7 @@ def test_deterministic_python_seed(): | |||
| # Second dataset | |||
| data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) | |||
| data2 = data2.map(input_columns=["image"], operations=transform) | |||
| data2 = data2.map(operations=transform, input_columns=["image"]) | |||
| # config.set_seed() calls random.seed(), resets seed for next dataset iterator | |||
| ds.config.set_seed(0) | |||
| @@ -328,7 +328,7 @@ def test_deterministic_python_seed_multi_thread(): | |||
| py_vision.ToTensor(), | |||
| ] | |||
| transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) | |||
| data1 = data1.map(input_columns=["image"], operations=transform, python_multiprocessing=True) | |||
| data1 = data1.map(operations=transform, input_columns=["image"], python_multiprocessing=True) | |||
| data1_output = [] | |||
| # config.set_seed() calls random.seed() | |||
| for data_one in data1.create_dict_iterator(num_epochs=1): | |||
| @@ -337,7 +337,7 @@ def test_deterministic_python_seed_multi_thread(): | |||
| # Second dataset | |||
| data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) | |||
| # If seed is set up on constructor | |||
| data2 = data2.map(input_columns=["image"], operations=transform, python_multiprocessing=True) | |||
| data2 = data2.map(operations=transform, input_columns=["image"], python_multiprocessing=True) | |||
| # config.set_seed() calls random.seed() | |||
| ds.config.set_seed(0) | |||
| @@ -30,6 +30,7 @@ SCHEMA_DIR = "../data/dataset/test_tf_file_3_images/datasetSchema.json" | |||
| GENERATE_GOLDEN = False | |||
| def test_cut_out_op(plot=False): | |||
| """ | |||
| Test Cutout | |||
| @@ -45,7 +46,7 @@ def test_cut_out_op(plot=False): | |||
| f.RandomErasing(value='random') | |||
| ] | |||
| transform_1 = mindspore.dataset.transforms.py_transforms.Compose(transforms_1) | |||
| data1 = data1.map(input_columns=["image"], operations=transform_1) | |||
| data1 = data1.map(operations=transform_1, input_columns=["image"]) | |||
| # Second dataset | |||
| data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) | |||
| @@ -57,7 +58,7 @@ def test_cut_out_op(plot=False): | |||
| cut_out_op | |||
| ] | |||
| data2 = data2.map(input_columns=["image"], operations=transforms_2) | |||
| data2 = data2.map(operations=transforms_2, input_columns=["image"]) | |||
| num_iter = 0 | |||
| for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): | |||
| @@ -91,7 +92,7 @@ def test_cut_out_op_multicut(plot=False): | |||
| f.ToTensor(), | |||
| ] | |||
| transform_1 = mindspore.dataset.transforms.py_transforms.Compose(transforms_1) | |||
| data1 = data1.map(input_columns=["image"], operations=transform_1) | |||
| data1 = data1.map(operations=transform_1, input_columns=["image"]) | |||
| # Second dataset | |||
| data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) | |||
| @@ -103,7 +104,7 @@ def test_cut_out_op_multicut(plot=False): | |||
| cut_out_op | |||
| ] | |||
| data2 = data2.map(input_columns=["image"], operations=transforms_2) | |||
| data2 = data2.map(operations=transforms_2, input_columns=["image"]) | |||
| num_iter = 0 | |||
| image_list_1, image_list_2 = [], [] | |||
| @@ -136,8 +137,8 @@ def test_cut_out_md5(): | |||
| data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) | |||
| decode_op = c.Decode() | |||
| cut_out_op = c.CutOut(100) | |||
| data1 = data1.map(input_columns=["image"], operations=decode_op) | |||
| data1 = data1.map(input_columns=["image"], operations=cut_out_op) | |||
| data1 = data1.map(operations=decode_op, input_columns=["image"]) | |||
| data1 = data1.map(operations=cut_out_op, input_columns=["image"]) | |||
| data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) | |||
| transforms = [ | |||
| @@ -146,7 +147,7 @@ def test_cut_out_md5(): | |||
| f.Cutout(100) | |||
| ] | |||
| transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) | |||
| data2 = data2.map(input_columns=["image"], operations=transform) | |||
| data2 = data2.map(operations=transform, input_columns=["image"]) | |||
| # Compare with expected md5 from images | |||
| filename1 = "cut_out_01_c_result.npz" | |||
| @@ -174,7 +175,7 @@ def test_cut_out_comp(plot=False): | |||
| f.Cutout(200) | |||
| ] | |||
| transform_1 = mindspore.dataset.transforms.py_transforms.Compose(transforms_1) | |||
| data1 = data1.map(input_columns=["image"], operations=transform_1) | |||
| data1 = data1.map(operations=transform_1, input_columns=["image"]) | |||
| # Second dataset | |||
| data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) | |||
| @@ -184,7 +185,7 @@ def test_cut_out_comp(plot=False): | |||
| c.CutOut(200) | |||
| ] | |||
| data2 = data2.map(input_columns=["image"], operations=transforms_2) | |||
| data2 = data2.map(operations=transforms_2, input_columns=["image"]) | |||
| num_iter = 0 | |||
| image_list_1, image_list_2 = [], [] | |||
| @@ -51,12 +51,12 @@ def test_cutmix_batch_success1(plot=False): | |||
| # CutMix Images | |||
| data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) | |||
| hwc2chw_op = vision.HWC2CHW() | |||
| data1 = data1.map(input_columns=["image"], operations=hwc2chw_op) | |||
| data1 = data1.map(operations=hwc2chw_op, input_columns=["image"]) | |||
| one_hot_op = data_trans.OneHot(num_classes=10) | |||
| data1 = data1.map(input_columns=["label"], operations=one_hot_op) | |||
| data1 = data1.map(operations=one_hot_op, input_columns=["label"]) | |||
| cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NCHW, 2.0, 0.5) | |||
| data1 = data1.batch(5, drop_remainder=True) | |||
| data1 = data1.map(input_columns=["image", "label"], operations=cutmix_batch_op) | |||
| data1 = data1.map(operations=cutmix_batch_op, input_columns=["image", "label"]) | |||
| images_cutmix = None | |||
| for idx, (image, _) in enumerate(data1): | |||
| @@ -94,12 +94,12 @@ def test_cutmix_batch_success2(plot=False): | |||
| # CutMix Images | |||
| data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) | |||
| one_hot_op = data_trans.OneHot(num_classes=10) | |||
| data1 = data1.map(input_columns=["label"], operations=one_hot_op) | |||
| rescale_op = vision.Rescale((1.0/255.0), 0.0) | |||
| data1 = data1.map(input_columns=["image"], operations=rescale_op) | |||
| data1 = data1.map(operations=one_hot_op, input_columns=["label"]) | |||
| rescale_op = vision.Rescale((1.0 / 255.0), 0.0) | |||
| data1 = data1.map(operations=rescale_op, input_columns=["image"]) | |||
| cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NHWC) | |||
| data1 = data1.batch(5, drop_remainder=True) | |||
| data1 = data1.map(input_columns=["image", "label"], operations=cutmix_batch_op) | |||
| data1 = data1.map(operations=cutmix_batch_op, input_columns=["image", "label"]) | |||
| images_cutmix = None | |||
| for idx, (image, _) in enumerate(data1): | |||
| @@ -125,7 +125,7 @@ def test_cutmix_batch_success3(plot=False): | |||
| ds_original = ds.ImageFolderDataset(dataset_dir=DATA_DIR2, shuffle=False) | |||
| decode_op = vision.Decode() | |||
| ds_original = ds_original.map(input_columns=["image"], operations=[decode_op]) | |||
| ds_original = ds_original.map(operations=[decode_op], input_columns=["image"]) | |||
| ds_original = ds_original.batch(4, pad_info={}, drop_remainder=True) | |||
| images_original = None | |||
| @@ -139,14 +139,14 @@ def test_cutmix_batch_success3(plot=False): | |||
| data1 = ds.ImageFolderDataset(dataset_dir=DATA_DIR2, shuffle=False) | |||
| decode_op = vision.Decode() | |||
| data1 = data1.map(input_columns=["image"], operations=[decode_op]) | |||
| data1 = data1.map(operations=[decode_op], input_columns=["image"]) | |||
| one_hot_op = data_trans.OneHot(num_classes=10) | |||
| data1 = data1.map(input_columns=["label"], operations=one_hot_op) | |||
| data1 = data1.map(operations=one_hot_op, input_columns=["label"]) | |||
| cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NHWC) | |||
| data1 = data1.batch(4, pad_info={}, drop_remainder=True) | |||
| data1 = data1.map(input_columns=["image", "label"], operations=cutmix_batch_op) | |||
| data1 = data1.map(operations=cutmix_batch_op, input_columns=["image", "label"]) | |||
| images_cutmix = None | |||
| for idx, (image, _) in enumerate(data1): | |||
| @@ -172,7 +172,7 @@ def test_cutmix_batch_success4(plot=False): | |||
| ds_original = ds.CelebADataset(DATA_DIR3, shuffle=False) | |||
| decode_op = vision.Decode() | |||
| ds_original = ds_original.map(input_columns=["image"], operations=[decode_op]) | |||
| ds_original = ds_original.map(operations=[decode_op], input_columns=["image"]) | |||
| ds_original = ds_original.batch(2, drop_remainder=True) | |||
| images_original = None | |||
| @@ -186,14 +186,14 @@ def test_cutmix_batch_success4(plot=False): | |||
| data1 = ds.CelebADataset(dataset_dir=DATA_DIR3, shuffle=False) | |||
| decode_op = vision.Decode() | |||
| data1 = data1.map(input_columns=["image"], operations=[decode_op]) | |||
| data1 = data1.map(operations=[decode_op], input_columns=["image"]) | |||
| one_hot_op = data_trans.OneHot(num_classes=100) | |||
| data1 = data1.map(input_columns=["attr"], operations=one_hot_op) | |||
| data1 = data1.map(operations=one_hot_op, input_columns=["attr"]) | |||
| cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NHWC, 0.5, 0.9) | |||
| data1 = data1.batch(2, drop_remainder=True) | |||
| data1 = data1.map(input_columns=["image", "attr"], operations=cutmix_batch_op) | |||
| data1 = data1.map(operations=cutmix_batch_op, input_columns=["image", "attr"]) | |||
| images_cutmix = None | |||
| for idx, (image, _) in enumerate(data1): | |||
| @@ -223,10 +223,10 @@ def test_cutmix_batch_nhwc_md5(): | |||
| data = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) | |||
| one_hot_op = data_trans.OneHot(num_classes=10) | |||
| data = data.map(input_columns=["label"], operations=one_hot_op) | |||
| data = data.map(operations=one_hot_op, input_columns=["label"]) | |||
| cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NHWC) | |||
| data = data.batch(5, drop_remainder=True) | |||
| data = data.map(input_columns=["image", "label"], operations=cutmix_batch_op) | |||
| data = data.map(operations=cutmix_batch_op, input_columns=["image", "label"]) | |||
| filename = "cutmix_batch_c_nhwc_result.npz" | |||
| save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN) | |||
| @@ -247,12 +247,12 @@ def test_cutmix_batch_nchw_md5(): | |||
| # CutMixBatch Images | |||
| data = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) | |||
| hwc2chw_op = vision.HWC2CHW() | |||
| data = data.map(input_columns=["image"], operations=hwc2chw_op) | |||
| data = data.map(operations=hwc2chw_op, input_columns=["image"]) | |||
| one_hot_op = data_trans.OneHot(num_classes=10) | |||
| data = data.map(input_columns=["label"], operations=one_hot_op) | |||
| data = data.map(operations=one_hot_op, input_columns=["label"]) | |||
| cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NCHW) | |||
| data = data.batch(5, drop_remainder=True) | |||
| data = data.map(input_columns=["image", "label"], operations=cutmix_batch_op) | |||
| data = data.map(operations=cutmix_batch_op, input_columns=["image", "label"]) | |||
| filename = "cutmix_batch_c_nchw_result.npz" | |||
| save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN) | |||
| @@ -273,10 +273,10 @@ def test_cutmix_batch_fail1(): | |||
| data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) | |||
| one_hot_op = data_trans.OneHot(num_classes=10) | |||
| data1 = data1.map(input_columns=["label"], operations=one_hot_op) | |||
| data1 = data1.map(operations=one_hot_op, input_columns=["label"]) | |||
| cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NHWC) | |||
| with pytest.raises(RuntimeError) as error: | |||
| data1 = data1.map(input_columns=["image", "label"], operations=cutmix_batch_op) | |||
| data1 = data1.map(operations=cutmix_batch_op, input_columns=["image", "label"]) | |||
| for idx, (image, _) in enumerate(data1): | |||
| if idx == 0: | |||
| images_cutmix = image | |||
| @@ -297,7 +297,7 @@ def test_cutmix_batch_fail2(): | |||
| data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) | |||
| one_hot_op = data_trans.OneHot(num_classes=10) | |||
| data1 = data1.map(input_columns=["label"], operations=one_hot_op) | |||
| data1 = data1.map(operations=one_hot_op, input_columns=["label"]) | |||
| with pytest.raises(ValueError) as error: | |||
| vision.CutMixBatch(mode.ImageBatchFormat.NHWC, -1) | |||
| error_message = "Input is not within the required interval" | |||
| @@ -315,7 +315,7 @@ def test_cutmix_batch_fail3(): | |||
| data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) | |||
| one_hot_op = data_trans.OneHot(num_classes=10) | |||
| data1 = data1.map(input_columns=["label"], operations=one_hot_op) | |||
| data1 = data1.map(operations=one_hot_op, input_columns=["label"]) | |||
| with pytest.raises(ValueError) as error: | |||
| vision.CutMixBatch(mode.ImageBatchFormat.NHWC, 1, 2) | |||
| error_message = "Input is not within the required interval" | |||
| @@ -333,7 +333,7 @@ def test_cutmix_batch_fail4(): | |||
| data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) | |||
| one_hot_op = data_trans.OneHot(num_classes=10) | |||
| data1 = data1.map(input_columns=["label"], operations=one_hot_op) | |||
| data1 = data1.map(operations=one_hot_op, input_columns=["label"]) | |||
| with pytest.raises(ValueError) as error: | |||
| vision.CutMixBatch(mode.ImageBatchFormat.NHWC, 1, -1) | |||
| error_message = "Input is not within the required interval" | |||
| @@ -351,10 +351,10 @@ def test_cutmix_batch_fail5(): | |||
| data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) | |||
| one_hot_op = data_trans.OneHot(num_classes=10) | |||
| data1 = data1.map(input_columns=["label"], operations=one_hot_op) | |||
| data1 = data1.map(operations=one_hot_op, input_columns=["label"]) | |||
| cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NHWC) | |||
| data1 = data1.batch(5, drop_remainder=True) | |||
| data1 = data1.map(input_columns=["image"], operations=cutmix_batch_op) | |||
| data1 = data1.map(operations=cutmix_batch_op, input_columns=["image"]) | |||
| with pytest.raises(RuntimeError) as error: | |||
| images_cutmix = np.array([]) | |||
| @@ -378,10 +378,10 @@ def test_cutmix_batch_fail6(): | |||
| data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) | |||
| one_hot_op = data_trans.OneHot(num_classes=10) | |||
| data1 = data1.map(input_columns=["label"], operations=one_hot_op) | |||
| data1 = data1.map(operations=one_hot_op, input_columns=["label"]) | |||
| cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NCHW) | |||
| data1 = data1.batch(5, drop_remainder=True) | |||
| data1 = data1.map(input_columns=["image", "label"], operations=cutmix_batch_op) | |||
| data1 = data1.map(operations=cutmix_batch_op, input_columns=["image", "label"]) | |||
| with pytest.raises(RuntimeError) as error: | |||
| images_cutmix = np.array([]) | |||
| @@ -406,7 +406,7 @@ def test_cutmix_batch_fail7(): | |||
| cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NHWC) | |||
| data1 = data1.batch(5, drop_remainder=True) | |||
| data1 = data1.map(input_columns=["image", "label"], operations=cutmix_batch_op) | |||
| data1 = data1.map(operations=cutmix_batch_op, input_columns=["image", "label"]) | |||
| with pytest.raises(RuntimeError) as error: | |||
| images_cutmix = np.array([]) | |||
| @@ -430,7 +430,7 @@ def test_cutmix_batch_fail8(): | |||
| data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) | |||
| one_hot_op = data_trans.OneHot(num_classes=10) | |||
| data1 = data1.map(input_columns=["label"], operations=one_hot_op) | |||
| data1 = data1.map(operations=one_hot_op, input_columns=["label"]) | |||
| with pytest.raises(ValueError) as error: | |||
| vision.CutMixBatch(mode.ImageBatchFormat.NHWC, 0.0) | |||
| error_message = "Input is not within the required interval" | |||
| @@ -59,7 +59,7 @@ def test_numpy_slices_list_append(): | |||
| data1 = de.TFRecordDataset(DATA_DIR) | |||
| resize_op = vision.Resize((resize_height, resize_width)) | |||
| data1 = data1.map(input_columns=["image"], operations=[vision.Decode(True), resize_op]) | |||
| data1 = data1.map(operations=[vision.Decode(True), resize_op], input_columns=["image"]) | |||
| res = [] | |||
| for data in data1.create_dict_iterator(num_epochs=1): | |||
| @@ -46,8 +46,8 @@ def test_celeba_dataset_op(): | |||
| data = data.repeat(2) | |||
| center_crop = vision.CenterCrop(crop_size) | |||
| resize_op = vision.Resize(resize_size, Inter.LINEAR) # Bilinear mode | |||
| data = data.map(input_columns=["image"], operations=center_crop) | |||
| data = data.map(input_columns=["image"], operations=resize_op) | |||
| data = data.map(operations=center_crop, input_columns=["image"]) | |||
| data = data.map(operations=resize_op, input_columns=["image"]) | |||
| count = 0 | |||
| for item in data.create_dict_iterator(num_epochs=1): | |||
| @@ -25,6 +25,7 @@ INVALID_FILE = "../data/dataset/testCOCO/annotations/invalid.json" | |||
| LACKOFIMAGE_FILE = "../data/dataset/testCOCO/annotations/lack_of_images.json" | |||
| INVALID_CATEGORY_ID_FILE = "../data/dataset/testCOCO/annotations/invalid_category_id.json" | |||
| def test_coco_detection(): | |||
| data1 = ds.CocoDataset(DATA_DIR, annotation_file=ANNOTATION_FILE, task="Detection", | |||
| decode=True, shuffle=False) | |||
| @@ -57,6 +58,7 @@ def test_coco_detection(): | |||
| np.testing.assert_array_equal(np.array([[5]]), category_id[4]) | |||
| np.testing.assert_array_equal(np.array([[6]]), category_id[5]) | |||
| def test_coco_stuff(): | |||
| data1 = ds.CocoDataset(DATA_DIR, annotation_file=ANNOTATION_FILE, task="Stuff", | |||
| decode=True, shuffle=False) | |||
| @@ -97,6 +99,7 @@ def test_coco_stuff(): | |||
| segmentation[5]) | |||
| np.testing.assert_array_equal(np.array([[0]]), iscrowd[5]) | |||
| def test_coco_keypoint(): | |||
| data1 = ds.CocoDataset(DATA_DIR, annotation_file=KEYPOINT_FILE, task="Keypoint", | |||
| decode=True, shuffle=False) | |||
| @@ -124,6 +127,7 @@ def test_coco_keypoint(): | |||
| keypoints[1]) | |||
| np.testing.assert_array_equal(np.array([[10]]), num_keypoints[1]) | |||
| def test_coco_panoptic(): | |||
| data1 = ds.CocoDataset(DATA_DIR, annotation_file=PANOPTIC_FILE, task="Panoptic", decode=True, shuffle=False) | |||
| num_iter = 0 | |||
| @@ -151,6 +155,7 @@ def test_coco_panoptic(): | |||
| np.testing.assert_array_equal(np.array([[0], [0]]), iscrowd[1]) | |||
| np.testing.assert_array_equal(np.array([[43102], [6079]]), area[1]) | |||
| def test_coco_detection_classindex(): | |||
| data1 = ds.CocoDataset(DATA_DIR, annotation_file=ANNOTATION_FILE, task="Detection", decode=True) | |||
| class_index = data1.get_class_indexing() | |||
| @@ -161,6 +166,7 @@ def test_coco_detection_classindex(): | |||
| num_iter += 1 | |||
| assert num_iter == 6 | |||
| def test_coco_panootic_classindex(): | |||
| data1 = ds.CocoDataset(DATA_DIR, annotation_file=PANOPTIC_FILE, task="Panoptic", decode=True) | |||
| class_index = data1.get_class_indexing() | |||
| @@ -170,6 +176,7 @@ def test_coco_panootic_classindex(): | |||
| num_iter += 1 | |||
| assert num_iter == 2 | |||
| def test_coco_case_0(): | |||
| data1 = ds.CocoDataset(DATA_DIR, annotation_file=ANNOTATION_FILE, task="Detection", decode=True) | |||
| data1 = data1.shuffle(10) | |||
| @@ -179,6 +186,7 @@ def test_coco_case_0(): | |||
| num_iter += 1 | |||
| assert num_iter == 2 | |||
| def test_coco_case_1(): | |||
| data1 = ds.CocoDataset(DATA_DIR, annotation_file=ANNOTATION_FILE, task="Detection", decode=True) | |||
| sizes = [0.5, 0.5] | |||
| @@ -194,28 +202,31 @@ def test_coco_case_1(): | |||
| num_iter += 1 | |||
| assert num_iter == 3 | |||
| def test_coco_case_2(): | |||
| data1 = ds.CocoDataset(DATA_DIR, annotation_file=ANNOTATION_FILE, task="Detection", decode=True) | |||
| resize_op = vision.Resize((224, 224)) | |||
| data1 = data1.map(input_columns=["image"], operations=resize_op) | |||
| data1 = data1.map(operations=resize_op, input_columns=["image"]) | |||
| data1 = data1.repeat(4) | |||
| num_iter = 0 | |||
| for _ in data1.__iter__(): | |||
| num_iter += 1 | |||
| assert num_iter == 24 | |||
| def test_coco_case_3(): | |||
| data1 = ds.CocoDataset(DATA_DIR_2, annotation_file=ANNOTATION_FILE, task="Detection", decode=True) | |||
| resize_op = vision.Resize((224, 224)) | |||
| data1 = data1.map(input_columns=["image"], operations=resize_op) | |||
| data1 = data1.map(operations=resize_op, input_columns=["image"]) | |||
| data1 = data1.repeat(4) | |||
| num_iter = 0 | |||
| for _ in data1.__iter__(): | |||
| num_iter += 1 | |||
| assert num_iter == 24 | |||
| def test_coco_case_exception(): | |||
| try: | |||
| data1 = ds.CocoDataset("path_not_exist/", annotation_file=ANNOTATION_FILE, task="Detection") | |||
| @@ -25,6 +25,7 @@ def generator_1d(): | |||
| for i in range(64): | |||
| yield (np.array([i]),) | |||
| class DatasetGenerator: | |||
| def __init__(self): | |||
| pass | |||
| @@ -241,11 +242,11 @@ def test_generator_8(): | |||
| # apply dataset operations | |||
| data1 = ds.GeneratorDataset(generator_mc(2048), ["col0", "col1"]) | |||
| data1 = data1.map(input_columns="col0", output_columns="out0", operations=(lambda x: x * 3), | |||
| data1 = data1.map(operations=(lambda x: x * 3), input_columns="col0", output_columns="out0", | |||
| num_parallel_workers=2) | |||
| data1 = data1.map(input_columns="col1", output_columns=["out1", "out2"], operations=(lambda x: (x * 7, x)), | |||
| data1 = data1.map(operations=(lambda x: (x * 7, x)), input_columns="col1", output_columns=["out1", "out2"], | |||
| num_parallel_workers=2, column_order=["out0", "out1", "out2"]) | |||
| data1 = data1.map(input_columns="out2", output_columns="out2", operations=(lambda x: x + 1), | |||
| data1 = data1.map(operations=(lambda x: x + 1), input_columns="out2", output_columns="out2", | |||
| num_parallel_workers=2) | |||
| i = 0 | |||
| @@ -268,9 +269,9 @@ def test_generator_9(): | |||
| # apply dataset operations | |||
| data1 = ds.GeneratorDataset(generator_mc(2048), ["image", "label"]) | |||
| data2 = ds.GeneratorDataset(generator_mc(2048), ["label", "image"]) | |||
| data1 = data1.map(input_columns="label", operations=(lambda x: x * 3), | |||
| data1 = data1.map(operations=(lambda x: x * 3), input_columns="label", | |||
| num_parallel_workers=4) | |||
| data2 = data2.map(input_columns="label", operations=(lambda x: x * 3), | |||
| data2 = data2.map(operations=(lambda x: x * 3), input_columns="label", | |||
| num_parallel_workers=4) | |||
| # Expected column order is not changed. | |||
| @@ -298,7 +299,7 @@ def test_generator_10(): | |||
| # apply dataset operations | |||
| data1 = ds.GeneratorDataset(generator_mc(2048), ["col0", "col1"]) | |||
| data1 = data1.map(input_columns="col1", output_columns=["out1", "out2"], operations=(lambda x: (x, x * 5)), | |||
| data1 = data1.map(operations=(lambda x: (x, x * 5)), input_columns="col1", output_columns=["out1", "out2"], | |||
| column_order=['col0', 'out1', 'out2'], num_parallel_workers=2) | |||
| # Expected column order is |col0|out1|out2| | |||
| @@ -322,7 +323,7 @@ def test_generator_11(): | |||
| # apply dataset operations | |||
| data1 = ds.GeneratorDataset(generator_mc(2048), ["col0", "col1"]) | |||
| data1 = data1.map(input_columns="col1", output_columns=["out1", "out2"], operations=(lambda x: (x, x * 5)), | |||
| data1 = data1.map(operations=(lambda x: (x, x * 5)), input_columns="col1", output_columns=["out1", "out2"], | |||
| column_order=['out1', 'out2'], num_parallel_workers=2) | |||
| # Expected column order is |out1|out2| | |||
| @@ -503,7 +504,7 @@ def test_generator_error_3(): | |||
| with pytest.raises(ValueError) as info: | |||
| # apply dataset operations | |||
| data1 = ds.GeneratorDataset(generator_mc(2048), ["label", "image"]) | |||
| data1 = data1.map(input_columns=["label"], output_columns=["out1", "out2"], operations=(lambda x: (x, x * 5)), | |||
| data1 = data1.map(operations=(lambda x: (x, x * 5)), input_columns=["label"], output_columns=["out1", "out2"], | |||
| num_parallel_workers=2) | |||
| for _ in data1: | |||
| @@ -515,7 +516,7 @@ def test_generator_error_4(): | |||
| with pytest.raises(RuntimeError) as info: | |||
| # apply dataset operations | |||
| data1 = ds.GeneratorDataset(generator_mc(2048), ["label", "image"]) | |||
| data1 = data1.map(input_columns=["label"], operations=(lambda x: (x, x * 5)), | |||
| data1 = data1.map(operations=(lambda x: (x, x * 5)), input_columns=["label"], | |||
| num_parallel_workers=2) | |||
| for _ in data1: | |||
| @@ -706,6 +707,7 @@ def test_generator_dataset_size_4(): | |||
| num_rows = num_rows + 1 | |||
| assert data_size == num_rows | |||
| def test_generator_dataset_size_5(): | |||
| """ | |||
| Test get_dataset_size after create_dict_iterator | |||
| @@ -103,8 +103,8 @@ def test_manifest_dataset_multi_label_onehot(): | |||
| data = ds.ManifestDataset(DATA_FILE, decode=True, shuffle=False) | |||
| expect_label = [[[0, 1, 0], [1, 0, 0]], [[1, 0, 0], [1, 0, 1]]] | |||
| one_hot_encode = data_trans.OneHot(3) | |||
| data = data.map(input_columns=["label"], operations=one_hot_encode) | |||
| data = data.map(input_columns=["label"], operations=multi_label_hot) | |||
| data = data.map(operations=one_hot_encode, input_columns=["label"]) | |||
| data = data.map(operations=multi_label_hot, input_columns=["label"]) | |||
| data = data.batch(2) | |||
| count = 0 | |||
| for item in data.create_dict_iterator(num_epochs=1): | |||
| @@ -85,8 +85,8 @@ def test_case_0(): | |||
| resize_op = vision.Resize((224, 224)) | |||
| data1 = data1.map(input_columns=["image"], operations=resize_op) | |||
| data1 = data1.map(input_columns=["target"], operations=resize_op) | |||
| data1 = data1.map(operations=resize_op, input_columns=["image"]) | |||
| data1 = data1.map(operations=resize_op, input_columns=["target"]) | |||
| repeat_num = 4 | |||
| data1 = data1.repeat(repeat_num) | |||
| batch_size = 2 | |||
| @@ -103,7 +103,7 @@ def test_case_1(): | |||
| resize_op = vision.Resize((224, 224)) | |||
| data1 = data1.map(input_columns=["image"], operations=resize_op) | |||
| data1 = data1.map(operations=resize_op, input_columns=["image"]) | |||
| repeat_num = 4 | |||
| data1 = data1.repeat(repeat_num) | |||
| batch_size = 2 | |||
| @@ -36,7 +36,7 @@ def test_decode_op(): | |||
| data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) | |||
| # Serialize and Load dataset requires using vision.Decode instead of vision.Decode(). | |||
| data1 = data1.map(input_columns=["image"], operations=[vision.Decode(True)]) | |||
| data1 = data1.map(operations=[vision.Decode(True)], input_columns=["image"]) | |||
| # Second dataset | |||
| data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) | |||
| @@ -57,7 +57,7 @@ def test_decode_op_tf_file_dataset(): | |||
| # Decode with rgb format set to True | |||
| data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=ds.Shuffle.FILES) | |||
| data1 = data1.map(input_columns=["image"], operations=vision.Decode(True)) | |||
| data1 = data1.map(operations=vision.Decode(True), input_columns=["image"]) | |||
| for item in data1.create_dict_iterator(num_epochs=1): | |||
| logger.info('decode == {}'.format(item['image'])) | |||
| @@ -54,8 +54,8 @@ def test_case_1(): | |||
| resize_op = vision.Resize((resize_height, resize_width)) | |||
| # apply map operations on images | |||
| data = data.map(input_columns=["image"], operations=decode_op) | |||
| data = data.map(input_columns=["image"], operations=resize_op) | |||
| data = data.map(operations=decode_op, input_columns=["image"]) | |||
| data = data.map(operations=resize_op, input_columns=["image"]) | |||
| batch_size = 3 | |||
| data = data.batch(batch_size, drop_remainder=True) | |||
| @@ -79,8 +79,8 @@ def test_case_2(): | |||
| resize_op = vision.Resize((resize_height, resize_width)) | |||
| # apply map operations on images | |||
| data = data.map(input_columns=["image"], operations=decode_op) | |||
| data = data.map(input_columns=["image"], operations=resize_op) | |||
| data = data.map(operations=decode_op, input_columns=["image"]) | |||
| data = data.map(operations=resize_op, input_columns=["image"]) | |||
| batch_size = 2 | |||
| data = data.batch(batch_size, drop_remainder=True) | |||
| @@ -107,8 +107,8 @@ def test_case_3(): | |||
| resize_op = vision.Resize((resize_height, resize_width)) | |||
| # apply map operations on images | |||
| data = data.map(input_columns=["image"], operations=decode_op) | |||
| data = data.map(input_columns=["image"], operations=resize_op) | |||
| data = data.map(operations=decode_op, input_columns=["image"]) | |||
| data = data.map(operations=resize_op, input_columns=["image"]) | |||
| data = data.repeat(2) | |||
| @@ -24,8 +24,8 @@ import mindspore.dataset.transforms.c_transforms as ops | |||
| def compare(array): | |||
| data = ds.NumpySlicesDataset([array], column_names="x") | |||
| array = np.array(array) | |||
| data = data.map(input_columns=["x"], output_columns=["x", "y"], column_order=["x", "y"], | |||
| operations=ops.Duplicate()) | |||
| data = data.map(operations=ops.Duplicate(), input_columns=["x"], output_columns=["x", "y"], | |||
| column_order=["x", "y"]) | |||
| for d in data.create_dict_iterator(num_epochs=1): | |||
| np.testing.assert_array_equal(array, d["x"]) | |||
| np.testing.assert_array_equal(array, d["y"]) | |||
| @@ -79,7 +79,7 @@ def test_decode_op(): | |||
| data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) | |||
| # Serialize and Load dataset requires using vision.Decode instead of vision.Decode(). | |||
| data1 = data1.map(input_columns=["image"], operations=[vision.Decode(True)]) | |||
| data1 = data1.map(operations=[vision.Decode(True)], input_columns=["image"]) | |||
| # Second dataset | |||
| data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) | |||
| @@ -43,8 +43,7 @@ def test_equalize_py(plot=False): | |||
| F.Resize((224, 224)), | |||
| F.ToTensor()]) | |||
| ds_original = ds.map(input_columns="image", | |||
| operations=transforms_original) | |||
| ds_original = ds.map(operations=transforms_original, input_columns="image") | |||
| ds_original = ds_original.batch(512) | |||
| @@ -64,8 +63,7 @@ def test_equalize_py(plot=False): | |||
| F.Equalize(), | |||
| F.ToTensor()]) | |||
| ds_equalize = ds.map(input_columns="image", | |||
| operations=transforms_equalize) | |||
| ds_equalize = ds.map(operations=transforms_equalize, input_columns="image") | |||
| ds_equalize = ds_equalize.batch(512) | |||
| @@ -98,8 +96,7 @@ def test_equalize_c(plot=False): | |||
| transforms_original = [C.Decode(), C.Resize(size=[224, 224])] | |||
| ds_original = ds.map(input_columns="image", | |||
| operations=transforms_original) | |||
| ds_original = ds.map(operations=transforms_original, input_columns="image") | |||
| ds_original = ds_original.batch(512) | |||
| @@ -117,8 +114,7 @@ def test_equalize_c(plot=False): | |||
| transform_equalize = [C.Decode(), C.Resize(size=[224, 224]), | |||
| C.Equalize()] | |||
| ds_equalize = ds.map(input_columns="image", | |||
| operations=transform_equalize) | |||
| ds_equalize = ds.map(operations=transform_equalize, input_columns="image") | |||
| ds_equalize = ds_equalize.batch(512) | |||
| @@ -147,11 +143,9 @@ def test_equalize_py_c(plot=False): | |||
| # equalize Images in cpp | |||
| ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) | |||
| ds = ds.map(input_columns=["image"], | |||
| operations=[C.Decode(), C.Resize((224, 224))]) | |||
| ds = ds.map(operations=[C.Decode(), C.Resize((224, 224))], input_columns=["image"]) | |||
| ds_c_equalize = ds.map(input_columns="image", | |||
| operations=C.Equalize()) | |||
| ds_c_equalize = ds.map(operations=C.Equalize(), input_columns="image") | |||
| ds_c_equalize = ds_c_equalize.batch(512) | |||
| @@ -165,16 +159,14 @@ def test_equalize_py_c(plot=False): | |||
| # Equalize images in python | |||
| ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) | |||
| ds = ds.map(input_columns=["image"], | |||
| operations=[C.Decode(), C.Resize((224, 224))]) | |||
| ds = ds.map(operations=[C.Decode(), C.Resize((224, 224))], input_columns=["image"]) | |||
| transforms_p_equalize = mindspore.dataset.transforms.py_transforms.Compose([lambda img: img.astype(np.uint8), | |||
| F.ToPIL(), | |||
| F.Equalize(), | |||
| np.array]) | |||
| ds_p_equalize = ds.map(input_columns="image", | |||
| operations=transforms_p_equalize) | |||
| ds_p_equalize = ds.map(operations=transforms_p_equalize, input_columns="image") | |||
| ds_p_equalize = ds_p_equalize.batch(512) | |||
| @@ -206,13 +198,10 @@ def test_equalize_one_channel(): | |||
| try: | |||
| ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) | |||
| ds = ds.map(input_columns=["image"], | |||
| operations=[C.Decode(), | |||
| C.Resize((224, 224)), | |||
| lambda img: np.array(img[:, :, 0])]) | |||
| ds = ds.map(operations=[C.Decode(), C.Resize((224, 224)), | |||
| lambda img: np.array(img[:, :, 0])], input_columns=["image"]) | |||
| ds.map(input_columns="image", | |||
| operations=c_op) | |||
| ds.map(operations=c_op, input_columns="image") | |||
| except RuntimeError as e: | |||
| logger.info("Got an exception in DE: {}".format(str(e))) | |||
| @@ -225,8 +214,7 @@ def test_equalize_mnist_c(plot=False): | |||
| """ | |||
| logger.info("Test Equalize C Op With MNIST Images") | |||
| ds = de.MnistDataset(dataset_dir=MNIST_DATA_DIR, num_samples=2, shuffle=False) | |||
| ds_equalize_c = ds.map(input_columns="image", | |||
| operations=C.Equalize()) | |||
| ds_equalize_c = ds.map(operations=C.Equalize(), input_columns="image") | |||
| ds_orig = de.MnistDataset(dataset_dir=MNIST_DATA_DIR, num_samples=2, shuffle=False) | |||
| images = [] | |||
| @@ -259,7 +247,7 @@ def test_equalize_md5_py(): | |||
| F.Equalize(), | |||
| F.ToTensor()]) | |||
| data1 = data1.map(input_columns="image", operations=transforms) | |||
| data1 = data1.map(operations=transforms, input_columns="image") | |||
| # Compare with expected md5 from images | |||
| filename = "equalize_01_result.npz" | |||
| save_and_check_md5(data1, filename, generate_golden=GENERATE_GOLDEN) | |||
| @@ -279,7 +267,7 @@ def test_equalize_md5_c(): | |||
| C.Equalize(), | |||
| F.ToTensor()] | |||
| data = ds.map(input_columns="image", operations=transforms_equalize) | |||
| data = ds.map(operations=transforms_equalize, input_columns="image") | |||
| # Compare with expected md5 from images | |||
| filename = "equalize_01_result_c.npz" | |||
| save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN) | |||
| @@ -29,7 +29,7 @@ def test_exception_01(): | |||
| logger.info("test_exception_01") | |||
| data = ds.TFRecordDataset(DATA_DIR, columns_list=["image"]) | |||
| with pytest.raises(TypeError) as info: | |||
| data.map(input_columns=["image"], operations=vision.Resize(100, 100)) | |||
| data.map(operations=vision.Resize(100, 100), input_columns=["image"]) | |||
| assert "Argument interpolation with value 100 is not of type (<enum 'Inter'>,)" in str(info.value) | |||
| @@ -45,8 +45,8 @@ def test_exception_02(): | |||
| num_samples = 1 | |||
| data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], num_samples=num_samples) | |||
| data = data.map(input_columns=["image"], operations=vision.Decode()) | |||
| data = data.map(input_columns=["image"], operations=vision.Resize((100, 100))) | |||
| data = data.map(operations=vision.Decode(), input_columns=["image"]) | |||
| data = data.map(operations=vision.Resize((100, 100)), input_columns=["image"]) | |||
| # Confirm 1 sample in dataset | |||
| assert sum([1 for _ in data]) == 1 | |||
| num_iters = 0 | |||
| @@ -28,7 +28,7 @@ def test_fillop_basic(): | |||
| data = ds.GeneratorDataset(gen, column_names=["col"]) | |||
| fill_op = data_trans.Fill(3) | |||
| data = data.map(input_columns=["col"], operations=fill_op) | |||
| data = data.map(operations=fill_op, input_columns=["col"]) | |||
| expected = np.array([3, 3, 3, 3], dtype=np.uint8) | |||
| for data_row in data: | |||
| np.testing.assert_array_equal(data_row[0], expected) | |||
| @@ -41,7 +41,7 @@ def test_fillop_down_type_cast(): | |||
| data = ds.GeneratorDataset(gen, column_names=["col"]) | |||
| fill_op = data_trans.Fill(-3) | |||
| data = data.map(input_columns=["col"], operations=fill_op) | |||
| data = data.map(operations=fill_op, input_columns=["col"]) | |||
| expected = np.array([253, 253, 253, 253], dtype=np.uint8) | |||
| for data_row in data: | |||
| np.testing.assert_array_equal(data_row[0], expected) | |||
| @@ -54,7 +54,7 @@ def test_fillop_up_type_cast(): | |||
| data = ds.GeneratorDataset(gen, column_names=["col"]) | |||
| fill_op = data_trans.Fill(3) | |||
| data = data.map(input_columns=["col"], operations=fill_op) | |||
| data = data.map(operations=fill_op, input_columns=["col"]) | |||
| expected = np.array([3., 3., 3., 3.], dtype=np.float) | |||
| for data_row in data: | |||
| np.testing.assert_array_equal(data_row[0], expected) | |||
| @@ -67,7 +67,7 @@ def test_fillop_string(): | |||
| data = ds.GeneratorDataset(gen, column_names=["col"]) | |||
| fill_op = data_trans.Fill("error") | |||
| data = data.map(input_columns=["col"], operations=fill_op) | |||
| data = data.map(operations=fill_op, input_columns=["col"]) | |||
| expected = np.array(['error', 'error'], dtype='S') | |||
| for data_row in data: | |||
| np.testing.assert_array_equal(data_row[0], expected) | |||
| @@ -79,7 +79,7 @@ def test_fillop_error_handling(): | |||
| data = ds.GeneratorDataset(gen, column_names=["col"]) | |||
| fill_op = data_trans.Fill("words") | |||
| data = data.map(input_columns=["col"], operations=fill_op) | |||
| data = data.map(operations=fill_op, input_columns=["col"]) | |||
| with pytest.raises(RuntimeError) as error_info: | |||
| for _ in data: | |||
| @@ -30,7 +30,7 @@ def test_diff_predicate_func(): | |||
| cde.Resize([64, 64]) | |||
| ] | |||
| dataset = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image", "label"], shuffle=False) | |||
| dataset = dataset.map(input_columns=["image"], operations=transforms, num_parallel_workers=1) | |||
| dataset = dataset.map(operations=transforms, input_columns=["image"], num_parallel_workers=1) | |||
| dataset = dataset.filter(input_columns=["image", "label"], predicate=predicate_func, num_parallel_workers=4) | |||
| num_iter = 0 | |||
| @@ -261,8 +261,8 @@ def func_map_part(data_col1): | |||
| # test with map | |||
| def test_filter_by_generator_with_map_all_col(): | |||
| dataset = ds.GeneratorDataset(generator_mc(12), ["col1", "col2"]) | |||
| dataset_map = dataset.map(input_columns=["col1"], output_columns=["col1"], operations=func_map_part) | |||
| # dataset_map = dataset.map( operations=func_map_part) | |||
| dataset_map = dataset.map(operations=func_map_part, input_columns=["col1"], output_columns=["col1"]) | |||
| # dataset_map = dataset.map(operations=func_map_part) | |||
| dataset_f = dataset_map.filter(input_columns=["col1"], predicate=filter_func_map_part, num_parallel_workers=1) | |||
| num_iter = 0 | |||
| ret_data = [] | |||
| @@ -277,7 +277,7 @@ def test_filter_by_generator_with_map_all_col(): | |||
| # test with map | |||
| def test_filter_by_generator_with_map_part_col(): | |||
| dataset = ds.GeneratorDataset(generator_mc(12), ["col1", "col2"]) | |||
| dataset_map = dataset.map(input_columns=["col1"], output_columns=["out1"], operations=func_map_part) | |||
| dataset_map = dataset.map(operations=func_map_part, input_columns=["col1"], output_columns=["out1"]) | |||
| dataset_f = dataset_map.filter(input_columns=["out1", "col2"], predicate=filter_func_map, num_parallel_workers=4) | |||
| num_iter = 0 | |||
| @@ -328,7 +328,7 @@ def filter_func_input_column3(col1): | |||
| # test with input_columns | |||
| def test_filter_by_generator_with_input_column(): | |||
| dataset = ds.GeneratorDataset(generator_mc(64), ["col1", "col2"]) | |||
| dataset_map = dataset.map(input_columns=["col1"], output_columns=["out1"], operations=func_map_part) | |||
| dataset_map = dataset.map(operations=func_map_part, input_columns=["col1"], output_columns=["out1"]) | |||
| dataset_f1 = dataset_map.filter(input_columns=["out1", "col2"], predicate=filter_func_input_column1, | |||
| num_parallel_workers=4) | |||
| dataset_f2 = dataset_f1.filter(input_columns=["out1"], predicate=filter_func_input_column2, num_parallel_workers=4) | |||
| @@ -382,7 +382,7 @@ def test_filter_by_generator_Partial1(): | |||
| dataset2 = ds.GeneratorDataset(source=generator_mc_p1(), column_names=["col3", "col4"]) | |||
| dataset_zip = ds.zip((dataset1, dataset2)) | |||
| dataset_f1 = dataset_zip.filter(predicate=filter_func_Partial_0, num_parallel_workers=2) | |||
| dataset_map = dataset_f1.map(input_columns=["col1"], output_columns=["out1"], operations=lambda x1: x1 + 400) | |||
| dataset_map = dataset_f1.map(operations=lambda x1: x1 + 400, input_columns=["col1"], output_columns=["out1"]) | |||
| ret = [] | |||
| for item in dataset_map.create_dict_iterator(num_epochs=1): | |||
| ret.append(item["out1"]) | |||
| @@ -399,8 +399,8 @@ def test_filter_by_generator_Partial2(): | |||
| dataset2f = dataset2.filter(input_columns=["col3"], predicate=lambda x: x not in [203, 207, 209], | |||
| num_parallel_workers=2) | |||
| dataset_zip = ds.zip((dataset1f, dataset2f)) | |||
| dataset_map = dataset_zip.map(input_columns=["col1", "col3"], output_columns=["out1", "out3"], | |||
| operations=lambda x1, x3: (x1 + 400, x3 + 500)) | |||
| dataset_map = dataset_zip.map(operations=lambda x1, x3: (x1 + 400, x3 + 500), input_columns=["col1", "col3"], | |||
| output_columns=["out1", "out3"]) | |||
| ret1 = [] | |||
| ret3 = [] | |||
| for item in dataset_map.create_dict_iterator(num_epochs=1): | |||
| @@ -484,6 +484,7 @@ def test_filter_by_generator_with_map_all_sort(): | |||
| assert ret_data[0]["col1"] == 0 | |||
| assert ret_data[9]["col6"] == 509 | |||
| def test_filter_by_generator_get_dataset_size(): | |||
| dataset = ds.GeneratorDataset(generator_1d, ["data"]) | |||
| dataset = dataset.filter(predicate=filter_func_shuffle_after, num_parallel_workers=4) | |||
| @@ -41,7 +41,7 @@ def test_five_crop_op(plot=False): | |||
| vision.ToTensor(), | |||
| ] | |||
| transform_1 = mindspore.dataset.transforms.py_transforms.Compose(transforms_1) | |||
| data1 = data1.map(input_columns=["image"], operations=transform_1) | |||
| data1 = data1.map(operations=transform_1, input_columns=["image"]) | |||
| # Second dataset | |||
| data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) | |||
| @@ -51,7 +51,7 @@ def test_five_crop_op(plot=False): | |||
| lambda images: np.stack([vision.ToTensor()(image) for image in images]) # 4D stack of 5 images | |||
| ] | |||
| transform_2 = mindspore.dataset.transforms.py_transforms.Compose(transforms_2) | |||
| data2 = data2.map(input_columns=["image"], operations=transform_2) | |||
| data2 = data2.map(operations=transform_2, input_columns=["image"]) | |||
| num_iter = 0 | |||
| for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): | |||
| @@ -85,7 +85,7 @@ def test_five_crop_error_msg(): | |||
| vision.ToTensor() | |||
| ] | |||
| transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) | |||
| data = data.map(input_columns=["image"], operations=transform) | |||
| data = data.map(operations=transform, input_columns=["image"]) | |||
| with pytest.raises(RuntimeError) as info: | |||
| for _ in data: | |||
| @@ -110,7 +110,7 @@ def test_five_crop_md5(): | |||
| lambda images: np.stack([vision.ToTensor()(image) for image in images]) # 4D stack of 5 images | |||
| ] | |||
| transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) | |||
| data = data.map(input_columns=["image"], operations=transform) | |||
| data = data.map(operations=transform, input_columns=["image"]) | |||
| # Compare with expected md5 from images | |||
| filename = "five_crop_01_result.npz" | |||
| save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN) | |||
| @@ -26,7 +26,7 @@ def test_demo_basic_from_dataset(): | |||
| vocab = text.Vocab.from_dataset(data, "text", freq_range=None, top_k=None, | |||
| special_tokens=["<pad>", "<unk>"], | |||
| special_first=True) | |||
| data = data.map(input_columns=["text"], operations=text.Lookup(vocab, "<unk>")) | |||
| data = data.map(operations=text.Lookup(vocab, "<unk>"), input_columns=["text"]) | |||
| res = [] | |||
| for d in data.create_dict_iterator(num_epochs=1): | |||
| res.append(d["text"].item()) | |||
| @@ -36,10 +36,10 @@ def test_demo_basic_from_dataset(): | |||
| def test_demo_basic_from_dataset_with_tokenizer(): | |||
| """ this is a tutorial on how from_dataset should be used in a normal use case with tokenizer""" | |||
| data = ds.TextFileDataset("../data/dataset/testTokenizerData/1.txt", shuffle=False) | |||
| data = data.map(input_columns=["text"], operations=text.UnicodeCharTokenizer()) | |||
| data = data.map(operations=text.UnicodeCharTokenizer(), input_columns=["text"]) | |||
| vocab = text.Vocab.from_dataset(data, None, freq_range=None, top_k=None, special_tokens=["<pad>", "<unk>"], | |||
| special_first=True) | |||
| data = data.map(input_columns=["text"], operations=text.Lookup(vocab, "<unk>")) | |||
| data = data.map(operations=text.Lookup(vocab, "<unk>"), input_columns=["text"]) | |||
| res = [] | |||
| for d in data.create_dict_iterator(num_epochs=1): | |||
| res.append(list(d["text"])) | |||
| @@ -60,7 +60,7 @@ def test_from_dataset(): | |||
| corpus_dataset = ds.GeneratorDataset(gen_corpus, column_names=["text"]) | |||
| vocab = text.Vocab.from_dataset(corpus_dataset, None, freq_range, top_k, special_tokens=["<pad>", "<unk>"], | |||
| special_first=True) | |||
| corpus_dataset = corpus_dataset.map(input_columns="text", operations=text.Lookup(vocab, "<unk>")) | |||
| corpus_dataset = corpus_dataset.map(operations=text.Lookup(vocab, "<unk>"), input_columns="text") | |||
| res = [] | |||
| for d in corpus_dataset.create_dict_iterator(num_epochs=1): | |||
| res.append(list(d["text"])) | |||
| @@ -108,7 +108,7 @@ def test_from_dataset_special_token(): | |||
| corpus_dataset = ds.GeneratorDataset(gen_corpus, column_names=["text"]) | |||
| vocab = text.Vocab.from_dataset(corpus_dataset, None, None, top_k, special_tokens, special_first) | |||
| data = ds.GeneratorDataset(gen_input(texts), column_names=["text"]) | |||
| data = data.map(input_columns="text", operations=text.Lookup(vocab, "<unk>")) | |||
| data = data.map(operations=text.Lookup(vocab, "<unk>"), input_columns="text") | |||
| res = [] | |||
| for d in data.create_dict_iterator(num_epochs=1): | |||
| res.append(d["text"].item()) | |||
| @@ -95,16 +95,16 @@ def test_get_column_name_manifest(): | |||
| def test_get_column_name_map(): | |||
| data = ds.Cifar10Dataset(CIFAR10_DIR) | |||
| center_crop_op = vision.CenterCrop(10) | |||
| data = data.map(input_columns=["image"], operations=center_crop_op) | |||
| data = data.map(operations=center_crop_op, input_columns=["image"]) | |||
| assert data.get_col_names() == ["image", "label"] | |||
| data = ds.Cifar10Dataset(CIFAR10_DIR) | |||
| data = data.map(input_columns=["image"], operations=center_crop_op, output_columns=["image"]) | |||
| data = data.map(operations=center_crop_op, input_columns=["image"], output_columns=["image"]) | |||
| assert data.get_col_names() == ["image", "label"] | |||
| data = ds.Cifar10Dataset(CIFAR10_DIR) | |||
| data = data.map(input_columns=["image"], operations=center_crop_op, output_columns=["col1"]) | |||
| data = data.map(operations=center_crop_op, input_columns=["image"], output_columns=["col1"]) | |||
| assert data.get_col_names() == ["col1", "label"] | |||
| data = ds.Cifar10Dataset(CIFAR10_DIR) | |||
| data = data.map(input_columns=["image"], operations=center_crop_op, output_columns=["col1", "col2"], | |||
| data = data.map(operations=center_crop_op, input_columns=["image"], output_columns=["col1", "col2"], | |||
| column_order=["col2", "col1"]) | |||
| assert data.get_col_names() == ["col2", "col1"] | |||
| @@ -42,8 +42,7 @@ def test_invert_py(plot=False): | |||
| F.Resize((224, 224)), | |||
| F.ToTensor()]) | |||
| ds_original = ds.map(input_columns="image", | |||
| operations=transforms_original) | |||
| ds_original = ds.map(operations=transforms_original, input_columns="image") | |||
| ds_original = ds_original.batch(512) | |||
| @@ -63,8 +62,7 @@ def test_invert_py(plot=False): | |||
| F.Invert(), | |||
| F.ToTensor()]) | |||
| ds_invert = ds.map(input_columns="image", | |||
| operations=transforms_invert) | |||
| ds_invert = ds.map(operations=transforms_invert, input_columns="image") | |||
| ds_invert = ds_invert.batch(512) | |||
| @@ -97,8 +95,7 @@ def test_invert_c(plot=False): | |||
| transforms_original = [C.Decode(), C.Resize(size=[224, 224])] | |||
| ds_original = ds.map(input_columns="image", | |||
| operations=transforms_original) | |||
| ds_original = ds.map(operations=transforms_original, input_columns="image") | |||
| ds_original = ds_original.batch(512) | |||
| @@ -116,8 +113,7 @@ def test_invert_c(plot=False): | |||
| transform_invert = [C.Decode(), C.Resize(size=[224, 224]), | |||
| C.Invert()] | |||
| ds_invert = ds.map(input_columns="image", | |||
| operations=transform_invert) | |||
| ds_invert = ds.map(operations=transform_invert, input_columns="image") | |||
| ds_invert = ds_invert.batch(512) | |||
| @@ -146,11 +142,9 @@ def test_invert_py_c(plot=False): | |||
| # Invert Images in cpp | |||
| ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) | |||
| ds = ds.map(input_columns=["image"], | |||
| operations=[C.Decode(), C.Resize((224, 224))]) | |||
| ds = ds.map(operations=[C.Decode(), C.Resize((224, 224))], input_columns=["image"]) | |||
| ds_c_invert = ds.map(input_columns="image", | |||
| operations=C.Invert()) | |||
| ds_c_invert = ds.map(operations=C.Invert(), input_columns="image") | |||
| ds_c_invert = ds_c_invert.batch(512) | |||
| @@ -164,16 +158,14 @@ def test_invert_py_c(plot=False): | |||
| # invert images in python | |||
| ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) | |||
| ds = ds.map(input_columns=["image"], | |||
| operations=[C.Decode(), C.Resize((224, 224))]) | |||
| ds = ds.map(operations=[C.Decode(), C.Resize((224, 224))], input_columns=["image"]) | |||
| transforms_p_invert = mindspore.dataset.transforms.py_transforms.Compose([lambda img: img.astype(np.uint8), | |||
| F.ToPIL(), | |||
| F.Invert(), | |||
| np.array]) | |||
| ds_p_invert = ds.map(input_columns="image", | |||
| operations=transforms_p_invert) | |||
| ds_p_invert = ds.map(operations=transforms_p_invert, input_columns="image") | |||
| ds_p_invert = ds_p_invert.batch(512) | |||
| @@ -205,13 +197,10 @@ def test_invert_one_channel(): | |||
| try: | |||
| ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) | |||
| ds = ds.map(input_columns=["image"], | |||
| operations=[C.Decode(), | |||
| C.Resize((224, 224)), | |||
| lambda img: np.array(img[:, :, 0])]) | |||
| ds = ds.map(operations=[C.Decode(), C.Resize((224, 224)), | |||
| lambda img: np.array(img[:, :, 0])], input_columns=["image"]) | |||
| ds.map(input_columns="image", | |||
| operations=c_op) | |||
| ds.map(operations=c_op, input_columns="image") | |||
| except RuntimeError as e: | |||
| logger.info("Got an exception in DE: {}".format(str(e))) | |||
| @@ -231,7 +220,7 @@ def test_invert_md5_py(): | |||
| F.Invert(), | |||
| F.ToTensor()]) | |||
| data = ds.map(input_columns="image", operations=transforms_invert) | |||
| data = ds.map(operations=transforms_invert, input_columns="image") | |||
| # Compare with expected md5 from images | |||
| filename = "invert_01_result_py.npz" | |||
| save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN) | |||
| @@ -251,7 +240,7 @@ def test_invert_md5_c(): | |||
| C.Invert(), | |||
| F.ToTensor()] | |||
| data = ds.map(input_columns="image", operations=transforms_invert) | |||
| data = ds.map(operations=transforms_invert, input_columns="image") | |||
| # Compare with expected md5 from images | |||
| filename = "invert_01_result_c.npz" | |||
| save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN) | |||
| @@ -51,15 +51,15 @@ def test_linear_transformation_op(plot=False): | |||
| # First dataset | |||
| data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) | |||
| data1 = data1.map(input_columns=["image"], operations=transform) | |||
| data1 = data1.map(operations=transform, input_columns=["image"]) | |||
| # Note: if transformation matrix is diagonal matrix with all 1 in diagonal, | |||
| # the output matrix in expected to be the same as the input matrix. | |||
| data1 = data1.map(input_columns=["image"], | |||
| operations=py_vision.LinearTransformation(transformation_matrix, mean_vector)) | |||
| data1 = data1.map(operations=py_vision.LinearTransformation(transformation_matrix, mean_vector), | |||
| input_columns=["image"]) | |||
| # Second dataset | |||
| data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) | |||
| data2 = data2.map(input_columns=["image"], operations=transform) | |||
| data2 = data2.map(operations=transform, input_columns=["image"]) | |||
| image_transformed = [] | |||
| image = [] | |||
| @@ -98,7 +98,7 @@ def test_linear_transformation_md5(): | |||
| py_vision.LinearTransformation(transformation_matrix, mean_vector) | |||
| ] | |||
| transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) | |||
| data1 = data1.map(input_columns=["image"], operations=transform) | |||
| data1 = data1.map(operations=transform, input_columns=["image"]) | |||
| # Compare with expected md5 from images | |||
| filename = "linear_transformation_01_result.npz" | |||
| @@ -128,7 +128,7 @@ def test_linear_transformation_exception_01(): | |||
| py_vision.LinearTransformation(None, mean_vector) | |||
| ] | |||
| transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) | |||
| data1 = data1.map(input_columns=["image"], operations=transform) | |||
| data1 = data1.map(operations=transform, input_columns=["image"]) | |||
| except TypeError as e: | |||
| logger.info("Got an exception in DE: {}".format(str(e))) | |||
| assert "Argument transformation_matrix with value None is not of type (<class 'numpy.ndarray'>,)" in str(e) | |||
| @@ -157,7 +157,7 @@ def test_linear_transformation_exception_02(): | |||
| py_vision.LinearTransformation(transformation_matrix, None) | |||
| ] | |||
| transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) | |||
| data1 = data1.map(input_columns=["image"], operations=transform) | |||
| data1 = data1.map(operations=transform, input_columns=["image"]) | |||
| except TypeError as e: | |||
| logger.info("Got an exception in DE: {}".format(str(e))) | |||
| assert "Argument mean_vector with value None is not of type (<class 'numpy.ndarray'>,)" in str(e) | |||
| @@ -187,7 +187,7 @@ def test_linear_transformation_exception_03(): | |||
| py_vision.LinearTransformation(transformation_matrix, mean_vector) | |||
| ] | |||
| transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) | |||
| data1 = data1.map(input_columns=["image"], operations=transform) | |||
| data1 = data1.map(operations=transform, input_columns=["image"]) | |||
| except ValueError as e: | |||
| logger.info("Got an exception in DE: {}".format(str(e))) | |||
| assert "square matrix" in str(e) | |||
| @@ -217,7 +217,7 @@ def test_linear_transformation_exception_04(): | |||
| py_vision.LinearTransformation(transformation_matrix, mean_vector) | |||
| ] | |||
| transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) | |||
| data1 = data1.map(input_columns=["image"], operations=transform) | |||
| data1 = data1.map(operations=transform, input_columns=["image"]) | |||
| except ValueError as e: | |||
| logger.info("Got an exception in DE: {}".format(str(e))) | |||
| assert "should match" in str(e) | |||
| @@ -73,6 +73,7 @@ def add_and_remove_cv_file(): | |||
| os.remove("{}".format(x)) | |||
| os.remove("{}.db".format(x)) | |||
| @pytest.fixture | |||
| def add_and_remove_nlp_file(): | |||
| """add/remove nlp file""" | |||
| @@ -265,6 +266,7 @@ def test_cv_minddataset_partition_tutorial(add_and_remove_cv_file): | |||
| assert partitions(5) == 2 | |||
| assert partitions(9) == 2 | |||
| def test_cv_minddataset_partition_num_samples_0(add_and_remove_cv_file): | |||
| """tutorial for cv minddataset.""" | |||
| columns_list = ["data", "file_name", "label"] | |||
| @@ -287,6 +289,7 @@ def test_cv_minddataset_partition_num_samples_0(add_and_remove_cv_file): | |||
| assert partitions(5) == 1 | |||
| assert partitions(9) == 1 | |||
| def test_cv_minddataset_partition_num_samples_1(add_and_remove_cv_file): | |||
| """tutorial for cv minddataset.""" | |||
| columns_list = ["data", "file_name", "label"] | |||
| @@ -309,6 +312,7 @@ def test_cv_minddataset_partition_num_samples_1(add_and_remove_cv_file): | |||
| assert partitions(5) == 2 | |||
| assert partitions(9) == 2 | |||
| def test_cv_minddataset_partition_num_samples_2(add_and_remove_cv_file): | |||
| """tutorial for cv minddataset.""" | |||
| columns_list = ["data", "file_name", "label"] | |||
| @@ -354,11 +358,11 @@ def test_cv_minddataset_partition_tutorial_check_shuffle_result(add_and_remove_c | |||
| logger.info("-------------- item[label]: {} -----------------------".format(item["label"])) | |||
| num_iter += 1 | |||
| if num_iter <= 4: | |||
| epoch1.append(item["file_name"]) # save epoch 1 list | |||
| epoch1.append(item["file_name"]) # save epoch 1 list | |||
| elif num_iter <= 8: | |||
| epoch2.append(item["file_name"]) # save epoch 2 list | |||
| epoch2.append(item["file_name"]) # save epoch 2 list | |||
| else: | |||
| epoch3.append(item["file_name"]) # save epoch 3 list | |||
| epoch3.append(item["file_name"]) # save epoch 3 list | |||
| assert num_iter == 12 | |||
| assert len(epoch1) == 4 | |||
| assert len(epoch2) == 4 | |||
| @@ -376,9 +380,9 @@ def test_cv_minddataset_partition_tutorial_check_whole_reshuffle_result_per_epoc | |||
| columns_list = ["data", "file_name", "label"] | |||
| num_readers = 4 | |||
| num_shards = 3 | |||
| epoch_result = [[["", "", "", ""], ["", "", "", ""], ["", "", "", ""]], # save partition 0 result | |||
| [["", "", "", ""], ["", "", "", ""], ["", "", "", ""]], # save partition 1 result | |||
| [["", "", "", ""], ["", "", "", ""], ["", "", "", ""]]] # svae partition 2 result | |||
| epoch_result = [[["", "", "", ""], ["", "", "", ""], ["", "", "", ""]], # save partition 0 result | |||
| [["", "", "", ""], ["", "", "", ""], ["", "", "", ""]], # save partition 1 result | |||
| [["", "", "", ""], ["", "", "", ""], ["", "", "", ""]]] # svae partition 2 result | |||
| for partition_id in range(num_shards): | |||
| data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers, | |||
| @@ -392,7 +396,7 @@ def test_cv_minddataset_partition_tutorial_check_whole_reshuffle_result_per_epoc | |||
| logger.info("-------------- item[file_name]: {}-----------------------".format(item["file_name"])) | |||
| logger.info("-------------- item[label]: {} -----------------------".format(item["label"])) | |||
| # total 3 partition, 4 result per epoch, total 12 result | |||
| epoch_result[partition_id][int(num_iter / 4)][num_iter % 4] = item["file_name"] # save epoch result | |||
| epoch_result[partition_id][int(num_iter / 4)][num_iter % 4] = item["file_name"] # save epoch result | |||
| num_iter += 1 | |||
| assert num_iter == 12 | |||
| assert epoch_result[partition_id][0] not in (epoch_result[partition_id][1], epoch_result[partition_id][2]) | |||
| @@ -425,11 +429,11 @@ def test_cv_minddataset_check_shuffle_result(add_and_remove_cv_file): | |||
| logger.info("-------------- item[label]: {} -----------------------".format(item["label"])) | |||
| num_iter += 1 | |||
| if num_iter <= 10: | |||
| epoch1.append(item["file_name"]) # save epoch 1 list | |||
| epoch1.append(item["file_name"]) # save epoch 1 list | |||
| elif num_iter <= 20: | |||
| epoch2.append(item["file_name"]) # save epoch 2 list | |||
| epoch2.append(item["file_name"]) # save epoch 2 list | |||
| else: | |||
| epoch3.append(item["file_name"]) # save epoch 3 list | |||
| epoch3.append(item["file_name"]) # save epoch 3 list | |||
| assert num_iter == 30 | |||
| assert len(epoch1) == 10 | |||
| assert len(epoch2) == 10 | |||
| @@ -451,11 +455,11 @@ def test_cv_minddataset_check_shuffle_result(add_and_remove_cv_file): | |||
| logger.info("-------------- item[label]: {} -----------------------".format(item["label"])) | |||
| num_iter += 1 | |||
| if num_iter <= 10: | |||
| epoch1_new_dataset.append(item["file_name"]) # save epoch 1 list | |||
| epoch1_new_dataset.append(item["file_name"]) # save epoch 1 list | |||
| elif num_iter <= 20: | |||
| epoch2_new_dataset.append(item["file_name"]) # save epoch 2 list | |||
| epoch2_new_dataset.append(item["file_name"]) # save epoch 2 list | |||
| else: | |||
| epoch3_new_dataset.append(item["file_name"]) # save epoch 3 list | |||
| epoch3_new_dataset.append(item["file_name"]) # save epoch 3 list | |||
| assert num_iter == 30 | |||
| assert len(epoch1_new_dataset) == 10 | |||
| assert len(epoch2_new_dataset) == 10 | |||
| @@ -482,11 +486,11 @@ def test_cv_minddataset_check_shuffle_result(add_and_remove_cv_file): | |||
| logger.info("-------------- item[label]: {} -----------------------".format(item["label"])) | |||
| num_iter += 1 | |||
| if num_iter <= 10: | |||
| epoch1_new_dataset2.append(item["file_name"]) # save epoch 1 list | |||
| epoch1_new_dataset2.append(item["file_name"]) # save epoch 1 list | |||
| elif num_iter <= 20: | |||
| epoch2_new_dataset2.append(item["file_name"]) # save epoch 2 list | |||
| epoch2_new_dataset2.append(item["file_name"]) # save epoch 2 list | |||
| else: | |||
| epoch3_new_dataset2.append(item["file_name"]) # save epoch 3 list | |||
| epoch3_new_dataset2.append(item["file_name"]) # save epoch 3 list | |||
| assert num_iter == 30 | |||
| assert len(epoch1_new_dataset2) == 10 | |||
| assert len(epoch2_new_dataset2) == 10 | |||
| @@ -532,8 +536,8 @@ def test_cv_minddataset_repeat_reshuffle(add_and_remove_cv_file): | |||
| data_set = data_set.map( | |||
| input_columns=["data"], operations=decode_op, num_parallel_workers=2) | |||
| resize_op = vision.Resize((32, 32), interpolation=Inter.LINEAR) | |||
| data_set = data_set.map(input_columns="data", | |||
| operations=resize_op, num_parallel_workers=2) | |||
| data_set = data_set.map(operations=resize_op, input_columns="data", | |||
| num_parallel_workers=2) | |||
| data_set = data_set.batch(2) | |||
| data_set = data_set.repeat(2) | |||
| num_iter = 0 | |||
| @@ -563,8 +567,8 @@ def test_cv_minddataset_batch_size_larger_than_records(add_and_remove_cv_file): | |||
| data_set = data_set.map( | |||
| input_columns=["data"], operations=decode_op, num_parallel_workers=2) | |||
| resize_op = vision.Resize((32, 32), interpolation=Inter.LINEAR) | |||
| data_set = data_set.map(input_columns="data", | |||
| operations=resize_op, num_parallel_workers=2) | |||
| data_set = data_set.map(operations=resize_op, input_columns="data", | |||
| num_parallel_workers=2) | |||
| data_set = data_set.batch(32, drop_remainder=True) | |||
| num_iter = 0 | |||
| for item in data_set.create_dict_iterator(num_epochs=1): | |||
| @@ -707,6 +711,7 @@ def test_cv_minddataset_reader_two_dataset(add_and_remove_cv_file): | |||
| if os.path.exists("{}.db".format(CV2_FILE_NAME)): | |||
| os.remove("{}.db".format(CV2_FILE_NAME)) | |||
| def test_cv_minddataset_reader_two_dataset_partition(add_and_remove_cv_file): | |||
| paths = ["{}{}".format(CV1_FILE_NAME, str(x).rjust(1, '0')) | |||
| for x in range(FILES_NUM)] | |||
| @@ -757,6 +762,7 @@ def test_cv_minddataset_reader_two_dataset_partition(add_and_remove_cv_file): | |||
| os.remove("{}".format(x)) | |||
| os.remove("{}.db".format(x)) | |||
| def test_cv_minddataset_reader_basic_tutorial(add_and_remove_cv_file): | |||
| """tutorial for cv minderdataset.""" | |||
| columns_list = ["data", "file_name", "label"] | |||
| @@ -778,6 +784,7 @@ def test_cv_minddataset_reader_basic_tutorial(add_and_remove_cv_file): | |||
| num_iter += 1 | |||
| assert num_iter == 10 | |||
| def test_nlp_minddataset_reader_basic_tutorial(add_and_remove_nlp_file): | |||
| """tutorial for nlp minderdataset.""" | |||
| num_readers = 4 | |||
| @@ -1522,6 +1529,7 @@ def test_write_with_multi_bytes_and_MindDataset(): | |||
| os.remove("{}".format(mindrecord_file_name)) | |||
| os.remove("{}.db".format(mindrecord_file_name)) | |||
| def test_write_with_multi_array_and_MindDataset(): | |||
| mindrecord_file_name = "test.mindrecord" | |||
| try: | |||
| @@ -1741,9 +1749,9 @@ def test_numpy_generic(): | |||
| for idx in range(10): | |||
| row = {} | |||
| row['label1'] = np.int32(idx) | |||
| row['label2'] = np.int64(idx*10) | |||
| row['label3'] = np.float32(idx+0.12345) | |||
| row['label4'] = np.float64(idx+0.12345789) | |||
| row['label2'] = np.int64(idx * 10) | |||
| row['label3'] = np.float32(idx + 0.12345) | |||
| row['label4'] = np.float64(idx + 0.12345789) | |||
| data.append(row) | |||
| writer.add_schema(cv_schema_json, "img_schema") | |||
| writer.write_raw_data(data) | |||
| @@ -1923,6 +1931,7 @@ def test_write_with_float32_float64_float32_array_float64_array_and_MindDataset( | |||
| os.remove("{}".format(mindrecord_file_name)) | |||
| os.remove("{}.db".format(mindrecord_file_name)) | |||
| if __name__ == '__main__': | |||
| test_nlp_compress_data(add_and_remove_nlp_compress_file) | |||
| test_nlp_compress_data_old_version(add_and_remove_nlp_compress_file) | |||
| @@ -37,9 +37,9 @@ def test_one_hot_op(): | |||
| num_classes = 2 | |||
| epsilon_para = 0.1 | |||
| transforms = [f.OneHotOp(num_classes=num_classes, smoothing_rate=epsilon_para),] | |||
| transforms = [f.OneHotOp(num_classes=num_classes, smoothing_rate=epsilon_para)] | |||
| transform_label = f.Compose(transforms) | |||
| dataset = dataset.map(input_columns=["label"], operations=transform_label) | |||
| dataset = dataset.map(operations=transform_label, input_columns=["label"]) | |||
| golden_label = np.ones(num_classes) * epsilon_para / num_classes | |||
| golden_label[1] = 1 - epsilon_para / num_classes | |||
| @@ -69,9 +69,9 @@ def test_mix_up_single(): | |||
| resize_op = c_vision.Resize((resize_height, resize_width), c_vision.Inter.LINEAR) | |||
| one_hot_encode = c.OneHot(num_classes) # num_classes is input argument | |||
| ds1 = ds1.map(input_columns=["image"], operations=decode_op) | |||
| ds1 = ds1.map(input_columns=["image"], operations=resize_op) | |||
| ds1 = ds1.map(input_columns=["label"], operations=one_hot_encode) | |||
| ds1 = ds1.map(operations=decode_op, input_columns=["image"]) | |||
| ds1 = ds1.map(operations=resize_op, input_columns=["image"]) | |||
| ds1 = ds1.map(operations=one_hot_encode, input_columns=["label"]) | |||
| # apply batch operations | |||
| batch_size = 3 | |||
| @@ -81,7 +81,7 @@ def test_mix_up_single(): | |||
| alpha = 0.2 | |||
| transforms = [py_vision.MixUp(batch_size=batch_size, alpha=alpha, is_single=True) | |||
| ] | |||
| ds1 = ds1.map(input_columns=["image", "label"], operations=transforms) | |||
| ds1 = ds1.map(operations=transforms, input_columns=["image", "label"]) | |||
| for data1, data2 in zip(ds1.create_dict_iterator(num_epochs=1), ds2.create_dict_iterator(num_epochs=1)): | |||
| image1 = data1["image"] | |||
| @@ -118,9 +118,9 @@ def test_mix_up_multi(): | |||
| resize_op = c_vision.Resize((resize_height, resize_width), c_vision.Inter.LINEAR) | |||
| one_hot_encode = c.OneHot(num_classes) # num_classes is input argument | |||
| ds1 = ds1.map(input_columns=["image"], operations=decode_op) | |||
| ds1 = ds1.map(input_columns=["image"], operations=resize_op) | |||
| ds1 = ds1.map(input_columns=["label"], operations=one_hot_encode) | |||
| ds1 = ds1.map(operations=decode_op, input_columns=["image"]) | |||
| ds1 = ds1.map(operations=resize_op, input_columns=["image"]) | |||
| ds1 = ds1.map(operations=one_hot_encode, input_columns=["label"]) | |||
| # apply batch operations | |||
| batch_size = 3 | |||
| @@ -130,7 +130,7 @@ def test_mix_up_multi(): | |||
| alpha = 0.2 | |||
| transforms = [py_vision.MixUp(batch_size=batch_size, alpha=alpha, is_single=False) | |||
| ] | |||
| ds1 = ds1.map(input_columns=["image", "label"], operations=transforms) | |||
| ds1 = ds1.map(operations=transforms, input_columns=["image", "label"]) | |||
| num_iter = 0 | |||
| batch1_image1 = 0 | |||
| for data1, data2 in zip(ds1.create_dict_iterator(num_epochs=1), ds2.create_dict_iterator(num_epochs=1)): | |||
| @@ -30,6 +30,7 @@ DATA_DIR3 = "../data/dataset/testCelebAData/" | |||
| GENERATE_GOLDEN = False | |||
| def test_mixup_batch_success1(plot=False): | |||
| """ | |||
| Test MixUpBatch op with specified alpha parameter | |||
| @@ -51,10 +52,10 @@ def test_mixup_batch_success1(plot=False): | |||
| data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) | |||
| one_hot_op = data_trans.OneHot(num_classes=10) | |||
| data1 = data1.map(input_columns=["label"], operations=one_hot_op) | |||
| data1 = data1.map(operations=one_hot_op, input_columns=["label"]) | |||
| mixup_batch_op = vision.MixUpBatch(2) | |||
| data1 = data1.batch(5, drop_remainder=True) | |||
| data1 = data1.map(input_columns=["image", "label"], operations=mixup_batch_op) | |||
| data1 = data1.map(operations=mixup_batch_op, input_columns=["image", "label"]) | |||
| images_mixup = None | |||
| for idx, (image, _) in enumerate(data1): | |||
| @@ -81,7 +82,7 @@ def test_mixup_batch_success2(plot=False): | |||
| # Original Images | |||
| ds_original = ds.ImageFolderDataset(dataset_dir=DATA_DIR2, shuffle=False) | |||
| decode_op = vision.Decode() | |||
| ds_original = ds_original.map(input_columns=["image"], operations=[decode_op]) | |||
| ds_original = ds_original.map(operations=[decode_op], input_columns=["image"]) | |||
| ds_original = ds_original.batch(4, pad_info={}, drop_remainder=True) | |||
| images_original = None | |||
| @@ -95,14 +96,14 @@ def test_mixup_batch_success2(plot=False): | |||
| data1 = ds.ImageFolderDataset(dataset_dir=DATA_DIR2, shuffle=False) | |||
| decode_op = vision.Decode() | |||
| data1 = data1.map(input_columns=["image"], operations=[decode_op]) | |||
| data1 = data1.map(operations=[decode_op], input_columns=["image"]) | |||
| one_hot_op = data_trans.OneHot(num_classes=10) | |||
| data1 = data1.map(input_columns=["label"], operations=one_hot_op) | |||
| data1 = data1.map(operations=one_hot_op, input_columns=["label"]) | |||
| mixup_batch_op = vision.MixUpBatch(2.0) | |||
| data1 = data1.batch(4, pad_info={}, drop_remainder=True) | |||
| data1 = data1.map(input_columns=["image", "label"], operations=mixup_batch_op) | |||
| data1 = data1.map(operations=mixup_batch_op, input_columns=["image", "label"]) | |||
| images_mixup = None | |||
| for idx, (image, _) in enumerate(data1): | |||
| @@ -142,10 +143,10 @@ def test_mixup_batch_success3(plot=False): | |||
| data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) | |||
| one_hot_op = data_trans.OneHot(num_classes=10) | |||
| data1 = data1.map(input_columns=["label"], operations=one_hot_op) | |||
| data1 = data1.map(operations=one_hot_op, input_columns=["label"]) | |||
| mixup_batch_op = vision.MixUpBatch() | |||
| data1 = data1.batch(5, drop_remainder=True) | |||
| data1 = data1.map(input_columns=["image", "label"], operations=mixup_batch_op) | |||
| data1 = data1.map(operations=mixup_batch_op, input_columns=["image", "label"]) | |||
| images_mixup = np.array([]) | |||
| for idx, (image, _) in enumerate(data1): | |||
| @@ -173,7 +174,7 @@ def test_mixup_batch_success4(plot=False): | |||
| # Original Images | |||
| ds_original = ds.CelebADataset(DATA_DIR3, shuffle=False) | |||
| decode_op = vision.Decode() | |||
| ds_original = ds_original.map(input_columns=["image"], operations=[decode_op]) | |||
| ds_original = ds_original.map(operations=[decode_op], input_columns=["image"]) | |||
| ds_original = ds_original.batch(2, drop_remainder=True) | |||
| images_original = None | |||
| @@ -187,14 +188,14 @@ def test_mixup_batch_success4(plot=False): | |||
| data1 = ds.CelebADataset(DATA_DIR3, shuffle=False) | |||
| decode_op = vision.Decode() | |||
| data1 = data1.map(input_columns=["image"], operations=[decode_op]) | |||
| data1 = data1.map(operations=[decode_op], input_columns=["image"]) | |||
| one_hot_op = data_trans.OneHot(num_classes=100) | |||
| data1 = data1.map(input_columns=["attr"], operations=one_hot_op) | |||
| data1 = data1.map(operations=one_hot_op, input_columns=["attr"]) | |||
| mixup_batch_op = vision.MixUpBatch() | |||
| data1 = data1.batch(2, drop_remainder=True) | |||
| data1 = data1.map(input_columns=["image", "attr"], operations=mixup_batch_op) | |||
| data1 = data1.map(operations=mixup_batch_op, input_columns=["image", "attr"]) | |||
| images_mixup = np.array([]) | |||
| for idx, (image, _) in enumerate(data1): | |||
| @@ -224,10 +225,10 @@ def test_mixup_batch_md5(): | |||
| data = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) | |||
| one_hot_op = data_trans.OneHot(num_classes=10) | |||
| data = data.map(input_columns=["label"], operations=one_hot_op) | |||
| data = data.map(operations=one_hot_op, input_columns=["label"]) | |||
| mixup_batch_op = vision.MixUpBatch() | |||
| data = data.batch(5, drop_remainder=True) | |||
| data = data.map(input_columns=["image", "label"], operations=mixup_batch_op) | |||
| data = data.map(operations=mixup_batch_op, input_columns=["image", "label"]) | |||
| filename = "mixup_batch_c_result.npz" | |||
| save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN) | |||
| @@ -259,10 +260,10 @@ def test_mixup_batch_fail1(): | |||
| data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) | |||
| one_hot_op = data_trans.OneHot(num_classes=10) | |||
| data1 = data1.map(input_columns=["label"], operations=one_hot_op) | |||
| data1 = data1.map(operations=one_hot_op, input_columns=["label"]) | |||
| mixup_batch_op = vision.MixUpBatch(0.1) | |||
| with pytest.raises(RuntimeError) as error: | |||
| data1 = data1.map(input_columns=["image", "label"], operations=mixup_batch_op) | |||
| data1 = data1.map(operations=mixup_batch_op, input_columns=["image", "label"]) | |||
| for idx, (image, _) in enumerate(data1): | |||
| if idx == 0: | |||
| images_mixup = image | |||
| @@ -294,7 +295,7 @@ def test_mixup_batch_fail2(): | |||
| data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) | |||
| one_hot_op = data_trans.OneHot(num_classes=10) | |||
| data1 = data1.map(input_columns=["label"], operations=one_hot_op) | |||
| data1 = data1.map(operations=one_hot_op, input_columns=["label"]) | |||
| with pytest.raises(ValueError) as error: | |||
| vision.MixUpBatch(-1) | |||
| error_message = "Input is not within the required interval" | |||
| @@ -322,10 +323,10 @@ def test_mixup_batch_fail3(): | |||
| data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) | |||
| one_hot_op = data_trans.OneHot(num_classes=10) | |||
| data1 = data1.map(input_columns=["label"], operations=one_hot_op) | |||
| data1 = data1.map(operations=one_hot_op, input_columns=["label"]) | |||
| mixup_batch_op = vision.MixUpBatch() | |||
| data1 = data1.batch(5, drop_remainder=True) | |||
| data1 = data1.map(input_columns=["image"], operations=mixup_batch_op) | |||
| data1 = data1.map(operations=mixup_batch_op, input_columns=["image"]) | |||
| with pytest.raises(RuntimeError) as error: | |||
| images_mixup = np.array([]) | |||
| @@ -337,6 +338,7 @@ def test_mixup_batch_fail3(): | |||
| error_message = "Both images and labels columns are required" | |||
| assert error_message in str(error.value) | |||
| def test_mixup_batch_fail4(): | |||
| """ | |||
| Test MixUpBatch Fail 2 | |||
| @@ -359,7 +361,7 @@ def test_mixup_batch_fail4(): | |||
| data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) | |||
| one_hot_op = data_trans.OneHot(num_classes=10) | |||
| data1 = data1.map(input_columns=["label"], operations=one_hot_op) | |||
| data1 = data1.map(operations=one_hot_op, input_columns=["label"]) | |||
| with pytest.raises(ValueError) as error: | |||
| vision.MixUpBatch(0.0) | |||
| error_message = "Input is not within the required interval" | |||
| @@ -389,7 +391,7 @@ def test_mixup_batch_fail5(): | |||
| mixup_batch_op = vision.MixUpBatch() | |||
| data1 = data1.batch(5, drop_remainder=True) | |||
| data1 = data1.map(input_columns=["image", "label"], operations=mixup_batch_op) | |||
| data1 = data1.map(operations=mixup_batch_op, input_columns=["image", "label"]) | |||
| with pytest.raises(RuntimeError) as error: | |||
| images_mixup = np.array([]) | |||
| @@ -39,7 +39,7 @@ def test_multiple_ngrams(): | |||
| yield (np.array(line.split(" "), dtype='S'),) | |||
| dataset = ds.GeneratorDataset(gen(plates_mottos), column_names=["text"]) | |||
| dataset = dataset.map(input_columns=["text"], operations=text.Ngram([1, 2, 3], ("_", 2), ("_", 2), " ")) | |||
| dataset = dataset.map(operations=text.Ngram([1, 2, 3], ("_", 2), ("_", 2), " "), input_columns="text") | |||
| i = 0 | |||
| for data in dataset.create_dict_iterator(num_epochs=1): | |||
| @@ -61,7 +61,7 @@ def test_simple_ngram(): | |||
| yield (np.array(line.split(" "), dtype='S'),) | |||
| dataset = ds.GeneratorDataset(gen(plates_mottos), column_names=["text"]) | |||
| dataset = dataset.map(input_columns=["text"], operations=text.Ngram(3, separator=" ")) | |||
| dataset = dataset.map(operations=text.Ngram(3, separator=" "), input_columns="text") | |||
| i = 0 | |||
| for data in dataset.create_dict_iterator(num_epochs=1): | |||
| @@ -78,7 +78,7 @@ def test_corner_cases(): | |||
| try: | |||
| dataset = ds.GeneratorDataset(gen(input_line), column_names=["text"]) | |||
| dataset = dataset.map(input_columns=["text"], operations=text.Ngram(n, l_pad, r_pad, separator=sep)) | |||
| dataset = dataset.map(operations=text.Ngram(n, l_pad, r_pad, separator=sep), input_columns=["text"]) | |||
| for data in dataset.create_dict_iterator(num_epochs=1): | |||
| return [d.decode("utf8") for d in data["text"]] | |||
| except (ValueError, TypeError) as e: | |||
| @@ -32,10 +32,10 @@ def test_on_tokenized_line(): | |||
| for line in f: | |||
| word = line.split(',')[0] | |||
| jieba_op.add_word(word) | |||
| data = data.map(input_columns=["text"], operations=jieba_op) | |||
| data = data.map(operations=jieba_op, input_columns=["text"]) | |||
| vocab = text.Vocab.from_file(VOCAB_FILE, ",", special_tokens=["<pad>", "<unk>"]) | |||
| lookup = text.Lookup(vocab, "<unk>") | |||
| data = data.map(input_columns=["text"], operations=lookup) | |||
| data = data.map(operations=lookup, input_columns=["text"]) | |||
| res = np.array([[10, 1, 11, 1, 12, 1, 15, 1, 13, 1, 14], | |||
| [11, 1, 12, 1, 10, 1, 14, 1, 13, 1, 15]], dtype=np.int32) | |||
| for i, d in enumerate(data.create_dict_iterator(num_epochs=1)): | |||
| @@ -50,10 +50,10 @@ def test_on_tokenized_line_with_no_special_tokens(): | |||
| word = line.split(',')[0] | |||
| jieba_op.add_word(word) | |||
| data = data.map(input_columns=["text"], operations=jieba_op) | |||
| data = data.map(operations=jieba_op, input_columns=["text"]) | |||
| vocab = text.Vocab.from_file(VOCAB_FILE, ",") | |||
| lookup = text.Lookup(vocab, "not") | |||
| data = data.map(input_columns=["text"], operations=lookup) | |||
| data = data.map(operations=lookup, input_columns=["text"]) | |||
| res = np.array([[8, 0, 9, 0, 10, 0, 13, 0, 11, 0, 12], | |||
| [9, 0, 10, 0, 8, 0, 12, 0, 11, 0, 13]], dtype=np.int32) | |||
| for i, d in enumerate(data.create_dict_iterator(num_epochs=1)): | |||
| @@ -51,8 +51,8 @@ def util_test_normalize(mean, std, op_type): | |||
| normalize_op = c_vision.Normalize(mean, std) | |||
| # Generate dataset | |||
| data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) | |||
| data = data.map(input_columns=["image"], operations=decode_op) | |||
| data = data.map(input_columns=["image"], operations=normalize_op) | |||
| data = data.map(operations=decode_op, input_columns=["image"]) | |||
| data = data.map(operations=normalize_op, input_columns=["image"]) | |||
| elif op_type == "python": | |||
| # define map operations | |||
| transforms = [ | |||
| @@ -63,7 +63,7 @@ def util_test_normalize(mean, std, op_type): | |||
| transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) | |||
| # Generate dataset | |||
| data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) | |||
| data = data.map(input_columns=["image"], operations=transform) | |||
| data = data.map(operations=transform, input_columns=["image"]) | |||
| else: | |||
| raise ValueError("Wrong parameter value") | |||
| return data | |||
| @@ -82,7 +82,7 @@ def util_test_normalize_grayscale(num_output_channels, mean, std): | |||
| transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) | |||
| # Generate dataset | |||
| data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) | |||
| data = data.map(input_columns=["image"], operations=transform) | |||
| data = data.map(operations=transform, input_columns=["image"]) | |||
| return data | |||
| @@ -99,12 +99,12 @@ def test_normalize_op_c(plot=False): | |||
| # First dataset | |||
| data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) | |||
| data1 = data1.map(input_columns=["image"], operations=decode_op) | |||
| data1 = data1.map(input_columns=["image"], operations=normalize_op) | |||
| data1 = data1.map(operations=decode_op, input_columns=["image"]) | |||
| data1 = data1.map(operations=normalize_op, input_columns=["image"]) | |||
| # Second dataset | |||
| data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) | |||
| data2 = data2.map(input_columns=["image"], operations=decode_op) | |||
| data2 = data2.map(operations=decode_op, input_columns=["image"]) | |||
| num_iter = 0 | |||
| for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): | |||
| @@ -136,12 +136,12 @@ def test_normalize_op_py(plot=False): | |||
| # First dataset | |||
| data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) | |||
| data1 = data1.map(input_columns=["image"], operations=transform) | |||
| data1 = data1.map(input_columns=["image"], operations=normalize_op) | |||
| data1 = data1.map(operations=transform, input_columns=["image"]) | |||
| data1 = data1.map(operations=normalize_op, input_columns=["image"]) | |||
| # Second dataset | |||
| data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) | |||
| data2 = data2.map(input_columns=["image"], operations=transform) | |||
| data2 = data2.map(operations=transform, input_columns=["image"]) | |||
| num_iter = 0 | |||
| for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): | |||
| @@ -169,7 +169,7 @@ def test_decode_op(): | |||
| decode_op = c_vision.Decode() | |||
| # apply map operations on images | |||
| data1 = data1.map(input_columns=["image"], operations=decode_op) | |||
| data1 = data1.map(operations=decode_op, input_columns=["image"]) | |||
| num_iter = 0 | |||
| for item in data1.create_dict_iterator(num_epochs=1): | |||
| @@ -192,7 +192,7 @@ def test_decode_normalize_op(): | |||
| normalize_op = c_vision.Normalize([121.0, 115.0, 100.0], [70.0, 68.0, 71.0]) | |||
| # apply map operations on images | |||
| data1 = data1.map(input_columns=["image"], operations=[decode_op, normalize_op]) | |||
| data1 = data1.map(operations=[decode_op, normalize_op], input_columns=["image"]) | |||
| num_iter = 0 | |||
| for item in data1.create_dict_iterator(num_epochs=1): | |||
| @@ -47,13 +47,14 @@ def test_one_hot(): | |||
| # First dataset | |||
| data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False) | |||
| one_hot_op = data_trans.OneHot(num_classes=depth) | |||
| data1 = data1.map(input_columns=["label"], operations=one_hot_op, column_order=["label"]) | |||
| data1 = data1.map(operations=one_hot_op, input_columns=["label"], column_order=["label"]) | |||
| # Second dataset | |||
| data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["label"], shuffle=False) | |||
| assert dataset_equal_with_function(data1, data2, 0, one_hot, depth) | |||
| def test_one_hot_post_aug(): | |||
| """ | |||
| Test One Hot Encoding after Multiple Data Augmentation Operators | |||
| @@ -72,14 +73,14 @@ def test_one_hot_post_aug(): | |||
| resize_op = c_vision.Resize((resize_height, resize_width)) | |||
| # Apply map operations on images | |||
| data1 = data1.map(input_columns=["image"], operations=decode_op) | |||
| data1 = data1.map(input_columns=["image"], operations=rescale_op) | |||
| data1 = data1.map(input_columns=["image"], operations=resize_op) | |||
| data1 = data1.map(operations=decode_op, input_columns=["image"]) | |||
| data1 = data1.map(operations=rescale_op, input_columns=["image"]) | |||
| data1 = data1.map(operations=resize_op, input_columns=["image"]) | |||
| # Apply one-hot encoding on labels | |||
| depth = 4 | |||
| one_hot_encode = data_trans.OneHot(depth) | |||
| data1 = data1.map(input_columns=["label"], operations=one_hot_encode) | |||
| data1 = data1.map(operations=one_hot_encode, input_columns=["label"]) | |||
| # Apply datasets ops | |||
| buffer_size = 100 | |||
| @@ -16,6 +16,7 @@ import numpy as np | |||
| import mindspore.dataset as ds | |||
| # tests the construction of multiple ops from a single dataset. | |||
| # map dataset with columns order arguments should produce a ProjectOp over MapOp | |||
| # This test does not utilize the compiling passes at this time. | |||
| @@ -27,12 +28,13 @@ def test_map_reorder0(): | |||
| # Generator -> Map | |||
| data0 = ds.GeneratorDataset(generator_mc, ["col0", "col1"]) | |||
| data0 = data0.map(input_columns="col0", output_columns="out", column_order=["col1", "out"], | |||
| operations=(lambda x: x)) | |||
| data0 = data0.map(operations=(lambda x: x), input_columns="col0", output_columns="out", | |||
| column_order=["col1", "out"]) | |||
| for item in data0.create_tuple_iterator(num_epochs=1): # each data is a dictionary | |||
| assert item == [np.array(1), np.array(0)] | |||
| # tests the construction of multiple ops from a single dataset. | |||
| # map dataset with columns order arguments should produce a ProjectOp over MapOp | |||
| # This test does not utilize the compiling passes at this time. | |||
| @@ -43,20 +45,20 @@ def test_map_reorder1(): | |||
| # Three map and zip | |||
| data0 = ds.GeneratorDataset(generator_mc, ["a0", "a1", "a2"]) | |||
| data0 = data0.map(input_columns="a0", column_order=["a2", "a1", "a0"], operations=(lambda x: x)) | |||
| data0 = data0.map(operations=(lambda x: x), input_columns="a0", column_order=["a2", "a1", "a0"]) | |||
| data1 = ds.GeneratorDataset(generator_mc, ["b0", "b1", "b2"]) | |||
| data1 = data1.map(input_columns="b0", column_order=["b1", "b2", "b0"], operations=(lambda x: x)) | |||
| data1 = data1.map(operations=(lambda x: x), input_columns="b0", column_order=["b1", "b2", "b0"]) | |||
| data2 = ds.zip((data0, data1)) | |||
| data2 = data2.map(input_columns="a0", column_order=["b2", "a2", "b1", "a1", "b0", "a0"], operations=(lambda x: x)) | |||
| data2 = data2.map(operations=(lambda x: x), input_columns="a0", column_order=["b2", "a2", "b1", "a1", "b0", "a0"]) | |||
| for item in data2.create_tuple_iterator(num_epochs=1): | |||
| assert item == [np.array(2), np.array(2), np.array(1), np.array(1), np.array(0), np.array(0)] | |||
| # tests the construction of multiple ops from a single dataset. | |||
| # TFRecordDataset with global shuffle should produce a ShuffleOp over TfReaderOp. | |||
| # This test does not utilize the compiling passes at this time. | |||
| def test_shuffle(): | |||
| FILES = ["../data/dataset/testTFTestAllTypes/test.data"] | |||
| SCHEMA_FILE = "../data/dataset/testTFTestAllTypes/datasetSchema.json" | |||
| @@ -44,7 +44,7 @@ def test_pad_op(): | |||
| pad_op, | |||
| ] | |||
| data1 = data1.map(input_columns=["image"], operations=ctrans) | |||
| data1 = data1.map(operations=ctrans, input_columns=["image"]) | |||
| # Second dataset | |||
| transforms = [ | |||
| @@ -54,7 +54,7 @@ def test_pad_op(): | |||
| ] | |||
| transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) | |||
| data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) | |||
| data2 = data2.map(input_columns=["image"], operations=transform) | |||
| data2 = data2.map(operations=transform, input_columns=["image"]) | |||
| for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): | |||
| c_image = item1["image"] | |||
| @@ -88,11 +88,11 @@ def test_pad_grayscale(): | |||
| transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) | |||
| data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) | |||
| data1 = data1.map(input_columns=["image"], operations=transform) | |||
| data1 = data1.map(operations=transform, input_columns=["image"]) | |||
| # if input is grayscale, the output dimensions should be single channel | |||
| pad_gray = c_vision.Pad(100, fill_value=(20, 20, 20)) | |||
| data1 = data1.map(input_columns=["image"], operations=pad_gray) | |||
| data1 = data1.map(operations=pad_gray, input_columns=["image"]) | |||
| dataset_shape_1 = [] | |||
| for item1 in data1.create_dict_iterator(num_epochs=1): | |||
| c_image = item1["image"] | |||
| @@ -106,7 +106,7 @@ def test_pad_grayscale(): | |||
| ctrans = [decode_op, pad_gray] | |||
| dataset_shape_2 = [] | |||
| data2 = data2.map(input_columns=["image"], operations=ctrans) | |||
| data2 = data2.map(operations=ctrans, input_columns=["image"]) | |||
| for item2 in data2.create_dict_iterator(num_epochs=1): | |||
| c_image = item2["image"] | |||
| @@ -132,7 +132,7 @@ def test_pad_md5(): | |||
| pad_op, | |||
| ] | |||
| data1 = data1.map(input_columns=["image"], operations=ctrans) | |||
| data1 = data1.map(operations=ctrans, input_columns=["image"]) | |||
| # Second dataset | |||
| data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) | |||
| @@ -142,7 +142,7 @@ def test_pad_md5(): | |||
| py_vision.ToTensor(), | |||
| ] | |||
| transform = mindspore.dataset.transforms.py_transforms.Compose(pytrans) | |||
| data2 = data2.map(input_columns=["image"], operations=transform) | |||
| data2 = data2.map(operations=transform, input_columns=["image"]) | |||
| # Compare with expected md5 from images | |||
| filename1 = "pad_01_c_result.npz" | |||
| save_and_check_md5(data1, filename1, generate_golden=GENERATE_GOLDEN) | |||
| @@ -127,7 +127,7 @@ def batch_padding_performance_1d(): | |||
| cifar10_dir = "../data/dataset/testCifar10Data" | |||
| data1 = ds.Cifar10Dataset(cifar10_dir, shuffle=False) # shape = [32,32,3] | |||
| data1 = data1.repeat(24) | |||
| data1 = data1.map(input_columns="image", operations=(lambda x: x.reshape(-1))) | |||
| data1 = data1.map(operations=(lambda x: x.reshape(-1)), input_columns="image") | |||
| pad_info = {"image": ([3888], 0)} # 3888 =36*36*3 | |||
| # pad_info = None | |||
| data1 = data1.batch(batch_size=24, drop_remainder=True, pad_info=pad_info) | |||
| @@ -144,7 +144,7 @@ def batch_pyfunc_padding_3d(): | |||
| data1 = ds.Cifar10Dataset(cifar10_dir, shuffle=False) # shape = [32,32,3] | |||
| data1 = data1.repeat(24) | |||
| # pad_info = {"image": ([36, 36, 3], 0)} | |||
| data1 = data1.map(input_columns="image", operations=(lambda x: np.pad(x, ((0, 4), (0, 4), (0, 0)))), | |||
| data1 = data1.map(operations=(lambda x: np.pad(x, ((0, 4), (0, 4), (0, 0)))), input_columns="image", | |||
| python_multiprocessing=False) | |||
| data1 = data1.batch(batch_size=24, drop_remainder=True) | |||
| start_time = time.time() | |||
| @@ -159,8 +159,8 @@ def batch_pyfunc_padding_1d(): | |||
| cifar10_dir = "../data/dataset/testCifar10Data" | |||
| data1 = ds.Cifar10Dataset(cifar10_dir, shuffle=False) # shape = [32,32,3] | |||
| data1 = data1.repeat(24) | |||
| data1 = data1.map(input_columns="image", operations=(lambda x: x.reshape(-1))) | |||
| data1 = data1.map(input_columns="image", operations=(lambda x: np.pad(x, (0, 816))), python_multiprocessing=False) | |||
| data1 = data1.map(operations=(lambda x: x.reshape(-1)), input_columns="image") | |||
| data1 = data1.map(operations=(lambda x: np.pad(x, (0, 816))), input_columns="image", python_multiprocessing=False) | |||
| data1 = data1.batch(batch_size=24, drop_remainder=True) | |||
| start_time = time.time() | |||
| num_batches = 0 | |||
| @@ -176,8 +176,8 @@ def test_pad_via_map(): | |||
| def pad_map_config(): | |||
| data1 = ds.Cifar10Dataset(cifar10_dir, shuffle=False, num_samples=1000) # shape = [32,32,3] | |||
| data1 = data1.map(input_columns="image", operations=(lambda x: x.reshape(-1))) # reshape to 1d | |||
| data1 = data1.map(input_columns="image", operations=(lambda x: np.pad(x, (0, 816)))) | |||
| data1 = data1.map(operations=(lambda x: x.reshape(-1)), input_columns="image") # reshape to 1d | |||
| data1 = data1.map(operations=(lambda x: np.pad(x, (0, 816))), input_columns="image") | |||
| data1 = data1.batch(batch_size=25, drop_remainder=True) | |||
| res = [] | |||
| for data in data1.create_dict_iterator(num_epochs=1): | |||
| @@ -186,7 +186,7 @@ def test_pad_via_map(): | |||
| def pad_batch_config(): | |||
| data2 = ds.Cifar10Dataset(cifar10_dir, shuffle=False, num_samples=1000) # shape = [32,32,3] | |||
| data2 = data2.map(input_columns="image", operations=(lambda x: x.reshape(-1))) # reshape to 1d | |||
| data2 = data2.map(operations=(lambda x: x.reshape(-1)), input_columns="image") # reshape to 1d | |||
| data2 = data2.batch(batch_size=25, drop_remainder=True, pad_info={"image": ([3888], 0)}) | |||
| res = [] | |||
| for data in data2.create_dict_iterator(num_epochs=1): | |||