|
- # Copyright (c) Alibaba, Inc. and its affiliates.
-
- import time
- from collections.abc import Sequence
- from typing import Mapping
-
- import numpy as np
- import torch
-
- from .builder import PREPROCESSORS, build_preprocessor
-
-
- @PREPROCESSORS.register_module()
- class Compose(object):
- """Compose a data pipeline with a sequence of transforms.
- Args:
- transforms (list[dict | callable]):
- Either config dicts of transforms or transform objects.
- profiling (bool, optional): If set True, will profile and
- print preprocess time for each step.
- """
-
- def __init__(self, transforms, field_name=None, profiling=False):
- assert isinstance(transforms, Sequence)
- self.profiling = profiling
- self.transforms = []
- self.field_name = field_name
- for transform in transforms:
- if isinstance(transform, dict):
- if self.field_name is None:
- transform = build_preprocessor(transform, field_name)
- else:
- # if not found key in field_name, try field_name=None(default_group)
- try:
- transform = build_preprocessor(transform, field_name)
- except KeyError:
- transform = build_preprocessor(transform, None)
- elif callable(transform):
- pass
- else:
- raise TypeError('transform must be callable or a dict, but got'
- f' {type(transform)}')
- self.transforms.append(transform)
-
- def __call__(self, data):
- for t in self.transforms:
- if self.profiling:
- start = time.time()
-
- data = t(data)
-
- if self.profiling:
- print(f'{t} time {time.time()-start}')
-
- if data is None:
- return None
- return data
-
- def __repr__(self):
- format_string = self.__class__.__name__ + '('
- for t in self.transforms:
- format_string += f'\n {t}'
- format_string += '\n)'
- return format_string
-
-
- def to_tensor(data):
- """Convert objects of various python types to :obj:`torch.Tensor`.
-
- Supported types are: :class:`numpy.ndarray`, :class:`torch.Tensor`,
- :class:`Sequence`, :class:`int` and :class:`float`.
-
- Args:
- data (torch.Tensor | numpy.ndarray | Sequence | int | float): Data to
- be converted.
- """
-
- if isinstance(data, torch.Tensor):
- return data
- elif isinstance(data, np.ndarray):
- return torch.from_numpy(data)
- elif isinstance(data, Sequence) and not isinstance(data, str):
- return torch.tensor(data)
- elif isinstance(data, int):
- return torch.LongTensor([data])
- elif isinstance(data, float):
- return torch.FloatTensor([data])
- else:
- raise TypeError(f'type {type(data)} cannot be converted to tensor.')
-
-
- @PREPROCESSORS.register_module()
- class ToTensor(object):
- """Convert target object to tensor.
-
- Args:
- keys (Sequence[str]): Key of data to be converted to Tensor.
- Only valid when data is type of `Mapping`. If `keys` is None,
- all values of keys will be converted to tensor by default.
- """
-
- def __init__(self, keys=None):
- self.keys = keys
-
- def __call__(self, data):
- if isinstance(data, Mapping):
- if self.keys is None:
- self.keys = list(data.keys())
-
- for key in self.keys:
- data[key] = to_tensor(data[key])
- else:
- data = to_tensor(data)
-
- return data
-
- def __repr__(self):
- return self.__class__.__name__ + f'(keys={self.keys})'
-
-
- @PREPROCESSORS.register_module()
- class Filter(object):
- """This is usually the last stage of the dataloader transform.
- Only data of reserved keys will be kept and passed directly to the model, others will be removed.
-
- Args:
- keys (Sequence[str]): Keys of data to be reserved, others will be removed.
- """
-
- def __init__(self, reserved_keys):
- self.reserved_keys = reserved_keys
-
- def __call__(self, data):
- assert isinstance(data, Mapping)
-
- reserved_data = {}
- for key in self.reserved_keys:
- reserved_data[key] = data[key]
-
- return reserved_data
-
- def __repr__(self):
- return self.__class__.__name__ + f'(keys={self.reserved_keys})'
|