You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

c_transforms.py 14 kB

5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388
  1. # Copyright 2019 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ==============================================================================
  15. """
  16. The module transforms.c_transforms provides common operations, including OneHotOp and TypeCast.
  17. """
  18. from enum import IntEnum
  19. import numpy as np
  20. import mindspore.common.dtype as mstype
  21. import mindspore._c_dataengine as cde
  22. from .validators import check_num_classes, check_de_type, check_fill_value, check_slice_option, check_slice_op, \
  23. check_mask_op, check_pad_end, check_concat_type, check_random_transform_ops
  24. from ..core.datatypes import mstype_to_detype
  25. class OneHot(cde.OneHotOp):
  26. """
  27. Tensor operation to apply one hot encoding.
  28. Args:
  29. num_classes (int): Number of classes of the label.
  30. It should be larger than the largest label number in the dataset.
  31. Raises:
  32. RuntimeError: feature size is bigger than num_classes.
  33. Examples:
  34. >>> import mindspore.dataset.transforms.c_transforms as c_transforms
  35. >>> import mindspore.dataset.vision.c_transforms as c_vision
  36. >>>
  37. >>> onehot_op = c_transforms.OneHot(num_classes=10)
  38. >>> data1 = data1.map(operations=onehot_op, input_columns=["label"])
  39. >>> mixup_batch_op = c_vision.MixUpBatch(alpha=0.8)
  40. >>> data1 = data1.batch(4)
  41. >>> data1 = data1.map(operations=mixup_batch_op, input_columns=["image", "label"])
  42. """
  43. @check_num_classes
  44. def __init__(self, num_classes):
  45. self.num_classes = num_classes
  46. super().__init__(num_classes)
  47. class Fill(cde.FillOp):
  48. """
  49. Tensor operation to create a tensor filled with input scalar value.
  50. The output tensor will have the same shape and type as the input tensor.
  51. Args:
  52. fill_value (Union[str, bytes, int, float, bool])) : scalar value
  53. to fill created tensor with.
  54. Examples:
  55. >>> import mindspore.dataset.transforms.c_transforms as c_transforms
  56. >>>
  57. >>> fill_op = c_transforms.Fill(3)
  58. """
  59. @check_fill_value
  60. def __init__(self, fill_value):
  61. super().__init__(cde.Tensor(np.array(fill_value)))
  62. class TypeCast(cde.TypeCastOp):
  63. """
  64. Tensor operation to cast to a given MindSpore data type.
  65. Args:
  66. data_type (mindspore.dtype): mindspore.dtype to be cast to.
  67. Examples:
  68. >>> import mindspore.dataset.transforms.c_transforms as c_transforms
  69. >>> import mindspore.common.dtype as mstype
  70. >>>
  71. >>> type_cast_op = c_transforms.TypeCast(mstype.int32)
  72. """
  73. @check_de_type
  74. def __init__(self, data_type):
  75. data_type = mstype_to_detype(data_type)
  76. self.data_type = str(data_type)
  77. super().__init__(data_type)
  78. class _SliceOption(cde.SliceOption):
  79. """
  80. Internal class SliceOption to be used with SliceOperation
  81. Args:
  82. _SliceOption(Union[int, list(int), slice, None, Ellipses, bool, _SliceOption]):
  83. 1. :py:obj:`int`: Slice this index only along the dimension. Negative index is supported.
  84. 2. :py:obj:`list(int)`: Slice these indices along the dimension. Negative indices are supported.
  85. 3. :py:obj:`slice`: Slice the generated indices from the slice object along the dimension.
  86. 4. :py:obj:`None`: Slice the whole dimension. Similar to `:` in Python indexing.
  87. 5. :py:obj:`Ellipses`: Slice the whole dimension. Similar to `:` in Python indexing.
  88. 6. :py:obj:`boolean`: Slice the whole dimension. Similar to `:` in Python indexing.
  89. """
  90. @check_slice_option
  91. def __init__(self, slice_option):
  92. if isinstance(slice_option, int) and not isinstance(slice_option, bool):
  93. slice_option = [slice_option]
  94. elif slice_option is Ellipsis:
  95. slice_option = True
  96. elif slice_option is None:
  97. slice_option = True
  98. super().__init__(slice_option)
  99. class Slice(cde.SliceOp):
  100. """
  101. Slice operation to extract a tensor out using the given n slices.
  102. The functionality of Slice is similar to NumPy's indexing feature.
  103. (Currently only rank-1 tensors are supported).
  104. Args:
  105. *slices(Union[int, list(int), slice, None, Ellipses]):
  106. Maximum `n` number of arguments to slice a tensor of rank `n`.
  107. One object in slices can be one of:
  108. 1. :py:obj:`int`: Slice this index only along the first dimension. Negative index is supported.
  109. 2. :py:obj:`list(int)`: Slice these indices along the first dimension. Negative indices are supported.
  110. 3. :py:obj:`slice`: Slice the generated indices from the slice object along the first dimension.
  111. Similar to `start:stop:step`.
  112. 4. :py:obj:`None`: Slice the whole dimension. Similar to `:` in Python indexing.
  113. 5. :py:obj:`Ellipses`: Slice the whole dimension. Similar to `:` in Python indexing.
  114. Examples:
  115. >>> import mindspore.dataset.transforms.c_transforms as c_transforms
  116. >>>
  117. >>> # Data before
  118. >>> # | col |
  119. >>> # +---------+
  120. >>> # | [1,2,3] |
  121. >>> # +---------|
  122. >>> data1 = data1.map(operations=c_transforms.Slice(slice(1,3))) # slice indices 1 and 2 only
  123. >>> # Data after
  124. >>> # | col |
  125. >>> # +---------+
  126. >>> # | [2,3] |
  127. >>> # +---------|
  128. """
  129. @check_slice_op
  130. def __init__(self, *slices):
  131. slice_input_ = list(slices)
  132. slice_input_ = [_SliceOption(slice_dim) for slice_dim in slice_input_]
  133. super().__init__(slice_input_)
  134. class Relational(IntEnum):
  135. EQ = 0
  136. NE = 1
  137. GT = 2
  138. GE = 3
  139. LT = 4
  140. LE = 5
  141. DE_C_RELATIONAL = {Relational.EQ: cde.RelationalOp.EQ,
  142. Relational.NE: cde.RelationalOp.NE,
  143. Relational.GT: cde.RelationalOp.GT,
  144. Relational.GE: cde.RelationalOp.GE,
  145. Relational.LT: cde.RelationalOp.LT,
  146. Relational.LE: cde.RelationalOp.LE}
  147. class Mask(cde.MaskOp):
  148. """
  149. Mask content of the input tensor with the given predicate.
  150. Any element of the tensor that matches the predicate will be evaluated to True, otherwise False.
  151. Args:
  152. operator (Relational): One of the relational operators EQ, NE LT, GT, LE or GE
  153. constant (Union[str, int, float, bool]): Constant to be compared to.
  154. Constant will be cast to the type of the input tensor.
  155. dtype (mindspore.dtype, optional): Type of the generated mask (Default to bool).
  156. Examples:
  157. >>> import mindspore.dataset.transforms.c_transforms as c_transforms
  158. >>>
  159. >>> # Data before
  160. >>> # | col1 |
  161. >>> # +---------+
  162. >>> # | [1,2,3] |
  163. >>> # +---------+
  164. >>> data1 = data1.map(operations=c_transforms.Mask(Relational.EQ, 2))
  165. >>> # Data after
  166. >>> # | col1 |
  167. >>> # +--------------------+
  168. >>> # | [False,True,False] |
  169. >>> # +--------------------+
  170. """
  171. @check_mask_op
  172. def __init__(self, operator, constant, dtype=mstype.bool_):
  173. dtype = mstype_to_detype(dtype)
  174. constant = cde.Tensor(np.array(constant))
  175. super().__init__(DE_C_RELATIONAL[operator], constant, dtype)
  176. class PadEnd(cde.PadEndOp):
  177. """
  178. Pad input tensor according to `pad_shape`, need to have same rank.
  179. Args:
  180. pad_shape (list(int)): List of integers representing the shape needed. Dimensions that set to `None` will
  181. not be padded (i.e., original dim will be used). Shorter dimensions will truncate the values.
  182. pad_value (Union[str, bytes, int, float, bool]), optional): Value used to pad. Default to 0 or empty
  183. string in case of tensors of strings.
  184. Examples:
  185. >>> import mindspore.dataset.transforms.c_transforms as c_transforms
  186. >>>
  187. >>> # Data before
  188. >>> # | col |
  189. >>> # +---------+
  190. >>> # | [1,2,3] |
  191. >>> # +---------|
  192. >>> data1 = data1.map(operations=c_transforms.PadEnd(pad_shape=[4], pad_value=10))
  193. >>> # Data after
  194. >>> # | col |
  195. >>> # +------------+
  196. >>> # | [1,2,3,10] |
  197. >>> # +------------|
  198. """
  199. @check_pad_end
  200. def __init__(self, pad_shape, pad_value=None):
  201. if pad_value is not None:
  202. pad_value = cde.Tensor(np.array(pad_value))
  203. super().__init__(cde.TensorShape(pad_shape), pad_value)
  204. class Concatenate(cde.ConcatenateOp):
  205. """
  206. Tensor operation that concatenates all columns into a single tensor.
  207. Args:
  208. axis (int, optional): Concatenate the tensors along given axis (Default=0).
  209. prepend (numpy.array, optional): NumPy array to be prepended to the already concatenated tensors (Default=None).
  210. append (numpy.array, optional): NumPy array to be appended to the already concatenated tensors (Default=None).
  211. Examples:
  212. >>> import mindspore.dataset.transforms.c_transforms as c_transforms
  213. >>>
  214. >>> # concatenate string
  215. >>> prepend_tensor = np.array(["dw", "df"], dtype='S')
  216. >>> append_tensor = np.array(["dwsdf", "df"], dtype='S')
  217. >>> concatenate_op = c_transforms.Concatenate(0, prepend_tensor, append_tensor)
  218. """
  219. @check_concat_type
  220. def __init__(self, axis=0, prepend=None, append=None):
  221. if prepend is not None:
  222. prepend = cde.Tensor(np.array(prepend))
  223. if append is not None:
  224. append = cde.Tensor(np.array(append))
  225. super().__init__(axis, prepend, append)
  226. class Duplicate(cde.DuplicateOp):
  227. """
  228. Duplicate the input tensor to a new output tensor. The input tensor is carried over to the output list.
  229. Examples:
  230. >>> import mindspore.dataset.transforms.c_transforms as c_transforms
  231. >>>
  232. >>> # Data before
  233. >>> # | x |
  234. >>> # +---------+
  235. >>> # | [1,2,3] |
  236. >>> # +---------+
  237. >>> data1 = data1.map(operations=c_transforms.Duplicate(), input_columns=["x"],
  238. >>> output_columns=["x", "y"], column_order=["x", "y"])
  239. >>> # Data after
  240. >>> # | x | y |
  241. >>> # +---------+---------+
  242. >>> # | [1,2,3] | [1,2,3] |
  243. >>> # +---------+---------+
  244. """
  245. class Unique(cde.UniqueOp):
  246. """
  247. Return an output tensor containing all the unique elements of the input tensor in
  248. the same order that they occur in the input tensor.
  249. Also return an index tensor that contains the index of each element of the
  250. input tensor in the Unique output tensor.
  251. Finally, return a count tensor that constains the count of each element of
  252. the output tensor in the input tensor.
  253. Note:
  254. Call batch op before calling this function.
  255. Examples:
  256. >>> import mindspore.dataset.transforms.c_transforms as c_transforms
  257. >>>
  258. >>> # Data before
  259. >>> # | x |
  260. >>> # +--------------------+
  261. >>> # | [[0,1,2], [1,2,3]] |
  262. >>> # +--------------------+
  263. >>> data1 = data1.map(operations=c_transforms.Unique(), input_columns=["x"],
  264. >>> output_columns=["x", "y", "z"], column_order=["x", "y", "z"])
  265. >>> # Data after
  266. >>> # | x | y |z |
  267. >>> # +---------+-----------------+---------+
  268. >>> # | [0,1,2,3] | [0,1,2,1,2,3] | [1,2,2,1]
  269. >>> # +---------+-----------------+---------+
  270. """
  271. class Compose(cde.ComposeOp):
  272. """
  273. Compose a list of transforms into a single transform.
  274. Args:
  275. transforms (list): List of transformations to be applied.
  276. Examples:
  277. >>> import mindspore.dataset.transforms.c_transforms as c_transforms
  278. >>> import mindspore.dataset.vision.c_transforms as c_vision
  279. >>>
  280. >>> compose = c_transforms.Compose([c_vision.Decode(), c_vision.RandomCrop()])
  281. >>> data1 = data1.map(operations=compose)
  282. """
  283. @check_random_transform_ops
  284. def __init__(self, transforms):
  285. super().__init__(transforms)
  286. class RandomApply(cde.RandomApplyOp):
  287. """
  288. Randomly perform a series of transforms with a given probability.
  289. Args:
  290. transforms (list): List of transformations to be applied.
  291. prob (float, optional): The probability to apply the transformation list (default=0.5)
  292. Examples:
  293. >>> import mindspore.dataset.transforms.c_transforms as c_transforms
  294. >>> import mindspore.dataset.vision.c_transforms as c_vision
  295. >>>
  296. >>> rand_apply = c_transforms.RandomApply([c_vision.RandomCrop()])
  297. >>> data1 = data1.map(operations=rand_apply)
  298. """
  299. @check_random_transform_ops
  300. def __init__(self, transforms, prob=0.5):
  301. super().__init__(prob, transforms)
  302. class RandomChoice(cde.RandomChoiceOp):
  303. """
  304. Randomly selects one transform from a list of transforms to perform operation.
  305. Args:
  306. transforms (list): List of transformations to be chosen from to apply.
  307. Examples:
  308. >>> import mindspore.dataset.transforms.c_transforms as c_transforms
  309. >>> import mindspore.dataset.vision.c_transforms as c_vision
  310. >>>
  311. >>> rand_choice = c_transforms.RandomChoice([c_vision.CenterCrop(), c_vision.RandomCrop()])
  312. >>> data1 = data1.map(operations=rand_choice)
  313. """
  314. @check_random_transform_ops
  315. def __init__(self, transforms):
  316. super().__init__(transforms)