You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

data_utils.h 7.9 kB

5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163
  1. /**
  2. * Copyright 2019 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef DATASET_KERNELS_DATA_DATA_UTILS_H_
  17. #define DATASET_KERNELS_DATA_DATA_UTILS_H_
  18. #include <memory>
  19. #include <string>
  20. #include <vector>
  21. #include "dataset/core/constants.h"
  22. #include "dataset/core/cv_tensor.h"
  23. #include "dataset/core/data_type.h"
  24. #include "dataset/core/tensor.h"
  25. #include "dataset/core/tensor_row.h"
  26. namespace mindspore {
  27. namespace dataset {
  28. // Returns Onehot encoding of the input tensor.
  29. // Example: if input=2 and numClasses=3, the output is [0 0 1].
  30. // @param input: Tensor has type DE_UINT64, the non-one hot values are stored
  31. // along the first dimensions or rows..
  32. // If the rank of input is not 1 or the type is not DE_UINT64,
  33. // then it will fail.
  34. // @param output: Tensor. The shape of the output tensor is <input_shape, numClasses>
  35. // and the type is same as input.
  36. // @param num_classes: Number of classes to.
  37. Status OneHotEncoding(std::shared_ptr<Tensor> input, std::shared_ptr<Tensor> *output, dsize_t num_classes);
  38. Status OneHotEncodingUnsigned(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output,
  39. dsize_t num_classes, int64_t index);
  40. Status OneHotEncodingSigned(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, dsize_t num_classes,
  41. int64_t index);
  42. // Returns a tensor of shape input filled with the passed fill_value
  43. // @param input Tensor
  44. // @param output Tensor. The shape and type of the output tensor is same as input
  45. // @param fill_value Tensor. A scalar tensor used to fill the output tensor
  46. Status Fill(const std::shared_ptr<Tensor> input, std::shared_ptr<Tensor> *output, std::shared_ptr<Tensor> fill_value);
  47. // Returns a type changed input tensor.
  48. // Example: if input tensor is float64, the output will the specified dataType. See DataTypes.cpp
  49. // @param input Tensor
  50. // @param output Tensor. The shape of the output tensor is same as input with the type changed.
  51. // @param data_type: type of data to cast data to
  52. // @note: this operation will do a memcpy and if the value is truncated then precision will be lost
  53. template <typename T>
  54. void CastFrom(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output);
  55. template <typename FROM, typename TO>
  56. void Cast(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output);
  57. Status ToFloat16(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output);
  58. Status TypeCast(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, const DataType &data_type);
  59. // Pad input tensor according pad_shape, need to have same rank.
  60. // Based on the type of the input tensor, PadEndNumeric/String will be called.
  61. // @param std::shared_ptr<Tensor> src - tensor to pad from
  62. // @param std::shared_ptr<Tensor> *dst - return tensor padded
  63. // @param std::vector<dsize_t> pad_shape - shape to pad to
  64. // @param std::shared_ptr<Tensor> pad_val - value to pad with in Tensor format,
  65. // @return - The error code return
  66. Status PadEnd(const std::shared_ptr<Tensor> &src, std::shared_ptr<Tensor> *dst, const std::vector<dsize_t> &pad_shape,
  67. const std::shared_ptr<Tensor> &pad_val);
  68. // Pad input numeric tensor according pad_shape, need to have same rank.
  69. // @param std::shared_ptr<Tensor> src - tensor to pad from
  70. // @param std::shared_ptr<Tensor> *dst - return tensor padded
  71. // @param std::vector<dsize_t> pad_shape - shape to pad to
  72. // @param float pad_val - value to pad with
  73. // @return - The error code return
  74. Status PadEndNumeric(const std::shared_ptr<Tensor> &src, std::shared_ptr<Tensor> *dst,
  75. const std::vector<dsize_t> &pad_shape, float pad_val);
  76. // recursive helper function for padding numric tensors. This function could be very expensive if called on a
  77. // multi-dimensional tensor it is only meant to be called by PadEndNumeric.
  78. // @tparam T - type of tensor and fill value
  79. // @param std::shared_ptr<Tensor> src - Tensor to pad from
  80. // @param std::shared_ptr<Tensor>* dst - Tensor to pad to, return value
  81. // @param std::vector<dsize_t> cur_ind - recursion helper
  82. // @param T pad_val - value to pad tensor with
  83. // @param size_t cur_dim - recursion helper
  84. // @return Status - The error code return
  85. Status PadEndNumericHelper(const std::shared_ptr<Tensor> &src, std::shared_ptr<Tensor> dst,
  86. std::vector<dsize_t> cur_ind, size_t cur_dim = 0);
  87. // Pad input string tensor according pad_shape, need to have same rank.
  88. // @param std::shared_ptr<Tensor> src - tensor to pad from
  89. // @param std::shared_ptr<Tensor> *dst - return tensor padded
  90. // @param std::vector<dsize_t> pad_shape - shape to pad to
  91. // @param std::string pad_val - value to pad with
  92. // @return - The error code return
  93. Status PadEndString(const std::shared_ptr<Tensor> &src, std::shared_ptr<Tensor> *dst,
  94. const std::vector<dsize_t> &pad_shape, const std::string &pad_val);
  95. // recursive helper function for padding string tensors. This function could be very expensive if called on a
  96. // multi-dimensional tensor it is only meant to be called by PadEndString.
  97. // @tparam T - type of tensor and fill value
  98. // @param std::shared_ptr<Tensor> src - Tensor to pad from
  99. // @param std::shared_ptr<Tensor>* dst - Tensor to pad to, return value
  100. // @param std::vector<dsize_t> cur_ind - recursion helperas text
  101. // @param std::string pad_val - value to pad tensor with
  102. // @param size_t cur_dim - recursion helper
  103. // @return Status - The error code return
  104. Status PadEndStringHelper(const std::shared_ptr<Tensor> &src, std::vector<std::string> *dst,
  105. const TensorShape &dst_shape, std::vector<dsize_t> cur_ind, size_t cur_dim,
  106. const std::string &pad_value);
  107. enum class RelationalOp {
  108. kEqual = 0, // ==
  109. kNotEqual, // !=
  110. kLess, // <
  111. kLessEqual, // <=
  112. kGreater, // >
  113. kGreaterEqual, // >=
  114. };
  115. /// Helper method that masks the input tensor
  116. /// @tparam T type of the tensor
  117. /// @param input[in] input tensor
  118. /// @param output[out] output tensor
  119. /// @param value_tensor[in] scalar tensor value to compared with
  120. /// @param op[in] RelationalOp enum
  121. /// @return Status ok/error
  122. template <typename T>
  123. Status MaskHelper(const std::shared_ptr<Tensor> &input, const std::shared_ptr<Tensor> &output,
  124. const std::shared_ptr<Tensor> &value_tensor, RelationalOp op);
  125. /// Mask the input tensor
  126. /// @param input[in] input tensor
  127. /// @param output[out] output tensor
  128. /// @param value[in] scalar tensor value to compared with
  129. /// @param op[in] RelationalOp enum
  130. /// @return Status ok/error
  131. Status Mask(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, const std::shared_ptr<Tensor> &value,
  132. RelationalOp op);
  133. Status Concatenate(const TensorRow &input, TensorRow *output, int8_t axis, std::shared_ptr<Tensor> prepend,
  134. std::shared_ptr<Tensor> append);
  135. // helper for concat, always append to the input, and pass that to the output
  136. Status ConcatenateHelper(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int8_t axis,
  137. std::shared_ptr<Tensor> append);
  138. } // namespace dataset
  139. } // namespace mindspore
  140. #endif // DATASET_KERNELS_DATA_DATA_UTILS_H_