You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

data_utils.h 6.0 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118
  1. /**
  2. * Copyright 2019 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef DATASET_KERNELS_DATA_DATA_UTILS_H_
  17. #define DATASET_KERNELS_DATA_DATA_UTILS_H_
  18. #include <memory>
  19. #include <string>
  20. #include <vector>
  21. #include "dataset/core/constants.h"
  22. #include "dataset/core/cv_tensor.h"
  23. #include "dataset/core/data_type.h"
  24. #include "dataset/core/tensor.h"
  25. namespace mindspore {
  26. namespace dataset {
  27. // Returns Onehot encoding of the input tensor.
  28. // Example: if input=2 and numClasses=3, the output is [0 0 1].
  29. // @param input: Tensor has type DE_UINT64, the non-one hot values are stored
  30. // along the first dimensions or rows..
  31. // If the rank of input is not 1 or the type is not DE_UINT64,
  32. // then it will fail.
  33. // @param output: Tensor. The shape of the output tensor is <input_shape, numClasses>
  34. // and the type is same as input.
  35. // @param num_classes: Number of classes to.
  36. Status OneHotEncoding(std::shared_ptr<Tensor> input, std::shared_ptr<Tensor> *output, dsize_t num_classes);
  37. Status OneHotEncodingUnsigned(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output,
  38. dsize_t num_classes, int64_t index);
  39. Status OneHotEncodingSigned(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, dsize_t num_classes,
  40. int64_t index);
  41. // Returns a type changed input tensor.
  42. // Example: if input tensor is float64, the output will the specified dataType. See DataTypes.cpp
  43. // @param input Tensor
  44. // @param output Tensor. The shape of the output tensor is same as input with the type changed.
  45. // @param data_type: type of data to cast data to
  46. // @note: this operation will do a memcpy and if the value is truncated then precision will be lost
  47. template <typename T>
  48. void CastFrom(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output);
  49. template <typename FROM, typename TO>
  50. void Cast(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output);
  51. Status ToFloat16(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output);
  52. Status TypeCast(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, const DataType &data_type);
  53. // Pad input tensor according pad_shape, need to have same rank.
  54. // Based on the type of the input tensor, PadEndNumeric/String will be called.
  55. // @param std::shared_ptr<Tensor> src - tensor to pad from
  56. // @param std::shared_ptr<Tensor> *dst - return tensor padded
  57. // @param std::vector<dsize_t> pad_shape - shape to pad to
  58. // @param std::shared_ptr<Tensor> pad_val - value to pad with in Tensor format,
  59. // @return - The error code return
  60. Status PadEnd(const std::shared_ptr<Tensor> &src, std::shared_ptr<Tensor> *dst, const std::vector<dsize_t> &pad_shape,
  61. const std::shared_ptr<Tensor> &pad_val);
  62. // Pad input numeric tensor according pad_shape, need to have same rank.
  63. // @param std::shared_ptr<Tensor> src - tensor to pad from
  64. // @param std::shared_ptr<Tensor> *dst - return tensor padded
  65. // @param std::vector<dsize_t> pad_shape - shape to pad to
  66. // @param float pad_val - value to pad with
  67. // @return - The error code return
  68. Status PadEndNumeric(const std::shared_ptr<Tensor> &src, std::shared_ptr<Tensor> *dst,
  69. const std::vector<dsize_t> &pad_shape, float pad_val);
  70. // recursive helper function for padding numric tensors. This function could be very expensive if called on a
  71. // multi-dimensional tensor it is only meant to be called by PadEndNumeric.
  72. // @tparam T - type of tensor and fill value
  73. // @param std::shared_ptr<Tensor> src - Tensor to pad from
  74. // @param std::shared_ptr<Tensor>* dst - Tensor to pad to, return value
  75. // @param std::vector<dsize_t> cur_ind - recursion helper
  76. // @param T pad_val - value to pad tensor with
  77. // @param size_t cur_dim - recursion helper
  78. // @return Status - The error code return
  79. Status PadEndNumericHelper(const std::shared_ptr<Tensor> &src, std::shared_ptr<Tensor> dst,
  80. std::vector<dsize_t> cur_ind, size_t cur_dim = 0);
  81. // Pad input string tensor according pad_shape, need to have same rank.
  82. // @param std::shared_ptr<Tensor> src - tensor to pad from
  83. // @param std::shared_ptr<Tensor> *dst - return tensor padded
  84. // @param std::vector<dsize_t> pad_shape - shape to pad to
  85. // @param std::string pad_val - value to pad with
  86. // @return - The error code return
  87. Status PadEndString(const std::shared_ptr<Tensor> &src, std::shared_ptr<Tensor> *dst,
  88. const std::vector<dsize_t> &pad_shape, const std::string &pad_val);
  89. // recursive helper function for padding string tensors. This function could be very expensive if called on a
  90. // multi-dimensional tensor it is only meant to be called by PadEndNumeric.
  91. // @tparam T - type of tensor and fill value
  92. // @param std::shared_ptr<Tensor> src - Tensor to pad from
  93. // @param std::shared_ptr<Tensor>* dst - Tensor to pad to, return value
  94. // @param std::vector<dsize_t> cur_ind - recursion helper
  95. // @param std::string pad_val - value to pad tensor with
  96. // @param size_t cur_dim - recursion helper
  97. // @return Status - The error code return
  98. Status PadEndStringHelper(const std::shared_ptr<Tensor> &src, std::vector<std::string> *dst,
  99. const TensorShape &dst_shape, std::vector<dsize_t> cur_ind, size_t cur_dim,
  100. const std::string &pad_value);
  101. } // namespace dataset
  102. } // namespace mindspore
  103. #endif // DATASET_KERNELS_DATA_DATA_UTILS_H_