Merge pull request !1192 from h.farahat/string_Tensortags/v0.3.0-alpha
| @@ -237,6 +237,11 @@ void bindTensor(py::module *m) { | |||||
| .def("type", &Tensor::type) | .def("type", &Tensor::type) | ||||
| .def("as_array", [](py::object &t) { | .def("as_array", [](py::object &t) { | ||||
| auto &tensor = py::cast<Tensor &>(t); | auto &tensor = py::cast<Tensor &>(t); | ||||
| if (tensor.type() == DataType::DE_STRING) { | |||||
| py::array res; | |||||
| tensor.GetDataAsNumpyStrings(&res); | |||||
| return res; | |||||
| } | |||||
| py::buffer_info info; | py::buffer_info info; | ||||
| THROW_IF_ERROR(Tensor::GetBufferInfo(tensor, &info)); | THROW_IF_ERROR(Tensor::GetBufferInfo(tensor, &info)); | ||||
| return py::array(pybind11::dtype(info), info.shape, info.strides, info.ptr, t); | return py::array(pybind11::dtype(info), info.shape, info.strides, info.ptr, t); | ||||
| @@ -24,15 +24,15 @@ | |||||
| namespace mindspore { | namespace mindspore { | ||||
| namespace dataset { | namespace dataset { | ||||
| CVTensor::CVTensor(const TensorShape &shape, const DataType &type) : Tensor(shape, type) { | CVTensor::CVTensor(const TensorShape &shape, const DataType &type) : Tensor(shape, type) { | ||||
| (void)this->MatInit(StartAddr(), shape_, type_, &mat_); | |||||
| (void)this->MatInit(GetMutableBuffer(), shape_, type_, &mat_); | |||||
| } | } | ||||
| CVTensor::CVTensor(const TensorShape &shape, const DataType &type, const uchar *data) : Tensor(shape, type, data) { | CVTensor::CVTensor(const TensorShape &shape, const DataType &type, const uchar *data) : Tensor(shape, type, data) { | ||||
| (void)this->MatInit(StartAddr(), shape_, type_, &mat_); | |||||
| (void)this->MatInit(GetMutableBuffer(), shape_, type_, &mat_); | |||||
| } | } | ||||
| CVTensor::CVTensor(std::shared_ptr<Tensor> tensor) : Tensor(std::move(*tensor)) { | CVTensor::CVTensor(std::shared_ptr<Tensor> tensor) : Tensor(std::move(*tensor)) { | ||||
| (void)this->MatInit(StartAddr(), shape_, type_, &mat_); | |||||
| (void)this->MatInit(GetMutableBuffer(), shape_, type_, &mat_); | |||||
| } | } | ||||
| std::pair<std::array<int, 2>, int> CVTensor::IsValidImage(const TensorShape &shape, const DataType &type) { | std::pair<std::array<int, 2>, int> CVTensor::IsValidImage(const TensorShape &shape, const DataType &type) { | ||||
| @@ -83,19 +83,19 @@ Status CVTensor::MatInit(uchar *data, const TensorShape &shape, const DataType & | |||||
| Status CVTensor::Reshape(const TensorShape &shape) { | Status CVTensor::Reshape(const TensorShape &shape) { | ||||
| RETURN_IF_NOT_OK(Tensor::Reshape(shape)); | RETURN_IF_NOT_OK(Tensor::Reshape(shape)); | ||||
| RETURN_IF_NOT_OK(this->MatInit(StartAddr(), shape_, type_, &mat_)); | |||||
| RETURN_IF_NOT_OK(this->MatInit(GetMutableBuffer(), shape_, type_, &mat_)); | |||||
| return Status::OK(); | return Status::OK(); | ||||
| } | } | ||||
| Status CVTensor::ExpandDim(const dsize_t &axis) { | Status CVTensor::ExpandDim(const dsize_t &axis) { | ||||
| RETURN_IF_NOT_OK(Tensor::ExpandDim(axis)); | RETURN_IF_NOT_OK(Tensor::ExpandDim(axis)); | ||||
| RETURN_IF_NOT_OK(this->MatInit(StartAddr(), shape_, type_, &mat_)); | |||||
| RETURN_IF_NOT_OK(this->MatInit(GetMutableBuffer(), shape_, type_, &mat_)); | |||||
| return Status::OK(); | return Status::OK(); | ||||
| } | } | ||||
| void CVTensor::Squeeze() { | void CVTensor::Squeeze() { | ||||
| Tensor::Squeeze(); | Tensor::Squeeze(); | ||||
| (void)this->MatInit(StartAddr(), shape_, type_, &mat_); | |||||
| (void)this->MatInit(GetMutableBuffer(), shape_, type_, &mat_); | |||||
| } | } | ||||
| } // namespace dataset | } // namespace dataset | ||||
| } // namespace mindspore | } // namespace mindspore | ||||
| @@ -15,116 +15,40 @@ | |||||
| */ | */ | ||||
| #include "dataset/core/data_type.h" | #include "dataset/core/data_type.h" | ||||
| #include <opencv2/core/hal/interface.h> | |||||
| #include "utils/log_adapter.h" | #include "utils/log_adapter.h" | ||||
| #include "dataset/core/constants.h" | |||||
| #include "dataset/core/pybind_support.h" | #include "dataset/core/pybind_support.h" | ||||
| #include "dataset/util/de_error.h" | #include "dataset/util/de_error.h" | ||||
| namespace mindspore { | namespace mindspore { | ||||
| namespace dataset { | namespace dataset { | ||||
| uint8_t DataType::SizeInBytes() const { | uint8_t DataType::SizeInBytes() const { | ||||
| switch (type_) { | |||||
| case DataType::DE_BOOL: | |||||
| case DataType::DE_INT8: | |||||
| case DataType::DE_UINT8: | |||||
| return 1; | |||||
| case DataType::DE_INT16: | |||||
| case DataType::DE_UINT16: | |||||
| case DataType::DE_FLOAT16: | |||||
| return 2; | |||||
| case DataType::DE_INT32: | |||||
| case DataType::DE_UINT32: | |||||
| case DataType::DE_FLOAT32: | |||||
| return 4; | |||||
| case DataType::DE_INT64: | |||||
| case DataType::DE_UINT64: | |||||
| case DataType::DE_FLOAT64: | |||||
| return 8; | |||||
| default: | |||||
| return 0; | |||||
| } | |||||
| if (type_ < DataType::NUM_OF_TYPES) | |||||
| return SIZE_IN_BYTES[type_]; | |||||
| else | |||||
| return 0; | |||||
| } | } | ||||
| py::dtype DataType::AsNumpyType() const { | py::dtype DataType::AsNumpyType() const { | ||||
| std::string s; | |||||
| switch (type_) { | |||||
| case DataType::DE_BOOL: | |||||
| s = "bool"; | |||||
| break; | |||||
| case DataType::DE_INT8: | |||||
| s = "int8"; | |||||
| break; | |||||
| case DataType::DE_UINT8: | |||||
| s = "uint8"; | |||||
| break; | |||||
| case DataType::DE_INT16: | |||||
| s = "int16"; | |||||
| break; | |||||
| case DataType::DE_UINT16: | |||||
| s = "uint16"; | |||||
| break; | |||||
| case DataType::DE_INT32: | |||||
| s = "int32"; | |||||
| break; | |||||
| case DataType::DE_UINT32: | |||||
| s = "uint32"; | |||||
| break; | |||||
| case DataType::DE_INT64: | |||||
| s = "int64"; | |||||
| break; | |||||
| case DataType::DE_UINT64: | |||||
| s = "uint64"; | |||||
| break; | |||||
| case DataType::DE_FLOAT16: | |||||
| s = "float16"; | |||||
| break; | |||||
| case DataType::DE_FLOAT32: | |||||
| s = "float32"; | |||||
| break; | |||||
| case DataType::DE_FLOAT64: | |||||
| s = "double"; | |||||
| break; | |||||
| case DataType::DE_UNKNOWN: | |||||
| s = "unknown"; | |||||
| break; | |||||
| default: | |||||
| s = "unknown"; | |||||
| break; | |||||
| } | |||||
| return py::dtype(s); | |||||
| if (type_ < DataType::NUM_OF_TYPES) | |||||
| return py::dtype(PYBIND_TYPES[type_]); | |||||
| else | |||||
| return py::dtype("unknown"); | |||||
| } | } | ||||
| uint8_t DataType::AsCVType() const { | uint8_t DataType::AsCVType() const { | ||||
| switch (type_) { | |||||
| case DataType::DE_BOOL: | |||||
| return CV_8U; | |||||
| case DataType::DE_INT8: | |||||
| return CV_8S; | |||||
| case DataType::DE_UINT8: | |||||
| return CV_8U; | |||||
| case DataType::DE_INT16: | |||||
| return CV_16S; | |||||
| case DataType::DE_UINT16: | |||||
| return CV_16U; | |||||
| case DataType::DE_INT32: | |||||
| return CV_32S; | |||||
| case DataType::DE_FLOAT16: | |||||
| return CV_16F; | |||||
| case DataType::DE_FLOAT32: | |||||
| return CV_32F; | |||||
| case DataType::DE_FLOAT64: | |||||
| return CV_64F; | |||||
| case DataType::DE_UINT32: | |||||
| case DataType::DE_INT64: | |||||
| case DataType::DE_UINT64: | |||||
| default: | |||||
| MS_LOG(ERROR) << "Cannot convert to OpenCV type. Return invalid type!"; | |||||
| return kCVInvalidType; | |||||
| uint8_t res = kCVInvalidType; | |||||
| if (type_ < DataType::NUM_OF_TYPES) { | |||||
| res = CV_TYPES[type_]; | |||||
| } | } | ||||
| } | |||||
| if (res == kCVInvalidType) { | |||||
| MS_LOG(ERROR) << "Cannot convert to OpenCV type. Return invalid type!"; | |||||
| } | |||||
| return res; | |||||
| } // namespace dataset | |||||
| DataType DataType::FromCVType(int cv_type) { | DataType DataType::FromCVType(int cv_type) { | ||||
| auto depth = static_cast<uchar>(cv_type) & static_cast<uchar>(CV_MAT_DEPTH_MASK); | auto depth = static_cast<uchar>(cv_type) & static_cast<uchar>(CV_MAT_DEPTH_MASK); | ||||
| @@ -176,72 +100,17 @@ DataType::DataType(const std::string &type_str) { | |||||
| type_ = DE_FLOAT32; | type_ = DE_FLOAT32; | ||||
| else if (type_str == "float64") | else if (type_str == "float64") | ||||
| type_ = DE_FLOAT64; | type_ = DE_FLOAT64; | ||||
| else if (type_str == "string") | |||||
| type_ = DE_STRING; | |||||
| else | else | ||||
| type_ = DE_UNKNOWN; | type_ = DE_UNKNOWN; | ||||
| } | } | ||||
| std::string DataType::ToString() const { | std::string DataType::ToString() const { | ||||
| switch (type_) { | |||||
| case DataType::DE_BOOL: | |||||
| return "bool"; | |||||
| case DataType::DE_INT8: | |||||
| return "int8"; | |||||
| case DataType::DE_UINT8: | |||||
| return "uint8"; | |||||
| case DataType::DE_INT16: | |||||
| return "int16"; | |||||
| case DataType::DE_UINT16: | |||||
| return "uint16"; | |||||
| case DataType::DE_INT32: | |||||
| return "int32"; | |||||
| case DataType::DE_UINT32: | |||||
| return "uint32"; | |||||
| case DataType::DE_INT64: | |||||
| return "int64"; | |||||
| case DataType::DE_UINT64: | |||||
| return "uint64"; | |||||
| case DataType::DE_FLOAT16: | |||||
| return "float16"; | |||||
| case DataType::DE_FLOAT32: | |||||
| return "float32"; | |||||
| case DataType::DE_FLOAT64: | |||||
| return "float64"; | |||||
| case DataType::DE_UNKNOWN: | |||||
| return "unknown"; | |||||
| default: | |||||
| return "unknown"; | |||||
| } | |||||
| } | |||||
| DataType DataType::FromNpType(const py::dtype &type) { | |||||
| if (type.is(py::dtype("bool"))) { | |||||
| return DataType(DataType::DE_BOOL); | |||||
| } else if (type.is(py::dtype("int8"))) { | |||||
| return DataType(DataType::DE_INT8); | |||||
| } else if (type.is(py::dtype("uint8"))) { | |||||
| return DataType(DataType::DE_UINT8); | |||||
| } else if (type.is(py::dtype("int16"))) { | |||||
| return DataType(DataType::DE_INT16); | |||||
| } else if (type.is(py::dtype("uint16"))) { | |||||
| return DataType(DataType::DE_UINT16); | |||||
| } else if (type.is(py::dtype("int32"))) { | |||||
| return DataType(DataType::DE_INT32); | |||||
| } else if (type.is(py::dtype("uint32"))) { | |||||
| return DataType(DataType::DE_UINT32); | |||||
| } else if (type.is(py::dtype("int64"))) { | |||||
| return DataType(DataType::DE_INT64); | |||||
| } else if (type.is(py::dtype("uint64"))) { | |||||
| return DataType(DataType::DE_UINT64); | |||||
| } else if (type.is(py::dtype("float16"))) { | |||||
| return DataType(DataType::DE_FLOAT16); | |||||
| } else if (type.is(py::dtype("float32"))) { | |||||
| return DataType(DataType::DE_FLOAT32); | |||||
| } else if (type.is(py::dtype("double"))) { | |||||
| return DataType(DataType::DE_FLOAT64); | |||||
| } else { | |||||
| MS_LOG(ERROR) << "Cannot convert from numpy type. Unknown data type is returned!"; | |||||
| return DataType(DataType::DE_UNKNOWN); | |||||
| } | |||||
| if (type_ < DataType::NUM_OF_TYPES) | |||||
| return TO_STRINGS[type_]; | |||||
| else | |||||
| return "unknown"; | |||||
| } | } | ||||
| DataType DataType::FromNpArray(const py::array &arr) { | DataType DataType::FromNpArray(const py::array &arr) { | ||||
| @@ -269,6 +138,8 @@ DataType DataType::FromNpArray(const py::array &arr) { | |||||
| return DataType(DataType::DE_FLOAT32); | return DataType(DataType::DE_FLOAT32); | ||||
| } else if (py::isinstance<py::array_t<std::double_t>>(arr)) { | } else if (py::isinstance<py::array_t<std::double_t>>(arr)) { | ||||
| return DataType(DataType::DE_FLOAT64); | return DataType(DataType::DE_FLOAT64); | ||||
| } else if (arr.dtype().kind() == 'S') { | |||||
| return DataType(DataType::DE_STRING); | |||||
| } else { | } else { | ||||
| MS_LOG(ERROR) << "Cannot convert from numpy type. Unknown data type is returned!"; | MS_LOG(ERROR) << "Cannot convert from numpy type. Unknown data type is returned!"; | ||||
| return DataType(DataType::DE_UNKNOWN); | return DataType(DataType::DE_UNKNOWN); | ||||
| @@ -276,36 +147,16 @@ DataType DataType::FromNpArray(const py::array &arr) { | |||||
| } | } | ||||
| std::string DataType::GetPybindFormat() const { | std::string DataType::GetPybindFormat() const { | ||||
| switch (type_) { | |||||
| case DataType::DE_BOOL: | |||||
| return py::format_descriptor<bool>::format(); | |||||
| case DataType::DE_INT8: | |||||
| return py::format_descriptor<int8_t>::format(); | |||||
| case DataType::DE_UINT8: | |||||
| return py::format_descriptor<uint8_t>::format(); | |||||
| case DataType::DE_INT16: | |||||
| return py::format_descriptor<int16_t>::format(); | |||||
| case DataType::DE_UINT16: | |||||
| return py::format_descriptor<uint16_t>::format(); | |||||
| case DataType::DE_INT32: | |||||
| return py::format_descriptor<int32_t>::format(); | |||||
| case DataType::DE_UINT32: | |||||
| return py::format_descriptor<uint32_t>::format(); | |||||
| case DataType::DE_INT64: | |||||
| return py::format_descriptor<int64_t>::format(); | |||||
| case DataType::DE_UINT64: | |||||
| return py::format_descriptor<uint64_t>::format(); | |||||
| case DataType::DE_FLOAT16: | |||||
| // Eigen 3.3.7 doesn't support py::format_descriptor<Eigen::half>::format() | |||||
| return "e"; | |||||
| case DataType::DE_FLOAT32: | |||||
| return py::format_descriptor<float>::format(); | |||||
| case DataType::DE_FLOAT64: | |||||
| return py::format_descriptor<double>::format(); | |||||
| default: | |||||
| MS_LOG(ERROR) << "Cannot convert from data type to pybind format descriptor!"; | |||||
| return ""; | |||||
| std::string res; | |||||
| if (type_ < DataType::NUM_OF_TYPES) { | |||||
| res = PYBIND_FORMAT_DESCRIPTOR[type_]; | |||||
| } | |||||
| if (res.empty()) { | |||||
| MS_LOG(ERROR) << "Cannot convert from data type to pybind format descriptor!"; | |||||
| } | } | ||||
| return res; | |||||
| } | } | ||||
| } // namespace dataset | } // namespace dataset | ||||
| } // namespace mindspore | } // namespace mindspore | ||||
| @@ -16,18 +16,25 @@ | |||||
| #ifndef DATASET_CORE_DATA_TYPE_H_ | #ifndef DATASET_CORE_DATA_TYPE_H_ | ||||
| #define DATASET_CORE_DATA_TYPE_H_ | #define DATASET_CORE_DATA_TYPE_H_ | ||||
| #include <opencv2/core/hal/interface.h> | |||||
| #include <string> | #include <string> | ||||
| #include "pybind11/numpy.h" | #include "pybind11/numpy.h" | ||||
| #include "pybind11/pybind11.h" | #include "pybind11/pybind11.h" | ||||
| #include "dataset/core/constants.h" | |||||
| #include "dataset/core/pybind_support.h" | #include "dataset/core/pybind_support.h" | ||||
| namespace py = pybind11; | namespace py = pybind11; | ||||
| namespace mindspore { | namespace mindspore { | ||||
| namespace dataset { | namespace dataset { | ||||
| // Class that represents basic data types in DataEngine. | // Class that represents basic data types in DataEngine. | ||||
| class DataType { | class DataType { | ||||
| public: | public: | ||||
| enum Type : uint8_t { | enum Type : uint8_t { | ||||
| DE_UNKNOWN = 0, | |||||
| DE_BOOL, | DE_BOOL, | ||||
| DE_INT8, | DE_INT8, | ||||
| DE_UINT8, | DE_UINT8, | ||||
| @@ -40,20 +47,60 @@ class DataType { | |||||
| DE_FLOAT16, | DE_FLOAT16, | ||||
| DE_FLOAT32, | DE_FLOAT32, | ||||
| DE_FLOAT64, | DE_FLOAT64, | ||||
| DE_UNKNOWN | |||||
| DE_STRING, | |||||
| NUM_OF_TYPES | |||||
| }; | }; | ||||
| static constexpr uint8_t DE_BOOL_SIZE = 1; | |||||
| static constexpr uint8_t DE_UINT8_SIZE = 1; | |||||
| static constexpr uint8_t DE_INT8_SIZE = 1; | |||||
| static constexpr uint8_t DE_UINT16_SIZE = 2; | |||||
| static constexpr uint8_t DE_INT16_SIZE = 2; | |||||
| static constexpr uint8_t DE_UINT32_SIZE = 4; | |||||
| static constexpr uint8_t DE_INT32_SIZE = 4; | |||||
| static constexpr uint8_t DE_INT64_SIZE = 8; | |||||
| static constexpr uint8_t DE_UINT64_SIZE = 8; | |||||
| static constexpr uint8_t DE_FLOAT32_SIZE = 4; | |||||
| static constexpr uint8_t DE_FLOAT64_SIZE = 8; | |||||
| inline static constexpr uint8_t SIZE_IN_BYTES[] = {0, // DE_UNKNOWN | |||||
| 1, // DE_BOOL | |||||
| 1, // DE_INT8 | |||||
| 1, // DE_UINT8 | |||||
| 2, // DE_INT16 | |||||
| 2, // DE_UINT16 | |||||
| 4, // DE_INT32 | |||||
| 4, // DE_UINT32 | |||||
| 8, // DE_INT64 | |||||
| 8, // DE_UINT64 | |||||
| 2, // DE_FLOAT16 | |||||
| 4, // DE_FLOAT32 | |||||
| 8, // DE_FLOAT64 | |||||
| 0}; // DE_STRING | |||||
| inline static const char *TO_STRINGS[] = {"unknown", "bool", "int8", "uint8", "int16", "uint16", "int32", | |||||
| "uint32", "int64", "uint64", "float16", "float32", "float64", "string"}; | |||||
| inline static const char *PYBIND_TYPES[] = {"object", "bool", "int8", "uint8", "int16", "uint16", "int32", | |||||
| "uint32", "int64", "uint64", "float16", "float32", "double", "bytes"}; | |||||
| inline static const std::string PYBIND_FORMAT_DESCRIPTOR[] = {"", // DE_UNKNOWN | |||||
| py::format_descriptor<bool>::format(), // DE_BOOL | |||||
| py::format_descriptor<int8_t>::format(), // DE_INT8 | |||||
| py::format_descriptor<uint8_t>::format(), // DE_UINT8 | |||||
| py::format_descriptor<int16_t>::format(), // DE_INT16 | |||||
| py::format_descriptor<uint16_t>::format(), // DE_UINT16 | |||||
| py::format_descriptor<int32_t>::format(), // DE_INT32 | |||||
| py::format_descriptor<uint32_t>::format(), // DE_UINT32 | |||||
| py::format_descriptor<int64_t>::format(), // DE_INT64 | |||||
| py::format_descriptor<uint64_t>::format(), // DE_UINT64 | |||||
| "e", // DE_FLOAT16 | |||||
| py::format_descriptor<float>::format(), // DE_FLOAT32 | |||||
| py::format_descriptor<double>::format(), // DE_FLOAT64 | |||||
| "S"}; // DE_STRING | |||||
| inline static constexpr uint8_t CV_TYPES[] = {kCVInvalidType, // DE_UNKNOWN | |||||
| CV_8U, // DE_BOOL | |||||
| CV_8S, // DE_INT8 | |||||
| CV_8U, // DE_UINT8 | |||||
| CV_16S, // DE_INT16 | |||||
| CV_16U, // DE_UINT16 | |||||
| CV_32S, // DE_INT32 | |||||
| kCVInvalidType, // DE_UINT32 | |||||
| kCVInvalidType, // DE_INT64 | |||||
| kCVInvalidType, // DE_UINT64 | |||||
| CV_16F, // DE_FLOAT16 | |||||
| CV_32F, // DE_FLOAT32 | |||||
| CV_64F, // DE_FLOAT64 | |||||
| kCVInvalidType}; // DE_STRING | |||||
| // No arg constructor to create an unknown shape | // No arg constructor to create an unknown shape | ||||
| DataType() : type_(DE_UNKNOWN) {} | DataType() : type_(DE_UNKNOWN) {} | ||||
| @@ -160,6 +207,8 @@ class DataType { | |||||
| bool IsBool() const { return type_ == DataType::DE_BOOL; } | bool IsBool() const { return type_ == DataType::DE_BOOL; } | ||||
| bool IsNumeric() const { return type_ != DataType::DE_STRING; } | |||||
| Type value() const { return type_; } | Type value() const { return type_; } | ||||
| private: | private: | ||||
| @@ -226,6 +275,11 @@ inline bool DataType::IsCompatible<uint8_t>() const { | |||||
| return type_ == DataType::DE_UINT8; | return type_ == DataType::DE_UINT8; | ||||
| } | } | ||||
| template <> | |||||
| inline bool DataType::IsCompatible<std::string_view>() const { | |||||
| return type_ == DataType::DE_STRING; | |||||
| } | |||||
| template <> | template <> | ||||
| inline bool DataType::IsLooselyCompatible<bool>() const { | inline bool DataType::IsLooselyCompatible<bool>() const { | ||||
| return type_ == DataType::DE_BOOL; | return type_ == DataType::DE_BOOL; | ||||
| @@ -15,6 +15,7 @@ | |||||
| */ | */ | ||||
| #include "dataset/core/tensor.h" | #include "dataset/core/tensor.h" | ||||
| #include <algorithm> | |||||
| #include <iomanip> | #include <iomanip> | ||||
| #include <iostream> | #include <iostream> | ||||
| #include <memory> | #include <memory> | ||||
| @@ -60,7 +61,7 @@ Tensor::Tensor(const TensorShape &shape, const DataType &type, const unsigned ch | |||||
| if (data != nullptr) { | if (data != nullptr) { | ||||
| // Given the shape/type of this tensor, compute the data size and copy in the input bytes. | // Given the shape/type of this tensor, compute the data size and copy in the input bytes. | ||||
| int64_t byte_size = this->SizeInBytes(); | int64_t byte_size = this->SizeInBytes(); | ||||
| static_cast<void>(this->StartAddr()); // Allocates data_ inside itself | |||||
| static_cast<void>(this->GetMutableBuffer()); // Allocates data_ inside itself | |||||
| if (data_ != nullptr) { | if (data_ != nullptr) { | ||||
| int ret_code = memcpy_s(data_, byte_size, data, byte_size); | int ret_code = memcpy_s(data_, byte_size, data, byte_size); | ||||
| if (ret_code != 0) { | if (ret_code != 0) { | ||||
| @@ -75,7 +76,7 @@ Tensor::Tensor(const TensorShape &shape, const DataType &type, const unsigned ch | |||||
| Tensor::Tensor(Tensor &&other) noexcept | Tensor::Tensor(Tensor &&other) noexcept | ||||
| : shape_(other.shape()), | : shape_(other.shape()), | ||||
| type_(other.type()), | type_(other.type()), | ||||
| data_(other.StartAddr()), | |||||
| data_(other.GetMutableBuffer()), | |||||
| data_allocator_(std::move(other.data_allocator_)) { | data_allocator_(std::move(other.data_allocator_)) { | ||||
| other.Invalidate(); | other.Invalidate(); | ||||
| } | } | ||||
| @@ -84,7 +85,7 @@ Tensor &Tensor::operator=(Tensor &&other) noexcept { | |||||
| if (&other != this) { | if (&other != this) { | ||||
| shape_ = other.shape(); | shape_ = other.shape(); | ||||
| type_ = other.type(); | type_ = other.type(); | ||||
| data_ = other.StartAddr(); | |||||
| data_ = other.GetMutableBuffer(); | |||||
| data_end_ = other.data_end_; | data_end_ = other.data_end_; | ||||
| data_allocator_ = std::move(other.data_allocator_); | data_allocator_ = std::move(other.data_allocator_); | ||||
| other.Invalidate(); | other.Invalidate(); | ||||
| @@ -92,6 +93,37 @@ Tensor &Tensor::operator=(Tensor &&other) noexcept { | |||||
| return *this; | return *this; | ||||
| } | } | ||||
| Tensor::Tensor(const std::vector<std::string> &strings, const TensorShape &shape) | |||||
| : Tensor(TensorShape({static_cast<dsize_t>(strings.size())}), DataType(DataType::DE_STRING)) { | |||||
| auto length_sum = [](dsize_t sum, const std::string &s) { return s.length() + sum; }; | |||||
| dsize_t total_length = std::accumulate(strings.begin(), strings.end(), 0, length_sum); | |||||
| dsize_t num_bytes = (kOffsetSize + 1) * shape_.NumOfElements() + total_length; | |||||
| data_ = data_allocator_->allocate(num_bytes); | |||||
| auto offset_arr = reinterpret_cast<offset_t *>(data_); | |||||
| uchar *buf = GetStringsBuffer(); | |||||
| offset_t offset = -1; | |||||
| uint32_t i = 0; | |||||
| for (const auto &str : strings) { | |||||
| // insert the end index of the string | |||||
| // end index of a string is the end index of previous string + the length (including \0) | |||||
| offset = offset + str.length() + 1; | |||||
| offset_arr[i++] = offset; | |||||
| // total bytes are reduced by kOffsetSize | |||||
| num_bytes -= kOffsetSize; | |||||
| // insert actual string | |||||
| memcpy_s(buf, num_bytes, str.c_str(), str.length() + 1); | |||||
| buf += str.length() + 1; | |||||
| num_bytes -= str.length() + 1; | |||||
| } | |||||
| this->data_end_ = buf; | |||||
| DS_ASSERT(num_bytes == 0); | |||||
| if (shape.known()) Tensor::Reshape(shape); | |||||
| } | |||||
| Status Tensor::CreateTensor(std::shared_ptr<Tensor> *ptr, TensorImpl tensor_impl, const TensorShape &shape, | Status Tensor::CreateTensor(std::shared_ptr<Tensor> *ptr, TensorImpl tensor_impl, const TensorShape &shape, | ||||
| DataType type, const unsigned char *data) { | DataType type, const unsigned char *data) { | ||||
| if (!shape.known()) { | if (!shape.known()) { | ||||
| @@ -120,8 +152,28 @@ Status Tensor::CreateTensor(std::shared_ptr<Tensor> *ptr, TensorImpl tensor_impl | |||||
| } | } | ||||
| return Status::OK(); // returns base-class shared_ptr | return Status::OK(); // returns base-class shared_ptr | ||||
| } | } | ||||
| std::string to(std::string x) { return x; } | |||||
| Status Tensor::CreateTensorFromNumpyString(std::shared_ptr<Tensor> *ptr, py::array arr) { | |||||
| std::vector<dsize_t> shape; | |||||
| for (dsize_t i = 0; i < arr.ndim(); i++) { | |||||
| shape.push_back(static_cast<dsize_t>(arr.shape()[i])); | |||||
| } | |||||
| arr.resize({arr.size()}); | |||||
| auto itr = arr.begin(); | |||||
| std::vector<std::string> strings; | |||||
| for (; itr != arr.end(); itr++) { | |||||
| std::string s = to(py::cast<py::bytes>(*itr)); | |||||
| strings.push_back(s); | |||||
| } | |||||
| arr.resize(shape); | |||||
| return CreateTensor(ptr, strings, TensorShape{shape}); | |||||
| } | |||||
| Status Tensor::CreateTensor(std::shared_ptr<Tensor> *ptr, py::array arr) { | Status Tensor::CreateTensor(std::shared_ptr<Tensor> *ptr, py::array arr) { | ||||
| if (DataType::FromNpArray(arr) == DataType::DE_STRING) { | |||||
| return CreateTensorFromNumpyString(ptr, arr); | |||||
| } | |||||
| const TensorAlloc *alloc = GlobalContext::Instance()->tensor_allocator(); | const TensorAlloc *alloc = GlobalContext::Instance()->tensor_allocator(); | ||||
| *ptr = std::allocate_shared<Tensor>(*alloc, TensorShape({}), DataType(DataType::DE_UNKNOWN)); | *ptr = std::allocate_shared<Tensor>(*alloc, TensorShape({}), DataType(DataType::DE_UNKNOWN)); | ||||
| @@ -138,7 +190,7 @@ Status Tensor::CreateTensor(std::shared_ptr<Tensor> *ptr, py::array arr) { | |||||
| std::shared_ptr<MemoryPool> global_pool = GlobalContext::Instance()->mem_pool(); | std::shared_ptr<MemoryPool> global_pool = GlobalContext::Instance()->mem_pool(); | ||||
| (*ptr)->data_allocator_ = std::make_unique<Allocator<unsigned char>>(global_pool); | (*ptr)->data_allocator_ = std::make_unique<Allocator<unsigned char>>(global_pool); | ||||
| static_cast<void>((*ptr)->StartAddr()); | |||||
| static_cast<void>((*ptr)->GetMutableBuffer()); | |||||
| int64_t byte_size = (*ptr)->SizeInBytes(); | int64_t byte_size = (*ptr)->SizeInBytes(); | ||||
| unsigned char *data = static_cast<unsigned char *>(arr.request().ptr); | unsigned char *data = static_cast<unsigned char *>(arr.request().ptr); | ||||
| if ((*ptr)->data_ == nullptr) { | if ((*ptr)->data_ == nullptr) { | ||||
| @@ -173,6 +225,13 @@ Status Tensor::CreateTensor(std::shared_ptr<Tensor> *ptr, py::array arr) { | |||||
| return Status::OK(); // returns base-class shared_ptr | return Status::OK(); // returns base-class shared_ptr | ||||
| } | } | ||||
| Status Tensor::CreateTensor(std::shared_ptr<Tensor> *ptr, const std::vector<std::string> &strings, | |||||
| const TensorShape &shape) { | |||||
| const TensorAlloc *alloc = GlobalContext::Instance()->tensor_allocator(); | |||||
| *ptr = std::allocate_shared<Tensor>(*alloc, strings, shape); | |||||
| return Status::OK(); | |||||
| } | |||||
| // Memcpy the given strided array's used part to consecutive memory | // Memcpy the given strided array's used part to consecutive memory | ||||
| // Consider a 3-d array | // Consider a 3-d array | ||||
| // A[(i * shape[1] + j) * shape[2] + k] = B[i][j][k] = C[i * strides[0] + j * strides[1] + k * strides[2]] | // A[(i * shape[1] + j) * shape[2] + k] = B[i][j][k] = C[i * strides[0] + j * strides[1] + k * strides[2]] | ||||
| @@ -264,6 +323,12 @@ void Tensor::PrintItemAt(const std::vector<dsize_t> &index, std::ostream &out) c | |||||
| CASE_PRINT(DataType::DE_FLOAT64, double); | CASE_PRINT(DataType::DE_FLOAT64, double); | ||||
| case DataType::DE_STRING: { | |||||
| std::string_view o{""}; | |||||
| GetItemAt(&o, index); | |||||
| out << "\"" << o << "\""; | |||||
| break; | |||||
| } | |||||
| default: { | default: { | ||||
| out << "?"; | out << "?"; | ||||
| break; | break; | ||||
| @@ -324,12 +389,12 @@ Status Tensor::ToFlatIndex(const std::vector<dsize_t> &index, dsize_t *flat_inde | |||||
| return Status::OK(); | return Status::OK(); | ||||
| } | } | ||||
| const unsigned char *Tensor::StartAddr() const { | |||||
| const unsigned char *Tensor::GetBuffer() const { | |||||
| // This version cannot modify anything. data_ could possibly be null. | // This version cannot modify anything. data_ could possibly be null. | ||||
| return data_; | return data_; | ||||
| } | } | ||||
| unsigned char *Tensor::StartAddr() { | |||||
| unsigned char *Tensor::GetMutableBuffer() { | |||||
| if (!shape_.known() || type_ == DataType::DE_UNKNOWN) { | if (!shape_.known() || type_ == DataType::DE_UNKNOWN) { | ||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| @@ -381,6 +446,25 @@ Status Tensor::GetItemPtr(T **ptr, const std::vector<dsize_t> &index) const { | |||||
| dsize_t flat_idx; | dsize_t flat_idx; | ||||
| RETURN_IF_NOT_OK(ToFlatIndex(index, &flat_idx)); | RETURN_IF_NOT_OK(ToFlatIndex(index, &flat_idx)); | ||||
| *ptr = reinterpret_cast<T *>(data_ + flat_idx * type_.SizeInBytes()); | *ptr = reinterpret_cast<T *>(data_ + flat_idx * type_.SizeInBytes()); | ||||
| return Status::OK(); | |||||
| } else { | |||||
| std::string err = "data type not compatible"; | |||||
| RETURN_STATUS_UNEXPECTED(err); | |||||
| } | |||||
| } | |||||
| Status Tensor::GetItemPtr(uchar **ptr, const std::vector<dsize_t> &index, offset_t *length) const { | |||||
| if (type_ == DataType::DE_STRING) { | |||||
| if (data_ == nullptr) { | |||||
| std::string err = "Data is not allocated yet"; | |||||
| RETURN_STATUS_UNEXPECTED(err); | |||||
| } | |||||
| dsize_t flat_idx; | |||||
| RETURN_IF_NOT_OK(ToFlatIndex(index, &flat_idx)); | |||||
| offset_t length_temp = 0; | |||||
| RETURN_IF_NOT_OK(GetStringAt(flat_idx, ptr, &length_temp)); | |||||
| if (length != nullptr) *length = length_temp; | |||||
| return Status::OK(); | return Status::OK(); | ||||
| } else { | } else { | ||||
| std::string err = "data type not compatible"; | std::string err = "data type not compatible"; | ||||
| @@ -389,23 +473,27 @@ Status Tensor::GetItemPtr(T **ptr, const std::vector<dsize_t> &index) const { | |||||
| } | } | ||||
| Status Tensor::StartAddrOfIndex(std::vector<dsize_t> ind, uchar **start_addr_of_index, TensorShape *remaining) { | Status Tensor::StartAddrOfIndex(std::vector<dsize_t> ind, uchar **start_addr_of_index, TensorShape *remaining) { | ||||
| if (type() == DataType::DE_STRING) { | |||||
| RETURN_STATUS_UNEXPECTED("StartAddrOfIndex does not support string tensors yet."); | |||||
| } | |||||
| dsize_t flat_ind; | dsize_t flat_ind; | ||||
| std::vector<dsize_t> t_shape = shape().AsVector(); | std::vector<dsize_t> t_shape = shape().AsVector(); | ||||
| std::vector<dsize_t> r(t_shape.begin() + ind.size(), t_shape.end()); | std::vector<dsize_t> r(t_shape.begin() + ind.size(), t_shape.end()); | ||||
| *remaining = TensorShape(r); | *remaining = TensorShape(r); | ||||
| ind.resize(this->Rank(), 0); // same as -> while (ind.size() < this->Rank()) ind.push_back(0); | ind.resize(this->Rank(), 0); // same as -> while (ind.size() < this->Rank()) ind.push_back(0); | ||||
| RETURN_IF_NOT_OK(ToFlatIndex(ind, &flat_ind)); | RETURN_IF_NOT_OK(ToFlatIndex(ind, &flat_ind)); | ||||
| // check if StartAddr() returns null, we should flag this as an error, this sanity check will only | |||||
| // check if GetBuffer() returns null, we should flag this as an error, this sanity check will only | |||||
| // be true is the tensor failed to allocate memory. | // be true is the tensor failed to allocate memory. | ||||
| if (StartAddr() == nullptr) { | |||||
| RETURN_STATUS_UNEXPECTED("Invalid StartAddr in Tensor, got nullptr"); | |||||
| if (GetMutableBuffer() == nullptr) { | |||||
| RETURN_STATUS_UNEXPECTED("Invalid GetBuffer in Tensor, got nullptr"); | |||||
| } | } | ||||
| *start_addr_of_index = StartAddr() + flat_ind * this->type().SizeInBytes(); | |||||
| *start_addr_of_index = GetMutableBuffer() + flat_ind * this->type().SizeInBytes(); | |||||
| return Status::OK(); | return Status::OK(); | ||||
| } | } | ||||
| Status Tensor::InsertTensor(const std::vector<dsize_t> &ind, const std::shared_ptr<Tensor> &tensor) { | Status Tensor::InsertTensor(const std::vector<dsize_t> &ind, const std::shared_ptr<Tensor> &tensor) { | ||||
| std::string err_msg; | std::string err_msg; | ||||
| err_msg += (this->type() == DataType::DE_STRING) ? "[Tensor] Cannot batch tensors of type string\n" : ""; | |||||
| err_msg += (!this->shape().known() || !tensor->shape().known()) ? "[Tensor] unknown shape\n" : ""; | err_msg += (!this->shape().known() || !tensor->shape().known()) ? "[Tensor] unknown shape\n" : ""; | ||||
| err_msg += (ind.size() + tensor->Rank() != this->Rank()) ? "[Tensor] incorrect index\n" : ""; | err_msg += (ind.size() + tensor->Rank() != this->Rank()) ? "[Tensor] incorrect index\n" : ""; | ||||
| err_msg += tensor->type().SizeInBytes() != this->type().SizeInBytes() ? "[Tensor] incorrect datatype\n" : ""; | err_msg += tensor->type().SizeInBytes() != this->type().SizeInBytes() ? "[Tensor] incorrect datatype\n" : ""; | ||||
| @@ -418,7 +506,8 @@ Status Tensor::InsertTensor(const std::vector<dsize_t> &ind, const std::shared_p | |||||
| RETURN_STATUS_UNEXPECTED(err_msg); | RETURN_STATUS_UNEXPECTED(err_msg); | ||||
| } else { | } else { | ||||
| if (start_addr_of_ind != nullptr) { | if (start_addr_of_ind != nullptr) { | ||||
| int ret_code = memcpy_s(start_addr_of_ind, tensor->SizeInBytes(), tensor->StartAddr(), tensor->SizeInBytes()); | |||||
| int ret_code = | |||||
| memcpy_s(start_addr_of_ind, tensor->SizeInBytes(), tensor->GetMutableBuffer(), tensor->SizeInBytes()); | |||||
| if (ret_code == 0) { | if (ret_code == 0) { | ||||
| return Status::OK(); | return Status::OK(); | ||||
| } else { | } else { | ||||
| @@ -446,21 +535,20 @@ Status Tensor::ExpandDim(const dsize_t &axis) { | |||||
| } | } | ||||
| std::vector<dsize_t> Tensor::Strides() { | std::vector<dsize_t> Tensor::Strides() { | ||||
| std::vector<dsize_t> strides(Rank()); | |||||
| dsize_t count = shape_.NumOfElements(); | |||||
| for (dsize_t i = 0; i < Rank(); i++) { | |||||
| count /= shape_[i]; | |||||
| strides[i] = type_.SizeInBytes() * count; | |||||
| } | |||||
| std::vector<dsize_t> strides = shape_.Strides(); | |||||
| uint8_t size = type_.SizeInBytes(); | |||||
| std::transform(strides.begin(), strides.end(), strides.begin(), [&size](const auto &c) { return c * size; }); | |||||
| return strides; | return strides; | ||||
| } | } | ||||
| Status Tensor::GetBufferInfo(Tensor &t, py::buffer_info *out) { | Status Tensor::GetBufferInfo(Tensor &t, py::buffer_info *out) { | ||||
| CHECK_FAIL_RETURN_UNEXPECTED(t.type().IsNumeric(), "Cannot use GetBufferInfo on tensor of strings."); | |||||
| std::string format_desc = t.type().GetPybindFormat(); | std::string format_desc = t.type().GetPybindFormat(); | ||||
| if (format_desc.empty()) { | if (format_desc.empty()) { | ||||
| RETURN_STATUS_UNEXPECTED("Cannot convert DE type tp pybind format"); | RETURN_STATUS_UNEXPECTED("Cannot convert DE type tp pybind format"); | ||||
| } | } | ||||
| *out = py::buffer_info(t.StartAddr(), /* Pointer to buffer */ | |||||
| *out = py::buffer_info(t.GetMutableBuffer(), /* Pointer to buffer */ | |||||
| t.type().SizeInBytes(), /* Size of one scalar */ | t.type().SizeInBytes(), /* Size of one scalar */ | ||||
| format_desc, /* Python struct-style format descriptor */ | format_desc, /* Python struct-style format descriptor */ | ||||
| t.Rank(), /* Number of dimensions */ | t.Rank(), /* Number of dimensions */ | ||||
| @@ -495,6 +583,18 @@ Status Tensor::GetItemAt(T *o, const std::vector<dsize_t> &index) const { | |||||
| return Status::OK(); | return Status::OK(); | ||||
| } | } | ||||
| Status Tensor::GetItemAt(std::string_view *o, const std::vector<dsize_t> &index) const { | |||||
| RETURN_UNEXPECTED_IF_NULL(data_); | |||||
| RETURN_UNEXPECTED_IF_NULL(o); | |||||
| CHECK_FAIL_RETURN_UNEXPECTED(type_ == DataType::DE_STRING, "Type is not DE_STRING"); | |||||
| uchar *buf = nullptr; | |||||
| offset_t length = 0; | |||||
| RETURN_IF_NOT_OK(GetItemPtr(&buf, index, &length)); | |||||
| std::string_view sv{reinterpret_cast<const char *>(buf), length}; | |||||
| o->swap(sv); | |||||
| return Status::OK(); | |||||
| } | |||||
| // return data as numpy, should return status | // return data as numpy, should return status | ||||
| Status Tensor::GetDataAsNumpy(py::array *data) { | Status Tensor::GetDataAsNumpy(py::array *data) { | ||||
| RETURN_UNEXPECTED_IF_NULL(data_); | RETURN_UNEXPECTED_IF_NULL(data_); | ||||
| @@ -523,11 +623,36 @@ Status Tensor::GetDataAsNumpy(py::array *data) { | |||||
| *data = py::array_t<float>(shape_.AsVector(), reinterpret_cast<float *>(data_)); | *data = py::array_t<float>(shape_.AsVector(), reinterpret_cast<float *>(data_)); | ||||
| } else if (type_ == DataType::DE_FLOAT64) { | } else if (type_ == DataType::DE_FLOAT64) { | ||||
| *data = py::array_t<double>(shape_.AsVector(), reinterpret_cast<double *>(data_)); | *data = py::array_t<double>(shape_.AsVector(), reinterpret_cast<double *>(data_)); | ||||
| } else if (type_ == DataType::DE_STRING) { | |||||
| GetDataAsNumpyStrings(data); | |||||
| } else { | } else { | ||||
| RETURN_STATUS_UNEXPECTED("Got unexpected type when returning numpy"); | RETURN_STATUS_UNEXPECTED("Got unexpected type when returning numpy"); | ||||
| } | } | ||||
| return Status::OK(); | return Status::OK(); | ||||
| } | } | ||||
| Status Tensor::GetDataAsNumpyStrings(py::array *data) { | |||||
| auto itr = begin<std::string_view>(); | |||||
| uint64_t max = 0; | |||||
| for (; itr != end<std::string_view>(); itr++) { | |||||
| max = std::max((*itr).length(), max); | |||||
| } | |||||
| uint64_t total_size = shape_.NumOfElements() * max; | |||||
| char *tmp_data = reinterpret_cast<char *>(data_allocator_->allocate(total_size)); | |||||
| if (tmp_data == nullptr) RETURN_STATUS_UNEXPECTED("Cannot create temp array."); | |||||
| memset(tmp_data, 0, total_size); | |||||
| itr = begin<std::string_view>(); | |||||
| uint64_t i = 0; | |||||
| for (; itr != end<std::string_view>(); itr++) { | |||||
| (void)memcpy_s(tmp_data + i * max, total_size, (*itr).data(), (*itr).length()); | |||||
| i++; | |||||
| } | |||||
| auto strides = shape_.Strides(); | |||||
| std::transform(strides.begin(), strides.end(), strides.begin(), [&max](const auto &s) { return s * max; }); | |||||
| *data = py::array(py::dtype("S" + std::to_string(max)), shape_.AsVector(), strides, tmp_data); | |||||
| data_allocator_->deallocate(reinterpret_cast<uchar *>(tmp_data)); | |||||
| return Status::OK(); | |||||
| } | |||||
| void Tensor::Squeeze() { shape_ = shape_.Squeeze(); } | void Tensor::Squeeze() { shape_ = shape_.Squeeze(); } | ||||
| @@ -647,5 +772,19 @@ Status Tensor::GetFloatAt(T *o, const std::vector<dsize_t> &index) const { | |||||
| } | } | ||||
| return Status::OK(); | return Status::OK(); | ||||
| } | } | ||||
| Status Tensor::GetStringAt(dsize_t index, uchar **string_start, offset_t *length) const { | |||||
| CHECK_FAIL_RETURN_UNEXPECTED(type_ == DataType::DE_STRING, "Type is not string"); | |||||
| RETURN_UNEXPECTED_IF_NULL(data_); | |||||
| RETURN_UNEXPECTED_IF_NULL(string_start); | |||||
| RETURN_UNEXPECTED_IF_NULL(length); | |||||
| auto *offset_ptr = reinterpret_cast<offset_t *>(data_); // offsets starts here | |||||
| offset_t end = offset_ptr[index]; | |||||
| offset_t start = 0; | |||||
| if (index != 0) start = offset_ptr[index - 1] + 1; // string starts at where the previous string ends + 1 | |||||
| uchar *buf = GetStringsBuffer(); // string data starts here | |||||
| *string_start = buf + start; | |||||
| *length = end - start; | |||||
| return Status::OK(); | |||||
| } | |||||
| } // namespace dataset | } // namespace dataset | ||||
| } // namespace mindspore | } // namespace mindspore | ||||
| @@ -47,8 +47,6 @@ using TensorRow = std::vector<std::shared_ptr<Tensor>>; // A row is a set of | |||||
| using TensorTable = std::vector<TensorRow>; // The table of tensors is a vector of rows | using TensorTable = std::vector<TensorRow>; // The table of tensors is a vector of rows | ||||
| using TensorQTable = std::deque<TensorRow>; // A different flavour of tensor table, this one has queue functionality | using TensorQTable = std::deque<TensorRow>; // A different flavour of tensor table, this one has queue functionality | ||||
| // Tensor base class which holds the data in an unsigned char* buffer. | |||||
| class Tensor { | class Tensor { | ||||
| public: | public: | ||||
| Tensor() = delete; | Tensor() = delete; | ||||
| @@ -74,6 +72,27 @@ class Tensor { | |||||
| Tensor &operator=(Tensor &&other) noexcept; | Tensor &operator=(Tensor &&other) noexcept; | ||||
| // type of offest values to store strings information | |||||
| using offset_t = uint32_t; | |||||
| // const of the size of the offset variable | |||||
| static constexpr uint8_t kOffsetSize = sizeof(offset_t); | |||||
| // Tensor base class which holds the data in an unsigned char* buffer. | |||||
| // Construct a scalar string Tensor | |||||
| explicit Tensor(const std::string &str) : Tensor(std::vector<std::string>{str}, TensorShape::CreateScalar()) {} | |||||
| // Construct a tensor from a list of strings. Reshape the tensor with `shape` if given, otherwise assume the shape is | |||||
| // the size of the vector `strings`. | |||||
| // The memory layout of a Tensor of strings consists of the Offset_array followed by the strings. | |||||
| // OFFSET1, OFFSET2, ... String1, String2, ... | |||||
| // The value of each offset is the end index of the corresponding string | |||||
| // Offsets is of type offest_t | |||||
| // strings will ne null-terminated | |||||
| // example: Tensor(['abc', 'de'], shape={2}, type=DE_STRING) | |||||
| // 3 6 a b c \0 d e \0 | |||||
| explicit Tensor(const std::vector<std::string> &strings, | |||||
| const TensorShape &shape = TensorShape::CreateUnknownRankShape()); | |||||
| // A static factory method to create the given flavour of derived Tensor | // A static factory method to create the given flavour of derived Tensor | ||||
| // Returns the base class reference for the Tensor. | // Returns the base class reference for the Tensor. | ||||
| // @param ptr output argument to hold the created Tensor of given tensor_impl | // @param ptr output argument to hold the created Tensor of given tensor_impl | ||||
| @@ -91,6 +110,17 @@ class Tensor { | |||||
| // @return Status Code | // @return Status Code | ||||
| static Status CreateTensor(std::shared_ptr<Tensor> *ptr, py::array arr); | static Status CreateTensor(std::shared_ptr<Tensor> *ptr, py::array arr); | ||||
| // Helper function to create a tensor from Numpy of strings | |||||
| static Status CreateTensorFromNumpyString(std::shared_ptr<Tensor> *ptr, py::array arr); | |||||
| // A static factory method to create a Tensor from a given list of strings. | |||||
| // @param ptr output argument to hold the created Tensor | |||||
| // @param strings elements of the tensor | |||||
| // @param shape shape of the tensor | |||||
| // @return Status Code | |||||
| static Status CreateTensor(std::shared_ptr<Tensor> *ptr, const std::vector<std::string> &strings, | |||||
| const TensorShape &shape = TensorShape::CreateUnknownRankShape()); | |||||
| // Copy raw data of a array based on shape and strides to the destination pointer | // Copy raw data of a array based on shape and strides to the destination pointer | ||||
| // @param dst Pointer to the destination array where the content is to be copied | // @param dst Pointer to the destination array where the content is to be copied | ||||
| // @param src Pointer to the source of strided array to be copied | // @param src Pointer to the source of strided array to be copied | ||||
| @@ -116,6 +146,11 @@ class Tensor { | |||||
| template <typename T> | template <typename T> | ||||
| Status GetItemAt(T *o, const std::vector<dsize_t> &index) const; | Status GetItemAt(T *o, const std::vector<dsize_t> &index) const; | ||||
| // Get string located at `index`. | |||||
| // @param index vector<dsize_t> | |||||
| // @return return std::string_view specified at index | |||||
| Status GetItemAt(std::string_view *o, const std::vector<dsize_t> &index) const; | |||||
| template <typename T> | template <typename T> | ||||
| Status GetUnsignedIntAt(T *o, const std::vector<dsize_t> &index) const; | Status GetUnsignedIntAt(T *o, const std::vector<dsize_t> &index) const; | ||||
| @@ -131,26 +166,44 @@ class Tensor { | |||||
| // @param value of type `T` | // @param value of type `T` | ||||
| template <typename T> | template <typename T> | ||||
| Status SetItemAt(const std::vector<dsize_t> &index, const T &value) { | Status SetItemAt(const std::vector<dsize_t> &index, const T &value) { | ||||
| static_cast<void>(StartAddr()); | |||||
| static_cast<void>(GetMutableBuffer()); | |||||
| T *ptr = nullptr; | T *ptr = nullptr; | ||||
| RETURN_IF_NOT_OK(GetItemPtr<T>(&ptr, index)); | RETURN_IF_NOT_OK(GetItemPtr<T>(&ptr, index)); | ||||
| *ptr = value; | *ptr = value; | ||||
| return Status::OK(); | return Status::OK(); | ||||
| } | } | ||||
| // fill tensor with Zeros | |||||
| // set string item at location specified by index | |||||
| // @param index | |||||
| // @param value of type std::string | |||||
| Status SetItemAt(const std::vector<dsize_t> &index, const std::string &value) { | |||||
| RETURN_UNEXPECTED_IF_NULL(data_); | |||||
| uchar *ptr = nullptr; | |||||
| offset_t length = 0; | |||||
| RETURN_IF_NOT_OK(GetItemPtr(&ptr, index, &length)); | |||||
| if (value.length() != length) { | |||||
| RETURN_STATUS_UNEXPECTED("Length of the new string does not match the item."); | |||||
| } | |||||
| memcpy_s(reinterpret_cast<char *>(ptr), length, value.c_str(), length); | |||||
| return Status::OK(); | |||||
| } | |||||
| // fill tensor with Zeros. Does not support strings. | |||||
| Status Zero() { | Status Zero() { | ||||
| CHECK_FAIL_RETURN_UNEXPECTED(type_ != DataType::DE_STRING, "Cannot use Zero on tensor of strings.."); | |||||
| dsize_t size = SizeInBytes(); | dsize_t size = SizeInBytes(); | ||||
| CHECK_FAIL_RETURN_UNEXPECTED(memset_sp(StartAddr(), size, 0, size) == 0, "Failed to fill tensor with zeroes."); | |||||
| CHECK_FAIL_RETURN_UNEXPECTED(memset_sp(GetMutableBuffer(), size, 0, size) == 0, | |||||
| "Failed to fill tensor with zeroes."); | |||||
| return Status::OK(); | return Status::OK(); | ||||
| } | } | ||||
| // Fill all elements in the Tensor with the given value of type `T` | |||||
| // Fill all elements in the Tensor with the given value of type `T`. Does not support strings. | |||||
| // @tparam T | // @tparam T | ||||
| // @param value | // @param value | ||||
| template <typename T> | template <typename T> | ||||
| Status Fill(const T &value) { | Status Fill(const T &value) { | ||||
| static_cast<void>(StartAddr()); | |||||
| CHECK_FAIL_RETURN_UNEXPECTED(type_ != DataType::DE_STRING, "Cannot use fill on tensor of strings."); | |||||
| static_cast<void>(GetMutableBuffer()); | |||||
| int64_t cellSize = type_.SizeInBytes(); | int64_t cellSize = type_.SizeInBytes(); | ||||
| if ((data_ != nullptr) && type_.IsCompatible<T>()) { | if ((data_ != nullptr) && type_.IsCompatible<T>()) { | ||||
| for (dsize_t i = 0; i < Size(); i++) { | for (dsize_t i = 0; i < Size(); i++) { | ||||
| @@ -177,7 +230,10 @@ class Tensor { | |||||
| dsize_t Size() const { return shape().NumOfElements(); } | dsize_t Size() const { return shape().NumOfElements(); } | ||||
| // @return the number of bytes this tensor is needs | // @return the number of bytes this tensor is needs | ||||
| dsize_t SizeInBytes() const { return Size() * type_.SizeInBytes(); } | |||||
| dsize_t SizeInBytes() const { | |||||
| if (data_end_ == nullptr) return type_.SizeInBytes() * shape_.NumOfElements(); | |||||
| return data_end_ - data_; | |||||
| } | |||||
| // @return the rank of the tensor | // @return the rank of the tensor | ||||
| dsize_t Rank() const { return shape().Rank(); } | dsize_t Rank() const { return shape().Rank(); } | ||||
| @@ -185,12 +241,12 @@ class Tensor { | |||||
| // Get the starting memory address as a constant for the data of the tensor. This potentially | // Get the starting memory address as a constant for the data of the tensor. This potentially | ||||
| // drives an allocation if the data area. | // drives an allocation if the data area. | ||||
| // @return const unsigned char* | // @return const unsigned char* | ||||
| const unsigned char *StartAddr() const; | |||||
| const unsigned char *GetBuffer() const; | |||||
| // Get the starting memory address for the data of the tensor. This potentially | // Get the starting memory address for the data of the tensor. This potentially | ||||
| // drives an allocation if the data area. | // drives an allocation if the data area. | ||||
| // @return unsigned char* | // @return unsigned char* | ||||
| unsigned char *StartAddr(); | |||||
| unsigned char *GetMutableBuffer(); | |||||
| // Getter of the type | // Getter of the type | ||||
| // @return | // @return | ||||
| @@ -236,12 +292,12 @@ class Tensor { | |||||
| virtual void Squeeze(); | virtual void Squeeze(); | ||||
| // Calculates the strides of the Tensor | |||||
| // Ex: Tensor of shape <4,2,2> and type DE_UINT8 (1 byte) | |||||
| // The strides will be {6,2,1}. | |||||
| // Ex: Tensor of shape <4,2,2> and type DE_UINT32 (4 byte) | |||||
| // The strides will be {24,8,4}. | |||||
| // @return vector of integers | |||||
| /// Calculates the strides of the Tensor | |||||
| /// Ex: Tensor of shape <4,2,2> and type DE_UINT8 (1 byte) | |||||
| /// The strides will be {6,2,1}. | |||||
| /// Ex: Tensor of shape <4,2,2> and type DE_UINT32 (4 byte) | |||||
| /// The strides will be {24,8,4}. | |||||
| /// @return vector of integers | |||||
| std::vector<dsize_t> Strides(); | std::vector<dsize_t> Strides(); | ||||
| std::string ToString() { | std::string ToString() { | ||||
| @@ -255,12 +311,14 @@ class Tensor { | |||||
| // @return Status code | // @return Status code | ||||
| Status GetDataAsNumpy(py::array *data); | Status GetDataAsNumpy(py::array *data); | ||||
| Status GetDataAsNumpyStrings(py::array *data); | |||||
| static Status GetBufferInfo(Tensor &t, py::buffer_info *out); | static Status GetBufferInfo(Tensor &t, py::buffer_info *out); | ||||
| // TensorIterator is a linear iterator that can be used to iterate over the elements of the Tensor | // TensorIterator is a linear iterator that can be used to iterate over the elements of the Tensor | ||||
| // The order elements is as the memory layout (i.e., row-major) [[1,2,3],[4,5,6] --> 1,2,3,4,5,6 | // The order elements is as the memory layout (i.e., row-major) [[1,2,3],[4,5,6] --> 1,2,3,4,5,6 | ||||
| // @tparam T type of values in the Tensor Iterator | // @tparam T type of values in the Tensor Iterator | ||||
| template <typename T> | |||||
| template <typename T, bool = true> | |||||
| class TensorIterator { | class TensorIterator { | ||||
| public: | public: | ||||
| using iterator_category = std::random_access_iterator_tag; | using iterator_category = std::random_access_iterator_tag; | ||||
| @@ -271,11 +329,14 @@ class Tensor { | |||||
| explicit TensorIterator(uchar *ptr = nullptr) { ptr_ = reinterpret_cast<T *>(ptr); } | explicit TensorIterator(uchar *ptr = nullptr) { ptr_ = reinterpret_cast<T *>(ptr); } | ||||
| TensorIterator(const TensorIterator<T> &raw_iterator) = default; | |||||
| TensorIterator(const TensorIterator<T> &raw_iterator) { ptr_ = raw_iterator.ptr_; } | |||||
| ~TensorIterator() = default; | ~TensorIterator() = default; | ||||
| TensorIterator<T> &operator=(const TensorIterator<T> &rhs) = default; | |||||
| TensorIterator<T> &operator=(const TensorIterator<T> &rhs) { | |||||
| ptr_ = rhs.ptr_; | |||||
| return *this; | |||||
| } | |||||
| TensorIterator<T> &operator=(T *rhs) { | TensorIterator<T> &operator=(T *rhs) { | ||||
| ptr_ = rhs; | ptr_ = rhs; | ||||
| @@ -346,6 +407,99 @@ class Tensor { | |||||
| T *ptr_; | T *ptr_; | ||||
| }; | }; | ||||
| // Specialization of TensorIterator for strings. It returns std::string_view for every item. | |||||
| // @tparam DUMMY, used to mbe able to specialize the inner class | |||||
| template <bool DUMMY> | |||||
| class TensorIterator<std::string_view, DUMMY> { | |||||
| public: | |||||
| using iterator_category = std::random_access_iterator_tag; | |||||
| using value_type = std::string_view; | |||||
| using difference_type = ptrdiff_t; | |||||
| using pointer = std::string_view *; | |||||
| using reference = std::string_view &; | |||||
| explicit TensorIterator(uchar *offset = nullptr, const uchar *buf = nullptr, dsize_t index = 0) { | |||||
| offset_ = reinterpret_cast<offset_t *>(offset); | |||||
| buf_ = reinterpret_cast<const char *>(buf); | |||||
| index_ = index; | |||||
| } | |||||
| TensorIterator(const TensorIterator<std::string_view, DUMMY> &raw_iterator) { | |||||
| offset_ = raw_iterator.offset_; | |||||
| buf_ = raw_iterator.buf_; | |||||
| index_ = raw_iterator.index_; | |||||
| } | |||||
| ~TensorIterator() = default; | |||||
| bool operator==(const TensorIterator<std::string_view> &rhs) { | |||||
| return buf_ == rhs.buf_ && offset_ == rhs.offset_ && index_ == rhs.index_; | |||||
| } | |||||
| bool operator!=(const TensorIterator<std::string_view> &rhs) { return !(*this == rhs); } | |||||
| operator bool() const { return offset_ != nullptr; } | |||||
| std::string_view operator*() const { | |||||
| offset_t start = 0; | |||||
| if (index_ != 0) start = offset_[index_ - 1] + 1; | |||||
| return std::string_view{buf_ + start}; | |||||
| } | |||||
| TensorIterator<std::string_view> &operator+=(const dsize_t &inc) { | |||||
| index_ += inc; | |||||
| return *this; | |||||
| } | |||||
| TensorIterator<std::string_view> &operator-=(const dsize_t &inc) { | |||||
| index_ -= inc; | |||||
| return *this; | |||||
| } | |||||
| TensorIterator<std::string_view> &operator++() { | |||||
| ++index_; | |||||
| return *this; | |||||
| } | |||||
| TensorIterator<std::string_view> &operator--() { | |||||
| --index_; | |||||
| return *this; | |||||
| } | |||||
| TensorIterator<std::string_view> operator++(int) { | |||||
| auto temp(*this); | |||||
| ++index_; | |||||
| return temp; | |||||
| } | |||||
| TensorIterator<std::string_view> operator--(int) { | |||||
| auto temp(*this); | |||||
| --index_; | |||||
| return temp; | |||||
| } | |||||
| TensorIterator<std::string_view> operator+(const dsize_t &inc) { | |||||
| auto oldPtr = index_; | |||||
| index_ += inc; | |||||
| auto temp(*this); | |||||
| index_ = oldPtr; | |||||
| return temp; | |||||
| } | |||||
| TensorIterator<std::string_view> operator-(const dsize_t &inc) { | |||||
| auto oldPtr = index_; | |||||
| index_ -= inc; | |||||
| auto temp(*this); | |||||
| index_ = oldPtr; | |||||
| return temp; | |||||
| } | |||||
| protected: | |||||
| dsize_t index_; | |||||
| offset_t *offset_; | |||||
| const char *buf_; | |||||
| }; | |||||
| // Return a TensorIterator that points to the start of the Tensor. | // Return a TensorIterator that points to the start of the Tensor. | ||||
| // It's the user responsibility to use the correct type that matches the Tensor type | // It's the user responsibility to use the correct type that matches the Tensor type | ||||
| // @tparam T The type of values in the Tensor | // @tparam T The type of values in the Tensor | ||||
| @@ -391,6 +545,22 @@ class Tensor { | |||||
| template <typename T> | template <typename T> | ||||
| Status GetItemPtr(T **, const std::vector<dsize_t> &index) const; | Status GetItemPtr(T **, const std::vector<dsize_t> &index) const; | ||||
| // Get pointer to string located at `index` and the length of string | |||||
| // @param index vector<dsize_t> | |||||
| // @return return a pointer to the string specified at index and the length of the string | |||||
| Status GetItemPtr(uchar **, const std::vector<dsize_t> &index, offset_t *length = nullptr) const; | |||||
| // Given a flat index of an item string, return the start and length of the item | |||||
| // @param index flat index of the item | |||||
| // @return start address of the ths string | |||||
| // @return length of the string | |||||
| Status GetStringAt(dsize_t index, uchar **string_start, offset_t *length) const; | |||||
| // Skip the offsets and returns the start of the buffer where the real strings is stored. Caller needs to check if the | |||||
| // tensor's type is a string, otherwise undefined address would be returned. | |||||
| // @return address of the first string of the tensor. | |||||
| uchar *GetStringsBuffer() const { return data_ + kOffsetSize * shape_.NumOfElements(); } | |||||
| // all access to shape_ should be via shape | // all access to shape_ should be via shape | ||||
| TensorShape shape_; | TensorShape shape_; | ||||
| // data type of tensor | // data type of tensor | ||||
| @@ -402,6 +572,16 @@ class Tensor { | |||||
| // pointer to the end of the physical data | // pointer to the end of the physical data | ||||
| unsigned char *data_end_ = nullptr; | unsigned char *data_end_ = nullptr; | ||||
| }; | }; | ||||
| template <> | |||||
| inline Tensor::TensorIterator<std::string_view> Tensor::begin<std::string_view>() { | |||||
| uchar *buf = GetStringsBuffer(); | |||||
| return TensorIterator<std::string_view>(data_, buf); | |||||
| } | |||||
| template <> | |||||
| inline Tensor::TensorIterator<std::string_view> Tensor::end<std::string_view>() { | |||||
| uchar *buf = GetStringsBuffer(); | |||||
| return TensorIterator<std::string_view>(data_, buf, shape_.NumOfElements()); | |||||
| } | |||||
| } // namespace dataset | } // namespace dataset | ||||
| } // namespace mindspore | } // namespace mindspore | ||||
| #endif // DATASET_CORE_TENSOR_H_ | #endif // DATASET_CORE_TENSOR_H_ | ||||
| @@ -215,5 +215,17 @@ TensorShape TensorShape::Squeeze() const { | |||||
| } | } | ||||
| return TensorShape(new_shape); | return TensorShape(new_shape); | ||||
| } | } | ||||
| std::vector<dsize_t> TensorShape::Strides() { | |||||
| std::vector<dsize_t> strides(Rank()); | |||||
| dsize_t count = NumOfElements(); | |||||
| for (dsize_t i = 0; i < Rank(); i++) { | |||||
| if (raw_shape_[i] != 0) | |||||
| count /= raw_shape_[i]; | |||||
| else | |||||
| count = 0; | |||||
| strides[i] = count; | |||||
| } | |||||
| return strides; | |||||
| } | |||||
| } // namespace dataset | } // namespace dataset | ||||
| } // namespace mindspore | } // namespace mindspore | ||||
| @@ -156,6 +156,8 @@ class TensorShape { | |||||
| TensorShape Squeeze() const; | TensorShape Squeeze() const; | ||||
| std::vector<dsize_t> Strides(); | |||||
| private: | private: | ||||
| // True if known and valid shape, false otherwise | // True if known and valid shape, false otherwise | ||||
| bool known_; | bool known_; | ||||
| @@ -74,6 +74,10 @@ Status BatchOp::operator()() { | |||||
| std::unique_ptr<TensorQTable> table = std::make_unique<TensorQTable>(); | std::unique_ptr<TensorQTable> table = std::make_unique<TensorQTable>(); | ||||
| child_iterator_ = std::make_unique<ChildIterator>(this, 0, 0); | child_iterator_ = std::make_unique<ChildIterator>(this, 0, 0); | ||||
| RETURN_IF_NOT_OK(child_iterator_->FetchNextTensorRow(&new_row)); | RETURN_IF_NOT_OK(child_iterator_->FetchNextTensorRow(&new_row)); | ||||
| for (const auto &t : new_row) { | |||||
| CHECK_FAIL_RETURN_UNEXPECTED(t->type().IsNumeric(), | |||||
| "[Batch ERROR] Batch does not support Tensor of type string yet."); | |||||
| } | |||||
| RETURN_IF_NOT_OK(DatasetOp::AssignColMapFromChild()); // must come after the first fetch above | RETURN_IF_NOT_OK(DatasetOp::AssignColMapFromChild()); // must come after the first fetch above | ||||
| int32_t cur_batch_size = 0; | int32_t cur_batch_size = 0; | ||||
| RETURN_IF_NOT_OK(GetBatchSize(&cur_batch_size, CBatchInfo(0, 0, 0))); | RETURN_IF_NOT_OK(GetBatchSize(&cur_batch_size, CBatchInfo(0, 0, 0))); | ||||
| @@ -445,8 +449,8 @@ Status BatchOp::PadHelper(std::shared_ptr<Tensor> src, std::shared_ptr<Tensor> d | |||||
| src_flat_ind += src_s[i] * cur_ind[i]; | src_flat_ind += src_s[i] * cur_ind[i]; | ||||
| dst_flat_ind += dst_s[i] * cur_ind[i]; | dst_flat_ind += dst_s[i] * cur_ind[i]; | ||||
| } | } | ||||
| unsigned char *src_addr = src->StartAddr() + src_flat_ind * type_size; | |||||
| unsigned char *dst_addr = dst->StartAddr() + dst_flat_ind * type_size; | |||||
| unsigned char *src_addr = src->GetMutableBuffer() + src_flat_ind * type_size; | |||||
| unsigned char *dst_addr = dst->GetMutableBuffer() + dst_flat_ind * type_size; | |||||
| CHECK_FAIL_RETURN_UNEXPECTED(memcpy_s(dst_addr, len, src_addr, len) == 0, "memcpy error"); | CHECK_FAIL_RETURN_UNEXPECTED(memcpy_s(dst_addr, len, src_addr, len) == 0, "memcpy error"); | ||||
| } else { // not the last dimension, keep doing recursion | } else { // not the last dimension, keep doing recursion | ||||
| dsize_t min_ind = std::min(dst->shape()[cur_dim], src->shape()[cur_dim]); | dsize_t min_ind = std::min(dst->shape()[cur_dim], src->shape()[cur_dim]); | ||||
| @@ -85,6 +85,13 @@ Status DeviceQueueOp::operator()() { | |||||
| Status DeviceQueueOp::CheckExceptions(const std::unique_ptr<DataBuffer> &buffer) const { | Status DeviceQueueOp::CheckExceptions(const std::unique_ptr<DataBuffer> &buffer) const { | ||||
| // this method checks if the buffer meets the conditions to be sent to TDT | // this method checks if the buffer meets the conditions to be sent to TDT | ||||
| if (buffer->NumRows() != 0) { | |||||
| TensorRow row; | |||||
| buffer->GetRow(0, &row); | |||||
| for (const auto &item : row) { | |||||
| CHECK_FAIL_RETURN_UNEXPECTED(item->type().IsNumeric(), "Cannot send tensor of string type to device."); | |||||
| } | |||||
| } | |||||
| return Status::OK(); | return Status::OK(); | ||||
| } | } | ||||
| @@ -207,7 +214,7 @@ Status DeviceQueueOp::MallocForGPUData(std::vector<device::DataItemGpu> *items, | |||||
| return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, "memory malloc failed."); | return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, "memory malloc failed."); | ||||
| } | } | ||||
| (void)memset_s(sub_item.data_ptr_, sub_item.data_len_, 0, sub_item.data_len_); | (void)memset_s(sub_item.data_ptr_, sub_item.data_len_, 0, sub_item.data_len_); | ||||
| unsigned char *column_data = curr_row[i]->StartAddr(); | |||||
| unsigned char *column_data = curr_row[i]->GetMutableBuffer(); | |||||
| if (memcpy_s(sub_item.data_ptr_, sub_item.data_len_, column_data, | if (memcpy_s(sub_item.data_ptr_, sub_item.data_len_, column_data, | ||||
| static_cast<uint32_t>(curr_row[i++]->SizeInBytes())) != 0) { | static_cast<uint32_t>(curr_row[i++]->SizeInBytes())) != 0) { | ||||
| MS_LOG(ERROR) << "memcpy_s failed!"; | MS_LOG(ERROR) << "memcpy_s failed!"; | ||||
| @@ -407,7 +407,7 @@ Status CelebAOp::LoadTensorRow(const std::pair<std::string, std::vector<int32_t> | |||||
| RETURN_IF_NOT_OK(Tensor::CreateTensor(&image, data_schema_->column(0).tensorImpl(), | RETURN_IF_NOT_OK(Tensor::CreateTensor(&image, data_schema_->column(0).tensorImpl(), | ||||
| TensorShape(std::vector<dsize_t>(1, num_elements)), | TensorShape(std::vector<dsize_t>(1, num_elements)), | ||||
| data_schema_->column(0).type())); | data_schema_->column(0).type())); | ||||
| (void)handle.read(reinterpret_cast<char *>(image->StartAddr()), num_elements); | |||||
| (void)handle.read(reinterpret_cast<char *>(image->GetMutableBuffer()), num_elements); | |||||
| if (decode_ == true) { | if (decode_ == true) { | ||||
| Status rc = Decode(image, &image); | Status rc = Decode(image, &image); | ||||
| if (rc.IsError()) { | if (rc.IsError()) { | ||||
| @@ -197,7 +197,7 @@ Status CifarOp::LoadTensorRow(uint64_t index, TensorRow *trow) { | |||||
| std::shared_ptr<Tensor> fine_label; | std::shared_ptr<Tensor> fine_label; | ||||
| std::shared_ptr<Tensor> ori_image = cifar_image_label_pairs_[index].first; | std::shared_ptr<Tensor> ori_image = cifar_image_label_pairs_[index].first; | ||||
| std::shared_ptr<Tensor> copy_image = | std::shared_ptr<Tensor> copy_image = | ||||
| std::make_shared<Tensor>(ori_image->shape(), ori_image->type(), ori_image->StartAddr()); | |||||
| std::make_shared<Tensor>(ori_image->shape(), ori_image->type(), ori_image->GetMutableBuffer()); | |||||
| RETURN_IF_NOT_OK(Tensor::CreateTensor(&label, data_schema_->column(1).tensorImpl(), data_schema_->column(1).shape(), | RETURN_IF_NOT_OK(Tensor::CreateTensor(&label, data_schema_->column(1).tensorImpl(), data_schema_->column(1).shape(), | ||||
| data_schema_->column(1).type(), | data_schema_->column(1).type(), | ||||
| reinterpret_cast<unsigned char *>(&cifar_image_label_pairs_[index].second[0]))); | reinterpret_cast<unsigned char *>(&cifar_image_label_pairs_[index].second[0]))); | ||||
| @@ -394,7 +394,7 @@ Status CifarOp::ParseCifarData() { | |||||
| data_schema_->column(0).type())); | data_schema_->column(0).type())); | ||||
| for (int ch = 0; ch < kCifarImageChannel; ++ch) { | for (int ch = 0; ch < kCifarImageChannel; ++ch) { | ||||
| for (int pix = 0; pix < kCifarImageHeight * kCifarImageWidth; ++pix) { | for (int pix = 0; pix < kCifarImageHeight * kCifarImageWidth; ++pix) { | ||||
| (image_tensor->StartAddr())[pix * kCifarImageChannel + ch] = block[cur_block_index++]; | |||||
| (image_tensor->GetMutableBuffer())[pix * kCifarImageChannel + ch] = block[cur_block_index++]; | |||||
| } | } | ||||
| } | } | ||||
| cifar_image_label_pairs_.emplace_back(std::make_pair(image_tensor, labels)); | cifar_image_label_pairs_.emplace_back(std::make_pair(image_tensor, labels)); | ||||
| @@ -216,7 +216,7 @@ Status ImageFolderOp::LoadTensorRow(ImageLabelPair pairPtr, TensorRow *trow) { | |||||
| RETURN_IF_NOT_OK(Tensor::CreateTensor(&image, data_schema_->column(0).tensorImpl(), | RETURN_IF_NOT_OK(Tensor::CreateTensor(&image, data_schema_->column(0).tensorImpl(), | ||||
| TensorShape(std::vector<dsize_t>(1, num_elements)), | TensorShape(std::vector<dsize_t>(1, num_elements)), | ||||
| data_schema_->column(0).type(), nullptr)); | data_schema_->column(0).type(), nullptr)); | ||||
| (void)fs.read(reinterpret_cast<char *>(image->StartAddr()), num_elements); | |||||
| (void)fs.read(reinterpret_cast<char *>(image->GetMutableBuffer()), num_elements); | |||||
| fs.close(); | fs.close(); | ||||
| if (decode_ == true) { | if (decode_ == true) { | ||||
| Status rc = Decode(image, &image); | Status rc = Decode(image, &image); | ||||
| @@ -210,7 +210,7 @@ Status ManifestOp::LoadTensorRow(const std::pair<std::string, std::vector<std::s | |||||
| RETURN_IF_NOT_OK(Tensor::CreateTensor(&image, data_schema_->column(0).tensorImpl(), | RETURN_IF_NOT_OK(Tensor::CreateTensor(&image, data_schema_->column(0).tensorImpl(), | ||||
| TensorShape(std::vector<dsize_t>(1, num_elements)), | TensorShape(std::vector<dsize_t>(1, num_elements)), | ||||
| data_schema_->column(0).type(), nullptr)); | data_schema_->column(0).type(), nullptr)); | ||||
| (void)fs.read(reinterpret_cast<char *>(image->StartAddr()), num_elements); | |||||
| (void)fs.read(reinterpret_cast<char *>(image->GetMutableBuffer()), num_elements); | |||||
| if (fs.fail()) { | if (fs.fail()) { | ||||
| fs.close(); | fs.close(); | ||||
| RETURN_STATUS_UNEXPECTED("Fail to read file: " + data.first); | RETURN_STATUS_UNEXPECTED("Fail to read file: " + data.first); | ||||
| @@ -170,7 +170,7 @@ Status MnistOp::LoadTensorRow(const MnistLabelPair &mnist_pair, TensorRow *trow) | |||||
| int32_t l = mnist_pair.second; | int32_t l = mnist_pair.second; | ||||
| // make a copy of cached tensor | // make a copy of cached tensor | ||||
| RETURN_IF_NOT_OK(Tensor::CreateTensor(&image, data_schema_->column(0).tensorImpl(), mnist_pair.first->shape(), | RETURN_IF_NOT_OK(Tensor::CreateTensor(&image, data_schema_->column(0).tensorImpl(), mnist_pair.first->shape(), | ||||
| mnist_pair.first->type(), mnist_pair.first->StartAddr())); | |||||
| mnist_pair.first->type(), mnist_pair.first->GetMutableBuffer())); | |||||
| RETURN_IF_NOT_OK(Tensor::CreateTensor(&label, data_schema_->column(1).tensorImpl(), data_schema_->column(1).shape(), | RETURN_IF_NOT_OK(Tensor::CreateTensor(&label, data_schema_->column(1).tensorImpl(), data_schema_->column(1).shape(), | ||||
| data_schema_->column(1).type(), reinterpret_cast<unsigned char *>(&l))); | data_schema_->column(1).type(), reinterpret_cast<unsigned char *>(&l))); | ||||
| (*trow) = {std::move(image), std::move(label)}; | (*trow) = {std::move(image), std::move(label)}; | ||||
| @@ -127,7 +127,7 @@ Status RandomDataOp::GenerateSchema() { | |||||
| // For each column: | // For each column: | ||||
| // - choose a datatype | // - choose a datatype | ||||
| // - generate a shape that randomly chooses the number of dimensions and the dimension values. | // - generate a shape that randomly chooses the number of dimensions and the dimension values. | ||||
| DataType::Type newType = static_cast<DataType::Type>(GenRandomInt(0, kMaxDataType)); | |||||
| DataType::Type newType = static_cast<DataType::Type>(GenRandomInt(0, DataType::NUM_OF_TYPES - 2)); | |||||
| int32_t rank = GenRandomInt(1, kMaxRank); | int32_t rank = GenRandomInt(1, kMaxRank); | ||||
| std::vector<dsize_t> dims; | std::vector<dsize_t> dims; | ||||
| for (int32_t d = 0; d < rank; d++) { | for (int32_t d = 0; d < rank; d++) { | ||||
| @@ -43,7 +43,6 @@ class RandomDataOp : public ParallelOp { | |||||
| static constexpr int32_t kMaxNumColumns = 4; | static constexpr int32_t kMaxNumColumns = 4; | ||||
| static constexpr int32_t kMaxRank = 4; | static constexpr int32_t kMaxRank = 4; | ||||
| static constexpr int32_t kMaxDimValue = 2048; | static constexpr int32_t kMaxDimValue = 2048; | ||||
| static constexpr int32_t kMaxDataType = (DataType::DE_UNKNOWN - 1); | |||||
| static constexpr int32_t kMaxTotalRows = 1024; | static constexpr int32_t kMaxTotalRows = 1024; | ||||
| // A nested builder class to aid in the construction of a RandomDataOp | // A nested builder class to aid in the construction of a RandomDataOp | ||||
| @@ -58,7 +58,7 @@ Status DistributedSampler::GetNextBuffer(std::unique_ptr<DataBuffer> *out_buffer | |||||
| (*out_buffer) = std::make_unique<DataBuffer>(cnt_, DataBuffer::kDeBFlagNone); | (*out_buffer) = std::make_unique<DataBuffer>(cnt_, DataBuffer::kDeBFlagNone); | ||||
| std::shared_ptr<Tensor> sample_ids; | std::shared_ptr<Tensor> sample_ids; | ||||
| RETURN_IF_NOT_OK(CreateSamplerTensor(&sample_ids, samples_per_buffer_)); | RETURN_IF_NOT_OK(CreateSamplerTensor(&sample_ids, samples_per_buffer_)); | ||||
| int64_t *id_ptr = reinterpret_cast<int64_t *>(sample_ids->StartAddr()); | |||||
| int64_t *id_ptr = reinterpret_cast<int64_t *>(sample_ids->GetMutableBuffer()); | |||||
| while (cnt_ < samples_per_buffer_) { | while (cnt_ < samples_per_buffer_) { | ||||
| int64_t next_id = (num_devices_ * (cnt_++) + device_id_) % num_rows_; | int64_t next_id = (num_devices_ * (cnt_++) + device_id_) % num_rows_; | ||||
| *(id_ptr++) = shuffle_ ? shuffle_vec_[static_cast<size_t>(next_id)] : next_id; | *(id_ptr++) = shuffle_ ? shuffle_vec_[static_cast<size_t>(next_id)] : next_id; | ||||
| @@ -58,7 +58,7 @@ Status PKSampler::GetNextBuffer(std::unique_ptr<DataBuffer> *out_buffer) { | |||||
| int64_t last_id = | int64_t last_id = | ||||
| (samples_per_buffer_ + next_id_ > num_pk_samples_) ? num_pk_samples_ : samples_per_buffer_ + next_id_; | (samples_per_buffer_ + next_id_ > num_pk_samples_) ? num_pk_samples_ : samples_per_buffer_ + next_id_; | ||||
| RETURN_IF_NOT_OK(CreateSamplerTensor(&sample_ids, last_id - next_id_)); | RETURN_IF_NOT_OK(CreateSamplerTensor(&sample_ids, last_id - next_id_)); | ||||
| int64_t *id_ptr = reinterpret_cast<int64_t *>(sample_ids->StartAddr()); | |||||
| int64_t *id_ptr = reinterpret_cast<int64_t *>(sample_ids->GetMutableBuffer()); | |||||
| while (next_id_ < last_id) { | while (next_id_ < last_id) { | ||||
| int64_t cls_id = next_id_++ / samples_per_class_; | int64_t cls_id = next_id_++ / samples_per_class_; | ||||
| const std::vector<int64_t> &samples = label_to_ids_[labels_[cls_id]]; | const std::vector<int64_t> &samples = label_to_ids_[labels_[cls_id]]; | ||||
| @@ -38,7 +38,7 @@ Status RandomSampler::GetNextBuffer(std::unique_ptr<DataBuffer> *out_buffer) { | |||||
| std::shared_ptr<Tensor> sampleIds; | std::shared_ptr<Tensor> sampleIds; | ||||
| int64_t last_id = samples_per_buffer_ + next_id_ > num_samples_ ? num_samples_ : samples_per_buffer_ + next_id_; | int64_t last_id = samples_per_buffer_ + next_id_ > num_samples_ ? num_samples_ : samples_per_buffer_ + next_id_; | ||||
| RETURN_IF_NOT_OK(CreateSamplerTensor(&sampleIds, last_id - next_id_)); | RETURN_IF_NOT_OK(CreateSamplerTensor(&sampleIds, last_id - next_id_)); | ||||
| int64_t *id_ptr = reinterpret_cast<int64_t *>(sampleIds->StartAddr()); | |||||
| int64_t *id_ptr = reinterpret_cast<int64_t *>(sampleIds->GetMutableBuffer()); | |||||
| for (int64_t i = 0; i < (last_id - next_id_); i++) { | for (int64_t i = 0; i < (last_id - next_id_); i++) { | ||||
| *(id_ptr + i) = replacement_ ? (*dist)(rnd_) : shuffled_ids_[static_cast<size_t>(i + next_id_)]; | *(id_ptr + i) = replacement_ ? (*dist)(rnd_) : shuffled_ids_[static_cast<size_t>(i + next_id_)]; | ||||
| } | } | ||||
| @@ -40,7 +40,7 @@ Status Sampler::CreateSamplerTensor(std::shared_ptr<Tensor> *sample_ids, int64_t | |||||
| } | } | ||||
| TensorShape shape(std::vector<dsize_t>(1, num_elements)); | TensorShape shape(std::vector<dsize_t>(1, num_elements)); | ||||
| RETURN_IF_NOT_OK(Tensor::CreateTensor(sample_ids, col_desc_->tensorImpl(), shape, col_desc_->type())); | RETURN_IF_NOT_OK(Tensor::CreateTensor(sample_ids, col_desc_->tensorImpl(), shape, col_desc_->type())); | ||||
| (void)(*sample_ids)->StartAddr(); // allocate memory in case user forgets! | |||||
| (void)(*sample_ids)->GetMutableBuffer(); // allocate memory in case user forgets! | |||||
| return Status::OK(); | return Status::OK(); | ||||
| } | } | ||||
| @@ -31,7 +31,7 @@ Status SequentialSampler::GetNextBuffer(std::unique_ptr<DataBuffer> *out_buffer) | |||||
| std::shared_ptr<Tensor> sampleIds; | std::shared_ptr<Tensor> sampleIds; | ||||
| int64_t lastId = (samples_per_buffer_ + next_id_ > num_samples_) ? num_samples_ : samples_per_buffer_ + next_id_; | int64_t lastId = (samples_per_buffer_ + next_id_ > num_samples_) ? num_samples_ : samples_per_buffer_ + next_id_; | ||||
| RETURN_IF_NOT_OK(CreateSamplerTensor(&sampleIds, lastId - next_id_)); | RETURN_IF_NOT_OK(CreateSamplerTensor(&sampleIds, lastId - next_id_)); | ||||
| int64_t *idPtr = reinterpret_cast<int64_t *>(sampleIds->StartAddr()); | |||||
| int64_t *idPtr = reinterpret_cast<int64_t *>(sampleIds->GetMutableBuffer()); | |||||
| while (next_id_ < lastId) { | while (next_id_ < lastId) { | ||||
| *(idPtr++) = next_id_++; | *(idPtr++) = next_id_++; | ||||
| } | } | ||||
| @@ -78,7 +78,7 @@ Status SubsetRandomSampler::GetNextBuffer(std::unique_ptr<DataBuffer> *out_buffe | |||||
| RETURN_IF_NOT_OK(CreateSamplerTensor(&outputIds, last_id - sample_id_)); | RETURN_IF_NOT_OK(CreateSamplerTensor(&outputIds, last_id - sample_id_)); | ||||
| // Initialize tensor | // Initialize tensor | ||||
| int64_t *id_ptr = reinterpret_cast<int64_t *>(outputIds->StartAddr()); | |||||
| int64_t *id_ptr = reinterpret_cast<int64_t *>(outputIds->GetMutableBuffer()); | |||||
| while (sample_id_ < last_id) { | while (sample_id_ < last_id) { | ||||
| if (indices_[sample_id_] >= num_rows_) { | if (indices_[sample_id_] >= num_rows_) { | ||||
| std::string err_msg = | std::string err_msg = | ||||
| @@ -111,7 +111,7 @@ Status WeightedRandomSampler::GetNextBuffer(std::unique_ptr<DataBuffer> *out_buf | |||||
| RETURN_IF_NOT_OK(CreateSamplerTensor(&outputIds, last_id - sample_id_)); | RETURN_IF_NOT_OK(CreateSamplerTensor(&outputIds, last_id - sample_id_)); | ||||
| // Initialize tensor. | // Initialize tensor. | ||||
| int64_t *id_ptr = reinterpret_cast<int64_t *>(outputIds->StartAddr()); | |||||
| int64_t *id_ptr = reinterpret_cast<int64_t *>(outputIds->GetMutableBuffer()); | |||||
| // Assign the data to tensor element. | // Assign the data to tensor element. | ||||
| while (sample_id_ < last_id) { | while (sample_id_ < last_id) { | ||||
| int64_t genId; | int64_t genId; | ||||
| @@ -146,10 +146,7 @@ Status TextFileOp::LoadTensor(const std::string &line, std::unique_ptr<TensorQTa | |||||
| (*tensor_table)->push_back(std::move(tRow)); | (*tensor_table)->push_back(std::move(tRow)); | ||||
| std::shared_ptr<Tensor> tensor; | std::shared_ptr<Tensor> tensor; | ||||
| RETURN_IF_NOT_OK( | |||||
| Tensor::CreateTensor(&tensor, data_schema_->column(0).tensorImpl(), | |||||
| TensorShape(std::vector<dsize_t>(1, line.size())), data_schema_->column(0).type(), | |||||
| const_cast<unsigned char *>(reinterpret_cast<const unsigned char *>(common::SafeCStr(line))))); | |||||
| RETURN_IF_NOT_OK(Tensor::CreateTensor(&tensor, {line}, TensorShape::CreateScalar())); | |||||
| (**tensor_table)[row][0] = std::move(tensor); | (**tensor_table)[row][0] = std::move(tensor); | ||||
| return Status::OK(); | return Status::OK(); | ||||
| } | } | ||||
| @@ -759,7 +759,7 @@ Status TFReaderOp::LoadBytesList(const ColDescriptor ¤t_col, const dataeng | |||||
| RETURN_IF_NOT_OK(Tensor::CreateTensor(tensor, current_col.tensorImpl(), current_shape, current_col.type())); | RETURN_IF_NOT_OK(Tensor::CreateTensor(tensor, current_col.tensorImpl(), current_shape, current_col.type())); | ||||
| // Tensors are lazily allocated, this eagerly allocates memory for the tensor. | // Tensors are lazily allocated, this eagerly allocates memory for the tensor. | ||||
| unsigned char *current_tensor_addr = (*tensor)->StartAddr(); | |||||
| unsigned char *current_tensor_addr = (*tensor)->GetMutableBuffer(); | |||||
| int64_t tensor_bytes_remaining = (*num_elements) * pad_size; | int64_t tensor_bytes_remaining = (*num_elements) * pad_size; | ||||
| if (current_tensor_addr == nullptr) { | if (current_tensor_addr == nullptr) { | ||||
| @@ -878,7 +878,7 @@ Status TFReaderOp::LoadIntList(const ColDescriptor ¤t_col, const dataengin | |||||
| RETURN_IF_NOT_OK(Tensor::CreateTensor(tensor, current_col.tensorImpl(), current_shape, current_col.type())); | RETURN_IF_NOT_OK(Tensor::CreateTensor(tensor, current_col.tensorImpl(), current_shape, current_col.type())); | ||||
| // Tensors are lazily allocated, this eagerly allocates memory for the tensor. | // Tensors are lazily allocated, this eagerly allocates memory for the tensor. | ||||
| (void)(*tensor)->StartAddr(); | |||||
| (void)(*tensor)->GetMutableBuffer(); | |||||
| int64_t i = 0; | int64_t i = 0; | ||||
| auto it = (*tensor)->begin<T>(); | auto it = (*tensor)->begin<T>(); | ||||
| @@ -388,7 +388,7 @@ Status VOCOp::ReadImageToTensor(const std::string &path, const ColDescriptor &co | |||||
| (void)fs.seekg(0, std::ios::beg); | (void)fs.seekg(0, std::ios::beg); | ||||
| RETURN_IF_NOT_OK( | RETURN_IF_NOT_OK( | ||||
| Tensor::CreateTensor(tensor, col.tensorImpl(), TensorShape(std::vector<dsize_t>(1, num_elements)), col.type())); | Tensor::CreateTensor(tensor, col.tensorImpl(), TensorShape(std::vector<dsize_t>(1, num_elements)), col.type())); | ||||
| (void)fs.read(reinterpret_cast<char *>((*tensor)->StartAddr()), num_elements); | |||||
| (void)fs.read(reinterpret_cast<char *>((*tensor)->GetMutableBuffer()), num_elements); | |||||
| fs.close(); | fs.close(); | ||||
| if (decode_ == true) { | if (decode_ == true) { | ||||
| Status rc = Decode(*tensor, tensor); | Status rc = Decode(*tensor, tensor); | ||||
| @@ -110,7 +110,7 @@ TdtStatus TdtPlugin::translate(const TensorRow &ts_row, std::vector<DataItem> &i | |||||
| data_item.tensorShape_ = dataShapes; | data_item.tensorShape_ = dataShapes; | ||||
| data_item.tensorType_ = datatype; | data_item.tensorType_ = datatype; | ||||
| data_item.dataLen_ = ts->SizeInBytes(); | data_item.dataLen_ = ts->SizeInBytes(); | ||||
| data_item.dataPtr_ = std::shared_ptr<void>(reinterpret_cast<void *>(ts->StartAddr()), [](void *elem) {}); | |||||
| data_item.dataPtr_ = std::shared_ptr<void>(reinterpret_cast<void *>(ts->GetMutableBuffer()), [](void *elem) {}); | |||||
| items.emplace_back(data_item); | items.emplace_back(data_item); | ||||
| MS_LOG(INFO) << "TDT data type is " << datatype << ", data shape is " << dataShapes << ", data length is " | MS_LOG(INFO) << "TDT data type is " << datatype << ", data shape is " << dataShapes << ", data length is " | ||||
| << ts->Size() << "."; | << ts->Size() << "."; | ||||
| @@ -162,7 +162,7 @@ void CastFrom(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *out | |||||
| Status TypeCast(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, const DataType &data_type) { | Status TypeCast(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, const DataType &data_type) { | ||||
| RETURN_IF_NOT_OK(Tensor::CreateTensor(output, TensorImpl::kFlexible, input->shape(), data_type)); | RETURN_IF_NOT_OK(Tensor::CreateTensor(output, TensorImpl::kFlexible, input->shape(), data_type)); | ||||
| static_cast<void>((*output)->StartAddr()); | |||||
| static_cast<void>((*output)->GetMutableBuffer()); | |||||
| switch (input->type().value()) { | switch (input->type().value()) { | ||||
| case DataType::DE_BOOL: | case DataType::DE_BOOL: | ||||
| CastFrom<bool>(input, output); | CastFrom<bool>(input, output); | ||||
| @@ -211,7 +211,7 @@ Status ToFloat16(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> * | |||||
| // initiate new tensor for type cast | // initiate new tensor for type cast | ||||
| DataType new_type = DataType("float16"); | DataType new_type = DataType("float16"); | ||||
| RETURN_IF_NOT_OK(Tensor::CreateTensor(output, TensorImpl::kFlexible, input->shape(), new_type)); | RETURN_IF_NOT_OK(Tensor::CreateTensor(output, TensorImpl::kFlexible, input->shape(), new_type)); | ||||
| static_cast<void>((*output)->StartAddr()); | |||||
| static_cast<void>((*output)->GetMutableBuffer()); | |||||
| auto in_itr = input->begin<float>(); | auto in_itr = input->begin<float>(); | ||||
| auto out_itr = (*output)->begin<float16>(); | auto out_itr = (*output)->begin<float16>(); | ||||
| @@ -64,7 +64,7 @@ Status Flip(std::shared_ptr<Tensor> input, std::shared_ptr<Tensor> *output, int | |||||
| std::shared_ptr<CVTensor> output_cv = std::make_shared<CVTensor>(input_cv->shape(), input_cv->type()); | std::shared_ptr<CVTensor> output_cv = std::make_shared<CVTensor>(input_cv->shape(), input_cv->type()); | ||||
| RETURN_UNEXPECTED_IF_NULL(output_cv); | RETURN_UNEXPECTED_IF_NULL(output_cv); | ||||
| (void)output_cv->StartAddr(); | |||||
| (void)output_cv->GetMutableBuffer(); | |||||
| if (input_cv->mat().data) { | if (input_cv->mat().data) { | ||||
| try { | try { | ||||
| cv::flip(input_cv->mat(), output_cv->mat(), flip_code); | cv::flip(input_cv->mat(), output_cv->mat(), flip_code); | ||||
| @@ -125,10 +125,10 @@ bool HasJpegMagic(const unsigned char *data, size_t data_size) { | |||||
| } | } | ||||
| Status Decode(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) { | Status Decode(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) { | ||||
| if (input->StartAddr() == nullptr) { | |||||
| if (input->GetMutableBuffer() == nullptr) { | |||||
| RETURN_STATUS_UNEXPECTED("Tensor is nullptr"); | RETURN_STATUS_UNEXPECTED("Tensor is nullptr"); | ||||
| } | } | ||||
| if (HasJpegMagic(input->StartAddr(), input->SizeInBytes())) { | |||||
| if (HasJpegMagic(input->GetMutableBuffer(), input->SizeInBytes())) { | |||||
| return JpegCropAndDecode(input, output); | return JpegCropAndDecode(input, output); | ||||
| } else { | } else { | ||||
| return DecodeCv(input, output); | return DecodeCv(input, output); | ||||
| @@ -282,7 +282,7 @@ Status JpegCropAndDecode(const std::shared_ptr<Tensor> &input, std::shared_ptr<T | |||||
| jerr.pub.error_exit = JpegErrorExitCustom; | jerr.pub.error_exit = JpegErrorExitCustom; | ||||
| try { | try { | ||||
| jpeg_create_decompress(&cinfo); | jpeg_create_decompress(&cinfo); | ||||
| JpegSetSource(&cinfo, input->StartAddr(), input->SizeInBytes()); | |||||
| JpegSetSource(&cinfo, input->GetMutableBuffer(), input->SizeInBytes()); | |||||
| (void)jpeg_read_header(&cinfo, TRUE); | (void)jpeg_read_header(&cinfo, TRUE); | ||||
| RETURN_IF_NOT_OK(JpegSetColorSpace(&cinfo)); | RETURN_IF_NOT_OK(JpegSetColorSpace(&cinfo)); | ||||
| jpeg_calc_output_dimensions(&cinfo); | jpeg_calc_output_dimensions(&cinfo); | ||||
| @@ -311,7 +311,7 @@ Status JpegCropAndDecode(const std::shared_ptr<Tensor> &input, std::shared_ptr<T | |||||
| TensorShape ts = TensorShape({crop_h, crop_w, kOutNumComponents}); | TensorShape ts = TensorShape({crop_h, crop_w, kOutNumComponents}); | ||||
| auto output_tensor = std::make_shared<Tensor>(ts, DataType(DataType::DE_UINT8)); | auto output_tensor = std::make_shared<Tensor>(ts, DataType(DataType::DE_UINT8)); | ||||
| const int buffer_size = output_tensor->SizeInBytes(); | const int buffer_size = output_tensor->SizeInBytes(); | ||||
| JSAMPLE *buffer = static_cast<JSAMPLE *>(output_tensor->StartAddr()); | |||||
| JSAMPLE *buffer = static_cast<JSAMPLE *>(output_tensor->GetMutableBuffer()); | |||||
| const int max_scanlines_to_read = skipped_scanlines + crop_h; | const int max_scanlines_to_read = skipped_scanlines + crop_h; | ||||
| // stride refers to output tensor, which has 3 components at most | // stride refers to output tensor, which has 3 components at most | ||||
| const int stride = crop_w * kOutNumComponents; | const int stride = crop_w * kOutNumComponents; | ||||
| @@ -31,7 +31,7 @@ Status RandomCropDecodeResizeOp::Compute(const std::shared_ptr<Tensor> &input, s | |||||
| if (input == nullptr) { | if (input == nullptr) { | ||||
| RETURN_STATUS_UNEXPECTED("input tensor is null"); | RETURN_STATUS_UNEXPECTED("input tensor is null"); | ||||
| } | } | ||||
| if (!HasJpegMagic(input->StartAddr(), input->SizeInBytes())) { | |||||
| if (!HasJpegMagic(input->GetMutableBuffer(), input->SizeInBytes())) { | |||||
| DecodeOp op(true); | DecodeOp op(true); | ||||
| std::shared_ptr<Tensor> decoded; | std::shared_ptr<Tensor> decoded; | ||||
| RETURN_IF_NOT_OK(op.Compute(input, &decoded)); | RETURN_IF_NOT_OK(op.Compute(input, &decoded)); | ||||
| @@ -43,7 +43,7 @@ Status RandomCropDecodeResizeOp::Compute(const std::shared_ptr<Tensor> &input, s | |||||
| jerr.pub.error_exit = JpegErrorExitCustom; | jerr.pub.error_exit = JpegErrorExitCustom; | ||||
| try { | try { | ||||
| jpeg_create_decompress(&cinfo); | jpeg_create_decompress(&cinfo); | ||||
| JpegSetSource(&cinfo, input->StartAddr(), input->SizeInBytes()); | |||||
| JpegSetSource(&cinfo, input->GetMutableBuffer(), input->SizeInBytes()); | |||||
| (void)jpeg_read_header(&cinfo, TRUE); | (void)jpeg_read_header(&cinfo, TRUE); | ||||
| jpeg_calc_output_dimensions(&cinfo); | jpeg_calc_output_dimensions(&cinfo); | ||||
| } catch (std::runtime_error &e) { | } catch (std::runtime_error &e) { | ||||
| @@ -50,6 +50,7 @@ SET(DE_UT_SRCS | |||||
| storage_op_test.cc | storage_op_test.cc | ||||
| task_manager_test.cc | task_manager_test.cc | ||||
| tensor_test.cc | tensor_test.cc | ||||
| tensor_string_test.cc | |||||
| tensorshape_test.cc | tensorshape_test.cc | ||||
| tfReader_op_test.cc | tfReader_op_test.cc | ||||
| to_float16_op_test.cc | to_float16_op_test.cc | ||||
| @@ -60,7 +60,7 @@ void CVOpCommon::GetInputImage(std::string filename) { | |||||
| TensorShape in_shape({file_size}); | TensorShape in_shape({file_size}); | ||||
| raw_input_tensor_ = std::make_shared<Tensor>(in_shape, DataType(DataType::DE_UINT8)); | raw_input_tensor_ = std::make_shared<Tensor>(in_shape, DataType(DataType::DE_UINT8)); | ||||
| file.read(reinterpret_cast<char *>(raw_input_tensor_->StartAddr()), raw_input_tensor_->SizeInBytes()); | |||||
| file.read(reinterpret_cast<char *>(raw_input_tensor_->GetMutableBuffer()), raw_input_tensor_->SizeInBytes()); | |||||
| raw_cv_image_ = cv::imread(filename, cv::ImreadModes::IMREAD_COLOR); | raw_cv_image_ = cv::imread(filename, cv::ImreadModes::IMREAD_COLOR); | ||||
| input_tensor_ = std::dynamic_pointer_cast<Tensor>(std::make_shared<CVTensor>(raw_cv_image_)); | input_tensor_ = std::dynamic_pointer_cast<Tensor>(std::make_shared<CVTensor>(raw_cv_image_)); | ||||
| SwapRedAndBlue(input_tensor_, &input_tensor_); | SwapRedAndBlue(input_tensor_, &input_tensor_); | ||||
| @@ -32,47 +32,47 @@ class MindDataTestDatatype : public UT::Common { | |||||
| TEST_F(MindDataTestDatatype, TestSizes) { | TEST_F(MindDataTestDatatype, TestSizes) { | ||||
| uint8_t x = DataType::DE_BOOL_SIZE; | |||||
| uint8_t x = DataType::SIZE_IN_BYTES[DataType::DE_BOOL]; | |||||
| DataType d = DataType(DataType::DE_BOOL); | DataType d = DataType(DataType::DE_BOOL); | ||||
| ASSERT_EQ(x, 1); | ASSERT_EQ(x, 1); | ||||
| ASSERT_EQ(d.SizeInBytes(), x); | ASSERT_EQ(d.SizeInBytes(), x); | ||||
| x = DataType::DE_INT8_SIZE; | |||||
| x = DataType::SIZE_IN_BYTES[DataType::DE_INT8]; | |||||
| d = DataType(DataType::DE_INT8); | d = DataType(DataType::DE_INT8); | ||||
| ASSERT_EQ(x, 1); | ASSERT_EQ(x, 1); | ||||
| ASSERT_EQ(d.SizeInBytes(), x); | ASSERT_EQ(d.SizeInBytes(), x); | ||||
| x = DataType::DE_UINT8_SIZE; | |||||
| x = DataType::SIZE_IN_BYTES[DataType::DE_UINT8]; | |||||
| d = DataType(DataType::DE_UINT8); | d = DataType(DataType::DE_UINT8); | ||||
| ASSERT_EQ(x, 1); | ASSERT_EQ(x, 1); | ||||
| ASSERT_EQ(d.SizeInBytes(), x); | ASSERT_EQ(d.SizeInBytes(), x); | ||||
| x = DataType::DE_INT16_SIZE; | |||||
| x = DataType::SIZE_IN_BYTES[DataType::DE_INT16]; | |||||
| d = DataType(DataType::DE_INT16); | d = DataType(DataType::DE_INT16); | ||||
| ASSERT_EQ(x, 2); | ASSERT_EQ(x, 2); | ||||
| ASSERT_EQ(d.SizeInBytes(), x); | ASSERT_EQ(d.SizeInBytes(), x); | ||||
| x = DataType::DE_UINT16_SIZE; | |||||
| x = DataType::SIZE_IN_BYTES[DataType::DE_UINT16]; | |||||
| d = DataType(DataType::DE_UINT16); | d = DataType(DataType::DE_UINT16); | ||||
| ASSERT_EQ(x, 2); | ASSERT_EQ(x, 2); | ||||
| ASSERT_EQ(d.SizeInBytes(), x); | ASSERT_EQ(d.SizeInBytes(), x); | ||||
| x = DataType::DE_INT32_SIZE; | |||||
| x = DataType::SIZE_IN_BYTES[DataType::DE_INT32]; | |||||
| d = DataType(DataType::DE_INT32); | d = DataType(DataType::DE_INT32); | ||||
| ASSERT_EQ(x, 4); | ASSERT_EQ(x, 4); | ||||
| ASSERT_EQ(d.SizeInBytes(), x); | ASSERT_EQ(d.SizeInBytes(), x); | ||||
| x = DataType::DE_UINT32_SIZE; | |||||
| x = DataType::SIZE_IN_BYTES[DataType::DE_UINT32]; | |||||
| d = DataType(DataType::DE_UINT32); | d = DataType(DataType::DE_UINT32); | ||||
| ASSERT_EQ(x, 4); | ASSERT_EQ(x, 4); | ||||
| ASSERT_EQ(d.SizeInBytes(), x); | ASSERT_EQ(d.SizeInBytes(), x); | ||||
| x = DataType::DE_INT64_SIZE; | |||||
| x = DataType::SIZE_IN_BYTES[DataType::DE_INT64]; | |||||
| d = DataType(DataType::DE_INT64); | d = DataType(DataType::DE_INT64); | ||||
| ASSERT_EQ(x, 8); | ASSERT_EQ(x, 8); | ||||
| ASSERT_EQ(d.SizeInBytes(), x); | ASSERT_EQ(d.SizeInBytes(), x); | ||||
| x = DataType::DE_UINT64_SIZE; | |||||
| x = DataType::SIZE_IN_BYTES[DataType::DE_UINT64]; | |||||
| d = DataType(DataType::DE_UINT64); | d = DataType(DataType::DE_UINT64); | ||||
| ASSERT_EQ(x, 8); | ASSERT_EQ(x, 8); | ||||
| ASSERT_EQ(d.SizeInBytes(), x); | ASSERT_EQ(d.SizeInBytes(), x); | ||||
| x = DataType::DE_FLOAT32_SIZE; | |||||
| x = DataType::SIZE_IN_BYTES[DataType::DE_FLOAT32]; | |||||
| d = DataType(DataType::DE_FLOAT32); | d = DataType(DataType::DE_FLOAT32); | ||||
| ASSERT_EQ(x, 4); | ASSERT_EQ(x, 4); | ||||
| ASSERT_EQ(d.SizeInBytes(), x); | ASSERT_EQ(d.SizeInBytes(), x); | ||||
| x = DataType::DE_FLOAT64_SIZE; | |||||
| x = DataType::SIZE_IN_BYTES[DataType::DE_FLOAT64]; | |||||
| d = DataType(DataType::DE_FLOAT64); | d = DataType(DataType::DE_FLOAT64); | ||||
| ASSERT_EQ(x, 8); | ASSERT_EQ(x, 8); | ||||
| ASSERT_EQ(d.SizeInBytes(), x); | ASSERT_EQ(d.SizeInBytes(), x); | ||||
| @@ -74,7 +74,7 @@ Status Create1DTensor(std::shared_ptr<Tensor> *sample_ids, int64_t num_elements, | |||||
| RETURN_IF_NOT_OK( | RETURN_IF_NOT_OK( | ||||
| Tensor::CreateTensor(sample_ids, TensorImpl::kFlexible, shape, DataType(data_type), data)); | Tensor::CreateTensor(sample_ids, TensorImpl::kFlexible, shape, DataType(data_type), data)); | ||||
| if (data == nullptr) { | if (data == nullptr) { | ||||
| (*sample_ids)->StartAddr(); // allocate memory in case user forgets! | |||||
| (*sample_ids)->GetMutableBuffer(); // allocate memory in case user forgets! | |||||
| } | } | ||||
| return Status::OK(); | return Status::OK(); | ||||
| } | } | ||||
| @@ -190,7 +190,7 @@ TEST_F(MindDataTestMapOp, TestByPosition) { | |||||
| EXPECT_EQ(tensor_list[i]->type(), golden_types[i]); | EXPECT_EQ(tensor_list[i]->type(), golden_types[i]); | ||||
| EXPECT_EQ(tensor_list[i]->Rank(), golden_ranks[i]); | EXPECT_EQ(tensor_list[i]->Rank(), golden_ranks[i]); | ||||
| EXPECT_EQ(tensor_list[i]->shape(), golden_shapes[i]); | EXPECT_EQ(tensor_list[i]->shape(), golden_shapes[i]); | ||||
| EXPECT_NE(tensor_list[i]->StartAddr(), nullptr); | |||||
| EXPECT_NE(tensor_list[i]->GetMutableBuffer(), nullptr); | |||||
| } | } | ||||
| } | } | ||||
| @@ -366,7 +366,7 @@ TEST_F(MindDataTestMapOp, Test1to3) { | |||||
| EXPECT_EQ(tensor_list[i]->type(), golden_types[i]); | EXPECT_EQ(tensor_list[i]->type(), golden_types[i]); | ||||
| EXPECT_EQ(tensor_list[i]->Rank(), golden_ranks[i]); | EXPECT_EQ(tensor_list[i]->Rank(), golden_ranks[i]); | ||||
| EXPECT_EQ(tensor_list[i]->shape(), golden_shapes[i]); | EXPECT_EQ(tensor_list[i]->shape(), golden_shapes[i]); | ||||
| EXPECT_NE(tensor_list[i]->StartAddr(), nullptr); | |||||
| EXPECT_NE(tensor_list[i]->GetMutableBuffer(), nullptr); | |||||
| } | } | ||||
| rc = di.FetchNextTensorRow(&tensor_list); | rc = di.FetchNextTensorRow(&tensor_list); | ||||
| EXPECT_TRUE(rc.IsOk()); | EXPECT_TRUE(rc.IsOk()); | ||||
| @@ -700,7 +700,7 @@ TEST_F(MindDataTestMapOp, ImageFolder_Decode_Repeat_Resize) { | |||||
| MS_LOG(DEBUG) << "row:" << i << "\tlabel:" << label << "\n"; | MS_LOG(DEBUG) << "row:" << i << "\tlabel:" << label << "\n"; | ||||
| EXPECT_TRUE(img_class[(i % 44) / 11] == label); | EXPECT_TRUE(img_class[(i % 44) / 11] == label); | ||||
| // Dump all the image into string, to be used as a comparison later. | // Dump all the image into string, to be used as a comparison later. | ||||
| result.append((char *) tensor_map["image"]->StartAddr(), (int64_t) tensor_map["image"]->Size()); | |||||
| result.append((char *)tensor_map["image"]->GetMutableBuffer(), (int64_t) tensor_map["image"]->Size()); | |||||
| di.GetNextAsMap(&tensor_map); | di.GetNextAsMap(&tensor_map); | ||||
| i++; | i++; | ||||
| } | } | ||||
| @@ -745,7 +745,7 @@ TEST_F(MindDataTestMapOp, ImageFolder_Decode_Repeat_Resize) { | |||||
| tensor_map["label"]->GetItemAt<int32_t>(&label, {}); | tensor_map["label"]->GetItemAt<int32_t>(&label, {}); | ||||
| MS_LOG(DEBUG) << "row:" << i << "\tlabel:" << label << "\n"; | MS_LOG(DEBUG) << "row:" << i << "\tlabel:" << label << "\n"; | ||||
| EXPECT_TRUE(img_class[(i % 44) / 11] == label); | EXPECT_TRUE(img_class[(i % 44) / 11] == label); | ||||
| result2.append((char *) tensor_map["image"]->StartAddr(), (int64_t) tensor_map["image"]->Size()); | |||||
| result2.append((char *)tensor_map["image"]->GetMutableBuffer(), (int64_t) tensor_map["image"]->Size()); | |||||
| di2.GetNextAsMap(&tensor_map); | di2.GetNextAsMap(&tensor_map); | ||||
| i++; | i++; | ||||
| } | } | ||||
| @@ -57,8 +57,8 @@ TEST_F(MindDataTestRandomCropDecodeResizeOp, TestOp2) { | |||||
| for (int i = 0; i < 100; i++) { | for (int i = 0; i < 100; i++) { | ||||
| (void)crop_and_decode.Compute(raw_input_tensor_, &crop_and_decode_output); | (void)crop_and_decode.Compute(raw_input_tensor_, &crop_and_decode_output); | ||||
| (void)decode_and_crop.Compute(input_tensor_, &decode_and_crop_output); | (void)decode_and_crop.Compute(input_tensor_, &decode_and_crop_output); | ||||
| cv::Mat output1(target_height, target_width, CV_8UC3, crop_and_decode_output->StartAddr()); | |||||
| cv::Mat output2(target_height, target_width, CV_8UC3, decode_and_crop_output->StartAddr()); | |||||
| cv::Mat output1(target_height, target_width, CV_8UC3, crop_and_decode_output->GetMutableBuffer()); | |||||
| cv::Mat output2(target_height, target_width, CV_8UC3, decode_and_crop_output->GetMutableBuffer()); | |||||
| long int mse_sum = 0; | long int mse_sum = 0; | ||||
| long int count = 0; | long int count = 0; | ||||
| int a, b; | int a, b; | ||||
| @@ -133,8 +133,8 @@ TEST_F(MindDataTestRandomCropDecodeResizeOp, TestOp1) { | |||||
| crop_and_decode_status = Crop(decoded, &decoded_and_cropped, x, y, crop_width, crop_height); | crop_and_decode_status = Crop(decoded, &decoded_and_cropped, x, y, crop_width, crop_height); | ||||
| decode_and_crop_status = JpegCropAndDecode(raw_input_tensor_, &cropped_and_decoded, x, y, crop_width, crop_height); | decode_and_crop_status = JpegCropAndDecode(raw_input_tensor_, &cropped_and_decoded, x, y, crop_width, crop_height); | ||||
| { | { | ||||
| cv::Mat M1(crop_height, crop_width, CV_8UC3, decoded_and_cropped->StartAddr()); | |||||
| cv::Mat M2(crop_height, crop_width, CV_8UC3, cropped_and_decoded->StartAddr()); | |||||
| cv::Mat M1(crop_height, crop_width, CV_8UC3, decoded_and_cropped->GetMutableBuffer()); | |||||
| cv::Mat M2(crop_height, crop_width, CV_8UC3, cropped_and_decoded->GetMutableBuffer()); | |||||
| for (int i = 0; i < crop_height; ++i) { | for (int i = 0; i < crop_height; ++i) { | ||||
| for (int j = 0; j < crop_width; ++j) { | for (int j = 0; j < crop_width; ++j) { | ||||
| m1 = M1.at<cv::Vec3b>(i, j)[1]; | m1 = M1.at<cv::Vec3b>(i, j)[1]; | ||||
| @@ -34,7 +34,7 @@ Status CreateINT64Tensor(std::shared_ptr<Tensor> *sample_ids, int64_t num_elemen | |||||
| RETURN_IF_NOT_OK(Tensor::CreateTensor(sample_ids, TensorImpl::kFlexible, shape, | RETURN_IF_NOT_OK(Tensor::CreateTensor(sample_ids, TensorImpl::kFlexible, shape, | ||||
| DataType(DataType::DE_INT64), data)); | DataType(DataType::DE_INT64), data)); | ||||
| if (data == nullptr) { | if (data == nullptr) { | ||||
| (*sample_ids)->StartAddr(); // allocate memory in case user forgets! | |||||
| (*sample_ids)->GetMutableBuffer(); // allocate memory in case user forgets! | |||||
| } | } | ||||
| return Status::OK(); | return Status::OK(); | ||||
| } | } | ||||
| @@ -0,0 +1,153 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include <memory> | |||||
| #include <string> | |||||
| #include "dataset/core/client.h" | |||||
| #include "common/common.h" | |||||
| #include "gtest/gtest.h" | |||||
| #include "securec.h" | |||||
| #include "dataset/core/tensor.h" | |||||
| #include "dataset/core/cv_tensor.h" | |||||
| #include "dataset/core/data_type.h" | |||||
| #include "dataset/util/de_error.h" | |||||
| using namespace mindspore::dataset; | |||||
| namespace py = pybind11; | |||||
| class MindDataTestStringTensorDE : public UT::Common { | |||||
| public: | |||||
| MindDataTestStringTensorDE() = default; | |||||
| void SetUp() override { GlobalInit(); } | |||||
| }; | |||||
| TEST_F(MindDataTestStringTensorDE, Basics) { | |||||
| std::shared_ptr<Tensor> t = std::make_shared<Tensor>("Hi"); | |||||
| ASSERT_TRUE(t->shape() == TensorShape({})); | |||||
| std::string_view s = ""; | |||||
| t->GetItemAt(&s, {}); | |||||
| ASSERT_TRUE(s == "Hi"); | |||||
| std::shared_ptr<Tensor> t2 = std::make_shared<Tensor>(std::vector<std::string>{"Hi", "Bye"}); | |||||
| ASSERT_TRUE(t2->shape() == TensorShape({2})); | |||||
| t2->GetItemAt(&s, {0}); | |||||
| ASSERT_TRUE(s == "Hi"); | |||||
| t2->GetItemAt(&s, {1}); | |||||
| ASSERT_TRUE(s == "Bye"); | |||||
| std::vector<std::string> strings{"abc", "defg", "hi", "klmno", "123", "789"}; | |||||
| std::shared_ptr<Tensor> t3 = std::make_shared<Tensor>(strings, TensorShape({2, 3})); | |||||
| ASSERT_TRUE(t3->shape() == TensorShape({2, 3})); | |||||
| uint32_t index = 0; | |||||
| for (uint32_t i = 0; i < 2; i++) { | |||||
| for (uint32_t j = 0; j < 3; j++) { | |||||
| std::string_view s = ""; | |||||
| t3->GetItemAt(&s, {i, j}); | |||||
| ASSERT_TRUE(s == strings[index++]); | |||||
| } | |||||
| } | |||||
| } | |||||
| TEST_F(MindDataTestStringTensorDE, Basics2) { | |||||
| std::shared_ptr<Tensor> t = | |||||
| std::make_shared<Tensor>(std::vector<std::string>{"abc", "defg", "hi", "klmno", "123", "789"}, TensorShape({2, 3})); | |||||
| ASSERT_TRUE(t->SizeInBytes() == 6 * 5 + 20); | |||||
| std::vector<uint32_t> offsets = {3, 8, 11, 17, 21, 25}; | |||||
| uint32_t ctr = 0; | |||||
| for (auto i : offsets) { | |||||
| ASSERT_TRUE(*(reinterpret_cast<uint32_t *>(t->GetMutableBuffer() + ctr)) == i); | |||||
| ctr += 4; | |||||
| } | |||||
| const char *buf = reinterpret_cast<char *>(t->GetMutableBuffer()) + 6 * 4; | |||||
| std::vector<uint32_t> starts = {0, 4, 9, 12, 18, 22}; | |||||
| uint32_t index = 0; | |||||
| for (uint32_t i = 0; i < 2; i++) { | |||||
| for (uint32_t j = 0; j < 3; j++) { | |||||
| std::string_view s = ""; | |||||
| t->GetItemAt(&s, {i, j}); | |||||
| ASSERT_TRUE(s.data() == buf + starts[index++]); | |||||
| } | |||||
| } | |||||
| } | |||||
| TEST_F(MindDataTestStringTensorDE, Empty) { | |||||
| std::vector<std::string> strings{"abc", "defg", "", "", "123", ""}; | |||||
| std::shared_ptr<Tensor> t = std::make_shared<Tensor>(strings, TensorShape({2, 3})); | |||||
| // abc_defg___123__ | |||||
| // 0123456789012345 | |||||
| ASSERT_TRUE(t->SizeInBytes() == 6 * 5 + 10); | |||||
| std::vector<uint32_t> offsets = {3, 8, 9, 10, 14, 15}; | |||||
| uint32_t ctr = 0; | |||||
| for (auto i : offsets) { | |||||
| ASSERT_TRUE(*(reinterpret_cast<uint32_t *>(t->GetMutableBuffer() + ctr)) == i); | |||||
| ctr += 4; | |||||
| } | |||||
| const char *buf = reinterpret_cast<char *>(t->GetMutableBuffer()) + 6 * 4; | |||||
| std::vector<uint32_t> starts = {0, 4, 9, 10, 11, 15}; | |||||
| uint32_t index = 0; | |||||
| for (uint32_t i = 0; i < 2; i++) { | |||||
| for (uint32_t j = 0; j < 3; j++) { | |||||
| std::string_view s = ""; | |||||
| t->GetItemAt(&s, {i, j}); | |||||
| ASSERT_TRUE(s.data() == buf + starts[index]); | |||||
| ASSERT_TRUE(s == strings[index++]); | |||||
| } | |||||
| } | |||||
| } | |||||
| TEST_F(MindDataTestStringTensorDE, SetItem) { | |||||
| std::vector<std::string> strings{"abc", "defg", "hi", "klmno", "123", "789"}; | |||||
| std::shared_ptr<Tensor> t3 = std::make_shared<Tensor>(strings, TensorShape({2, 3})); | |||||
| ASSERT_TRUE(t3->shape() == TensorShape({2, 3})); | |||||
| t3->SetItemAt({0, 1}, std::string{"xyzz"}); | |||||
| strings[1] = "xyzz"; | |||||
| t3->SetItemAt({0, 2}, std::string{"07"}); | |||||
| strings[2] = "07"; | |||||
| t3->SetItemAt({1, 2}, std::string{"987"}); | |||||
| strings[5] = "987"; | |||||
| uint32_t index = 0; | |||||
| for (uint32_t i = 0; i < 2; i++) { | |||||
| for (uint32_t j = 0; j < 3; j++) { | |||||
| std::string_view s = ""; | |||||
| t3->GetItemAt(&s, {i, j}); | |||||
| ASSERT_TRUE(s == strings[index++]); | |||||
| } | |||||
| } | |||||
| } | |||||
| TEST_F(MindDataTestStringTensorDE, Iterator) { | |||||
| std::vector<std::string> strings{"abc", "defg", "hi", "klmno", "123", "789"}; | |||||
| std::shared_ptr<Tensor> t = std::make_shared<Tensor>(strings, TensorShape({2, 3})); | |||||
| uint32_t index = 0; | |||||
| auto itr = t->begin<std::string_view>(); | |||||
| for (; itr != t->end<std::string_view>(); itr++) { | |||||
| ASSERT_TRUE(*itr == strings[index++]); | |||||
| } | |||||
| index = 0; | |||||
| itr = t->begin<std::string_view>(); | |||||
| for (; itr != t->end<std::string_view>(); itr += 2) { | |||||
| ASSERT_TRUE(*itr == strings[index]); | |||||
| index += 2; | |||||
| } | |||||
| } | |||||
| @@ -111,17 +111,17 @@ TEST_F(MindDataTestTensorDE, CopyTensor) { | |||||
| int16_t o; | int16_t o; | ||||
| t->GetItemAt<int16_t>(&o, {}); | t->GetItemAt<int16_t>(&o, {}); | ||||
| ASSERT_EQ(o, -66); | ASSERT_EQ(o, -66); | ||||
| unsigned char *addr = t->StartAddr(); | |||||
| unsigned char *addr = t->GetMutableBuffer(); | |||||
| auto t2 = std::make_shared<Tensor>(std::move(*t)); | auto t2 = std::make_shared<Tensor>(std::move(*t)); | ||||
| ASSERT_EQ(t2->shape(), TensorShape({})); | ASSERT_EQ(t2->shape(), TensorShape({})); | ||||
| ASSERT_EQ(t2->type(), DataType::DE_INT16); | ASSERT_EQ(t2->type(), DataType::DE_INT16); | ||||
| t2->GetItemAt<int16_t>(&o, {}); | t2->GetItemAt<int16_t>(&o, {}); | ||||
| ASSERT_EQ(o, -66); | ASSERT_EQ(o, -66); | ||||
| unsigned char *new_addr = t2->StartAddr(); | |||||
| unsigned char *new_addr = t2->GetMutableBuffer(); | |||||
| ASSERT_EQ(addr, new_addr); | ASSERT_EQ(addr, new_addr); | ||||
| ASSERT_EQ(t->shape(), TensorShape::CreateUnknownRankShape()); | ASSERT_EQ(t->shape(), TensorShape::CreateUnknownRankShape()); | ||||
| ASSERT_EQ(t->type(), DataType::DE_UNKNOWN); | ASSERT_EQ(t->type(), DataType::DE_UNKNOWN); | ||||
| ASSERT_EQ(t->StartAddr(), nullptr); | |||||
| ASSERT_EQ(t->GetMutableBuffer(), nullptr); | |||||
| Status rc = t->GetItemAt<int16_t>(&o, {}); | Status rc = t->GetItemAt<int16_t>(&o, {}); | ||||
| ASSERT_TRUE(rc.IsError()); | ASSERT_TRUE(rc.IsError()); | ||||
| } | } | ||||
| @@ -237,7 +237,7 @@ TEST_F(MindDataTestTensorDE, Strides) { | |||||
| void checkCvMat(TensorShape shape, DataType type) { | void checkCvMat(TensorShape shape, DataType type) { | ||||
| std::shared_ptr<CVTensor> t = std::make_shared<CVTensor>(shape, type); | std::shared_ptr<CVTensor> t = std::make_shared<CVTensor>(shape, type); | ||||
| cv::Mat m = t->mat(); | cv::Mat m = t->mat(); | ||||
| ASSERT_EQ(m.data, t->StartAddr()); | |||||
| ASSERT_EQ(m.data, t->GetMutableBuffer()); | |||||
| ASSERT_EQ(static_cast<uchar>(m.type()) & static_cast<uchar>(CV_MAT_DEPTH_MASK), type.AsCVType()); | ASSERT_EQ(static_cast<uchar>(m.type()) & static_cast<uchar>(CV_MAT_DEPTH_MASK), type.AsCVType()); | ||||
| if (shape.Rank() < 4) { | if (shape.Rank() < 4) { | ||||
| if (shape.Rank() > 1) { | if (shape.Rank() > 1) { | ||||
| @@ -311,15 +311,15 @@ TEST_F(MindDataTestTensorDE, CVTensorFromMat) { | |||||
| TEST_F(MindDataTestTensorDE, CVTensorAs) { | TEST_F(MindDataTestTensorDE, CVTensorAs) { | ||||
| std::shared_ptr<Tensor> t = std::make_shared<Tensor>(TensorShape({3, 2}), DataType(DataType::DE_FLOAT64)); | std::shared_ptr<Tensor> t = std::make_shared<Tensor>(TensorShape({3, 2}), DataType(DataType::DE_FLOAT64)); | ||||
| t->Fill<double>(2.2); | t->Fill<double>(2.2); | ||||
| unsigned char *addr = t->StartAddr(); | |||||
| unsigned char *addr = t->GetMutableBuffer(); | |||||
| std::shared_ptr<Tensor> t2 = std::make_shared<Tensor>(TensorShape({3, 2}), DataType(DataType::DE_FLOAT64)); | std::shared_ptr<Tensor> t2 = std::make_shared<Tensor>(TensorShape({3, 2}), DataType(DataType::DE_FLOAT64)); | ||||
| t2->Fill<double>(4.4); | t2->Fill<double>(4.4); | ||||
| std::shared_ptr<CVTensor> ctv = CVTensor::AsCVTensor(t); | std::shared_ptr<CVTensor> ctv = CVTensor::AsCVTensor(t); | ||||
| ASSERT_EQ(t->StartAddr(), nullptr); | |||||
| ASSERT_EQ(ctv->StartAddr(), addr); | |||||
| ASSERT_EQ(t->GetMutableBuffer(), nullptr); | |||||
| ASSERT_EQ(ctv->GetMutableBuffer(), addr); | |||||
| cv::Mat m = ctv->mat(); | cv::Mat m = ctv->mat(); | ||||
| m = 2 * m; | m = 2 * m; | ||||
| ASSERT_EQ(ctv->StartAddr(), addr); | |||||
| ASSERT_EQ(ctv->GetMutableBuffer(), addr); | |||||
| ASSERT_TRUE(*t2 == *ctv); | ASSERT_TRUE(*t2 == *ctv); | ||||
| MS_LOG(DEBUG) << *t2 << std::endl << *ctv; | MS_LOG(DEBUG) << *t2 << std::endl << *ctv; | ||||
| } | } | ||||
| @@ -41,8 +41,8 @@ def test_textline_dataset_totext(): | |||||
| count = 0 | count = 0 | ||||
| line = ["This is a text file.", "Another file.", "Be happy every day.", "End of file.", "Good luck to everyone."] | line = ["This is a text file.", "Another file.", "Be happy every day.", "End of file.", "Good luck to everyone."] | ||||
| for i in data.create_dict_iterator(): | for i in data.create_dict_iterator(): | ||||
| str = nlp.as_text(i["text"]) | |||||
| assert(str == line[count]) | |||||
| str = i["text"].item().decode("utf8") | |||||
| assert(str == line[count]) | |||||
| count += 1 | count += 1 | ||||
| assert(count == 5) | assert(count == 5) | ||||
| @@ -68,8 +68,8 @@ def test_textline_dataset_repeat(): | |||||
| "This is a text file.", "Be happy every day.", "Good luck to everyone.", | "This is a text file.", "Be happy every day.", "Good luck to everyone.", | ||||
| "This is a text file.", "Be happy every day.", "Good luck to everyone."] | "This is a text file.", "Be happy every day.", "Good luck to everyone."] | ||||
| for i in data.create_dict_iterator(): | for i in data.create_dict_iterator(): | ||||
| str = nlp.as_text(i["text"]) | |||||
| assert(str == line[count]) | |||||
| str = i["text"].item().decode("utf8") | |||||
| assert(str == line[count]) | |||||
| count += 1 | count += 1 | ||||
| assert(count == 9) | assert(count == 9) | ||||
| @@ -26,7 +26,7 @@ def test_flat_map_1(): | |||||
| import mindspore.dataset.transforms.nlp.utils as nlp | import mindspore.dataset.transforms.nlp.utils as nlp | ||||
| def flat_map_func(x): | def flat_map_func(x): | ||||
| data_dir = nlp.as_text(x[0]) | |||||
| data_dir = x[0].item().decode('utf8') | |||||
| d = ds.ImageFolderDatasetV2(data_dir) | d = ds.ImageFolderDatasetV2(data_dir) | ||||
| return d | return d | ||||
| @@ -47,12 +47,12 @@ def test_flat_map_2(): | |||||
| import mindspore.dataset.transforms.nlp.utils as nlp | import mindspore.dataset.transforms.nlp.utils as nlp | ||||
| def flat_map_func_1(x): | def flat_map_func_1(x): | ||||
| data_dir = nlp.as_text(x[0]) | |||||
| data_dir = x[0].item().decode('utf8') | |||||
| d = ds.ImageFolderDatasetV2(data_dir) | d = ds.ImageFolderDatasetV2(data_dir) | ||||
| return d | return d | ||||
| def flat_map_func_2(x): | def flat_map_func_2(x): | ||||
| text_file = nlp.as_text(x[0]) | |||||
| text_file = x[0].item().decode('utf8') | |||||
| d = ds.TextFileDataset(text_file) | d = ds.TextFileDataset(text_file) | ||||
| d = d.flat_map(flat_map_func_1) | d = d.flat_map(flat_map_func_1) | ||||
| return d | return d | ||||
| @@ -0,0 +1,65 @@ | |||||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||||
| # | |||||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| # you may not use this file except in compliance with the License. | |||||
| # You may obtain a copy of the License at | |||||
| # | |||||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||||
| # | |||||
| # Unless required by applicable law or agreed to in writing, software | |||||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| # See the License for the specific language governing permissions and | |||||
| # limitations under the License. | |||||
| # ============================================================================== | |||||
| import mindspore._c_dataengine as cde | |||||
| import mindspore.dataset as ds | |||||
| import pytest | |||||
| import numpy as np | |||||
| def test_basic(): | |||||
| x = np.array([["ab", "cde", "121"], ["x", "km", "789"]], dtype='S') | |||||
| # x = np.array(["ab", "cde"], dtype='S') | |||||
| n = cde.Tensor(x) | |||||
| arr = n.as_array() | |||||
| y = np.array([1, 2]) | |||||
| assert all(y == y) | |||||
| # assert np.testing.assert_array_equal(y,y) | |||||
| def compare(strings): | |||||
| arr = np.array(strings, dtype='S') | |||||
| def gen(): | |||||
| yield arr, | |||||
| data = ds.GeneratorDataset(gen, column_names=["col"]) | |||||
| for d in data: | |||||
| np.testing.assert_array_equal(d[0], arr) | |||||
| def test_generator(): | |||||
| compare(["ab"]) | |||||
| compare(["ab", "cde", "121"]) | |||||
| compare([["ab", "cde", "121"], ["x", "km", "789"]]) | |||||
| def test_batching_strings(): | |||||
| def gen(): | |||||
| yield np.array(["ab", "cde", "121"], dtype='S'), | |||||
| data = ds.GeneratorDataset(gen, column_names=["col"]).batch(10) | |||||
| with pytest.raises(RuntimeError) as info: | |||||
| for _ in data: | |||||
| pass | |||||
| assert "[Batch ERROR] Batch does not support" in str(info) | |||||
| if __name__ == '__main__': | |||||
| test_generator() | |||||
| test_basic() | |||||
| test_batching_strings() | |||||