| @@ -593,14 +593,16 @@ SchemaObj::SchemaObj(const std::vector<char> &schema_file) : data_(std::make_sha | |||
| // SchemaObj Init function | |||
| Status SchemaObj::Init() { | |||
| if (!data_->schema_file_.empty()) { | |||
| Path schema_file(data_->schema_file_); | |||
| if (data_ != nullptr && !data_->schema_file_.empty()) { | |||
| std::string real_path; | |||
| RETURN_IF_NOT_OK(Path::RealPath(data_->schema_file_, real_path)); | |||
| Path schema_file(real_path); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(schema_file.Exists(), | |||
| "The file " + data_->schema_file_ + " does not exist or permission denied!"); | |||
| nlohmann::json js; | |||
| try { | |||
| std::ifstream in(data_->schema_file_); | |||
| std::ifstream in(real_path); | |||
| in >> js; | |||
| CHECK_FAIL_RETURN_UNEXPECTED(js.find("columns") != js.end(), | |||
| "\"columns\" node is required in the schema json file."); | |||
| @@ -27,7 +27,8 @@ Iterator::~Iterator() { Stop(); } | |||
| // Get the next row from the data pipeline. | |||
| Status Iterator::GetNextRowCharIF(MSTensorMapChar *row) { | |||
| // Clean data row | |||
| RETURN_UNEXPECTED_IF_NULL(row); | |||
| // Clean data buffer | |||
| row->clear(); | |||
| std::unordered_map<std::string, std::shared_ptr<dataset::Tensor>> md_map; | |||
| Status rc = consumer_->GetNextAsMap(&md_map); | |||
| @@ -47,6 +48,7 @@ Status Iterator::GetNextRowCharIF(MSTensorMapChar *row) { | |||
| // Get the next row from the data pipeline. | |||
| Status Iterator::GetNextRow(MSTensorVec *row) { | |||
| // Clean data row | |||
| RETURN_UNEXPECTED_IF_NULL(row); | |||
| row->clear(); | |||
| // create a dataset tensor row and fetch. Then we convert the output to MSTensor | |||
| std::vector<std::shared_ptr<dataset::Tensor>> md_row; | |||
| @@ -84,6 +86,7 @@ Status Iterator::BuildAndLaunchTree(std::shared_ptr<Dataset> ds, int32_t num_epo | |||
| PullIterator::PullIterator() : pull_consumer_(nullptr) {} | |||
| // Get the next row from the data pipeline. | |||
| Status PullIterator::GetRows(int32_t num_rows, std::vector<MSTensorVec> *const row) { | |||
| RETURN_UNEXPECTED_IF_NULL(row); | |||
| for (int i = 0; i < num_rows; i++) { | |||
| std::vector<std::shared_ptr<dataset::Tensor>> md_row; | |||
| Status rc = pull_consumer_->GetNextAsVector(&md_row); | |||
| @@ -105,6 +108,7 @@ Status PullIterator::GetRows(int32_t num_rows, std::vector<MSTensorVec> *const r | |||
| } | |||
| Status PullIterator::GetNextRow(MSTensorVec *const row) { | |||
| RETURN_UNEXPECTED_IF_NULL(row); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(pull_consumer_ != nullptr, "Consumer is nullptr."); | |||
| std::vector<std::shared_ptr<dataset::Tensor>> md_row; | |||
| Status rc = pull_consumer_->GetNextAsVector(&md_row); | |||
| @@ -107,6 +107,7 @@ Status ConfigManager::LoadFile(const std::string &settingsFile) { | |||
| nlohmann::json js; | |||
| in >> js; | |||
| rc = FromJson(js); | |||
| in.close(); | |||
| } catch (const nlohmann::json::type_error &e) { | |||
| std::ostringstream ss; | |||
| ss << "Client file failed to load:\n" << e.what(); | |||
| @@ -29,8 +29,10 @@ CVTensor::CVTensor(std::shared_ptr<Tensor> tensor) : Tensor(std::move(*tensor)) | |||
| } | |||
| Status CVTensor::CreateEmpty(const TensorShape &shape, DataType type, CVTensorPtr *out) { | |||
| RETURN_UNEXPECTED_IF_NULL(out); | |||
| const CVTensorAlloc *alloc = GlobalContext::Instance()->cv_tensor_allocator(); | |||
| *out = std::allocate_shared<CVTensor>(*alloc, shape, type); | |||
| RETURN_UNEXPECTED_IF_NULL(out); | |||
| int64_t byte_size = (*out)->SizeInBytes(); | |||
| // Don't allocate if we have a tensor with no elements. | |||
| if (byte_size != 0) { | |||
| @@ -41,6 +43,7 @@ Status CVTensor::CreateEmpty(const TensorShape &shape, DataType type, CVTensorPt | |||
| } | |||
| Status CVTensor::CreateFromMat(const cv::Mat &mat, const dsize_t rank, CVTensorPtr *out) { | |||
| RETURN_UNEXPECTED_IF_NULL(out); | |||
| TensorPtr out_tensor; | |||
| cv::Mat mat_local = mat; | |||
| // if the input Mat's memory is not continuous, copy it to one block of memory | |||
| @@ -78,6 +81,9 @@ std::pair<std::array<int, 2>, int> CVTensor::IsValidImage(const TensorShape &sha | |||
| } | |||
| std::shared_ptr<CVTensor> CVTensor::AsCVTensor(std::shared_ptr<Tensor> t) { | |||
| if (t == nullptr) { | |||
| return nullptr; | |||
| } | |||
| std::shared_ptr<CVTensor> cv_t = std::dynamic_pointer_cast<CVTensor>(t); | |||
| if (cv_t != nullptr) { | |||
| return cv_t; | |||
| @@ -88,13 +94,13 @@ std::shared_ptr<CVTensor> CVTensor::AsCVTensor(std::shared_ptr<Tensor> t) { | |||
| } | |||
| Status CVTensor::MatInit(uchar *data, const TensorShape &shape, const DataType &type, cv::Mat *mat) { | |||
| std::pair<std::array<int, 2>, int> cv_shape_type = IsValidImage(shape, type); | |||
| RETURN_UNEXPECTED_IF_NULL(data); | |||
| RETURN_UNEXPECTED_IF_NULL(mat); | |||
| const int kShapeAsDefault = 2; | |||
| std::pair<std::array<int, kShapeAsDefault>, int> cv_shape_type = IsValidImage(shape, type); | |||
| if (cv_shape_type.second == -1) { | |||
| std::vector<dsize_t> sizes = shape.AsVector(); | |||
| std::vector<int> sizes32(sizes.begin(), sizes.end()); // convert long to int for usage with OpenCV | |||
| if (static_cast<int>(shape.Rank()) != shape.Rank()) { | |||
| RETURN_STATUS_UNEXPECTED("Error in creating CV mat. Wrong shape."); | |||
| } | |||
| uint8_t cv_type = type.AsCVType(); | |||
| if (cv_type == kCVInvalidType) { | |||
| @@ -102,7 +108,7 @@ Status CVTensor::MatInit(uchar *data, const TensorShape &shape, const DataType & | |||
| } | |||
| *mat = cv::Mat(static_cast<int>(shape.Rank()), &sizes32[0], cv_type, data); | |||
| } else { | |||
| *mat = cv::Mat(2, &(cv_shape_type.first[0]), cv_shape_type.second, data); | |||
| *mat = cv::Mat(kShapeAsDefault, &(cv_shape_type.first[0]), cv_shape_type.second, data); | |||
| } | |||
| return Status::OK(); | |||
| } | |||
| @@ -121,10 +127,14 @@ Status CVTensor::ExpandDim(const dsize_t &axis) { | |||
| void CVTensor::Squeeze() { | |||
| Tensor::Squeeze(); | |||
| (void)this->MatInit(GetMutableBuffer(), shape_, type_, &mat_); | |||
| Status rc = this->MatInit(GetMutableBuffer(), shape_, type_, &mat_); | |||
| if (rc.IsError()) { | |||
| MS_LOG(ERROR) << "Squeeze failed, error details is " << rc; | |||
| } | |||
| } | |||
| Status CVTensor::MatAtIndex(const std::vector<dsize_t> &index, cv::Mat *mat) { | |||
| RETURN_UNEXPECTED_IF_NULL(mat); | |||
| uchar *start = nullptr; | |||
| TensorShape remaining({-1}); | |||
| RETURN_IF_NOT_OK(this->StartAddrOfIndex(index, &start, &remaining)); | |||
| @@ -143,15 +143,15 @@ class DataType { | |||
| constexpr bool operator!=(const Type a) const { return type_ != a; } | |||
| // Disable this usage `if(d)` where d is of type DataType | |||
| /// \return | |||
| /// \return return nothing since we deiable this function. | |||
| operator bool() = delete; | |||
| // To be used in Switch/case | |||
| /// \return | |||
| /// \return data type internal. | |||
| operator Type() const { return type_; } | |||
| // The number of bytes needed to store one value of this type | |||
| /// \return | |||
| /// \return the number of bytes of the type. | |||
| uint8_t SizeInBytes() const; | |||
| #ifndef ENABLE_ANDROID | |||
| @@ -41,15 +41,17 @@ DETensor::DETensor(std::shared_ptr<dataset::DeviceTensor> device_tensor_impl, bo | |||
| : device_tensor_impl_(device_tensor_impl), name_("MindDataDeviceTensor"), is_device_(is_device) { | |||
| // The sequence of shape_ is (width, widthStride, height, heightStride) in Dvpp module | |||
| // We need to add [1]widthStride and [3]heightStride, which are actual YUV image shape, into shape_ attribute | |||
| uint8_t flag = 0; | |||
| for (auto &i : device_tensor_impl->GetYuvStrideShape()) { | |||
| if (flag % 2 == 1) { | |||
| int64_t j = static_cast<int64_t>(i); | |||
| shape_.emplace_back(j); | |||
| if (device_tensor_impl && device_tensor_impl->GetYuvStrideShape().size() > 0) { | |||
| uint8_t flag = 0; | |||
| for (auto &i : device_tensor_impl->GetYuvStrideShape()) { | |||
| if (flag % 2 == 1) { | |||
| int64_t j = static_cast<int64_t>(i); | |||
| shape_.emplace_back(j); | |||
| } | |||
| ++flag; | |||
| } | |||
| ++flag; | |||
| std::reverse(shape_.begin(), shape_.end()); | |||
| } | |||
| std::reverse(shape_.begin(), shape_.end()); | |||
| MS_LOG(INFO) << "This is a YUV420 format image, one pixel takes 1.5 bytes. Therefore, the shape of" | |||
| << " image is in (H, W) format. You can search for more information about YUV420 format"; | |||
| } | |||
| @@ -23,7 +23,10 @@ | |||
| namespace mindspore { | |||
| namespace dataset { | |||
| DeviceTensor::DeviceTensor(const TensorShape &shape, const DataType &type) : Tensor(shape, type) { | |||
| const int kYuvDefaultChannels = 4; | |||
| DeviceTensor::DeviceTensor(const TensorShape &shape, const DataType &type) | |||
| : Tensor(shape, type), device_data_(nullptr), size_(0) { | |||
| // grab the mem pool from global context and create the allocator for char data area | |||
| std::shared_ptr<MemoryPool> global_pool = GlobalContext::Instance()->mem_pool(); | |||
| data_allocator_ = std::make_unique<Allocator<unsigned char>>(global_pool); | |||
| @@ -34,6 +37,7 @@ DeviceTensor::DeviceTensor(const TensorShape &shape, const DataType &type) : Ten | |||
| Status DeviceTensor::CreateEmpty(const TensorShape &shape, const DataType &type, std::shared_ptr<DeviceTensor> *out) { | |||
| CHECK_FAIL_RETURN_UNEXPECTED(shape.known(), "Invalid shape."); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(type != DataType::DE_UNKNOWN, "Invalid data type."); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(out != nullptr, "Invalid nullptr pointer."); | |||
| const DeviceTensorAlloc *alloc = GlobalContext::Instance()->device_tensor_allocator(); | |||
| *out = std::allocate_shared<DeviceTensor>(*alloc, shape, type); | |||
| // if it's a string tensor and it has no elements, Just initialize the shape and type. | |||
| @@ -42,6 +46,7 @@ Status DeviceTensor::CreateEmpty(const TensorShape &shape, const DataType &type, | |||
| } | |||
| CHECK_FAIL_RETURN_UNEXPECTED(type.IsNumeric(), "Number of elements is not 0. The type should be numeric."); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(out != nullptr, "Allocate memory faiiled."); | |||
| int64_t bytes = (*out)->SizeInBytes(); | |||
| // Don't allocate if we have a tensor with no elements. | |||
| @@ -58,9 +63,11 @@ Status DeviceTensor::CreateFromDeviceMemory(const TensorShape &shape, const Data | |||
| CHECK_FAIL_RETURN_UNEXPECTED(type != DataType::DE_UNKNOWN, "Invalid data type."); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(data_ptr != nullptr, "Data pointer is NULL"); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(dataSize > 0, "Invalid data size"); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(out != nullptr, "Out pointer is NULL"); | |||
| const DeviceTensorAlloc *alloc = GlobalContext::Instance()->device_tensor_allocator(); | |||
| *out = std::allocate_shared<DeviceTensor>(*alloc, shape, type); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(out != nullptr, "Allocate memory failed."); | |||
| // if it's a string tensor and it has no elements, Just initialize the shape and type. | |||
| if (!type.IsNumeric() && shape.NumOfElements() == 0) { | |||
| @@ -76,6 +83,8 @@ Status DeviceTensor::CreateFromDeviceMemory(const TensorShape &shape, const Data | |||
| RETURN_IF_NOT_OK((*out)->AllocateBuffer(byte_size)); | |||
| } | |||
| CHECK_FAIL_RETURN_UNEXPECTED(attributes.size() >= kYuvDefaultChannels, | |||
| "Invalid attributes size, should be greater than 4."); | |||
| CHECK_FAIL_RETURN_UNEXPECTED( | |||
| (*out)->SetAttributes(data_ptr, dataSize, attributes[0], attributes[1], attributes[2], attributes[3]), | |||
| "Fail to set attributes for DeviceTensor"); | |||
| @@ -129,6 +138,7 @@ Status DeviceTensor::SetSize_(const uint32_t &new_size) { | |||
| #ifdef ENABLE_ACL | |||
| Status DeviceTensor::DataPop_(std::shared_ptr<Tensor> *host_tensor) { | |||
| CHECK_FAIL_RETURN_UNEXPECTED(host_tensor != nullptr, "host tensor pointer is NULL."); | |||
| void *resHostBuf = nullptr; | |||
| APP_ERROR ret = aclrtMallocHost(&resHostBuf, this->DeviceDataSize()); | |||
| if (ret != APP_ERR_OK) { | |||
| @@ -151,13 +161,18 @@ Status DeviceTensor::DataPop_(std::shared_ptr<Tensor> *host_tensor) { | |||
| mindspore::dataset::dsize_t dvppDataSize = this->DeviceDataSize(); | |||
| const mindspore::dataset::TensorShape dvpp_shape({dvppDataSize, 1, 1}); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(this->GetYuvStrideShape().size() >= kYuvDefaultChannels, | |||
| "Invalid YuvShape, should greater than 4"); | |||
| uint32_t _output_width_ = this->GetYuvStrideShape()[0]; | |||
| uint32_t _output_widthStride_ = this->GetYuvStrideShape()[1]; | |||
| uint32_t _output_height_ = this->GetYuvStrideShape()[2]; | |||
| uint32_t _output_heightStride_ = this->GetYuvStrideShape()[3]; | |||
| const mindspore::dataset::DataType dvpp_data_type(mindspore::dataset::DataType::DE_UINT8); | |||
| mindspore::dataset::Tensor::CreateFromMemory(dvpp_shape, dvpp_data_type, ret_ptr, host_tensor); | |||
| RETURN_IF_NOT_OK(mindspore::dataset::Tensor::CreateFromMemory(dvpp_shape, dvpp_data_type, ret_ptr, host_tensor)); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(host_tensor != nullptr, "Allocate memory failed."); | |||
| (*host_tensor)->SetYuvShape(_output_width_, _output_widthStride_, _output_height_, _output_heightStride_); | |||
| if (!(*host_tensor)->HasData()) { | |||
| @@ -39,7 +39,9 @@ struct npy_scalar_caster { | |||
| bool load(handle src, bool convert) { | |||
| // Taken from Eigen casters. Permits either scalar dtype or scalar array. | |||
| handle type = dtype::of<T>().attr("type"); // Could make more efficient. | |||
| if (!convert && !isinstance<Array>(src) && !isinstance(src, type)) return false; | |||
| if (!convert && !isinstance<Array>(src) && !isinstance(src, type)) { | |||
| return false; | |||
| } | |||
| Array tmp = Array::ensure(src); | |||
| if (tmp && tmp.size() == 1 && tmp.ndim() == 0) { | |||
| @@ -91,8 +91,10 @@ Tensor &Tensor::operator=(Tensor &&other) noexcept { | |||
| Status Tensor::CreateEmpty(const TensorShape &shape, const DataType &type, TensorPtr *out) { | |||
| CHECK_FAIL_RETURN_UNEXPECTED(shape.known(), "Invalid shape."); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(type != DataType::DE_UNKNOWN, "Invalid data type."); | |||
| RETURN_UNEXPECTED_IF_NULL(out); | |||
| const TensorAlloc *alloc = GlobalContext::Instance()->tensor_allocator(); | |||
| *out = std::allocate_shared<Tensor>(*alloc, shape, type); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(out != nullptr, "Allocate memory failed."); | |||
| // if it's a string tensor and it has no elements, Just initialize the shape and type. | |||
| if (!type.IsNumeric() && shape.NumOfElements() == 0) { | |||
| return Status::OK(); | |||
| @@ -110,7 +112,7 @@ Status Tensor::CreateEmpty(const TensorShape &shape, const DataType &type, Tenso | |||
| } | |||
| Status Tensor::CreateFromMemory(const TensorShape &shape, const DataType &type, const uchar *src, TensorPtr *out) { | |||
| RETURN_IF_NOT_OK(CreateEmpty(shape, type, out)); | |||
| if (src != nullptr) { | |||
| if (src != nullptr && out != nullptr) { | |||
| // Given the shape/type of this tensor, compute the data size and copy in the input bytes. | |||
| int64_t byte_size = (*out)->SizeInBytes(); | |||
| if (byte_size == 0) { | |||
| @@ -129,9 +131,11 @@ Status Tensor::CreateFromMemory(const TensorShape &shape, const DataType &type, | |||
| Status Tensor::CreateFromMemory(const TensorShape &shape, const DataType &type, const unsigned char *src, | |||
| const dsize_t &length, TensorPtr *out) { | |||
| CHECK_FAIL_RETURN_UNEXPECTED(src != nullptr, "Pointer to source data is null."); | |||
| RETURN_UNEXPECTED_IF_NULL(src); | |||
| RETURN_UNEXPECTED_IF_NULL(out); | |||
| const TensorAlloc *alloc = GlobalContext::Instance()->tensor_allocator(); | |||
| *out = std::allocate_shared<Tensor>(*alloc, shape, type); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(out != nullptr, "Allocate memory failed."); | |||
| if (type.IsNumeric()) { | |||
| dsize_t calculated_length = (*out)->SizeInBytes(); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(calculated_length == length, "Length of source data does not match the shape."); | |||
| @@ -159,6 +163,7 @@ Status Tensor::CreateFromMemory(const TensorShape &shape, const DataType &type, | |||
| #ifdef ENABLE_PYTHON | |||
| Status Tensor::CreateFromNpString(py::array arr, std::shared_ptr<Tensor> *out) { | |||
| RETURN_UNEXPECTED_IF_NULL(out); | |||
| std::vector<dsize_t> shape; | |||
| for (dsize_t i = 0; i < arr.ndim(); i++) { | |||
| shape.push_back(static_cast<dsize_t>(arr.shape()[i])); | |||
| @@ -167,9 +172,11 @@ Status Tensor::CreateFromNpString(py::array arr, std::shared_ptr<Tensor> *out) { | |||
| std::vector<std::string> strings; | |||
| if (arr.dtype().kind() == 'U') { | |||
| std::for_each(arr.begin(), arr.end(), [&strings](const auto &s) { strings.emplace_back(py::cast<py::str>(s)); }); | |||
| (void)std::for_each(arr.begin(), arr.end(), | |||
| [&strings](const auto &s) { strings.emplace_back(py::cast<py::str>(s)); }); | |||
| } else { | |||
| std::for_each(arr.begin(), arr.end(), [&strings](const auto &s) { strings.emplace_back(py::cast<py::bytes>(s)); }); | |||
| (void)std::for_each(arr.begin(), arr.end(), | |||
| [&strings](const auto &s) { strings.emplace_back(py::cast<py::bytes>(s)); }); | |||
| } | |||
| arr.resize(shape); // resize arr back to the original shape | |||
| @@ -178,6 +185,7 @@ Status Tensor::CreateFromNpString(py::array arr, std::shared_ptr<Tensor> *out) { | |||
| } | |||
| Status Tensor::CreateFromNpArray(const py::array &arr, std::shared_ptr<Tensor> *out) { | |||
| RETURN_UNEXPECTED_IF_NULL(out); | |||
| if (DataType::FromNpArray(arr) == DataType::DE_STRING) { | |||
| return CreateFromNpString(arr, out); | |||
| } | |||
| @@ -191,7 +199,7 @@ Status Tensor::CreateFromNpArray(const py::array &arr, std::shared_ptr<Tensor> * | |||
| shape.push_back(static_cast<dsize_t>(arr.shape()[i])); | |||
| strides.push_back(static_cast<dsize_t>(arr.strides()[i])); | |||
| // in case of empty array num_items=0 | |||
| if (count != 0) { | |||
| if (count != 0 && shape.size() > i && shape[i] != 0) { | |||
| count /= shape[i]; | |||
| if (strides[i] != arr.itemsize() * count) { | |||
| is_strided = true; | |||
| @@ -213,9 +221,11 @@ Status Tensor::CreateFromNpArray(const py::array &arr, std::shared_ptr<Tensor> * | |||
| #ifndef ENABLE_ANDROID | |||
| Status Tensor::CreateFromByteList(const dataengine::BytesList &bytes_list, const TensorShape &shape, TensorPtr *out) { | |||
| RETURN_UNEXPECTED_IF_NULL(out); | |||
| const TensorAlloc *alloc = GlobalContext::Instance()->tensor_allocator(); | |||
| *out = std::allocate_shared<Tensor>(*alloc, TensorShape({static_cast<dsize_t>(bytes_list.value_size())}), | |||
| DataType(DataType::DE_STRING)); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(out != nullptr, "Allocate memory failed."); | |||
| // total bytes needed = offset array + strings | |||
| // offset array needs to store one offset var per element + 1 extra to get the length of the last string. | |||
| // strings will be null-terminated --> need 1 extra byte per element | |||
| @@ -236,9 +246,7 @@ Status Tensor::CreateFromByteList(const dataengine::BytesList &bytes_list, const | |||
| num_bytes -= kOffsetSize; | |||
| // insert actual string | |||
| int ret_code = memcpy_s((*out)->data_ + offset, num_bytes, common::SafeCStr(str), str.length() + 1); | |||
| if (ret_code != 0) { | |||
| MS_LOG(ERROR) << "Cannot copy string into Tensor"; | |||
| } | |||
| CHECK_FAIL_RETURN_UNEXPECTED(ret_code == 0, "Cannot copy string into Tensor"); | |||
| // next string will be stored right after the current one. | |||
| offset = offset + str.length() + 1; | |||
| // total bytes are reduced by the length of the string | |||
| @@ -257,6 +265,7 @@ Status Tensor::CreateFromByteList(const dataengine::BytesList &bytes_list, const | |||
| #endif | |||
| Status Tensor::CreateFromFile(const std::string &path, std::shared_ptr<Tensor> *out) { | |||
| RETURN_UNEXPECTED_IF_NULL(out); | |||
| Path file(path); | |||
| if (file.IsDirectory()) { | |||
| RETURN_STATUS_UNEXPECTED("Invalid file found: " + path + ", should be file, but got directory."); | |||
| @@ -269,8 +278,10 @@ Status Tensor::CreateFromFile(const std::string &path, std::shared_ptr<Tensor> * | |||
| CHECK_FAIL_RETURN_UNEXPECTED(fs.seekg(0, std::ios::beg).good(), "Failed to find size of file, check path: " + path); | |||
| RETURN_IF_NOT_OK(Tensor::CreateEmpty(TensorShape{num_bytes}, DataType(DataType::DE_UINT8), out)); | |||
| int64_t written_bytes = fs.read(reinterpret_cast<char *>((*out)->GetMutableBuffer()), num_bytes).gcount(); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(written_bytes == num_bytes && fs.good(), | |||
| "Error in writing to tensor, check path: " + path); | |||
| if (!(written_bytes == num_bytes && fs.good())) { | |||
| fs.close(); | |||
| RETURN_STATUS_UNEXPECTED("Error in writing to tensor, check path: " + path); | |||
| } | |||
| fs.close(); | |||
| return Status::OK(); | |||
| } | |||
| @@ -278,8 +289,10 @@ Status Tensor::CreateFromFile(const std::string &path, std::shared_ptr<Tensor> * | |||
| #ifndef ENABLE_ANDROID | |||
| Status Tensor::CreateFromByteList(const dataengine::BytesList &bytes_list, const TensorShape &shape, | |||
| const DataType &type, dsize_t pad_size, TensorPtr *out) { | |||
| RETURN_UNEXPECTED_IF_NULL(out); | |||
| RETURN_IF_NOT_OK(Tensor::CreateEmpty(shape, type, out)); | |||
| RETURN_UNEXPECTED_IF_NULL(out); | |||
| unsigned char *current_tensor_addr = (*out)->GetMutableBuffer(); | |||
| int64_t tensor_bytes_remaining = bytes_list.value_size() * pad_size; | |||
| @@ -313,18 +326,23 @@ Status Tensor::CreateFromByteList(const dataengine::BytesList &bytes_list, const | |||
| // Here we convert array C to array A, by memcpy index by index (Note that not all elements in C is copied) | |||
| Status Tensor::CopyStridedArray(unsigned char *dst, unsigned char *src, std::vector<dsize_t> shape, | |||
| std::vector<dsize_t> strides, uint8_t type_size) { | |||
| RETURN_UNEXPECTED_IF_NULL(dst); | |||
| RETURN_UNEXPECTED_IF_NULL(src); | |||
| dsize_t size = std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<>()); | |||
| for (dsize_t i = 0; i < size; ++i) { | |||
| dsize_t offset = 0; | |||
| dsize_t count = i; | |||
| for (size_t j = 0; j < shape.size(); ++j) { | |||
| // convert 1d array's index to 3d array's index (A -> B) | |||
| CHECK_FAIL_RETURN_UNEXPECTED(shape[shape.size() - 1 - j] != 0, "Invalid data, shape can't be zero."); | |||
| dsize_t idx = count % shape[shape.size() - 1 - j]; | |||
| count /= shape[shape.size() - 1 - j]; | |||
| // calculate the raw data offset based on strides (B -> C) | |||
| offset += idx * strides[shape.size() - 1 - j]; | |||
| // once count = 0, the following idxes are all zero, skip them | |||
| if (count == 0) break; | |||
| if (count == 0) { | |||
| break; | |||
| } | |||
| } | |||
| // strides already consider byte size of the data type, but dst doesn't. | |||
| // dst[i] = dst + i * type_size = src + offset | |||
| @@ -482,6 +500,7 @@ void Tensor::Invalidate() { | |||
| template <typename T> | |||
| Status Tensor::GetItemPtr(T **ptr, const std::vector<dsize_t> &index) const { | |||
| RETURN_UNEXPECTED_IF_NULL(ptr); | |||
| if (type_.IsCompatible<T>()) { | |||
| if (data_ == nullptr) { | |||
| std::string err = "Data is not allocated yet"; | |||
| @@ -490,6 +509,7 @@ Status Tensor::GetItemPtr(T **ptr, const std::vector<dsize_t> &index) const { | |||
| dsize_t flat_idx; | |||
| RETURN_IF_NOT_OK(shape_.ToFlatIndex(index, &flat_idx)); | |||
| *ptr = reinterpret_cast<T *>(data_ + flat_idx * type_.SizeInBytes()); | |||
| RETURN_UNEXPECTED_IF_NULL(ptr); | |||
| return Status::OK(); | |||
| } else { | |||
| @@ -499,6 +519,8 @@ Status Tensor::GetItemPtr(T **ptr, const std::vector<dsize_t> &index) const { | |||
| } | |||
| Status Tensor::GetItemPtr(uchar **ptr, const std::vector<dsize_t> &index, offset_t *length) const { | |||
| RETURN_UNEXPECTED_IF_NULL(ptr); | |||
| RETURN_UNEXPECTED_IF_NULL(length); | |||
| if (type_ == DataType::DE_STRING) { | |||
| if (data_ == nullptr) { | |||
| std::string err = "Data is not allocated yet"; | |||
| @@ -519,6 +541,8 @@ Status Tensor::GetItemPtr(uchar **ptr, const std::vector<dsize_t> &index, offset | |||
| } | |||
| Status Tensor::StartAddrOfIndex(std::vector<dsize_t> ind, uchar **start_addr_of_index, TensorShape *remaining) { | |||
| RETURN_UNEXPECTED_IF_NULL(start_addr_of_index); | |||
| RETURN_UNEXPECTED_IF_NULL(remaining); | |||
| if (type() == DataType::DE_STRING) { | |||
| RETURN_STATUS_UNEXPECTED("StartAddrOfIndex does not support string tensors yet."); | |||
| } | |||
| @@ -541,6 +565,7 @@ Status Tensor::StartAddrOfIndex(std::vector<dsize_t> ind, uchar **start_addr_of_ | |||
| Status Tensor::InsertTensor(const std::vector<dsize_t> &ind, const std::shared_ptr<Tensor> &tensor, | |||
| const bool partial_insert) { | |||
| RETURN_UNEXPECTED_IF_NULL(tensor); | |||
| std::string err_msg; | |||
| if (partial_insert) { | |||
| err_msg += (ind.size() != 1) | |||
| @@ -603,13 +628,14 @@ Status Tensor::ExpandDim(const dsize_t &axis) { | |||
| std::vector<dsize_t> Tensor::Strides() const { | |||
| std::vector<dsize_t> strides = shape_.Strides(); | |||
| uint8_t size = type_.SizeInBytes(); | |||
| std::transform(strides.begin(), strides.end(), strides.begin(), [&size](const auto &c) { return c * size; }); | |||
| (void)std::transform(strides.begin(), strides.end(), strides.begin(), [&size](const auto &c) { return c * size; }); | |||
| return strides; | |||
| } | |||
| #ifdef ENABLE_PYTHON | |||
| Status Tensor::GetBufferInfo(Tensor *t, py::buffer_info *out) { | |||
| RETURN_UNEXPECTED_IF_NULL(t); | |||
| RETURN_UNEXPECTED_IF_NULL(out); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(t->type().IsNumeric(), "Cannot use GetBufferInfo on tensor of strings."); | |||
| std::string format_desc = t->type().GetPybindFormat(); | |||
| @@ -622,6 +648,7 @@ Status Tensor::GetBufferInfo(Tensor *t, py::buffer_info *out) { | |||
| t->Rank(), /* Number of dimensions */ | |||
| t->shape().AsVector(), /* Buffer dimensions */ | |||
| t->Strides()); | |||
| RETURN_UNEXPECTED_IF_NULL(out); | |||
| return Status::OK(); | |||
| } | |||
| #endif | |||
| @@ -721,6 +748,7 @@ Status Tensor::from_json_convert(nlohmann::json json_data, TensorShape shape, st | |||
| template <typename T> | |||
| Status Tensor::GetItemAt(T *o, const std::vector<dsize_t> &index) const { | |||
| RETURN_UNEXPECTED_IF_NULL(o); | |||
| if (data_ == nullptr) { | |||
| RETURN_STATUS_UNEXPECTED("Data is not allocated yet"); | |||
| } | |||
| @@ -794,6 +822,7 @@ Status Tensor::GetDataAsNumpy(py::array *data) { | |||
| return Status::OK(); | |||
| } | |||
| Status Tensor::GetDataAsNumpyStrings(py::array *data) { | |||
| RETURN_UNEXPECTED_IF_NULL(data); | |||
| auto itr = begin<std::string_view>(); | |||
| uint64_t max_value = 0; | |||
| for (; itr != end<std::string_view>(); ++itr) { | |||
| @@ -807,7 +836,9 @@ Status Tensor::GetDataAsNumpyStrings(py::array *data) { | |||
| max_value = (max_value == 0 ? 1 : max_value); | |||
| uint64_t total_size = shape_.NumOfElements() * max_value; | |||
| char *tmp_data = reinterpret_cast<char *>(data_allocator_->allocate(total_size)); | |||
| if (tmp_data == nullptr) RETURN_STATUS_UNEXPECTED("Cannot create temp array."); | |||
| if (tmp_data == nullptr) { | |||
| RETURN_STATUS_UNEXPECTED("Cannot create temp array."); | |||
| } | |||
| int ret_code = memset_s(tmp_data, total_size, 0, total_size); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(ret_code == 0, "Failed to initialize temp memory"); | |||
| @@ -820,9 +851,10 @@ Status Tensor::GetDataAsNumpyStrings(py::array *data) { | |||
| } | |||
| } | |||
| auto strides = shape_.Strides(); | |||
| std::transform(strides.begin(), strides.end(), strides.begin(), | |||
| [&max_value](const auto &s) { return s * max_value; }); | |||
| (void)std::transform(strides.begin(), strides.end(), strides.begin(), | |||
| [&max_value](const auto &s) { return s * max_value; }); | |||
| *data = py::array(py::dtype("S" + std::to_string(max_value)), shape_.AsVector(), strides, tmp_data); | |||
| RETURN_UNEXPECTED_IF_NULL(data); | |||
| data_allocator_->deallocate(reinterpret_cast<uchar *>(tmp_data)); | |||
| return Status::OK(); | |||
| } | |||
| @@ -832,6 +864,7 @@ void Tensor::Squeeze() { shape_ = shape_.Squeeze(); } | |||
| template <typename T> | |||
| Status Tensor::GetUnsignedIntAt(T *o, const std::vector<dsize_t> &index) const { | |||
| RETURN_UNEXPECTED_IF_NULL(o); | |||
| if (data_ == nullptr) { | |||
| RETURN_STATUS_UNEXPECTED("Data is not allocated yet"); | |||
| } | |||
| @@ -873,6 +906,7 @@ Status Tensor::GetUnsignedIntAt(T *o, const std::vector<dsize_t> &index) const { | |||
| template <typename T> | |||
| Status Tensor::GetSignedIntAt(T *o, const std::vector<dsize_t> &index) const { | |||
| RETURN_UNEXPECTED_IF_NULL(o); | |||
| if (data_ == nullptr) { | |||
| RETURN_STATUS_UNEXPECTED("Data is not allocated yet"); | |||
| } | |||
| @@ -914,6 +948,7 @@ Status Tensor::GetSignedIntAt(T *o, const std::vector<dsize_t> &index) const { | |||
| template <typename T> | |||
| Status Tensor::GetFloatAt(T *o, const std::vector<dsize_t> &index) const { | |||
| RETURN_UNEXPECTED_IF_NULL(o); | |||
| if (data_ == nullptr) { | |||
| RETURN_STATUS_UNEXPECTED("Data is not allocated yet"); | |||
| } | |||
| @@ -958,6 +993,7 @@ Status Tensor::GetStringAt(dsize_t index, uchar **string_start, offset_t *length | |||
| return Status::OK(); | |||
| } | |||
| Status Tensor::CopyLastDimAt(const std::shared_ptr<Tensor> &src, const std::vector<dsize_t> &index) { | |||
| RETURN_UNEXPECTED_IF_NULL(src); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(src->type() == type_, "Source Tensor has a different type"); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(index.back() == 0, "Last dim in index should be 0"); | |||
| @@ -975,6 +1011,7 @@ Status Tensor::CopyLastDimAt(const std::shared_ptr<Tensor> &src, const std::vect | |||
| Status Tensor::GetSliceOption(const SliceOption &slice_option, const int32_t &slice_index, | |||
| SliceOption *slice_option_ptr) { | |||
| RETURN_UNEXPECTED_IF_NULL(slice_option_ptr); | |||
| if (slice_option.indices_.empty() && !slice_option.slice_.valid()) { | |||
| RETURN_STATUS_UNEXPECTED("Both indices and slices can not be empty."); | |||
| } | |||
| @@ -983,6 +1020,7 @@ Status Tensor::GetSliceOption(const SliceOption &slice_option, const int32_t &sl | |||
| RETURN_STATUS_UNEXPECTED("Both indices and slices can not be given."); | |||
| } | |||
| CHECK_FAIL_RETURN_UNEXPECTED(shape_.Size() > slice_index, "Invalid shape, should greater than slices index."); | |||
| // if slice object was provided, indices should be empty. Generate indices from the slice object. | |||
| if (slice_option.indices_.empty()) { | |||
| // check if slice is valid | |||
| @@ -1010,6 +1048,7 @@ Status Tensor::GetSliceOption(const SliceOption &slice_option, const int32_t &sl | |||
| } | |||
| Status Tensor::Slice(std::shared_ptr<Tensor> *out, const std::vector<SliceOption> slice_options_) { | |||
| RETURN_UNEXPECTED_IF_NULL(out); | |||
| std::vector<SliceOption> converted_slice_objects; | |||
| CHECK_FAIL_RETURN_UNEXPECTED(slice_options_.size() <= static_cast<size_t>(std::numeric_limits<dsize_t>::max()), | |||
| @@ -1046,7 +1085,7 @@ Status Tensor::Slice(std::shared_ptr<Tensor> *out, const std::vector<SliceOption | |||
| for (int i = 0; i < shape_.Rank(); i++) { | |||
| if (i < slice_len) { | |||
| // if it's a slice | |||
| if (converted_slice_objects[i].indices_.size() == 0) { | |||
| if (converted_slice_objects[i].indices_.size() == 0 && converted_slice_objects[i].slice_.step_ != 0) { | |||
| slice_len_ind = (converted_slice_objects[i].slice_.stop_ - converted_slice_objects[i].slice_.start_) / | |||
| converted_slice_objects[i].slice_.step_; | |||
| if ((converted_slice_objects[i].slice_.stop_ - converted_slice_objects[i].slice_.start_) % | |||
| @@ -1085,8 +1124,10 @@ Status Tensor::Slice(std::shared_ptr<Tensor> *out, const std::vector<SliceOption | |||
| Status Tensor::SliceNumeric(std::shared_ptr<Tensor> *out, const std::vector<std::vector<dsize_t>> &indices, | |||
| const TensorShape &shape) { | |||
| RETURN_UNEXPECTED_IF_NULL(out); | |||
| RETURN_IF_NOT_OK(CreateEmpty(shape, type_, out)); | |||
| RETURN_UNEXPECTED_IF_NULL(out); | |||
| (*out)->GetMutableBuffer(); | |||
| dsize_t out_index = 0; | |||
| std::vector<dsize_t> dim_length = shape_.AsVector(); | |||
| @@ -1131,6 +1172,7 @@ Status Tensor::SliceNumeric(std::shared_ptr<Tensor> *out, const std::vector<std: | |||
| } | |||
| Status Tensor::SliceString(std::shared_ptr<Tensor> *out, const std::vector<std::vector<dsize_t>> &indices, | |||
| const TensorShape &shape) { | |||
| RETURN_UNEXPECTED_IF_NULL(out); | |||
| std::vector<dsize_t> dim_length = shape_.AsVector(); | |||
| std::vector<std::string> strings; | |||
| @@ -414,6 +414,10 @@ class Tensor { | |||
| /// \param[in] index_vector vector of indices | |||
| /// \return std::vector<dsize_t> modified vector of indices | |||
| static inline std::vector<dsize_t> HandleNegIndices(std::vector<dsize_t> index_vector, std::vector<dsize_t> length) { | |||
| if (length.size() < index_vector.size()) { | |||
| MS_LOG(ERROR) << "The size of length should be greater than the shape of index_vector"; | |||
| return {}; | |||
| } | |||
| std::vector<dsize_t> indices(index_vector.size(), 0); | |||
| for (int i = 0; i < index_vector.size(); i++) { | |||
| indices[i] = HandleNeg(index_vector[i], length[i]); | |||
| @@ -780,12 +784,14 @@ inline Tensor::TensorIterator<std::string_view> Tensor::end<std::string_view>() | |||
| template <> | |||
| inline Status Tensor::CreateFromVector<std::string>(const std::vector<std::string> &items, const TensorShape &shape, | |||
| TensorPtr *out) { | |||
| RETURN_UNEXPECTED_IF_NULL(out); | |||
| CHECK_FAIL_RETURN_UNEXPECTED( | |||
| items.size() == shape.NumOfElements(), | |||
| "Number of elements in the vector does not match the number of elements of the shape required"); | |||
| const TensorAlloc *alloc = GlobalContext::Instance()->tensor_allocator(); | |||
| *out = std::allocate_shared<Tensor>(*alloc, TensorShape({static_cast<dsize_t>(items.size())}), | |||
| DataType(DataType::DE_STRING)); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(out != nullptr, "Allocate memory failed."); | |||
| if (items.size() == 0) { | |||
| if (shape.known()) { | |||
| return (*out)->Reshape(shape); | |||
| @@ -835,6 +841,7 @@ inline Status Tensor::CreateFromVector<std::string>(const std::vector<std::strin | |||
| /// \return Status code | |||
| template <> | |||
| inline Status Tensor::CreateScalar<std::string>(const std::string &item, TensorPtr *out) { | |||
| RETURN_UNEXPECTED_IF_NULL(out); | |||
| return CreateFromVector<std::string>({item}, TensorShape::CreateScalar(), out); | |||
| } | |||
| } // namespace dataset | |||
| @@ -16,6 +16,8 @@ | |||
| #include <string> | |||
| #include <vector> | |||
| #include "minddata/dataset/core/tensor_helpers.h" | |||
| #include "minddata/dataset/util/log_adapter.h" | |||
| #include "minddata/dataset/util/status.h" | |||
| namespace mindspore { | |||
| namespace dataset { | |||
| @@ -23,6 +25,10 @@ namespace dataset { | |||
| void IndexGeneratorHelper(int8_t depth, std::vector<dsize_t> *numbers, | |||
| const std::vector<mindspore::dataset::SliceOption> &slice_list, | |||
| std::vector<std::vector<dsize_t>> *matrix) { | |||
| if (numbers == nullptr || matrix == nullptr) { | |||
| MS_LOG(ERROR) << "Invalid input pointer, can't be NULL"; | |||
| return; | |||
| } | |||
| // for loop changes if its an index instead of a slice object | |||
| if (depth > 0) { | |||
| int8_t new_depth = depth - 1; | |||
| @@ -87,6 +87,7 @@ class TensorRow { | |||
| /// \param[out] output TensorRow | |||
| template <typename T> | |||
| static Status ConvertToTensorRow(const std::vector<T> &o, TensorRow *output) { | |||
| RETURN_UNEXPECTED_IF_NULL(output); | |||
| DataType data_type = DataType::FromCType<T>(); | |||
| if (data_type == DataType::DE_UNKNOWN) { | |||
| RETURN_STATUS_UNEXPECTED("ConvertToTensorRow: Data type was not recognized."); | |||
| @@ -106,6 +107,7 @@ class TensorRow { | |||
| /// \param[out] output TensorRow | |||
| template <typename T> | |||
| static Status ConvertToTensorRow(const T &o, TensorRow *output) { | |||
| RETURN_UNEXPECTED_IF_NULL(output); | |||
| DataType data_type = DataType::FromCType<T>(); | |||
| if (data_type == DataType::DE_UNKNOWN) { | |||
| RETURN_STATUS_UNEXPECTED("ConvertToTensorRow: Data type was not recognized."); | |||
| @@ -125,6 +127,7 @@ class TensorRow { | |||
| /// \param[out] o the primitive variable | |||
| template <typename T> | |||
| static Status ConvertFromTensorRow(const TensorRow &input, T *o) { | |||
| RETURN_UNEXPECTED_IF_NULL(o); | |||
| DataType data_type = DataType::FromCType<T>(); | |||
| RETURN_IF_NOT_OK(ValidateTensorRow(input, data_type)); | |||
| if (input.at(0)->type() != data_type) { | |||
| @@ -142,6 +145,7 @@ class TensorRow { | |||
| /// \param[out] o vector of primitive variable | |||
| template <typename T> | |||
| static Status ConvertFromTensorRow(const TensorRow &input, std::vector<T> *o) { | |||
| RETURN_UNEXPECTED_IF_NULL(o); | |||
| DataType data_type = DataType::FromCType<T>(); | |||
| RETURN_IF_NOT_OK(ValidateTensorRow(input, data_type)); | |||
| if (input.at(0)->Rank() != 1) | |||
| @@ -40,7 +40,7 @@ bool multi_ok(dsize_t x, dsize_t y) { | |||
| } | |||
| dsize_t TensorShape::NumOfElements() const { | |||
| if (!known()) { | |||
| if (!known() && strides_.size() < 1) { | |||
| return 0; | |||
| } | |||
| return strides_[0]; | |||
| @@ -216,12 +216,9 @@ py::list TensorShape::AsPyList() { | |||
| #endif | |||
| TensorShape TensorShape::Squeeze() const { | |||
| std::vector<dsize_t> new_shape; | |||
| for (auto s : AsVector()) { | |||
| if (s != 1) { | |||
| new_shape.push_back(s); | |||
| } | |||
| } | |||
| std::vector<dsize_t> new_shape(raw_shape_.size()); | |||
| auto it = std::copy_if(raw_shape_.begin(), raw_shape_.end(), new_shape.begin(), [](auto s) { return s != 1; }); | |||
| new_shape.resize(std::distance(new_shape.begin(), it)); | |||
| return TensorShape(new_shape); | |||
| } | |||
| @@ -230,6 +227,7 @@ std::vector<dsize_t> TensorShape::Strides() const { return std::vector<dsize_t>{ | |||
| // Name: ToFlatIndex() | |||
| // Description: convert a vector style index to number, used to access memory internal use only | |||
| Status TensorShape::ToFlatIndex(const std::vector<dsize_t> &index, dsize_t *flat_index) const { | |||
| RETURN_UNEXPECTED_IF_NULL(flat_index); | |||
| if (index.size() != raw_shape_.size()) { | |||
| std::stringstream ss; | |||
| ss << "Index size (" << index.size() << ") does not match the shape size (" << raw_shape_.size() << ")."; | |||
| @@ -101,8 +101,8 @@ Status CacheServerHW::GetNumaNodeInfo() { | |||
| }; | |||
| // Look for name starts with 'node' and followed by digits. | |||
| const char kNodeName[] = "node"; | |||
| while (it->hasNext()) { | |||
| auto p = it->next(); | |||
| while (it->HasNext()) { | |||
| auto p = it->Next(); | |||
| const std::string entry = p.Basename(); | |||
| const char *name = entry.data(); | |||
| if (strncmp(name, kNodeName, strlen(kNodeName)) == 0 && isdigit_string(name + strlen(kNodeName))) { | |||
| @@ -63,8 +63,8 @@ Status CachePool::DoServiceStop() { | |||
| if (!root_.toString().empty()) { | |||
| Path spill = GetSpillPath(); | |||
| auto it = Path::DirIterator::OpenDirectory(&spill); | |||
| while (it->hasNext()) { | |||
| rc = it->next().Remove(); | |||
| while (it->HasNext()) { | |||
| rc = it->Next().Remove(); | |||
| if (rc.IsError() && rc2.IsOk()) { | |||
| rc2 = rc; | |||
| } | |||
| @@ -24,6 +24,7 @@ namespace mindspore::dataset { | |||
| PullBasedIteratorConsumer::PullBasedIteratorConsumer() { tree_adapter_lite_ = std::make_unique<TreeAdapterLite>(); } | |||
| Status PullBasedIteratorConsumer::Init(std::shared_ptr<DatasetNode> root) { | |||
| RETURN_UNEXPECTED_IF_NULL(root); | |||
| return tree_adapter_lite_->BuildTree(std::move(root)); | |||
| } | |||
| @@ -20,6 +20,7 @@ | |||
| namespace mindspore::dataset { | |||
| Status PythonIteratorConsumer::GetNextAsList(py::list *out) { | |||
| RETURN_UNEXPECTED_IF_NULL(out); | |||
| std::vector<TensorPtr> row; | |||
| { | |||
| py::gil_scoped_release gil_release; | |||
| @@ -32,6 +33,7 @@ Status PythonIteratorConsumer::GetNextAsList(py::list *out) { | |||
| } | |||
| Status PythonIteratorConsumer::GetNextAsDict(py::dict *out) { | |||
| RETURN_UNEXPECTED_IF_NULL(out); | |||
| std::vector<std::pair<std::string, std::shared_ptr<Tensor>>> vec; | |||
| Status s; | |||
| { | |||
| @@ -64,6 +66,8 @@ Status PythonTreeGetters::GetRow(TensorRow *const r) { | |||
| return TreeGetters::GetRow(r); | |||
| } | |||
| Status PythonDatasetSizeGetter::GetRow(const std::shared_ptr<TreeAdapter> &tree_adapter, TensorRow *r) { | |||
| RETURN_UNEXPECTED_IF_NULL(tree_adapter); | |||
| RETURN_UNEXPECTED_IF_NULL(r); | |||
| py::gil_scoped_release gil_release; | |||
| return DatasetSizeGetter::GetRow(tree_adapter, r); | |||
| } | |||
| @@ -13,7 +13,6 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include <algorithm> | |||
| #include <memory> | |||
| #include <string> | |||
| @@ -179,6 +178,8 @@ Status ToDevice::Stop() { | |||
| } | |||
| Status ToDevice::GetDataInfo(std::vector<DataType> *const types, std::vector<TensorShape> *const shapes) { | |||
| RETURN_UNEXPECTED_IF_NULL(types); | |||
| RETURN_UNEXPECTED_IF_NULL(shapes); | |||
| // tree_.root() must be DeviceQueueOp | |||
| std::shared_ptr<DatasetOp> root = std::shared_ptr<DatasetOp>(tree_adapter_->GetRoot()); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(root != nullptr, "Root is a nullptr."); | |||
| @@ -218,8 +219,13 @@ Status SaveToDisk::ValidateParams() { | |||
| MS_LOG(ERROR) << err; | |||
| RETURN_STATUS_SYNTAX_ERROR(err); | |||
| } | |||
| auto parent_path = dir.ParentPath(); | |||
| if (!parent_path.empty() && access(common::SafeCStr(parent_path), R_OK) == -1) { | |||
| std::string real_path; | |||
| if (Path::RealPath(dir.ParentPath(), real_path).IsError()) { | |||
| std::string err_msg = "CreateSaver failed, can not get real dataset path: " + dir.ParentPath(); | |||
| MS_LOG(ERROR) << err_msg; | |||
| RETURN_STATUS_SYNTAX_ERROR(err_msg); | |||
| } | |||
| if (access(dir.ParentPath().c_str(), R_OK) == -1) { | |||
| std::string err_msg = "CreateSaver failed, no access to specified dataset path: " + dataset_path_; | |||
| MS_LOG(ERROR) << err_msg; | |||
| RETURN_STATUS_SYNTAX_ERROR(err_msg); | |||
| @@ -250,15 +256,15 @@ Status SaveToDisk::Save() { | |||
| auto mr_header = std::make_shared<mindrecord::ShardHeader>(); | |||
| auto mr_writer = std::make_unique<mindrecord::ShardWriter>(); | |||
| std::vector<std::string> blob_fields; | |||
| if (mindrecord::SUCCESS != mindrecord::ShardWriter::initialize(&mr_writer, file_names)) { | |||
| if (mindrecord::SUCCESS != mindrecord::ShardWriter::Initialize(&mr_writer, file_names)) { | |||
| RETURN_STATUS_UNEXPECTED("Error: failed to initialize ShardWriter, please check above `ERROR` level message."); | |||
| } | |||
| std::unordered_map<std::string, int32_t> column_name_id_map; | |||
| for (auto el : tree_adapter_->GetColumnNameMap()) { | |||
| std::string column_name = el.first; | |||
| std::transform(column_name.begin(), column_name.end(), column_name.begin(), | |||
| [](unsigned char c) { return ispunct(c) ? '_' : c; }); | |||
| (void)std::transform(column_name.begin(), column_name.end(), column_name.begin(), | |||
| [](unsigned char c) { return ispunct(c) ? '_' : c; }); | |||
| column_name_id_map[column_name] = el.second; | |||
| } | |||
| @@ -281,17 +287,21 @@ Status SaveToDisk::Save() { | |||
| RETURN_IF_NOT_OK(FetchMetaFromTensorRow(column_name_id_map, row, &mr_json, &index_fields)); | |||
| MS_LOG(INFO) << "Schema of saved mindrecord: " << mr_json.dump(); | |||
| if (mindrecord::SUCCESS != | |||
| mindrecord::ShardHeader::initialize(&mr_header, mr_json, index_fields, blob_fields, mr_schema_id)) { | |||
| mindrecord::ShardHeader::Initialize(&mr_header, mr_json, index_fields, blob_fields, mr_schema_id)) { | |||
| RETURN_STATUS_UNEXPECTED("Error: failed to initialize ShardHeader."); | |||
| } | |||
| mr_writer->SetShardHeader(mr_header); | |||
| if (mindrecord::SUCCESS != mr_writer->SetShardHeader(mr_header)) { | |||
| RETURN_STATUS_UNEXPECTED("Error: failed to set header of ShardWriter."); | |||
| } | |||
| first_loop = false; | |||
| } | |||
| // construct data | |||
| if (!row.empty()) { // write data | |||
| RETURN_IF_NOT_OK(FetchDataFromTensorRow(row, column_name_id_map, &row_raw_data, &row_bin_data)); | |||
| std::shared_ptr<std::vector<uint8_t>> output_bin_data; | |||
| mr_writer->MergeBlobData(blob_fields, row_bin_data, &output_bin_data); | |||
| if (mindrecord::SUCCESS != mr_writer->MergeBlobData(blob_fields, row_bin_data, &output_bin_data)) { | |||
| RETURN_STATUS_UNEXPECTED("Error: failed to merge blob data of ShardWriter."); | |||
| } | |||
| std::map<std::uint64_t, std::vector<nlohmann::json>> raw_data; | |||
| raw_data.insert( | |||
| std::pair<uint64_t, std::vector<nlohmann::json>>(mr_schema_id, std::vector<nlohmann::json>{row_raw_data})); | |||
| @@ -299,12 +309,16 @@ Status SaveToDisk::Save() { | |||
| if (output_bin_data != nullptr) { | |||
| bin_data.emplace_back(*output_bin_data); | |||
| } | |||
| mr_writer->WriteRawData(raw_data, bin_data); | |||
| if (mindrecord::SUCCESS != mr_writer->WriteRawData(raw_data, bin_data)) { | |||
| RETURN_STATUS_UNEXPECTED("Error: failed to write raw data to ShardWriter."); | |||
| } | |||
| } | |||
| } while (!row.empty()); | |||
| mr_writer->Commit(); | |||
| if (mindrecord::SUCCESS != mindrecord::ShardIndexGenerator::finalize(file_names)) { | |||
| if (mindrecord::SUCCESS != mr_writer->Commit()) { | |||
| RETURN_STATUS_UNEXPECTED("Error: failed to commit ShardWriter."); | |||
| } | |||
| if (mindrecord::SUCCESS != mindrecord::ShardIndexGenerator::Finalize(file_names)) { | |||
| RETURN_STATUS_UNEXPECTED("Error: failed to finalize ShardIndexGenerator."); | |||
| } | |||
| return Status::OK(); | |||
| @@ -407,7 +421,7 @@ Status SaveToDisk::FetchMetaFromTensorRow(const std::unordered_map<std::string, | |||
| return Status::OK(); | |||
| } | |||
| static Status ValidateInputParams(nlohmann::json *row_raw_data, | |||
| inline Status ValidateInputParams(nlohmann::json *row_raw_data, | |||
| std::map<std::string, std::unique_ptr<std::vector<uint8_t>>> *row_bin_data, | |||
| const std::unordered_map<std::string, int32_t> &column_name_id_map) { | |||
| if (row_raw_data == nullptr) { | |||
| @@ -424,6 +438,8 @@ static Status ValidateInputParams(nlohmann::json *row_raw_data, | |||
| Status SaveToDisk::FetchFloatData(std::shared_ptr<Tensor> tensor, std::string column_name, nlohmann::json *row_raw_data, | |||
| std::unique_ptr<std::vector<uint8_t>> *data_ptr) { | |||
| RETURN_UNEXPECTED_IF_NULL(row_raw_data); | |||
| RETURN_UNEXPECTED_IF_NULL(data_ptr); | |||
| auto column_type = tensor->type(); | |||
| Status s; | |||
| if (column_type == DataType::DE_FLOAT32) { | |||
| @@ -442,6 +458,9 @@ Status SaveToDisk::FetchFloatData(std::shared_ptr<Tensor> tensor, std::string co | |||
| Status SaveToDisk::FetchItemData(std::shared_ptr<Tensor> tensor, std::string column_name, nlohmann::json *row_raw_data, | |||
| std::map<std::string, std::unique_ptr<std::vector<uint8_t>>> *row_bin_data) { | |||
| RETURN_UNEXPECTED_IF_NULL(tensor); | |||
| RETURN_UNEXPECTED_IF_NULL(row_raw_data); | |||
| RETURN_UNEXPECTED_IF_NULL(row_bin_data); | |||
| auto column_type = tensor->type(); | |||
| Status s; | |||
| std::unique_ptr<std::vector<uint8_t>> data_ptr; | |||
| @@ -492,7 +511,6 @@ Status SaveToDisk::FetchItemData(std::shared_ptr<Tensor> tensor, std::string col | |||
| RETURN_IF_NOT_OK(tensor->GetItemAt(&sv, {})); // assume scalar string tensor | |||
| std::string ss(sv); | |||
| (*row_raw_data)[column_name] = std::move(ss); | |||
| return Status::OK(); | |||
| } else { | |||
| RETURN_STATUS_UNEXPECTED("Got unexpected type when casting data."); | |||
| } | |||
| @@ -506,6 +524,8 @@ Status SaveToDisk::FetchDataFromTensorRow(const TensorRow &row, | |||
| const std::unordered_map<std::string, int32_t> &column_name_id_map, | |||
| nlohmann::json *row_raw_data, | |||
| std::map<std::string, std::unique_ptr<std::vector<uint8_t>>> *row_bin_data) { | |||
| RETURN_UNEXPECTED_IF_NULL(row_raw_data); | |||
| RETURN_UNEXPECTED_IF_NULL(row_bin_data); | |||
| Status s; | |||
| s = ValidateInputParams(row_raw_data, row_bin_data, column_name_id_map); | |||
| if (s.IsError()) { | |||
| @@ -525,9 +545,11 @@ template <typename T, typename S> | |||
| Status SaveToDisk::TransformTensor(const unsigned char *src, const TensorShape &shape, const int64_t num_of_elements, | |||
| std::unique_ptr<T> *data, std::unique_ptr<std::vector<uint8_t>> *data_ptr, | |||
| std::unique_ptr<S> *s, bool need_convert) { | |||
| if (nullptr == src) { | |||
| RETURN_STATUS_UNEXPECTED("Error: buffer of Tensor is NULL."); | |||
| } | |||
| RETURN_UNEXPECTED_IF_NULL(src); | |||
| RETURN_UNEXPECTED_IF_NULL(data); | |||
| RETURN_UNEXPECTED_IF_NULL(data_ptr); | |||
| RETURN_UNEXPECTED_IF_NULL(s); | |||
| *data_ptr = std::make_unique<std::vector<uint8_t>>(num_of_elements * sizeof(T)); | |||
| if (need_convert) { | |||
| auto tmp_ptr = std::make_unique<std::vector<uint8_t>>(num_of_elements * sizeof(S)); | |||
| @@ -560,25 +582,32 @@ TreeGetters::TreeGetters() : dataset_size_(-1), init_flag_(false), first_row_obt | |||
| } | |||
| Status TreeGetters::Init(std::shared_ptr<DatasetNode> d) { | |||
| RETURN_UNEXPECTED_IF_NULL(d); | |||
| root_ = std::move(d); | |||
| return Status::OK(); | |||
| } | |||
| Status TreeGetters::GetRow(TensorRow *row) { return tree_adapter_->GetNext(row); } | |||
| Status TreeGetters::GetRow(TensorRow *row) { | |||
| RETURN_UNEXPECTED_IF_NULL(row); | |||
| return tree_adapter_->GetNext(row); | |||
| } | |||
| Status TreeGetters::GetOutputTypes(std::vector<DataType> *types) { | |||
| RETURN_UNEXPECTED_IF_NULL(types); | |||
| RETURN_IF_NOT_OK(GetFirstRowShapeAndType()); | |||
| *types = first_row_type_; | |||
| return Status::OK(); | |||
| } | |||
| Status TreeGetters::GetOutputShapes(std::vector<TensorShape> *shapes) { | |||
| RETURN_UNEXPECTED_IF_NULL(shapes); | |||
| RETURN_IF_NOT_OK(GetFirstRowShapeAndType()); | |||
| *shapes = first_row_shape_; | |||
| return Status::OK(); | |||
| } | |||
| Status TreeGetters::GetBatchSize(int64_t *batch_size) { | |||
| RETURN_UNEXPECTED_IF_NULL(batch_size); | |||
| RETURN_IF_NOT_OK(InternalInit()); | |||
| std::shared_ptr<DatasetOp> root = std::shared_ptr<DatasetOp>(tree_adapter_->GetRoot()); | |||
| RETURN_UNEXPECTED_IF_NULL(root); | |||
| @@ -588,6 +617,7 @@ Status TreeGetters::GetBatchSize(int64_t *batch_size) { | |||
| } | |||
| Status TreeGetters::GetRepeatCount(int64_t *repeat_count) { | |||
| RETURN_UNEXPECTED_IF_NULL(repeat_count); | |||
| RETURN_IF_NOT_OK(InternalInit()); | |||
| std::shared_ptr<DatasetOp> root = std::shared_ptr<DatasetOp>(tree_adapter_->GetRoot()); | |||
| RETURN_UNEXPECTED_IF_NULL(root); | |||
| @@ -596,6 +626,7 @@ Status TreeGetters::GetRepeatCount(int64_t *repeat_count) { | |||
| } | |||
| Status TreeGetters::GetNumClasses(int64_t *num_classes) { | |||
| RETURN_UNEXPECTED_IF_NULL(num_classes); | |||
| RETURN_IF_NOT_OK(InternalInit()); | |||
| std::shared_ptr<DatasetOp> root = std::shared_ptr<DatasetOp>(tree_adapter_->GetRoot()); | |||
| RETURN_UNEXPECTED_IF_NULL(root); | |||
| @@ -604,6 +635,7 @@ Status TreeGetters::GetNumClasses(int64_t *num_classes) { | |||
| } | |||
| Status TreeGetters::GetColumnNames(std::vector<std::string> *output) { | |||
| RETURN_UNEXPECTED_IF_NULL(output); | |||
| RETURN_IF_NOT_OK(InternalInit()); | |||
| std::shared_ptr<DatasetOp> root = std::shared_ptr<DatasetOp>(tree_adapter_->GetRoot()); | |||
| RETURN_UNEXPECTED_IF_NULL(root); | |||
| @@ -620,6 +652,7 @@ Status TreeGetters::GetColumnNames(std::vector<std::string> *output) { | |||
| } | |||
| Status TreeGetters::GetClassIndexing(std::vector<std::pair<std::string, std::vector<int32_t>>> *output_class_indexing) { | |||
| RETURN_UNEXPECTED_IF_NULL(output_class_indexing); | |||
| RETURN_IF_NOT_OK(InternalInit()); | |||
| std::shared_ptr<DatasetOp> root = std::shared_ptr<DatasetOp>(tree_adapter_->GetRoot()); | |||
| RETURN_UNEXPECTED_IF_NULL(root); | |||
| @@ -671,6 +704,7 @@ Status DatasetSizeGetter::Init(std::shared_ptr<DatasetNode> d) { | |||
| return Status::OK(); | |||
| } | |||
| Status DatasetSizeGetter::DryRun(std::shared_ptr<DatasetNode> ir_node, int64_t *dataset_size) { | |||
| RETURN_UNEXPECTED_IF_NULL(dataset_size); | |||
| std::shared_ptr<TreeAdapter> tree_adapter = std::make_shared<TreeAdapter>(TreeAdapter::UsageFlag::kDeGetter); | |||
| tree_adapters_.push_back(tree_adapter); | |||
| RETURN_IF_NOT_OK(tree_adapter->Compile(ir_node, 1)); | |||
| @@ -685,6 +719,7 @@ Status DatasetSizeGetter::DryRun(std::shared_ptr<DatasetNode> ir_node, int64_t * | |||
| return Status::OK(); | |||
| } | |||
| Status DatasetSizeGetter::GetRow(const std::shared_ptr<TreeAdapter> &tree_adapter, TensorRow *row) { | |||
| RETURN_UNEXPECTED_IF_NULL(row); | |||
| return tree_adapter->GetNext(row); | |||
| } | |||
| Status DatasetSizeGetter::Terminate() { | |||
| @@ -73,7 +73,7 @@ ColDescriptor::ColDescriptor(const std::string &col_name, DataType col_type, Ten | |||
| ColDescriptor::ColDescriptor(const ColDescriptor &in_cd) | |||
| : type_(in_cd.type_), rank_(in_cd.rank_), tensor_impl_(in_cd.tensor_impl_), col_name_(in_cd.col_name_) { | |||
| // If it has a tensor shape, make a copy of it with our own unique_ptr. | |||
| tensor_shape_ = in_cd.hasShape() ? std::make_unique<TensorShape>(in_cd.shape()) : nullptr; | |||
| tensor_shape_ = in_cd.HasShape() ? std::make_unique<TensorShape>(in_cd.Shape()) : nullptr; | |||
| } | |||
| // Assignment overload | |||
| @@ -84,7 +84,7 @@ ColDescriptor &ColDescriptor::operator=(const ColDescriptor &in_cd) { | |||
| tensor_impl_ = in_cd.tensor_impl_; | |||
| col_name_ = in_cd.col_name_; | |||
| // If it has a tensor shape, make a copy of it with our own unique_ptr. | |||
| tensor_shape_ = in_cd.hasShape() ? std::make_unique<TensorShape>(in_cd.shape()) : nullptr; | |||
| tensor_shape_ = in_cd.HasShape() ? std::make_unique<TensorShape>(in_cd.Shape()) : nullptr; | |||
| } | |||
| return *this; | |||
| } | |||
| @@ -113,7 +113,7 @@ Status ColDescriptor::MaterializeTensorShape(int32_t num_elements, TensorShape * | |||
| // If the shape is not given in this column, then we assume the shape will be: {numElements} | |||
| if (tensor_shape_ == nullptr) { | |||
| if (this->rank() == 0 && num_elements == 1) { | |||
| if (this->Rank() == 0 && num_elements == 1) { | |||
| *out_shape = TensorShape::CreateScalar(); | |||
| return Status::OK(); | |||
| } | |||
| @@ -173,7 +173,7 @@ Status ColDescriptor::MaterializeTensorShape(int32_t num_elements, TensorShape * | |||
| } | |||
| // getter function for the shape | |||
| TensorShape ColDescriptor::shape() const { | |||
| TensorShape ColDescriptor::Shape() const { | |||
| if (tensor_shape_ != nullptr) { | |||
| return *tensor_shape_; // copy construct a shape to return | |||
| } else { | |||
| @@ -257,7 +257,7 @@ Status DataSchema::ColumnOrderLoad(nlohmann::json column_tree, const std::vector | |||
| } | |||
| // Internal helper function for parsing shape info and building a vector for the shape construction. | |||
| static Status buildShape(const nlohmann::json &shapeVal, std::vector<dsize_t> *outShape) { | |||
| static Status BuildShape(const nlohmann::json &shapeVal, std::vector<dsize_t> *outShape) { | |||
| if (outShape == nullptr) { | |||
| RETURN_STATUS_UNEXPECTED("null output shape"); | |||
| } | |||
| @@ -274,7 +274,8 @@ static Status buildShape(const nlohmann::json &shapeVal, std::vector<dsize_t> *o | |||
| Status DataSchema::ColumnLoad(nlohmann::json column_child_tree, const std::string &col_name) { | |||
| int32_t rank_value = -1; | |||
| TensorImpl t_impl_value = TensorImpl::kFlexible; | |||
| std::string name, type_str; | |||
| std::string name = ""; | |||
| std::string type_str = ""; | |||
| std::vector<dsize_t> tmp_shape = {}; | |||
| bool shape_field_exists = false; | |||
| // Iterate over this column's attributes. | |||
| @@ -291,7 +292,7 @@ Status DataSchema::ColumnLoad(nlohmann::json column_child_tree, const std::strin | |||
| STR_TO_TENSORIMPL(it_child.value(), t_impl_value); | |||
| } else if (it_child.key() == "shape") { | |||
| shape_field_exists = true; | |||
| RETURN_IF_NOT_OK(buildShape(it_child.value(), &tmp_shape)); | |||
| RETURN_IF_NOT_OK(BuildShape(it_child.value(), &tmp_shape)); | |||
| } else { | |||
| std::string err_msg = "Unexpected column attribute " + it_child.key() + " for column " + col_name; | |||
| RETURN_STATUS_UNEXPECTED(err_msg); | |||
| @@ -324,10 +325,10 @@ Status DataSchema::ColumnLoad(nlohmann::json column_child_tree, const std::strin | |||
| // Create the column descriptor for this column from the data we pulled from the json file | |||
| TensorShape col_shape = TensorShape(tmp_shape); | |||
| if (shape_field_exists) | |||
| (void)this->AddColumn(ColDescriptor(name, DataType(type_str), t_impl_value, rank_value, &col_shape)); | |||
| RETURN_IF_NOT_OK(this->AddColumn(ColDescriptor(name, DataType(type_str), t_impl_value, rank_value, &col_shape))); | |||
| else | |||
| // Create a column descriptor that doesn't have a shape | |||
| (void)this->AddColumn(ColDescriptor(name, DataType(type_str), t_impl_value, rank_value)); | |||
| RETURN_IF_NOT_OK(this->AddColumn(ColDescriptor(name, DataType(type_str), t_impl_value, rank_value))); | |||
| return Status::OK(); | |||
| } | |||
| @@ -345,19 +346,30 @@ Status DataSchema::LoadSchemaFile(const std::string &schema_file_path, | |||
| } catch (nlohmann::json::out_of_range &e) { | |||
| num_rows_ = 0; | |||
| } catch (nlohmann::json::exception &e) { | |||
| in.close(); | |||
| RETURN_STATUS_UNEXPECTED("Unable to parse \"numRows\" from schema"); | |||
| } | |||
| nlohmann::json column_tree = js.at("columns"); | |||
| if (column_tree.empty()) { | |||
| in.close(); | |||
| RETURN_STATUS_UNEXPECTED("columns is null"); | |||
| } | |||
| if (columns_to_load.empty()) { | |||
| // Parse the json tree and load the schema's columns in whatever order that the json | |||
| // layout decides | |||
| RETURN_IF_NOT_OK(this->AnyOrderLoad(column_tree)); | |||
| Status rc = this->AnyOrderLoad(column_tree); | |||
| if (rc.IsError()) { | |||
| in.close(); | |||
| return rc; | |||
| } | |||
| } else { | |||
| RETURN_IF_NOT_OK(this->ColumnOrderLoad(column_tree, columns_to_load)); | |||
| Status rc = this->ColumnOrderLoad(column_tree, columns_to_load); | |||
| if (rc.IsError()) { | |||
| in.close(); | |||
| return rc; | |||
| } | |||
| } | |||
| in.close(); | |||
| } catch (const std::exception &err) { | |||
| // Catch any exception and convert to Status return code | |||
| RETURN_STATUS_UNEXPECTED("Schema file failed to load with JSON tools. File is: " + schema_file_path); | |||
| @@ -394,7 +406,7 @@ Status DataSchema::LoadSchemaString(const std::string &schema_json_string, | |||
| DataSchema::~DataSchema() = default; | |||
| // Getter for the ColDescriptor by index | |||
| const ColDescriptor &DataSchema::column(int32_t idx) const { | |||
| const ColDescriptor &DataSchema::Column(int32_t idx) const { | |||
| MS_ASSERT(idx < static_cast<int>(col_descs_.size())); | |||
| return col_descs_[idx]; | |||
| } | |||
| @@ -411,9 +423,9 @@ void DataSchema::Print(std::ostream &out) const { | |||
| Status DataSchema::AddColumn(const ColDescriptor &cd) { | |||
| // Sanity check there's not a duplicate name before adding the column | |||
| for (auto i = 0; i < col_descs_.size(); ++i) { | |||
| if (col_descs_[i].name() == cd.name()) { | |||
| if (col_descs_[i].Name() == cd.Name()) { | |||
| std::ostringstream ss; | |||
| ss << "column name '" << cd.name() << "' already exists in schema."; | |||
| ss << "column name '" << cd.Name() << "' already exists in schema."; | |||
| std::string err_msg = ss.str(); | |||
| RETURN_STATUS_UNEXPECTED(err_msg); | |||
| } | |||
| @@ -439,11 +451,11 @@ Status DataSchema::GetColumnNameMap(std::unordered_map<std::string, int32_t> *ou | |||
| } | |||
| for (size_t i = 0; i < col_descs_.size(); ++i) { | |||
| if (col_descs_[i].name().empty()) { | |||
| if (col_descs_[i].Name().empty()) { | |||
| return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, | |||
| "Constructing column name map from schema, but found empty column name."); | |||
| } | |||
| (*out_column_name_map)[col_descs_[i].name()] = i; | |||
| (*out_column_name_map)[col_descs_[i].Name()] = i; | |||
| } | |||
| return Status::OK(); | |||
| @@ -81,27 +81,27 @@ class ColDescriptor { | |||
| /// \brief getter function | |||
| /// \return The column's DataType | |||
| DataType type() const { return type_; } | |||
| DataType Type() const { return type_; } | |||
| /// \brief getter function | |||
| /// \return The column's rank | |||
| int32_t rank() const { return rank_; } | |||
| int32_t Rank() const { return rank_; } | |||
| /// \brief getter function | |||
| /// \return The column's name | |||
| std::string name() const { return col_name_; } | |||
| std::string Name() const { return col_name_; } | |||
| /// \brief getter function | |||
| /// \return The column's shape | |||
| TensorShape shape() const; | |||
| TensorShape Shape() const; | |||
| /// \brief getter function | |||
| /// \return TF if the column has an assigned fixed shape. | |||
| bool hasShape() const { return tensor_shape_ != nullptr; } | |||
| bool HasShape() const { return tensor_shape_ != nullptr; } | |||
| /// \brief getter function | |||
| /// \return The column's tensor implementation type | |||
| TensorImpl tensorImpl() const { return tensor_impl_; } | |||
| TensorImpl GetTensorImpl() const { return tensor_impl_; } | |||
| private: | |||
| DataType type_; // The columns type | |||
| @@ -153,7 +153,7 @@ class DataSchema { | |||
| /// \brief getter | |||
| /// \return The reference to a ColDescriptor to get (const version) | |||
| const ColDescriptor &column(int32_t idx) const; | |||
| const ColDescriptor &Column(int32_t idx) const; | |||
| /// \brief getter | |||
| /// \return The number of columns in the schema | |||
| @@ -163,7 +163,7 @@ class DataSchema { | |||
| /// \brief getter | |||
| /// \return The number of rows read from schema | |||
| int64_t num_rows() const { return num_rows_; } | |||
| int64_t NumRows() const { return num_rows_; } | |||
| static const char DEFAULT_DATA_SCHEMA_FILENAME[]; | |||
| @@ -14,6 +14,7 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #include "minddata/dataset/engine/dataset_iterator.h" | |||
| #include <algorithm> | |||
| #include <unordered_map> | |||
| #include <utility> | |||
| #include "minddata/dataset/core/data_type.h" | |||
| @@ -49,7 +49,7 @@ class DatasetIterator { | |||
| // @return The string to column id mapping. | |||
| std::unordered_map<std::string, int32_t> GetColumnNameMap() const; | |||
| bool eof_handled() const { return eof_handled_; } | |||
| bool EofHandled() const { return eof_handled_; } | |||
| // Fetches one row of data from the iterator. | |||
| // the base class version simply performs error handling and returns empty row. Actual | |||
| @@ -108,11 +108,11 @@ class ChildIterator { | |||
| std::unordered_map<std::string, int32_t> GetColumnNameMap() const; | |||
| // Return T/F if end of epoch | |||
| bool end_of_epoch() { return end_epoch_; } | |||
| bool EndOfEpoch() { return end_epoch_; } | |||
| // Getter | |||
| // @return T/F if this iterator is completely done after getting an eof | |||
| bool eof_handled() const { return eof_handled_; } | |||
| bool EofHandled() const { return eof_handled_; } | |||
| private: | |||
| DatasetOp *current_op_; // The parent operator. We consume from it's children. | |||
| @@ -113,6 +113,7 @@ Status BarrierOp::blockCond() { | |||
| // fetches next Barrier row | |||
| Status BarrierOp::getNextTensorRow(TensorRow *new_row) { | |||
| RETURN_UNEXPECTED_IF_NULL(new_row); | |||
| // iterate over all iterators and generate a row | |||
| RETURN_IF_NOT_OK((child_iterator_)->FetchNextTensorRow(new_row)); | |||
| // add each new row to iterator, check if row is empty, if row from iterator is empty return empty row | |||
| @@ -122,7 +123,7 @@ Status BarrierOp::getNextTensorRow(TensorRow *new_row) { | |||
| MS_LOG(INFO) << "Barrier operator child iterator produced empty row."; | |||
| clean_up_ = true; | |||
| // If we picked up an eof here, then we are completely done. | |||
| if ((child_iterator_)->eof_handled()) { | |||
| if ((child_iterator_)->EofHandled()) { | |||
| MS_LOG(INFO) << "Barrier operator iterator got EOF."; | |||
| eof_ = true; | |||
| } | |||
| @@ -36,6 +36,7 @@ BatchOp::Builder::Builder(int32_t batch_size) : builder_drop_(false), builder_pa | |||
| } | |||
| Status BatchOp::Builder::Build(std::shared_ptr<BatchOp> *ptr) { | |||
| RETURN_UNEXPECTED_IF_NULL(ptr); | |||
| #ifdef ENABLE_PYTHON | |||
| *ptr = std::make_shared<BatchOp>(builder_batch_size_, builder_drop_, builder_pad_, builder_op_connector_size_, | |||
| builder_num_workers_, builder_in_names_, builder_out_names_, | |||
| @@ -106,7 +107,7 @@ Status BatchOp::operator()() { | |||
| RETURN_IF_NOT_OK(child_iterator_->FetchNextTensorRow(&new_row)); | |||
| int32_t cur_batch_size = 0; | |||
| RETURN_IF_NOT_OK(GetBatchSize(&cur_batch_size, CBatchInfo(0, 0, 0))); | |||
| while (child_iterator_->eof_handled() == false) { | |||
| while (child_iterator_->EofHandled() == false) { | |||
| while (new_row.empty() == false) { | |||
| table->emplace_back(new_row); | |||
| // if # of rows is enough to make 1 batch, send it to worker_queue | |||
| @@ -142,7 +143,7 @@ Status BatchOp::operator()() { | |||
| << "reduce memory usage."; | |||
| } | |||
| #endif | |||
| } // end of eof_handled() == false | |||
| } // end of EofHandled() == false | |||
| RETURN_IF_NOT_OK( | |||
| worker_queues_[cnt++ % num_workers_]->EmplaceBack(std::make_pair(nullptr, CBatchInfo(batchCtrl::kEOF)))); | |||
| // EOF received, send quit signal to all workers | |||
| @@ -168,6 +169,8 @@ void BatchOp::Print(std::ostream &out, bool show_all) const { | |||
| } | |||
| Status BatchOp::BatchRows(const std::unique_ptr<TensorQTable> *src, TensorRow *dest, dsize_t batch_size) { | |||
| RETURN_UNEXPECTED_IF_NULL(src); | |||
| RETURN_UNEXPECTED_IF_NULL(dest); | |||
| if ((*src)->size() != batch_size) { | |||
| RETURN_STATUS_UNEXPECTED("[Internal ERROR] Source table size does not match the batch_size."); | |||
| } | |||
| @@ -274,6 +277,8 @@ Status BatchOp::EoeReceived(int32_t) { | |||
| #ifdef ENABLE_PYTHON | |||
| Status BatchOp::MapColumns(std::pair<std::unique_ptr<TensorQTable>, CBatchInfo> *table_pair) { | |||
| RETURN_UNEXPECTED_IF_NULL(table_pair); | |||
| RETURN_UNEXPECTED_IF_NULL(table_pair->first); | |||
| std::unique_ptr<TensorQTable> in_q_table = std::move(table_pair->first); | |||
| size_t num_rows = in_q_table->size(); | |||
| auto out_q_table = std::make_unique<TensorQTable>(num_rows, TensorRow(column_name_id_map_.size(), nullptr)); | |||
| @@ -316,6 +321,7 @@ Status BatchOp::MapColumns(std::pair<std::unique_ptr<TensorQTable>, CBatchInfo> | |||
| #endif | |||
| Status BatchOp::GetBatchSize(int32_t *batch_size, CBatchInfo info) { | |||
| RETURN_UNEXPECTED_IF_NULL(batch_size); | |||
| #ifdef ENABLE_PYTHON | |||
| if (batch_size_func_) { | |||
| RETURN_IF_NOT_OK(InvokeBatchSizeFunc(batch_size, info)); | |||
| @@ -330,6 +336,7 @@ Status BatchOp::GetBatchSize(int32_t *batch_size, CBatchInfo info) { | |||
| #ifdef ENABLE_PYTHON | |||
| Status BatchOp::InvokeBatchSizeFunc(int32_t *batch_size, CBatchInfo info) { | |||
| RETURN_UNEXPECTED_IF_NULL(batch_size); | |||
| { | |||
| // Acquire Python GIL | |||
| py::gil_scoped_acquire gil_acquire; | |||
| @@ -355,6 +362,8 @@ Status BatchOp::InvokeBatchSizeFunc(int32_t *batch_size, CBatchInfo info) { | |||
| } | |||
| Status BatchOp::InvokeBatchMapFunc(TensorTable *input, TensorTable *output, CBatchInfo info) { | |||
| RETURN_UNEXPECTED_IF_NULL(input); | |||
| RETURN_UNEXPECTED_IF_NULL(output); | |||
| { | |||
| // Acquire Python GIL | |||
| py::gil_scoped_acquire gil_acquire; | |||
| @@ -471,6 +480,9 @@ Status BatchOp::UnpackPadInfo(const PadInfo &pad_info, | |||
| const std::unordered_map<std::string, int32_t> &column_name_id_map, | |||
| std::set<int32_t> *pad_cols, std::vector<std::shared_ptr<Tensor>> *pad_vals, | |||
| std::vector<std::vector<dsize_t>> *pad_shapes) { | |||
| RETURN_UNEXPECTED_IF_NULL(pad_cols); | |||
| RETURN_UNEXPECTED_IF_NULL(pad_vals); | |||
| RETURN_UNEXPECTED_IF_NULL(pad_shapes); | |||
| if (pad_info.empty()) { // if pad_info empty, pad every columns automatically | |||
| for (size_t col_id = 0; col_id < column_name_id_map.size(); col_id++) { | |||
| pad_cols->insert(col_id); | |||
| @@ -561,6 +573,7 @@ int64_t BatchOp::GetTreeBatchSize() { | |||
| } | |||
| Status BatchOp::GetNextRowPullMode(TensorRow *const row) { | |||
| RETURN_UNEXPECTED_IF_NULL(row); | |||
| std::unique_ptr<TensorQTable> table = std::make_unique<TensorQTable>(); | |||
| child_iterator_ = std::make_unique<ChildIterator>(this, 0, 0); | |||
| int32_t cur_batch_size = 0; | |||
| @@ -60,7 +60,7 @@ Status BucketBatchByLengthOp::operator()() { | |||
| TensorRow current_row; | |||
| child_iterator_ = std::make_unique<ChildIterator>(this, 0, 0); | |||
| RETURN_IF_NOT_OK(child_iterator_->FetchNextTensorRow(¤t_row)); | |||
| while (!child_iterator_->eof_handled()) { | |||
| while (!child_iterator_->EofHandled()) { | |||
| while (!current_row.empty()) { | |||
| int32_t element_length; | |||
| RETURN_IF_NOT_OK(ObtainElementLength(&element_length, current_row)); | |||
| @@ -99,6 +99,7 @@ Status BucketBatchByLengthOp::operator()() { | |||
| } | |||
| Status BucketBatchByLengthOp::ObtainElementLength(int32_t *out_element_length, TensorRow element) { | |||
| RETURN_UNEXPECTED_IF_NULL(out_element_length); | |||
| // call pyfunc here if given pyfunc, otherwise return 0th dimension of shape of | |||
| // the single column specified in length_dependent_columns_ | |||
| if (element_length_function_) { | |||
| @@ -52,7 +52,7 @@ Status BuildSentencePieceVocabOp::operator()() { | |||
| RETURN_IF_NOT_OK(child_iterator_->FetchNextTensorRow(&new_row)); | |||
| bool eoe_warning = false; // give out warning if receive more than 1 eoe | |||
| while (child_iterator_->eof_handled() == false) { | |||
| while (child_iterator_->EofHandled() == false) { | |||
| while (new_row.empty() == false) { | |||
| RETURN_IF_NOT_OK(sentence_queue_->EmplaceBack(new_row)); | |||
| RETURN_IF_NOT_OK(child_iterator_->FetchNextTensorRow(&new_row)); | |||
| @@ -107,7 +107,7 @@ Status BuildVocabOp::operator()() { | |||
| } | |||
| } | |||
| bool eoe_warning = false; // give out warning if receive more than 1 eoe | |||
| while (child_iterator_->eof_handled() == false) { | |||
| while (child_iterator_->EofHandled() == false) { | |||
| while (new_row.empty() == false) { | |||
| RETURN_IF_NOT_OK(distributor_queue_->EmplaceBack(new_row)); | |||
| RETURN_IF_NOT_OK(child_iterator_->FetchNextTensorRow(&new_row)); | |||
| @@ -593,7 +593,7 @@ Status DeviceQueueOp::SendDataToCPU() { | |||
| MS_LOG(INFO) << "Device queue, sending data to CPU."; | |||
| int64_t total_batch = 0; | |||
| while (!(child_iterator_->eof_handled())) { | |||
| while (!(child_iterator_->EofHandled())) { | |||
| TensorRow curr_row; | |||
| RETURN_IF_NOT_OK(child_iterator_->FetchNextTensorRow(&curr_row)); | |||
| @@ -62,7 +62,7 @@ Status FilterOp::operator()() { | |||
| TensorRow new_row; | |||
| RETURN_IF_NOT_OK(child_iterator_->FetchNextTensorRow(&new_row)); | |||
| int64_t cnt = 0; | |||
| while (child_iterator_->eof_handled() == false) { | |||
| while (child_iterator_->EofHandled() == false) { | |||
| while (new_row.empty() == false) { | |||
| RETURN_IF_NOT_OK(worker_queues_[cnt % num_workers_]->EmplaceBack(new_row)); | |||
| cnt++; | |||
| @@ -124,7 +124,7 @@ Status ShuffleOp::operator()() { | |||
| RETURN_IF_NOT_OK(InitShuffleBuffer()); | |||
| // This is our main loop exit condition, when the iterator has no more data completely. | |||
| if (child_iterator_->eof_handled()) { | |||
| if (child_iterator_->EofHandled()) { | |||
| RETURN_IF_NOT_OK(out_connector_->SendEOF()); | |||
| break; | |||
| } | |||
| @@ -214,7 +214,7 @@ Status ShuffleOp::InitShuffleBuffer() { | |||
| TensorRow new_row; | |||
| RETURN_IF_NOT_OK(child_iterator_->FetchNextTensorRow(&new_row)); | |||
| if (child_iterator_->eof_handled()) { | |||
| if (child_iterator_->EofHandled()) { | |||
| MS_LOG(DEBUG) << "Shuffle operator init picked up EOF. No more epochs."; | |||
| RETURN_IF_NOT_OK(out_connector_->SendEOF()); | |||
| return Status::OK(); | |||
| @@ -43,7 +43,7 @@ AlbumOp::AlbumOp(int32_t num_wkrs, std::string file_dir, int32_t queue_size, boo | |||
| curr_row_(0) { | |||
| // Set the column name map (base class field) | |||
| for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) { | |||
| column_name_id_map_[data_schema_->column(i).name()] = i; | |||
| column_name_id_map_[data_schema_->Column(i).Name()] = i; | |||
| } | |||
| io_block_queues_.Init(num_workers_, queue_size); | |||
| } | |||
| @@ -70,8 +70,8 @@ Status AlbumOp::PrescanEntry() { | |||
| } | |||
| MS_LOG(INFO) << "Album folder Path found: " << folder_path_ << "."; | |||
| while (dirItr->hasNext()) { | |||
| Path file = dirItr->next(); | |||
| while (dirItr->HasNext()) { | |||
| Path file = dirItr->Next(); | |||
| if (extensions_.empty() || extensions_.find(file.Extension()) != extensions_.end()) { | |||
| (void)image_rows_.push_back(file.toString().substr(dirname_offset_)); | |||
| } else { | |||
| @@ -192,7 +192,7 @@ Status AlbumOp::LoadStringTensor(const nlohmann::json &json_obj, int32_t col_num | |||
| Status AlbumOp::LoadIntArrayTensor(const nlohmann::json &json_obj, int32_t col_num, TensorRow *row) { | |||
| TensorPtr label; | |||
| // consider templating this function to handle all ints | |||
| if (data_schema_->column(col_num).type() == DataType::DE_INT64) { | |||
| if (data_schema_->Column(col_num).Type() == DataType::DE_INT64) { | |||
| std::vector<int64_t> data; | |||
| // Iterate over the integer list and add those values to the output shape tensor | |||
| @@ -201,7 +201,7 @@ Status AlbumOp::LoadIntArrayTensor(const nlohmann::json &json_obj, int32_t col_n | |||
| (void)std::transform(items.begin(), items.end(), std::back_inserter(data), [](it_type j) { return j.value(); }); | |||
| RETURN_IF_NOT_OK(Tensor::CreateFromVector(data, &label)); | |||
| } else if (data_schema_->column(col_num).type() == DataType::DE_INT32) { | |||
| } else if (data_schema_->Column(col_num).Type() == DataType::DE_INT32) { | |||
| std::vector<int32_t> data; | |||
| // Iterate over the integer list and add those values to the output shape tensor | |||
| @@ -212,7 +212,7 @@ Status AlbumOp::LoadIntArrayTensor(const nlohmann::json &json_obj, int32_t col_n | |||
| RETURN_IF_NOT_OK(Tensor::CreateFromVector(data, &label)); | |||
| } else { | |||
| RETURN_STATUS_UNEXPECTED("Invalid data, column type in data_schema is neither int32 nor int64, it is " + | |||
| data_schema_->column(col_num).type().ToString()); | |||
| data_schema_->Column(col_num).Type().ToString()); | |||
| } | |||
| row->push_back(std::move(label)); | |||
| return Status::OK(); | |||
| @@ -221,7 +221,7 @@ Status AlbumOp::LoadIntArrayTensor(const nlohmann::json &json_obj, int32_t col_n | |||
| Status AlbumOp::LoadFloatArrayTensor(const nlohmann::json &json_obj, int32_t col_num, TensorRow *row) { | |||
| TensorPtr float_array; | |||
| // consider templating this function to handle all ints | |||
| if (data_schema_->column(col_num).type() == DataType::DE_FLOAT64) { | |||
| if (data_schema_->Column(col_num).Type() == DataType::DE_FLOAT64) { | |||
| std::vector<double> data; | |||
| // Iterate over the integer list and add those values to the output shape tensor | |||
| @@ -230,7 +230,7 @@ Status AlbumOp::LoadFloatArrayTensor(const nlohmann::json &json_obj, int32_t col | |||
| (void)std::transform(items.begin(), items.end(), std::back_inserter(data), [](it_type j) { return j.value(); }); | |||
| RETURN_IF_NOT_OK(Tensor::CreateFromVector(data, &float_array)); | |||
| } else if (data_schema_->column(col_num).type() == DataType::DE_FLOAT32) { | |||
| } else if (data_schema_->Column(col_num).Type() == DataType::DE_FLOAT32) { | |||
| std::vector<float> data; | |||
| // Iterate over the integer list and add those values to the output shape tensor | |||
| @@ -241,14 +241,15 @@ Status AlbumOp::LoadFloatArrayTensor(const nlohmann::json &json_obj, int32_t col | |||
| RETURN_IF_NOT_OK(Tensor::CreateFromVector(data, &float_array)); | |||
| } else { | |||
| RETURN_STATUS_UNEXPECTED("Invalid data, column type in data_schema is neither float32 nor float64, it is " + | |||
| data_schema_->column(col_num).type().ToString()); | |||
| data_schema_->Column(col_num).Type().ToString()); | |||
| } | |||
| row->push_back(std::move(float_array)); | |||
| return Status::OK(); | |||
| } | |||
| Status AlbumOp::LoadIDTensor(const std::string &file, int32_t col_num, TensorRow *row) { | |||
| if (data_schema_->column(col_num).type() == DataType::DE_STRING) { | |||
| RETURN_UNEXPECTED_IF_NULL(row); | |||
| if (data_schema_->Column(col_num).Type() == DataType::DE_STRING) { | |||
| TensorPtr id; | |||
| RETURN_IF_NOT_OK(Tensor::CreateScalar<std::string>(file, &id)); | |||
| row->push_back(std::move(id)); | |||
| @@ -266,7 +267,7 @@ Status AlbumOp::LoadIDTensor(const std::string &file, int32_t col_num, TensorRow | |||
| Status AlbumOp::LoadEmptyTensor(int32_t col_num, TensorRow *row) { | |||
| // hack to get the file name without extension, the 1 is to get rid of the backslash character | |||
| TensorPtr empty_tensor; | |||
| RETURN_IF_NOT_OK(Tensor::CreateEmpty(TensorShape({0}), data_schema_->column(col_num).type(), &empty_tensor)); | |||
| RETURN_IF_NOT_OK(Tensor::CreateEmpty(TensorShape({0}), data_schema_->Column(col_num).Type(), &empty_tensor)); | |||
| row->push_back(std::move(empty_tensor)); | |||
| return Status::OK(); | |||
| } | |||
| @@ -277,11 +278,11 @@ Status AlbumOp::LoadEmptyTensor(int32_t col_num, TensorRow *row) { | |||
| // only be float32, seems like a weird limitation to impose | |||
| Status AlbumOp::LoadFloatTensor(const nlohmann::json &json_obj, int32_t col_num, TensorRow *row) { | |||
| TensorPtr float_tensor; | |||
| if (data_schema_->column(col_num).type() == DataType::DE_FLOAT64) { | |||
| if (data_schema_->Column(col_num).Type() == DataType::DE_FLOAT64) { | |||
| double data = json_obj; | |||
| MS_LOG(INFO) << "double found: " << json_obj << "."; | |||
| RETURN_IF_NOT_OK(Tensor::CreateScalar<double>(data, &float_tensor)); | |||
| } else if (data_schema_->column(col_num).type() == DataType::DE_FLOAT32) { | |||
| } else if (data_schema_->Column(col_num).Type() == DataType::DE_FLOAT32) { | |||
| float data1 = json_obj; | |||
| RETURN_IF_NOT_OK(Tensor::CreateScalar<float>(data1, &float_tensor)); | |||
| MS_LOG(INFO) << "float found: " << json_obj << "."; | |||
| @@ -293,11 +294,11 @@ Status AlbumOp::LoadFloatTensor(const nlohmann::json &json_obj, int32_t col_num, | |||
| // Loads a tensor with int value, we have to cast the value to type specified in the schema. | |||
| Status AlbumOp::LoadIntTensor(const nlohmann::json &json_obj, int32_t col_num, TensorRow *row) { | |||
| TensorPtr int_tensor; | |||
| if (data_schema_->column(col_num).type() == DataType::DE_INT64) { | |||
| if (data_schema_->Column(col_num).Type() == DataType::DE_INT64) { | |||
| int64_t data = json_obj; | |||
| MS_LOG(INFO) << "int64 found: " << json_obj << "."; | |||
| RETURN_IF_NOT_OK(Tensor::CreateScalar<int64_t>(data, &int_tensor)); | |||
| } else if (data_schema_->column(col_num).type() == DataType::DE_INT32) { | |||
| } else if (data_schema_->Column(col_num).Type() == DataType::DE_INT32) { | |||
| int32_t data = json_obj; | |||
| RETURN_IF_NOT_OK(Tensor::CreateScalar<int32_t>(data, &int_tensor)); | |||
| MS_LOG(INFO) << "int32 found: " << json_obj << "."; | |||
| @@ -349,35 +350,35 @@ Status AlbumOp::LoadTensorRow(row_id_type row_id, TensorRow *row) { | |||
| Status AlbumOp::loadColumnData(const std::string &file, int32_t index, nlohmann::json js, TensorRow *row) { | |||
| int32_t i = index; | |||
| // special case to handle | |||
| if (data_schema_->column(i).name() == "id") { | |||
| if (data_schema_->Column(i).Name() == "id") { | |||
| // id is internal, special case to load from file | |||
| return LoadIDTensor(file, i, row); | |||
| } | |||
| // find if key does not exist, insert placeholder nullptr if not found | |||
| if (js.find(data_schema_->column(i).name()) == js.end()) { | |||
| if (js.find(data_schema_->Column(i).Name()) == js.end()) { | |||
| // iterator not found, push nullptr as placeholder | |||
| MS_LOG(INFO) << "Pushing empty tensor for column: " << data_schema_->column(i).name() << "."; | |||
| MS_LOG(INFO) << "Pushing empty tensor for column: " << data_schema_->Column(i).Name() << "."; | |||
| return LoadEmptyTensor(i, row); | |||
| } | |||
| nlohmann::json column_value = js.at(data_schema_->column(i).name()); | |||
| MS_LOG(INFO) << "This column is: " << data_schema_->column(i).name() << "."; | |||
| nlohmann::json column_value = js.at(data_schema_->Column(i).Name()); | |||
| MS_LOG(INFO) << "This column is: " << data_schema_->Column(i).Name() << "."; | |||
| bool is_array = column_value.is_array(); | |||
| // load single string | |||
| if (column_value.is_string() && data_schema_->column(i).type() == DataType::DE_STRING) { | |||
| if (column_value.is_string() && data_schema_->Column(i).Type() == DataType::DE_STRING) { | |||
| return LoadStringTensor(column_value, i, row); | |||
| } | |||
| // load string array | |||
| if (is_array && data_schema_->column(i).type() == DataType::DE_STRING) { | |||
| if (is_array && data_schema_->Column(i).Type() == DataType::DE_STRING) { | |||
| return LoadStringArrayTensor(column_value, i, row); | |||
| } | |||
| // load image file | |||
| if (column_value.is_string() && data_schema_->column(i).type() != DataType::DE_STRING) { | |||
| if (column_value.is_string() && data_schema_->Column(i).Type() != DataType::DE_STRING) { | |||
| std::string image_file_path = column_value; | |||
| return LoadImageTensor(image_file_path, i, row); | |||
| } | |||
| // load float value | |||
| bool judge_float = (data_schema_->column(i).type() == DataType::DE_FLOAT32) || | |||
| (data_schema_->column(i).type() == DataType::DE_FLOAT64); | |||
| bool judge_float = (data_schema_->Column(i).Type() == DataType::DE_FLOAT32) || | |||
| (data_schema_->Column(i).Type() == DataType::DE_FLOAT64); | |||
| if (!is_array && judge_float) { | |||
| return LoadFloatTensor(column_value, i, row); | |||
| } | |||
| @@ -387,15 +388,15 @@ Status AlbumOp::loadColumnData(const std::string &file, int32_t index, nlohmann: | |||
| } | |||
| // int value | |||
| if (!is_array && | |||
| (data_schema_->column(i).type() == DataType::DE_INT64 || data_schema_->column(i).type() == DataType::DE_INT32)) { | |||
| (data_schema_->Column(i).Type() == DataType::DE_INT64 || data_schema_->Column(i).Type() == DataType::DE_INT32)) { | |||
| return LoadIntTensor(column_value, i, row); | |||
| } | |||
| // int array | |||
| if (is_array && | |||
| (data_schema_->column(i).type() == DataType::DE_INT64 || data_schema_->column(i).type() == DataType::DE_INT32)) { | |||
| (data_schema_->Column(i).Type() == DataType::DE_INT64 || data_schema_->Column(i).Type() == DataType::DE_INT32)) { | |||
| return LoadIntArrayTensor(column_value, i, row); | |||
| } else { | |||
| MS_LOG(WARNING) << "Value type for column: " << data_schema_->column(i).name() << " is not supported."; | |||
| MS_LOG(WARNING) << "Value type for column: " << data_schema_->Column(i).Name() << " is not supported."; | |||
| return Status::OK(); | |||
| } | |||
| } | |||
| @@ -438,7 +439,7 @@ Status AlbumOp::ComputeColMap() { | |||
| // Set the column name map (base class field) | |||
| if (column_name_id_map_.empty()) { | |||
| for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) { | |||
| column_name_id_map_[data_schema_->column(i).name()] = i; | |||
| column_name_id_map_[data_schema_->Column(i).Name()] = i; | |||
| } | |||
| } else { | |||
| MS_LOG(WARNING) << "Column name map is already set!"; | |||
| @@ -258,7 +258,7 @@ Status CelebAOp::LoadTensorRow(row_id_type row_id, TensorRow *row) { | |||
| } | |||
| RETURN_IF_NOT_OK( | |||
| Tensor::CreateEmpty(TensorShape({1, (uint32_t)image_label.second.size()}), data_schema_->column(1).type(), &label)); | |||
| Tensor::CreateEmpty(TensorShape({1, (uint32_t)image_label.second.size()}), data_schema_->Column(1).Type(), &label)); | |||
| RETURN_IF_NOT_OK(label->Zero()); | |||
| for (uint32_t index = 0; index < image_label.second.size(); index++) { | |||
| if (image_label.second[index] == 1) { | |||
| @@ -294,7 +294,7 @@ Status CelebAOp::ComputeColMap() { | |||
| // Set the column name map (base class field) | |||
| if (column_name_id_map_.empty()) { | |||
| for (int32_t index = 0; index < data_schema_->NumColumns(); index++) { | |||
| column_name_id_map_[data_schema_->column(index).name()] = index; | |||
| column_name_id_map_[data_schema_->Column(index).Name()] = index; | |||
| } | |||
| } else { | |||
| MS_LOG(WARNING) << "Column name map is already set!"; | |||
| @@ -205,8 +205,8 @@ Status CifarOp::GetCifarFiles() { | |||
| Path dir_path(folder_path_); | |||
| auto dirIt = Path::DirIterator::OpenDirectory(&dir_path); | |||
| if (dirIt) { | |||
| while (dirIt->hasNext()) { | |||
| Path file = dirIt->next(); | |||
| while (dirIt->HasNext()) { | |||
| Path file = dirIt->Next(); | |||
| if (file.Extension() == kExtension) { | |||
| cifar_files_.push_back(file.toString()); | |||
| } | |||
| @@ -236,7 +236,7 @@ Status CifarOp::ParseCifarData() { | |||
| std::shared_ptr<Tensor> image_tensor; | |||
| RETURN_IF_NOT_OK(Tensor::CreateEmpty(TensorShape({kCifarImageHeight, kCifarImageWidth, kCifarImageChannel}), | |||
| data_schema_->column(0).type(), &image_tensor)); | |||
| data_schema_->Column(0).Type(), &image_tensor)); | |||
| auto itr = image_tensor->begin<uint8_t>(); | |||
| uint32_t total_pix = kCifarImageHeight * kCifarImageWidth; | |||
| for (uint32_t pix = 0; pix < total_pix; ++pix) { | |||
| @@ -369,7 +369,7 @@ Status CifarOp::ComputeColMap() { | |||
| // set the column name map (base class field) | |||
| if (column_name_id_map_.empty()) { | |||
| for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) { | |||
| column_name_id_map_[data_schema_->column(i).name()] = i; | |||
| column_name_id_map_[data_schema_->Column(i).Name()] = i; | |||
| } | |||
| } else { | |||
| MS_LOG(WARNING) << "Column name map is already set!"; | |||
| @@ -86,7 +86,7 @@ Status CocoOp::LoadTensorRow(row_id_type row_id, TensorRow *trow) { | |||
| } | |||
| std::string kImageFile = image_folder_path_ + std::string("/") + image_id; | |||
| RETURN_IF_NOT_OK(ReadImageToTensor(kImageFile, data_schema_->column(0), &image)); | |||
| RETURN_IF_NOT_OK(ReadImageToTensor(kImageFile, data_schema_->Column(0), &image)); | |||
| auto bboxRow = itr->second; | |||
| std::vector<float> bbox_row; | |||
| @@ -505,7 +505,7 @@ Status CocoOp::ComputeColMap() { | |||
| // Set the column name map (base class field) | |||
| if (column_name_id_map_.empty()) { | |||
| for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) { | |||
| column_name_id_map_[data_schema_->column(i).name()] = i; | |||
| column_name_id_map_[data_schema_->Column(i).Name()] = i; | |||
| } | |||
| } else { | |||
| MS_LOG(WARNING) << "Column name map is already set!"; | |||
| @@ -240,7 +240,7 @@ Status FlickrOp::ComputeColMap() { | |||
| // Set the column name map (base class field) | |||
| if (column_name_id_map_.empty()) { | |||
| for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) { | |||
| column_name_id_map_[data_schema_->column(i).name()] = i; | |||
| column_name_id_map_[data_schema_->Column(i).Name()] = i; | |||
| } | |||
| } else { | |||
| MS_LOG(WARNING) << "Column name map is already set!"; | |||
| @@ -156,8 +156,8 @@ Status ImageFolderOp::PrescanWorkerEntry(int32_t worker_id) { | |||
| RETURN_STATUS_UNEXPECTED("Invalid file, failed to open folder: " + folder_name); | |||
| } | |||
| std::set<std::string> imgs; // use this for ordering | |||
| while (dirItr->hasNext()) { | |||
| Path file = dirItr->next(); | |||
| while (dirItr->HasNext()) { | |||
| Path file = dirItr->Next(); | |||
| if (extensions_.empty() || extensions_.find(file.Extension()) != extensions_.end()) { | |||
| (void)imgs.insert(file.toString().substr(dirname_offset_)); | |||
| } else { | |||
| @@ -182,8 +182,8 @@ Status ImageFolderOp::PrescanWorkerEntry(int32_t worker_id) { | |||
| Status ImageFolderOp::RecursiveWalkFolder(Path *dir) { | |||
| std::shared_ptr<Path::DirIterator> dir_itr = Path::DirIterator::OpenDirectory(dir); | |||
| RETURN_UNEXPECTED_IF_NULL(dir_itr); | |||
| while (dir_itr->hasNext()) { | |||
| Path subdir = dir_itr->next(); | |||
| while (dir_itr->HasNext()) { | |||
| Path subdir = dir_itr->Next(); | |||
| if (subdir.IsDirectory()) { | |||
| if (class_index_.empty() || | |||
| class_index_.find(subdir.toString().substr(dirname_offset_ + 1)) != class_index_.end()) { | |||
| @@ -256,8 +256,8 @@ Status ImageFolderOp::CountRowsAndClasses(const std::string &path, const std::se | |||
| std::queue<std::string> folder_paths; | |||
| std::shared_ptr<Path::DirIterator> dir_itr = Path::DirIterator::OpenDirectory(&dir); | |||
| std::unordered_set<std::string> folder_names; | |||
| while (dir_itr->hasNext()) { | |||
| Path subdir = dir_itr->next(); | |||
| while (dir_itr->HasNext()) { | |||
| Path subdir = dir_itr->Next(); | |||
| if (subdir.IsDirectory()) { | |||
| folder_paths.push(subdir.toString()); | |||
| if (!class_index.empty()) folder_names.insert(subdir.Basename()); | |||
| @@ -283,7 +283,7 @@ Status ImageFolderOp::CountRowsAndClasses(const std::string &path, const std::se | |||
| if (subdir.Exists() == false || dir_itr == nullptr) { | |||
| RETURN_STATUS_UNEXPECTED("Invalid file, failed to open folder: " + subdir.toString()); | |||
| } | |||
| while (dir_itr->hasNext()) { | |||
| while (dir_itr->HasNext()) { | |||
| if (exts.empty() || exts.find(subdir.Extension()) != exts.end()) { | |||
| ++row_cnt; | |||
| } | |||
| @@ -298,7 +298,7 @@ Status ImageFolderOp::ComputeColMap() { | |||
| // Set the column name map (base class field) | |||
| if (column_name_id_map_.empty()) { | |||
| for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) { | |||
| column_name_id_map_[data_schema_->column(i).name()] = i; | |||
| column_name_id_map_[data_schema_->Column(i).Name()] = i; | |||
| } | |||
| } else { | |||
| MS_LOG(WARNING) << "Column name map is already set!"; | |||
| @@ -274,7 +274,7 @@ Status ManifestOp::ComputeColMap() { | |||
| // Set the column name map (base class field) | |||
| if (column_name_id_map_.empty()) { | |||
| for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) { | |||
| column_name_id_map_[data_schema_->column(i).name()] = i; | |||
| column_name_id_map_[data_schema_->Column(i).Name()] = i; | |||
| } | |||
| } else { | |||
| MS_LOG(WARNING) << "Column name map is already set!"; | |||
| @@ -113,7 +113,7 @@ Status MindRecordOp::Init() { | |||
| CHECK_FAIL_RETURN_UNEXPECTED( | |||
| colname_to_ind.find(colname) != colname_to_ind.end(), | |||
| "Invalid data, specified loading column name: " + colname + " does not exist in data file."); | |||
| RETURN_IF_NOT_OK(tmp_schema->AddColumn(data_schema_->column(colname_to_ind[colname]))); | |||
| RETURN_IF_NOT_OK(tmp_schema->AddColumn(data_schema_->Column(colname_to_ind[colname]))); | |||
| } | |||
| data_schema_ = std::move(tmp_schema); | |||
| } | |||
| @@ -271,8 +271,8 @@ Status MindRecordOp::LoadTensorRow(TensorRow *tensor_row, const std::vector<uint | |||
| } | |||
| std::shared_ptr<Tensor> tensor; | |||
| const ColDescriptor &column = data_schema_->column(i_col); | |||
| DataType type = column.type(); | |||
| const ColDescriptor &column = data_schema_->Column(i_col); | |||
| DataType type = column.Type(); | |||
| // Set shape | |||
| CHECK_FAIL_RETURN_UNEXPECTED(column_data_type_size != 0, "Found memory size of column data type is 0."); | |||
| @@ -280,8 +280,8 @@ Status MindRecordOp::LoadTensorRow(TensorRow *tensor_row, const std::vector<uint | |||
| if (type == DataType::DE_STRING) { | |||
| std::string s{data, data + n_bytes}; | |||
| RETURN_IF_NOT_OK(Tensor::CreateScalar(s, &tensor)); | |||
| } else if (column.hasShape()) { | |||
| auto new_shape = TensorShape(column.shape()); | |||
| } else if (column.HasShape()) { | |||
| auto new_shape = TensorShape(column.Shape()); | |||
| // if the numpy is null, create empty tensor shape | |||
| if (num_elements == 0) { | |||
| new_shape = TensorShape({}); | |||
| @@ -180,7 +180,7 @@ Status MnistOp::ReadImageAndLabel(std::ifstream *image_reader, std::ifstream *la | |||
| pixels[m] = (pixels[m] == 0) ? 0 : 255; | |||
| } | |||
| std::shared_ptr<Tensor> image; | |||
| RETURN_IF_NOT_OK(Tensor::CreateFromMemory(img_tensor_shape, data_schema_->column(0).type(), | |||
| RETURN_IF_NOT_OK(Tensor::CreateFromMemory(img_tensor_shape, data_schema_->Column(0).Type(), | |||
| reinterpret_cast<unsigned char *>(pixels), &image)); | |||
| image_label_pairs_.emplace_back(std::make_pair(image, labels_buf[j])); | |||
| image_path_.push_back(image_names_[index]); | |||
| @@ -225,8 +225,8 @@ Status MnistOp::WalkAllFiles() { | |||
| std::string prefix; // empty string, used to match usage = "" (default) or usage == "all" | |||
| if (usage_ == "train" || usage_ == "test") prefix = (usage_ == "test" ? test_prefix : train_prefix); | |||
| if (dir_it != nullptr) { | |||
| while (dir_it->hasNext()) { | |||
| Path file = dir_it->next(); | |||
| while (dir_it->HasNext()) { | |||
| Path file = dir_it->Next(); | |||
| std::string fname = file.Basename(); // name of the mnist file | |||
| if ((fname.find(prefix + "-images") != std::string::npos) && (fname.find(img_ext) != std::string::npos)) { | |||
| image_names_.push_back(file.toString()); | |||
| @@ -307,7 +307,7 @@ Status MnistOp::ComputeColMap() { | |||
| // set the column name map (base class field) | |||
| if (column_name_id_map_.empty()) { | |||
| for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) { | |||
| column_name_id_map_[data_schema_->column(i).name()] = i; | |||
| column_name_id_map_[data_schema_->Column(i).Name()] = i; | |||
| } | |||
| } else { | |||
| MS_LOG(WARNING) << "Column name map is already set!"; | |||
| @@ -267,8 +267,8 @@ Status RandomDataOp::CreateRandomRow(int32_t worker_id, TensorRow *new_row) { | |||
| // Create a tensor for each column, then add the tensor to the row | |||
| for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) { | |||
| const ColDescriptor current_col = data_schema_->column(i); | |||
| std::vector<dsize_t> current_shape = current_col.shape().AsVector(); | |||
| const ColDescriptor current_col = data_schema_->Column(i); | |||
| std::vector<dsize_t> current_shape = current_col.Shape().AsVector(); | |||
| std::unique_ptr<TensorShape> new_shape = nullptr; | |||
| std::unique_ptr<unsigned char[]> buf = nullptr; | |||
| std::shared_ptr<Tensor> new_tensor = nullptr; | |||
| @@ -282,7 +282,7 @@ Status RandomDataOp::CreateRandomRow(int32_t worker_id, TensorRow *new_row) { | |||
| } | |||
| new_shape = std::make_unique<TensorShape>(current_shape); | |||
| int64_t size_in_bytes = new_shape->NumOfElements() * current_col.type().SizeInBytes(); | |||
| int64_t size_in_bytes = new_shape->NumOfElements() * current_col.Type().SizeInBytes(); | |||
| // Generate a random byte of data. This may cause some funny data for things like doubles,floats, bools | |||
| // however the random data op is not too concerned about the physical data itself. | |||
| @@ -296,7 +296,7 @@ Status RandomDataOp::CreateRandomRow(int32_t worker_id, TensorRow *new_row) { | |||
| return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, "Failed to set random bytes for a tensor."); | |||
| } | |||
| RETURN_IF_NOT_OK(Tensor::CreateFromMemory(*new_shape, current_col.type(), buf.get(), &new_tensor)); | |||
| RETURN_IF_NOT_OK(Tensor::CreateFromMemory(*new_shape, current_col.Type(), buf.get(), &new_tensor)); | |||
| // Add this tensor to the tensor row for output | |||
| (*new_row).push_back(std::move(new_tensor)); | |||
| @@ -75,7 +75,7 @@ Status SamplerRT::CreateSamplerTensor(std::shared_ptr<Tensor> *sample_ids, int64 | |||
| col_desc_ = std::make_unique<ColDescriptor>("sampleIds", DataType(DataType::DE_INT64), TensorImpl::kFlexible, 1); | |||
| } | |||
| TensorShape shape(std::vector<dsize_t>(1, num_elements)); | |||
| RETURN_IF_NOT_OK(Tensor::CreateEmpty(shape, col_desc_->type(), sample_ids)); | |||
| RETURN_IF_NOT_OK(Tensor::CreateEmpty(shape, col_desc_->Type(), sample_ids)); | |||
| return Status::OK(); | |||
| } | |||
| @@ -225,7 +225,7 @@ Status TextFileOp::ComputeColMap() { | |||
| // Set the column name mapping (base class field) | |||
| if (column_name_id_map_.empty()) { | |||
| for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) { | |||
| column_name_id_map_[data_schema_->column(i).name()] = i; | |||
| column_name_id_map_[data_schema_->Column(i).Name()] = i; | |||
| } | |||
| } else { | |||
| MS_LOG(WARNING) << "Column name map is already set!"; | |||
| @@ -123,7 +123,7 @@ Status TFReaderOp::Init() { | |||
| } | |||
| if (total_rows_ == 0) { | |||
| total_rows_ = data_schema_->num_rows(); | |||
| total_rows_ = data_schema_->NumRows(); | |||
| } | |||
| if (total_rows_ < 0) { | |||
| RETURN_STATUS_UNEXPECTED( | |||
| @@ -332,12 +332,12 @@ Status TFReaderOp::LoadFile(const std::string &filename, int64_t start_offset, i | |||
| Status TFReaderOp::LoadExample(const dataengine::Example *tf_file, TensorRow *out_row) { | |||
| int32_t num_columns = data_schema_->NumColumns(); | |||
| for (int32_t col = 0; col < num_columns; ++col) { | |||
| const ColDescriptor current_col = data_schema_->column(col); | |||
| const ColDescriptor current_col = data_schema_->Column(col); | |||
| const dataengine::Features &example_features = tf_file->features(); | |||
| const google::protobuf::Map<std::string, dataengine::Feature> &feature_map = example_features.feature(); | |||
| auto iter_column = feature_map.find(current_col.name()); | |||
| auto iter_column = feature_map.find(current_col.Name()); | |||
| if (iter_column == feature_map.end()) { | |||
| RETURN_STATUS_UNEXPECTED("Invalid parameter, column name: " + current_col.name() + " does not exist."); | |||
| RETURN_STATUS_UNEXPECTED("Invalid parameter, column name: " + current_col.Name() + " does not exist."); | |||
| } | |||
| const dataengine::Feature &column_values_list = iter_column->second; | |||
| RETURN_IF_NOT_OK(LoadFeature(out_row, column_values_list, current_col, col)); | |||
| @@ -379,7 +379,7 @@ Status TFReaderOp::LoadFeature(TensorRow *tensor_row, const dataengine::Feature | |||
| // into the tensor | |||
| TensorShape current_shape = TensorShape::CreateUnknownRankShape(); | |||
| RETURN_IF_NOT_OK(current_col.MaterializeTensorShape(num_elements, ¤t_shape)); | |||
| RETURN_IF_NOT_OK(Tensor::CreateFromMemory(current_shape, current_col.type(), data_ptr, &ts)); | |||
| RETURN_IF_NOT_OK(Tensor::CreateFromMemory(current_shape, current_col.Type(), data_ptr, &ts)); | |||
| break; | |||
| } | |||
| case dataengine::Feature::KindCase::kInt64List: { | |||
| @@ -406,10 +406,10 @@ Status TFReaderOp::LoadBytesList(const ColDescriptor ¤t_col, const dataeng | |||
| // kBytesList can map to the following DE types ONLY! | |||
| // DE_UINT8, DE_INT8 | |||
| // Must be single byte type for each element! | |||
| if (current_col.type() != DataType::DE_UINT8 && current_col.type() != DataType::DE_INT8 && | |||
| current_col.type() != DataType::DE_STRING) { | |||
| std::string err_msg = "Invalid data, invalid data type for Tensor at column: " + current_col.name() + | |||
| ", data type should be int8, uint8 or string, but got " + current_col.type().ToString(); | |||
| if (current_col.Type() != DataType::DE_UINT8 && current_col.Type() != DataType::DE_INT8 && | |||
| current_col.Type() != DataType::DE_STRING) { | |||
| std::string err_msg = "Invalid data, invalid data type for Tensor at column: " + current_col.Name() + | |||
| ", data type should be int8, uint8 or string, but got " + current_col.Type().ToString(); | |||
| RETURN_STATUS_UNEXPECTED(err_msg); | |||
| } | |||
| @@ -417,7 +417,7 @@ Status TFReaderOp::LoadBytesList(const ColDescriptor ¤t_col, const dataeng | |||
| *num_elements = bytes_list.value_size(); | |||
| if (current_col.type() == DataType::DE_STRING) { | |||
| if (current_col.Type() == DataType::DE_STRING) { | |||
| TensorShape shape = TensorShape::CreateScalar(); | |||
| RETURN_IF_NOT_OK(current_col.MaterializeTensorShape(*num_elements, &shape)); | |||
| RETURN_IF_NOT_OK(Tensor::CreateFromByteList(bytes_list, shape, tensor)); | |||
| @@ -436,14 +436,14 @@ Status TFReaderOp::LoadBytesList(const ColDescriptor ¤t_col, const dataeng | |||
| int64_t pad_size = max_size; | |||
| // if user provides a shape in the form of [-1, d1, 2d, ... , dn], we need to pad to d1 * d2 * ... * dn | |||
| if (current_col.hasShape()) { | |||
| TensorShape cur_shape = current_col.shape(); | |||
| if (current_col.HasShape()) { | |||
| TensorShape cur_shape = current_col.Shape(); | |||
| if (cur_shape.Size() >= 2 && cur_shape[0] == TensorShape::kDimUnknown) { | |||
| int64_t new_pad_size = 1; | |||
| for (int i = 1; i < cur_shape.Size(); ++i) { | |||
| if (cur_shape[i] == TensorShape::kDimUnknown) { | |||
| std::string err_msg = | |||
| "Invalid data, more than one unknown dimension in the shape of column: " + current_col.name(); | |||
| "Invalid data, more than one unknown dimension in the shape of column: " + current_col.Name(); | |||
| RETURN_STATUS_UNEXPECTED(err_msg); | |||
| } | |||
| new_pad_size *= cur_shape[i]; | |||
| @@ -451,7 +451,7 @@ Status TFReaderOp::LoadBytesList(const ColDescriptor ¤t_col, const dataeng | |||
| pad_size = new_pad_size; | |||
| } else { | |||
| if (cur_shape.known() && cur_shape.NumOfElements() != max_size) { | |||
| std::string err_msg = "Shape in schema's column '" + current_col.name() + "' is incorrect." + | |||
| std::string err_msg = "Shape in schema's column '" + current_col.Name() + "' is incorrect." + | |||
| "\nshape received: " + cur_shape.ToString() + | |||
| "\ntotal elements in shape received: " + std::to_string(cur_shape.NumOfElements()) + | |||
| "\nexpected total elements in shape: " + std::to_string(max_size); | |||
| @@ -463,7 +463,7 @@ Status TFReaderOp::LoadBytesList(const ColDescriptor ¤t_col, const dataeng | |||
| // know how many elements there are and the total bytes, create tensor here: | |||
| TensorShape current_shape = TensorShape::CreateScalar(); | |||
| RETURN_IF_NOT_OK(current_col.MaterializeTensorShape((*num_elements) * pad_size, ¤t_shape)); | |||
| RETURN_IF_NOT_OK(Tensor::CreateFromByteList(bytes_list, current_shape, current_col.type(), pad_size, tensor)); | |||
| RETURN_IF_NOT_OK(Tensor::CreateFromByteList(bytes_list, current_shape, current_col.Type(), pad_size, tensor)); | |||
| return Status::OK(); | |||
| } | |||
| @@ -472,9 +472,9 @@ Status TFReaderOp::LoadFloatList(const ColDescriptor ¤t_col, const dataeng | |||
| int32_t *num_elements, std::unique_ptr<float[]> *float_array) { | |||
| // KFloatList can only map to DE types: | |||
| // DE_FLOAT32 | |||
| if (current_col.type() != DataType::DE_FLOAT32) { | |||
| std::string err_msg = "Invalid data, invalid data type for Tensor at column: " + current_col.name() + | |||
| ", data type should be string, but got " + current_col.type().ToString(); | |||
| if (current_col.Type() != DataType::DE_FLOAT32) { | |||
| std::string err_msg = "Invalid data, invalid data type for Tensor at column: " + current_col.Name() + | |||
| ", data type should be string, but got " + current_col.Type().ToString(); | |||
| RETURN_STATUS_UNEXPECTED(err_msg); | |||
| } | |||
| @@ -494,26 +494,26 @@ Status TFReaderOp::LoadFloatList(const ColDescriptor ¤t_col, const dataeng | |||
| // Determines which template type to use and calls LoadIntList | |||
| Status TFReaderOp::LoadIntListSwitch(const ColDescriptor ¤t_col, const dataengine::Feature &column_values_list, | |||
| int32_t *num_elements, std::shared_ptr<Tensor> *tensor) { | |||
| if (current_col.type() == DataType::DE_UINT64) { | |||
| if (current_col.Type() == DataType::DE_UINT64) { | |||
| RETURN_IF_NOT_OK(LoadIntList<uint64_t>(current_col, column_values_list, num_elements, tensor)); | |||
| } else if (current_col.type() == DataType::DE_INT64) { | |||
| } else if (current_col.Type() == DataType::DE_INT64) { | |||
| RETURN_IF_NOT_OK(LoadIntList<int64_t>(current_col, column_values_list, num_elements, tensor)); | |||
| } else if (current_col.type() == DataType::DE_UINT32) { | |||
| } else if (current_col.Type() == DataType::DE_UINT32) { | |||
| RETURN_IF_NOT_OK(LoadIntList<uint32_t>(current_col, column_values_list, num_elements, tensor)); | |||
| } else if (current_col.type() == DataType::DE_INT32) { | |||
| } else if (current_col.Type() == DataType::DE_INT32) { | |||
| RETURN_IF_NOT_OK(LoadIntList<int32_t>(current_col, column_values_list, num_elements, tensor)); | |||
| } else if (current_col.type() == DataType::DE_UINT16) { | |||
| } else if (current_col.Type() == DataType::DE_UINT16) { | |||
| RETURN_IF_NOT_OK(LoadIntList<uint16_t>(current_col, column_values_list, num_elements, tensor)); | |||
| } else if (current_col.type() == DataType::DE_INT16) { | |||
| } else if (current_col.Type() == DataType::DE_INT16) { | |||
| RETURN_IF_NOT_OK(LoadIntList<int16_t>(current_col, column_values_list, num_elements, tensor)); | |||
| } else if (current_col.type() == DataType::DE_UINT8) { | |||
| } else if (current_col.Type() == DataType::DE_UINT8) { | |||
| RETURN_IF_NOT_OK(LoadIntList<uint8_t>(current_col, column_values_list, num_elements, tensor)); | |||
| } else if (current_col.type() == DataType::DE_INT8) { | |||
| } else if (current_col.Type() == DataType::DE_INT8) { | |||
| RETURN_IF_NOT_OK(LoadIntList<int8_t>(current_col, column_values_list, num_elements, tensor)); | |||
| } else { | |||
| std::string err_msg = "Invalid data, invalid datatype for Tensor at column: " + current_col.name() + | |||
| std::string err_msg = "Invalid data, invalid datatype for Tensor at column: " + current_col.Name() + | |||
| ", data type should be uint64, int64, uint32, int32, uint16, int16, uint8 or int8" + | |||
| ", but got " + current_col.type().ToString(); | |||
| ", but got " + current_col.Type().ToString(); | |||
| RETURN_STATUS_UNEXPECTED(err_msg); | |||
| } | |||
| @@ -525,9 +525,9 @@ Status TFReaderOp::LoadIntListSwitch(const ColDescriptor ¤t_col, const dat | |||
| template <typename T> | |||
| Status TFReaderOp::LoadIntList(const ColDescriptor ¤t_col, const dataengine::Feature &column_values_list, | |||
| int32_t *num_elements, std::shared_ptr<Tensor> *tensor) { | |||
| if (!(current_col.type().IsInt())) { | |||
| std::string err_msg = "Invalid data, invalid data type for Tensor at column: " + current_col.name() + | |||
| ", data type should be int, but got " + current_col.type().ToString(); | |||
| if (!(current_col.Type().IsInt())) { | |||
| std::string err_msg = "Invalid data, invalid data type for Tensor at column: " + current_col.Name() + | |||
| ", data type should be int, but got " + current_col.Type().ToString(); | |||
| RETURN_STATUS_UNEXPECTED(err_msg); | |||
| } | |||
| @@ -540,7 +540,7 @@ Status TFReaderOp::LoadIntList(const ColDescriptor ¤t_col, const dataengin | |||
| // know how many elements there are, create tensor here: | |||
| TensorShape current_shape = TensorShape::CreateUnknownRankShape(); | |||
| RETURN_IF_NOT_OK(current_col.MaterializeTensorShape(*num_elements, ¤t_shape)); | |||
| RETURN_IF_NOT_OK(Tensor::CreateEmpty(current_shape, current_col.type(), tensor)); | |||
| RETURN_IF_NOT_OK(Tensor::CreateEmpty(current_shape, current_col.Type(), tensor)); | |||
| int64_t i = 0; | |||
| auto it = (*tensor)->begin<T>(); | |||
| @@ -719,7 +719,7 @@ Status TFReaderOp::ComputeColMap() { | |||
| // Construct the column name map for this operator (base class field) | |||
| if (column_name_id_map_.empty()) { | |||
| for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) { | |||
| column_name_id_map_[data_schema_->column(i).name()] = i; | |||
| column_name_id_map_[data_schema_->Column(i).Name()] = i; | |||
| } | |||
| } else { | |||
| MS_LOG(WARNING) << "Column name map is already set!"; | |||
| @@ -83,8 +83,8 @@ Status VOCOp::LoadTensorRow(row_id_type row_id, TensorRow *trow) { | |||
| std::shared_ptr<Tensor> image, target; | |||
| const std::string kTargetFile = | |||
| folder_path_ + std::string(kSegmentationClassFolder) + image_id + std::string(kSegmentationExtension); | |||
| RETURN_IF_NOT_OK(ReadImageToTensor(kImageFile, data_schema_->column(0), &image)); | |||
| RETURN_IF_NOT_OK(ReadImageToTensor(kTargetFile, data_schema_->column(1), &target)); | |||
| RETURN_IF_NOT_OK(ReadImageToTensor(kImageFile, data_schema_->Column(0), &image)); | |||
| RETURN_IF_NOT_OK(ReadImageToTensor(kTargetFile, data_schema_->Column(1), &target)); | |||
| (*trow) = TensorRow(row_id, {std::move(image), std::move(target)}); | |||
| path_list = {kImageFile, kTargetFile}; | |||
| } else if (task_type_ == TaskType::Detection) { | |||
| @@ -92,7 +92,7 @@ Status VOCOp::LoadTensorRow(row_id_type row_id, TensorRow *trow) { | |||
| TensorRow annotation; | |||
| const std::string kAnnotationFile = | |||
| folder_path_ + std::string(kAnnotationsFolder) + image_id + std::string(kAnnotationExtension); | |||
| RETURN_IF_NOT_OK(ReadImageToTensor(kImageFile, data_schema_->column(0), &image)); | |||
| RETURN_IF_NOT_OK(ReadImageToTensor(kImageFile, data_schema_->Column(0), &image)); | |||
| RETURN_IF_NOT_OK(ReadAnnotationToTensor(kAnnotationFile, &annotation)); | |||
| trow->setId(row_id); | |||
| trow->push_back(std::move(image)); | |||
| @@ -326,7 +326,7 @@ Status VOCOp::ComputeColMap() { | |||
| // Set the column name map (base class field) | |||
| if (column_name_id_map_.empty()) { | |||
| for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) { | |||
| column_name_id_map_[data_schema_->column(i).name()] = i; | |||
| column_name_id_map_[data_schema_->Column(i).Name()] = i; | |||
| } | |||
| } else { | |||
| MS_LOG(WARNING) << "Column name map is already set!"; | |||
| @@ -62,6 +62,7 @@ ExecutionTree::~ExecutionTree() { | |||
| // provides it with a link to the tree. A node cannot form any relationships (parent/child) with | |||
| // other nodes unless they are associated with the same tree. | |||
| Status ExecutionTree::AssociateNode(const std::shared_ptr<DatasetOp> &op) { | |||
| RETURN_UNEXPECTED_IF_NULL(op); | |||
| // If we are already a part of the tree, no-op | |||
| if (op->tree_ == this) { | |||
| return Status::OK(); | |||
| @@ -88,6 +89,7 @@ Status ExecutionTree::AssociateNode(const std::shared_ptr<DatasetOp> &op) { | |||
| // Sets the root node of the tree | |||
| Status ExecutionTree::AssignRoot(const std::shared_ptr<DatasetOp> &op) { | |||
| RETURN_UNEXPECTED_IF_NULL(op); | |||
| // Tree must be in building state before we can assign root to it | |||
| if (tree_state_ != kDeTStateBuilding) { | |||
| std::string err_msg = | |||
| @@ -121,6 +123,9 @@ void ExecutionTree::Print(std::ostream &out, const std::shared_ptr<DatasetOp> &o | |||
| // A helper functions for doing the recursive printing | |||
| void ExecutionTree::PrintNode(std::ostream &out, const std::shared_ptr<DatasetOp> &dataset_op, std::string indent, | |||
| bool last, bool detailed) const { | |||
| if (dataset_op == nullptr) { | |||
| return; | |||
| } | |||
| // Decide which printer to use based on detailed arg. | |||
| if (!detailed) { | |||
| out << indent << "+- " << *dataset_op; | |||
| @@ -41,6 +41,7 @@ GraphDataImpl::GraphDataImpl(std::string dataset_file, int32_t num_workers, bool | |||
| GraphDataImpl::~GraphDataImpl() {} | |||
| Status GraphDataImpl::GetAllNodes(NodeType node_type, std::shared_ptr<Tensor> *out) { | |||
| RETURN_UNEXPECTED_IF_NULL(out); | |||
| auto itr = node_type_map_.find(node_type); | |||
| if (itr == node_type_map_.end()) { | |||
| std::string err_msg = "Invalid node type:" + std::to_string(node_type); | |||
| @@ -54,6 +55,7 @@ Status GraphDataImpl::GetAllNodes(NodeType node_type, std::shared_ptr<Tensor> *o | |||
| template <typename T> | |||
| Status GraphDataImpl::CreateTensorByVector(const std::vector<std::vector<T>> &data, DataType type, | |||
| std::shared_ptr<Tensor> *out) { | |||
| RETURN_UNEXPECTED_IF_NULL(out); | |||
| if (!type.IsCompatible<T>()) { | |||
| RETURN_STATUS_UNEXPECTED("Data type not compatible"); | |||
| } | |||
| @@ -96,6 +98,7 @@ Status GraphDataImpl::ComplementVector(std::vector<std::vector<T>> *data, size_t | |||
| } | |||
| Status GraphDataImpl::GetAllEdges(EdgeType edge_type, std::shared_ptr<Tensor> *out) { | |||
| RETURN_UNEXPECTED_IF_NULL(out); | |||
| auto itr = edge_type_map_.find(edge_type); | |||
| if (itr == edge_type_map_.end()) { | |||
| std::string err_msg = "Invalid edge type:" + std::to_string(edge_type); | |||
| @@ -110,6 +113,7 @@ Status GraphDataImpl::GetNodesFromEdges(const std::vector<EdgeIdType> &edge_list | |||
| if (edge_list.empty()) { | |||
| RETURN_STATUS_UNEXPECTED("Input edge_list is empty"); | |||
| } | |||
| RETURN_UNEXPECTED_IF_NULL(out); | |||
| std::vector<std::vector<NodeIdType>> node_list; | |||
| node_list.reserve(edge_list.size()); | |||
| @@ -156,6 +160,7 @@ Status GraphDataImpl::GetAllNeighbors(const std::vector<NodeIdType> &node_list, | |||
| const OutputFormat &format, std::shared_ptr<Tensor> *out) { | |||
| CHECK_FAIL_RETURN_UNEXPECTED(!node_list.empty(), "Input node_list is empty."); | |||
| RETURN_IF_NOT_OK(CheckNeighborType(neighbor_type)); | |||
| RETURN_UNEXPECTED_IF_NULL(out); | |||
| std::vector<std::vector<NodeIdType>> neighbors; | |||
| @@ -251,6 +256,7 @@ Status GraphDataImpl::GetSampledNeighbors(const std::vector<NodeIdType> &node_li | |||
| for (const auto &type : neighbor_types) { | |||
| RETURN_IF_NOT_OK(CheckNeighborType(type)); | |||
| } | |||
| RETURN_UNEXPECTED_IF_NULL(out); | |||
| std::vector<std::vector<NodeIdType>> neighbors_vec(node_list.size()); | |||
| for (size_t node_idx = 0; node_idx < node_list.size(); ++node_idx) { | |||
| std::shared_ptr<Node> input_node; | |||
| @@ -285,6 +291,7 @@ Status GraphDataImpl::NegativeSample(const std::vector<NodeIdType> &data, const | |||
| size_t *start_index, const std::unordered_set<NodeIdType> &exclude_data, | |||
| int32_t samples_num, std::vector<NodeIdType> *out_samples) { | |||
| CHECK_FAIL_RETURN_UNEXPECTED(!data.empty(), "Input data is empty."); | |||
| RETURN_UNEXPECTED_IF_NULL(start_index); | |||
| size_t index = *start_index; | |||
| for (size_t i = index; i < shuffled_ids.size(); ++i) { | |||
| ++index; | |||
| @@ -305,6 +312,7 @@ Status GraphDataImpl::GetNegSampledNeighbors(const std::vector<NodeIdType> &node | |||
| CHECK_FAIL_RETURN_UNEXPECTED(!node_list.empty(), "Input node_list is empty."); | |||
| RETURN_IF_NOT_OK(CheckSamplesNum(samples_num)); | |||
| RETURN_IF_NOT_OK(CheckNeighborType(neg_neighbor_type)); | |||
| RETURN_UNEXPECTED_IF_NULL(out); | |||
| const std::vector<NodeIdType> &all_nodes = node_type_map_[neg_neighbor_type]; | |||
| std::vector<NodeIdType> shuffled_id(all_nodes.size()); | |||
| @@ -321,9 +329,9 @@ Status GraphDataImpl::GetNegSampledNeighbors(const std::vector<NodeIdType> &node | |||
| std::vector<NodeIdType> neighbors; | |||
| RETURN_IF_NOT_OK(node->GetAllNeighbors(neg_neighbor_type, &neighbors)); | |||
| std::unordered_set<NodeIdType> exclude_nodes; | |||
| std::transform(neighbors.begin(), neighbors.end(), | |||
| std::insert_iterator<std::unordered_set<NodeIdType>>(exclude_nodes, exclude_nodes.begin()), | |||
| [](const NodeIdType node) { return node; }); | |||
| (void)std::transform(neighbors.begin(), neighbors.end(), | |||
| std::insert_iterator<std::unordered_set<NodeIdType>>(exclude_nodes, exclude_nodes.begin()), | |||
| [](const NodeIdType node) { return node; }); | |||
| neg_neighbors_vec[node_idx].emplace_back(node->id()); | |||
| if (all_nodes.size() > exclude_nodes.size()) { | |||
| while (neg_neighbors_vec[node_idx].size() < samples_num + 1) { | |||
| @@ -355,6 +363,7 @@ Status GraphDataImpl::GetNegSampledNeighbors(const std::vector<NodeIdType> &node | |||
| Status GraphDataImpl::RandomWalk(const std::vector<NodeIdType> &node_list, const std::vector<NodeType> &meta_path, | |||
| float step_home_param, float step_away_param, NodeIdType default_node, | |||
| std::shared_ptr<Tensor> *out) { | |||
| RETURN_UNEXPECTED_IF_NULL(out); | |||
| RETURN_IF_NOT_OK(random_walk_.Build(node_list, meta_path, step_home_param, step_away_param, default_node)); | |||
| std::vector<std::vector<NodeIdType>> walks; | |||
| RETURN_IF_NOT_OK(random_walk_.SimulateWalk(&walks)); | |||
| @@ -363,6 +372,7 @@ Status GraphDataImpl::RandomWalk(const std::vector<NodeIdType> &node_list, const | |||
| } | |||
| Status GraphDataImpl::GetNodeDefaultFeature(FeatureType feature_type, std::shared_ptr<Feature> *out_feature) { | |||
| RETURN_UNEXPECTED_IF_NULL(out_feature); | |||
| auto itr = default_node_feature_map_.find(feature_type); | |||
| if (itr == default_node_feature_map_.end()) { | |||
| std::string err_msg = "Invalid feature type:" + std::to_string(feature_type); | |||
| @@ -374,6 +384,7 @@ Status GraphDataImpl::GetNodeDefaultFeature(FeatureType feature_type, std::share | |||
| } | |||
| Status GraphDataImpl::GetEdgeDefaultFeature(FeatureType feature_type, std::shared_ptr<Feature> *out_feature) { | |||
| RETURN_UNEXPECTED_IF_NULL(out_feature); | |||
| auto itr = default_edge_feature_map_.find(feature_type); | |||
| if (itr == default_edge_feature_map_.end()) { | |||
| std::string err_msg = "Invalid feature type:" + std::to_string(feature_type); | |||
| @@ -390,6 +401,7 @@ Status GraphDataImpl::GetNodeFeature(const std::shared_ptr<Tensor> &nodes, | |||
| RETURN_STATUS_UNEXPECTED("Input nodes is empty"); | |||
| } | |||
| CHECK_FAIL_RETURN_UNEXPECTED(!feature_types.empty(), "Input feature_types is empty"); | |||
| RETURN_UNEXPECTED_IF_NULL(out); | |||
| TensorRow tensors; | |||
| for (const auto &f_type : feature_types) { | |||
| std::shared_ptr<Feature> default_feature; | |||
| @@ -436,6 +448,7 @@ Status GraphDataImpl::GetNodeFeatureSharedMemory(const std::shared_ptr<Tensor> & | |||
| if (!nodes || nodes->Size() == 0) { | |||
| RETURN_STATUS_UNEXPECTED("Input nodes is empty"); | |||
| } | |||
| RETURN_UNEXPECTED_IF_NULL(out); | |||
| TensorShape shape = nodes->shape().AppendDim(2); | |||
| std::shared_ptr<Tensor> fea_tensor; | |||
| RETURN_IF_NOT_OK(Tensor::CreateEmpty(shape, DataType(DataType::DE_INT64), &fea_tensor)); | |||
| @@ -478,6 +491,7 @@ Status GraphDataImpl::GetEdgeFeature(const std::shared_ptr<Tensor> &edges, | |||
| RETURN_STATUS_UNEXPECTED("Input edges is empty"); | |||
| } | |||
| CHECK_FAIL_RETURN_UNEXPECTED(!feature_types.empty(), "Input feature_types is empty"); | |||
| RETURN_UNEXPECTED_IF_NULL(out); | |||
| TensorRow tensors; | |||
| for (const auto &f_type : feature_types) { | |||
| std::shared_ptr<Feature> default_feature; | |||
| @@ -520,6 +534,7 @@ Status GraphDataImpl::GetEdgeFeatureSharedMemory(const std::shared_ptr<Tensor> & | |||
| if (!edges || edges->Size() == 0) { | |||
| RETURN_STATUS_UNEXPECTED("Input edges is empty"); | |||
| } | |||
| RETURN_UNEXPECTED_IF_NULL(out); | |||
| TensorShape shape = edges->shape().AppendDim(2); | |||
| std::shared_ptr<Tensor> fea_tensor; | |||
| RETURN_IF_NOT_OK(Tensor::CreateEmpty(shape, DataType(DataType::DE_INT64), &fea_tensor)); | |||
| @@ -554,14 +569,15 @@ Status GraphDataImpl::Init() { | |||
| } | |||
| Status GraphDataImpl::GetMetaInfo(MetaInfo *meta_info) { | |||
| RETURN_UNEXPECTED_IF_NULL(meta_info); | |||
| meta_info->node_type.resize(node_type_map_.size()); | |||
| std::transform(node_type_map_.begin(), node_type_map_.end(), meta_info->node_type.begin(), | |||
| [](auto itr) { return itr.first; }); | |||
| (void)std::transform(node_type_map_.begin(), node_type_map_.end(), meta_info->node_type.begin(), | |||
| [](auto itr) { return itr.first; }); | |||
| std::sort(meta_info->node_type.begin(), meta_info->node_type.end()); | |||
| meta_info->edge_type.resize(edge_type_map_.size()); | |||
| std::transform(edge_type_map_.begin(), edge_type_map_.end(), meta_info->edge_type.begin(), | |||
| [](auto itr) { return itr.first; }); | |||
| (void)std::transform(edge_type_map_.begin(), edge_type_map_.end(), meta_info->edge_type.begin(), | |||
| [](auto itr) { return itr.first; }); | |||
| std::sort(meta_info->edge_type.begin(), meta_info->edge_type.end()); | |||
| for (const auto &node : node_type_map_) { | |||
| @@ -594,6 +610,7 @@ Status GraphDataImpl::GetMetaInfo(MetaInfo *meta_info) { | |||
| #ifdef ENABLE_PYTHON | |||
| Status GraphDataImpl::GraphInfo(py::dict *out) { | |||
| RETURN_UNEXPECTED_IF_NULL(out); | |||
| MetaInfo meta_info; | |||
| RETURN_IF_NOT_OK(GetMetaInfo(&meta_info)); | |||
| (*out)["node_type"] = py::cast(meta_info.node_type); | |||
| @@ -616,6 +633,7 @@ Status GraphDataImpl::LoadNodeAndEdge() { | |||
| } | |||
| Status GraphDataImpl::GetNodeByNodeId(NodeIdType id, std::shared_ptr<Node> *node) { | |||
| RETURN_UNEXPECTED_IF_NULL(node); | |||
| auto itr = node_id_map_.find(id); | |||
| if (itr == node_id_map_.end()) { | |||
| std::string err_msg = "Invalid node id:" + std::to_string(id); | |||
| @@ -627,6 +645,7 @@ Status GraphDataImpl::GetNodeByNodeId(NodeIdType id, std::shared_ptr<Node> *node | |||
| } | |||
| Status GraphDataImpl::GetEdgeByEdgeId(EdgeIdType id, std::shared_ptr<Edge> *edge) { | |||
| RETURN_UNEXPECTED_IF_NULL(edge); | |||
| auto itr = edge_id_map_.find(id); | |||
| if (itr == edge_id_map_.end()) { | |||
| std::string err_msg = "Invalid edge id:" + std::to_string(id); | |||
| @@ -682,6 +701,7 @@ Status GraphDataImpl::RandomWalkBase::Build(const std::vector<NodeIdType> &node_ | |||
| } | |||
| Status GraphDataImpl::RandomWalkBase::Node2vecWalk(const NodeIdType &start_node, std::vector<NodeIdType> *walk_path) { | |||
| RETURN_UNEXPECTED_IF_NULL(walk_path); | |||
| // Simulate a random walk starting from start node. | |||
| auto walk = std::vector<NodeIdType>(1, start_node); // walk is an vector | |||
| // walk simulate | |||
| @@ -722,6 +742,7 @@ Status GraphDataImpl::RandomWalkBase::Node2vecWalk(const NodeIdType &start_node, | |||
| } | |||
| Status GraphDataImpl::RandomWalkBase::SimulateWalk(std::vector<std::vector<NodeIdType>> *walks) { | |||
| RETURN_UNEXPECTED_IF_NULL(walks); | |||
| for (int32_t i = 0; i < num_walks_; ++i) { | |||
| for (const auto &node : node_list_) { | |||
| std::vector<NodeIdType> walk; | |||
| @@ -734,6 +755,7 @@ Status GraphDataImpl::RandomWalkBase::SimulateWalk(std::vector<std::vector<NodeI | |||
| Status GraphDataImpl::RandomWalkBase::GetNodeProbability(const NodeIdType &node_id, const NodeType &node_type, | |||
| std::shared_ptr<StochasticIndex> *node_probability) { | |||
| RETURN_UNEXPECTED_IF_NULL(node_probability); | |||
| // Generate alias nodes | |||
| std::shared_ptr<Node> node; | |||
| RETURN_IF_NOT_OK(graph_->GetNodeByNodeId(node_id, &node)); | |||
| @@ -749,6 +771,7 @@ Status GraphDataImpl::RandomWalkBase::GetNodeProbability(const NodeIdType &node_ | |||
| Status GraphDataImpl::RandomWalkBase::GetEdgeProbability(const NodeIdType &src, const NodeIdType &dst, | |||
| uint32_t meta_path_index, | |||
| std::shared_ptr<StochasticIndex> *edge_probability) { | |||
| RETURN_UNEXPECTED_IF_NULL(edge_probability); | |||
| // Get the alias edge setup lists for a given edge. | |||
| std::shared_ptr<Node> src_node; | |||
| RETURN_IF_NOT_OK(graph_->GetNodeByNodeId(src, &src_node)); | |||
| @@ -760,6 +783,8 @@ Status GraphDataImpl::RandomWalkBase::GetEdgeProbability(const NodeIdType &src, | |||
| std::vector<NodeIdType> dst_neighbors; | |||
| RETURN_IF_NOT_OK(dst_node->GetAllNeighbors(meta_path_[meta_path_index + 1], &dst_neighbors, true)); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(step_home_param_ != 0, "Invalid data, step home parameter can't be zero."); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(step_away_param_ != 0, "Invalid data, step away parameter can't be zero."); | |||
| std::sort(dst_neighbors.begin(), dst_neighbors.end()); | |||
| std::vector<float> non_normalized_probability; | |||
| for (const auto &dst_nbr : dst_neighbors) { | |||
| @@ -17,6 +17,8 @@ | |||
| #include "minddata/dataset/engine/gnn/graph_shared_memory.h" | |||
| #include <string> | |||
| #include "debug/common.h" | |||
| #include "utils/ms_utils.h" | |||
| #include "minddata/dataset/util/log_adapter.h" | |||
| namespace mindspore { | |||
| @@ -51,7 +53,9 @@ GraphSharedMemory::~GraphSharedMemory() { | |||
| Status GraphSharedMemory::CreateSharedMemory() { | |||
| if (memory_key_ == -1) { | |||
| // ftok to generate unique key | |||
| memory_key_ = ftok(mr_file_.data(), kGnnSharedMemoryId); | |||
| auto realpath = Common::GetRealPath(mr_file_); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(realpath.has_value(), "Get real path failed, path=" + mr_file_); | |||
| memory_key_ = ftok(common::SafeCStr(realpath.value()), kGnnSharedMemoryId); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(memory_key_ != -1, "Failed to get key of shared memory. file_name:" + mr_file_); | |||
| std::stringstream stream; | |||
| stream << std::hex << memory_key_; | |||
| @@ -89,6 +93,7 @@ Status GraphSharedMemory::DeleteSharedMemory() { | |||
| Status GraphSharedMemory::SharedMemoryImpl(const int &shmflg) { | |||
| // shmget returns an identifier in shmid | |||
| CHECK_FAIL_RETURN_UNEXPECTED(memory_size_ >= 0, "Invalid memory size, should be greater than zero."); | |||
| int shmid = shmget(memory_key_, memory_size_, shmflg); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(shmid != -1, "Failed to get shared memory. key=0x" + memory_key_str_); | |||
| @@ -103,6 +108,7 @@ Status GraphSharedMemory::SharedMemoryImpl(const int &shmflg) { | |||
| Status GraphSharedMemory::InsertData(const uint8_t *data, int64_t len, int64_t *offset) { | |||
| CHECK_FAIL_RETURN_UNEXPECTED(data, "Input data is nullptr."); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(len > 0, "Input len is invalid."); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(offset, "Input offset is nullptr."); | |||
| std::lock_guard<std::mutex> lck(mutex_); | |||
| CHECK_FAIL_RETURN_UNEXPECTED((memory_size_ - memory_offset_ >= len), | |||
| @@ -46,6 +46,7 @@ class GpuItemConnector : public Connector<std::vector<device::DataItemGpu>> { | |||
| } | |||
| Status Pop(int32_t worker_id, std::vector<device::DataItemGpu> *result) noexcept override { | |||
| RETURN_UNEXPECTED_IF_NULL(result); | |||
| { | |||
| MS_ASSERT(worker_id < num_consumers_); | |||
| std::unique_lock<std::mutex> lock(m_); | |||
| @@ -30,6 +30,7 @@ namespace dataset { | |||
| // Helper function to compute a default shuffle size | |||
| Status ComputeShuffleSize(int64_t num_files, int64_t num_devices, int64_t num_rows, int64_t total_rows, | |||
| int64_t *shuffle_size) { | |||
| RETURN_UNEXPECTED_IF_NULL(shuffle_size); | |||
| const int64_t average_files_multiplier = 4; | |||
| const int64_t shuffle_max = 10000; | |||
| int64_t avg_rows_per_file = 0; | |||
| @@ -59,6 +60,7 @@ Status ComputeShuffleSize(int64_t num_files, int64_t num_devices, int64_t num_ro | |||
| // Helper function to inject a shuffle operator over top of current operator being built | |||
| Status AddShuffleOp(int64_t num_files, int64_t num_devices, int64_t num_rows, int64_t total_rows, | |||
| int32_t connector_que_size, std::shared_ptr<DatasetOp> *shuffle_op) { | |||
| RETURN_UNEXPECTED_IF_NULL(shuffle_op); | |||
| int64_t shuffle_size = 0; | |||
| RETURN_IF_NOT_OK(ComputeShuffleSize(num_files, num_devices, num_rows, total_rows, &shuffle_size)); | |||
| MS_LOG(INFO) << "Dataset::AddShuffleOp - num_rows: " << num_rows << ", shuffle_size: " << shuffle_size; | |||
| @@ -59,6 +59,7 @@ void MapNode::Print(std::ostream &out) const { | |||
| } | |||
| Status MapNode::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_ops) { | |||
| RETURN_UNEXPECTED_IF_NULL(node_ops); | |||
| std::vector<std::shared_ptr<TensorOp>> tensor_ops; | |||
| // Build tensorOp from tensorOperation vector | |||
| @@ -131,12 +132,16 @@ Status MapNode::ValidateParams() { | |||
| // Visitor accepting method for IRNodePass | |||
| Status MapNode::Accept(IRNodePass *const p, bool *const modified) { | |||
| RETURN_UNEXPECTED_IF_NULL(p); | |||
| RETURN_UNEXPECTED_IF_NULL(modified); | |||
| // Downcast shared pointer then call visitor | |||
| return p->Visit(shared_from_base<MapNode>(), modified); | |||
| } | |||
| // Visitor accepting method for IRNodePass | |||
| Status MapNode::AcceptAfter(IRNodePass *const p, bool *const modified) { | |||
| RETURN_UNEXPECTED_IF_NULL(p); | |||
| RETURN_UNEXPECTED_IF_NULL(modified); | |||
| // Downcast shared pointer then call visitor | |||
| return p->VisitAfter(shared_from_base<MapNode>(), modified); | |||
| } | |||
| @@ -147,6 +152,7 @@ void MapNode::setOperations(const std::vector<std::shared_ptr<TensorOperation>> | |||
| std::vector<std::shared_ptr<TensorOperation>> MapNode::operations() { return operations_; } | |||
| Status MapNode::to_json(nlohmann::json *out_json) { | |||
| RETURN_UNEXPECTED_IF_NULL(out_json); | |||
| nlohmann::json args; | |||
| args["num_parallel_workers"] = num_workers_; | |||
| args["input_columns"] = input_columns_; | |||
| @@ -160,6 +166,7 @@ Status MapNode::to_json(nlohmann::json *out_json) { | |||
| std::vector<nlohmann::json> ops; | |||
| std::vector<int32_t> cbs; | |||
| for (auto op : operations_) { | |||
| RETURN_UNEXPECTED_IF_NULL(op); | |||
| nlohmann::json op_args; | |||
| RETURN_IF_NOT_OK(op->to_json(&op_args)); | |||
| if (op->Name() == "PyFuncOp") { | |||
| @@ -172,8 +179,8 @@ Status MapNode::to_json(nlohmann::json *out_json) { | |||
| } | |||
| } | |||
| args["operations"] = ops; | |||
| std::transform(callbacks_.begin(), callbacks_.end(), std::back_inserter(cbs), | |||
| [](std::shared_ptr<DSCallback> cb) -> int32_t { return cb->step_size(); }); | |||
| (void)std::transform(callbacks_.begin(), callbacks_.end(), std::back_inserter(cbs), | |||
| [](std::shared_ptr<DSCallback> cb) -> int32_t { return cb != nullptr ? cb->step_size() : 0; }); | |||
| args["callback"] = cbs; | |||
| *out_json = args; | |||
| return Status::OK(); | |||
| @@ -106,8 +106,8 @@ Status AlbumNode::GetDatasetSize(const std::shared_ptr<DatasetSizeGetter> &size_ | |||
| } | |||
| std::set<std::string> extensions = {".json", ".JSON"}; | |||
| while (dirItr->hasNext()) { | |||
| Path file = dirItr->next(); | |||
| while (dirItr->HasNext()) { | |||
| Path file = dirItr->Next(); | |||
| if (extensions.empty() || extensions.find(file.Extension()) != extensions.end()) { | |||
| num_rows += 1; | |||
| } | |||
| @@ -73,9 +73,9 @@ Status GeneratorNode::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_ | |||
| RETURN_IF_NOT_OK(data_schema->LoadSchemaString(schema_json_string, {})); | |||
| for (int32_t i = 0; i < data_schema->NumColumns(); i++) { | |||
| ColDescriptor col = data_schema->column(i); | |||
| column_names_.push_back(col.name()); | |||
| column_types_.push_back((col.type())); | |||
| ColDescriptor col = data_schema->Column(i); | |||
| column_names_.push_back(col.Name()); | |||
| column_types_.push_back((col.Type())); | |||
| } | |||
| } | |||
| std::shared_ptr<SamplerRT> sampler_rt = nullptr; | |||
| @@ -131,7 +131,7 @@ Status RandomNode::GetDatasetSize(const std::shared_ptr<DatasetSizeGetter> &size | |||
| *dataset_size = dataset_size_; | |||
| return Status::OK(); | |||
| } | |||
| int64_t num_rows = total_rows_ != 0 ? total_rows_ : data_schema_->num_rows(); | |||
| int64_t num_rows = total_rows_ != 0 ? total_rows_ : data_schema_->NumRows(); | |||
| *dataset_size = num_rows; | |||
| dataset_size_ = *dataset_size; | |||
| return Status::OK(); | |||
| @@ -22,6 +22,7 @@ | |||
| #include <utility> | |||
| #include <vector> | |||
| #include "debug/common.h" | |||
| #include "minddata/dataset/engine/datasetops/source/tf_reader_op.h" | |||
| #include "minddata/dataset/engine/jagged_connector.h" | |||
| #include "minddata/dataset/engine/opt/pass.h" | |||
| @@ -58,13 +59,9 @@ Status TFRecordNode::ValidateParams() { | |||
| } | |||
| for (const auto &f : dataset_files_) { | |||
| Path dataset_file(f); | |||
| if (!dataset_file.Exists()) { | |||
| std::string err_msg = "TFRecordNode: dataset file: [" + f + "] is invalid or does not exist."; | |||
| MS_LOG(ERROR) << err_msg; | |||
| return Status(StatusCode::kMDSyntaxError, __LINE__, __FILE__, err_msg); | |||
| } | |||
| auto realpath = Common::GetRealPath(f); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(realpath.has_value(), | |||
| "TFRecordNode: dataset file: [" + f + "] is invalid or does not exist."); | |||
| } | |||
| if (num_samples_ < 0) { | |||
| @@ -107,6 +104,7 @@ Status TFRecordNode::ValidateParams() { | |||
| // Function to build TFRecordNode | |||
| Status TFRecordNode::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_ops) { | |||
| RETURN_UNEXPECTED_IF_NULL(node_ops); | |||
| // Sort the datasets file in a lexicographical order | |||
| std::vector<std::string> sorted_dir_files = dataset_files_; | |||
| std::sort(sorted_dir_files.begin(), sorted_dir_files.end()); | |||
| @@ -165,6 +163,8 @@ Status TFRecordNode::GetShardId(int32_t *const shard_id) { | |||
| // Get Dataset size | |||
| Status TFRecordNode::GetDatasetSize(const std::shared_ptr<DatasetSizeGetter> &size_getter, bool estimate, | |||
| int64_t *dataset_size) { | |||
| RETURN_UNEXPECTED_IF_NULL(size_getter); | |||
| RETURN_UNEXPECTED_IF_NULL(dataset_size); | |||
| if (dataset_size_ > 0) { | |||
| *dataset_size = dataset_size_; | |||
| return Status::OK(); | |||
| @@ -189,6 +189,7 @@ Status TFRecordNode::GetDatasetSize(const std::shared_ptr<DatasetSizeGetter> &si | |||
| // Get the file list of the specific shard ID | |||
| Status TFRecordNode::GetShardFileList(std::vector<std::string> *shard_filenames) { | |||
| RETURN_UNEXPECTED_IF_NULL(shard_filenames); | |||
| if (!shard_filenames->empty()) { | |||
| RETURN_STATUS_UNEXPECTED("The initial file list must be empty."); | |||
| } | |||
| @@ -201,6 +202,7 @@ Status TFRecordNode::GetShardFileList(std::vector<std::string> *shard_filenames) | |||
| } | |||
| Status TFRecordNode::to_json(nlohmann::json *out_json) { | |||
| RETURN_UNEXPECTED_IF_NULL(out_json); | |||
| nlohmann::json args; | |||
| args["num_parallel_workers"] = num_workers_; | |||
| args["dataset_files"] = dataset_files_; | |||
| @@ -262,6 +264,7 @@ Status TFRecordNode::from_json(nlohmann::json json_obj, std::shared_ptr<DatasetN | |||
| // inherit this sampler from the leaf, providing sampling support from the caching layer. | |||
| // That is why we setup the sampler for a leaf node that does not use sampling. | |||
| Status TFRecordNode::SetupSamplerForCache(std::shared_ptr<SamplerObj> *sampler) { | |||
| RETURN_UNEXPECTED_IF_NULL(sampler); | |||
| bool shuffle_files = (shuffle_ == ShuffleMode::kGlobal || shuffle_ == ShuffleMode::kFiles); | |||
| *sampler = SelectSampler(num_samples_, shuffle_files, num_shards_, shard_id_); | |||
| return Status::OK(); | |||
| @@ -281,12 +284,16 @@ Status TFRecordNode::MakeSimpleProducer() { | |||
| // Visitor accepting method for IRNodePass | |||
| Status TFRecordNode::Accept(IRNodePass *p, bool *const modified) { | |||
| RETURN_UNEXPECTED_IF_NULL(p); | |||
| RETURN_UNEXPECTED_IF_NULL(modified); | |||
| // Downcast shared pointer then call visitor | |||
| return p->Visit(shared_from_base<TFRecordNode>(), modified); | |||
| } | |||
| // Visitor accepting method for IRNodePass | |||
| Status TFRecordNode::AcceptAfter(IRNodePass *const p, bool *const modified) { | |||
| RETURN_UNEXPECTED_IF_NULL(p); | |||
| RETURN_UNEXPECTED_IF_NULL(modified); | |||
| // Downcast shared pointer then call visitor | |||
| return p->VisitAfter(shared_from_base<TFRecordNode>(), modified); | |||
| } | |||
| @@ -43,6 +43,7 @@ class JaggedConnector : public Connector<TensorRow> { | |||
| } | |||
| Status Pop(int32_t worker_id, TensorRow *result) noexcept override { | |||
| RETURN_UNEXPECTED_IF_NULL(result); | |||
| { | |||
| MS_ASSERT(worker_id < num_consumers_); | |||
| std::unique_lock<std::mutex> lock(m_); | |||
| @@ -53,7 +54,7 @@ class JaggedConnector : public Connector<TensorRow> { | |||
| } | |||
| RETURN_IF_NOT_OK(queues_[pop_from_]->PopFront(result)); | |||
| if (result->eoe()) { | |||
| if (result != nullptr && result->eoe()) { | |||
| is_queue_finished_[pop_from_] = true; | |||
| } | |||
| @@ -32,12 +32,14 @@ namespace mindspore { | |||
| namespace dataset { | |||
| Status TensorOpFusionPass::Visit(std::shared_ptr<MapNode> node, bool *const modified) { | |||
| RETURN_UNEXPECTED_IF_NULL(node); | |||
| RETURN_UNEXPECTED_IF_NULL(modified); | |||
| std::vector<std::shared_ptr<TensorOperation>> ops = node->operations(); | |||
| // start temporary code, to deal with pre-built TensorOperation | |||
| std::vector<std::string> pattern = {kDecodeOp, kRandomCropAndResizeOp}; | |||
| auto itr = std::search(ops.begin(), ops.end(), pattern.begin(), pattern.end(), | |||
| [](auto op, const std::string &nm) { return op->Name() == nm; }); | |||
| [](auto op, const std::string &nm) { return op != nullptr ? op->Name() == nm : false; }); | |||
| if (itr != ops.end()) { | |||
| MS_LOG(WARNING) << "Fusing pre-build Decode and RandomCropResize into one pre-build."; | |||
| auto fused_op = dynamic_cast<RandomCropAndResizeOp *>((*(itr + 1))->Build().get()); | |||
| @@ -52,7 +54,7 @@ Status TensorOpFusionPass::Visit(std::shared_ptr<MapNode> node, bool *const modi | |||
| // logic below is for non-prebuilt TensorOperation | |||
| pattern = {vision::kDecodeOperation, vision::kRandomResizedCropOperation}; | |||
| itr = std::search(ops.begin(), ops.end(), pattern.begin(), pattern.end(), | |||
| [](auto op, const std::string &nm) { return op->Name() == nm; }); | |||
| [](auto op, const std::string &nm) { return op != nullptr ? op->Name() == nm : false; }); | |||
| // return here if no pattern is found | |||
| RETURN_OK_IF_TRUE(itr == ops.end()); | |||
| @@ -27,6 +27,8 @@ namespace dataset { | |||
| // this will become the RootNode:DatasetNode when it is turned on | |||
| Status AutoWorkerPass::RunOnTree(std::shared_ptr<DatasetNode> root_ir, bool *const modified) { | |||
| RETURN_UNEXPECTED_IF_NULL(root_ir); | |||
| RETURN_UNEXPECTED_IF_NULL(modified); | |||
| uint8_t config = GlobalContext::config_manager()->get_auto_worker_config(); | |||
| OpWeightPass pass(kOpWeightConfigs[config < kOpWeightConfigs.size() ? config : 0]); | |||
| @@ -46,6 +48,8 @@ Status AutoWorkerPass::RunOnTree(std::shared_ptr<DatasetNode> root_ir, bool *con | |||
| // get the maximum weight of all the ops, this value is used to ensure the ratio of num_workers between ops | |||
| float max_weight = 0; | |||
| for (const auto &p : pass.weight_profile_) max_weight = std::max(max_weight, p.second); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(max_weight != 0, "Internal error, doesn't allow divide zero."); | |||
| RETURN_IF_NOT_OK(pass.Run(root_ir, modified)); | |||
| constexpr size_t max_num_ops = 3; | |||
| if (pass.parallel_ops_.size() > max_num_ops) { | |||
| @@ -53,6 +57,7 @@ Status AutoWorkerPass::RunOnTree(std::shared_ptr<DatasetNode> root_ir, bool *con | |||
| << "1 batch and 1 map. AutoNumWorker may not be optimal for usage on complex pipelines."; | |||
| } | |||
| CHECK_FAIL_RETURN_UNEXPECTED(pass.weight_sum_ != 0, "Internal error, doesn't allow divide zero."); | |||
| for (auto &p : pass.parallel_ops_) { | |||
| // get the num worker via the weight ratio | |||
| int32_t num_workers = std::ceil((thread_cnt_ * p.second) / (pass.weight_sum_ * num_shards)); | |||
| @@ -33,6 +33,8 @@ RepeatPass::RepeatPass() | |||
| // Identifies the subtree below this node as being in a repeated path of the tree. | |||
| Status RepeatPass::Visit(std::shared_ptr<RepeatNode> node, bool *const modified) { | |||
| RETURN_UNEXPECTED_IF_NULL(node); | |||
| RETURN_UNEXPECTED_IF_NULL(modified); | |||
| // If this is an infinite repeat under infinite repeat/epoch, adjust current num_repeats_. | |||
| // Otherwise, after multiplication it would become positive and this repeat wouldn't run infinitely. | |||
| if (node->Count() == DatasetOp::kInfiniteRepeat && num_repeats_ < 0) { | |||
| @@ -56,6 +58,8 @@ Status RepeatPass::Visit(std::shared_ptr<RepeatNode> node, bool *const modified) | |||
| // Identifies the subtree below this node as being in a repeated path of the tree. | |||
| Status RepeatPass::Visit(std::shared_ptr<EpochCtrlNode> node, bool *const modified) { | |||
| RETURN_UNEXPECTED_IF_NULL(node); | |||
| RETURN_UNEXPECTED_IF_NULL(modified); | |||
| // Get the total number of epochs from the EpochCtrlOp parameter | |||
| num_epochs_ = node->Count(); | |||
| // Every node below this EpochCtrlOp should be repeated for num_epochs_ times. | |||
| @@ -69,6 +73,8 @@ Status RepeatPass::Visit(std::shared_ptr<EpochCtrlNode> node, bool *const modifi | |||
| #ifndef ENABLE_ANDROID | |||
| // Identifies the subtree below this node as being in a cache merge path | |||
| Status RepeatPass::Visit(std::shared_ptr<CacheMergeNode> node, bool *const modified) { | |||
| RETURN_UNEXPECTED_IF_NULL(node); | |||
| RETURN_UNEXPECTED_IF_NULL(modified); | |||
| // Turn on the flag that we're under a merge op | |||
| is_merge_ = true; | |||
| return Status::OK(); | |||
| @@ -76,6 +82,8 @@ Status RepeatPass::Visit(std::shared_ptr<CacheMergeNode> node, bool *const modif | |||
| // Identifies the subtree below this node as being cached | |||
| Status RepeatPass::Visit(std::shared_ptr<CacheNode> node, bool *const modified) { | |||
| RETURN_UNEXPECTED_IF_NULL(node); | |||
| RETURN_UNEXPECTED_IF_NULL(modified); | |||
| // Turn on the flag that we're under a merge op | |||
| is_cached_ = true; | |||
| return Status::OK(); | |||
| @@ -84,6 +92,8 @@ Status RepeatPass::Visit(std::shared_ptr<CacheNode> node, bool *const modified) | |||
| // Hooks up any identified eoe nodes under this repeat. | |||
| Status RepeatPass::VisitAfter(std::shared_ptr<RepeatNode> node, bool *const modified) { | |||
| RETURN_UNEXPECTED_IF_NULL(node); | |||
| RETURN_UNEXPECTED_IF_NULL(modified); | |||
| // We are a repeat op in the descendant tree of a merge op, then we take the saved lookup up | |||
| // and set its total repeats. It is important that the op is removed from the save area, | |||
| // because the merge op above us may also take action on it later for a different case when | |||
| @@ -103,12 +113,16 @@ Status RepeatPass::VisitAfter(std::shared_ptr<RepeatNode> node, bool *const modi | |||
| // The total repeats of nodes above this Repeat(n) have nothing to do with this RepeatOp's parameter n. | |||
| // But num_repeats_ has been multiplied by n during this Repeat(n)'s PreRunOnNode, | |||
| // so we divide num_repeats_ by n to be able to correctly set total repeats for nodes above this RepeatOp. | |||
| CHECK_FAIL_RETURN_UNEXPECTED(node->Count() != 0, "Invalid data, the number of node can't be 0."); | |||
| num_repeats_ /= node->Count(); | |||
| return Status::OK(); | |||
| } | |||
| // Hooks up any identified eoe nodes under this repeat. | |||
| Status RepeatPass::VisitAfter(std::shared_ptr<EpochCtrlNode> node, bool *const modified) { | |||
| RETURN_UNEXPECTED_IF_NULL(node); | |||
| RETURN_UNEXPECTED_IF_NULL(modified); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(node->Count() != 0, "Invalid data, the number of node can't be 0."); | |||
| node->SetTotalRepeats(num_repeats_); | |||
| node->SetNumEpochs(num_epochs_); | |||
| // We finish the walk of this EpochCtrl's descendent nodes. | |||
| @@ -119,6 +133,8 @@ Status RepeatPass::VisitAfter(std::shared_ptr<EpochCtrlNode> node, bool *const m | |||
| // All operators have a flag that might be set related to the repeat and any leaf nodes need to be set up | |||
| // for use with a controlling repeat above it. | |||
| Status RepeatPass::VisitAfter(std::shared_ptr<DatasetNode> node, bool *const modified) { | |||
| RETURN_UNEXPECTED_IF_NULL(node); | |||
| RETURN_UNEXPECTED_IF_NULL(modified); | |||
| // If we are under a cache op, then save ourselves to the cached op stack. | |||
| if (is_cached_) { | |||
| AddToCachedNodeStack(node); | |||
| @@ -132,6 +148,8 @@ Status RepeatPass::VisitAfter(std::shared_ptr<DatasetNode> node, bool *const mod | |||
| #ifndef ENABLE_ANDROID | |||
| // CacheOp removes previous leaf ops and replaces them with itself | |||
| Status RepeatPass::VisitAfter(std::shared_ptr<CacheNode> node, bool *const modified) { | |||
| RETURN_UNEXPECTED_IF_NULL(node); | |||
| RETURN_UNEXPECTED_IF_NULL(modified); | |||
| is_cached_ = false; | |||
| // if we are a cache within a repeat path of the tree, then adjust the total repeats and total epochs for cached ops. | |||
| @@ -153,6 +171,8 @@ Status RepeatPass::VisitAfter(std::shared_ptr<CacheNode> node, bool *const modif | |||
| // Turns off the tracking for operations under merge op | |||
| Status RepeatPass::VisitAfter(std::shared_ptr<CacheMergeNode> node, bool *const modified) { | |||
| RETURN_UNEXPECTED_IF_NULL(node); | |||
| RETURN_UNEXPECTED_IF_NULL(modified); | |||
| // If there was not any repeat in the merge cache miss leg, then the cache_lookup | |||
| // would not have been consumed yet. In that case, we need to set its total repeats for it. | |||
| if (cache_lookup_) { | |||
| @@ -168,6 +188,8 @@ Status RepeatPass::VisitAfter(std::shared_ptr<CacheMergeNode> node, bool *const | |||
| // Saves the lookup up in case it needs to be referenced by a repeat | |||
| Status RepeatPass::VisitAfter(std::shared_ptr<CacheLookupNode> node, bool *const modified) { | |||
| RETURN_UNEXPECTED_IF_NULL(node); | |||
| RETURN_UNEXPECTED_IF_NULL(modified); | |||
| if (!node->IsLeaf()) { | |||
| // By definition, the CacheLookup must be a leaf op. Make that clear here. | |||
| RETURN_STATUS_UNEXPECTED("CacheLookupOp must be a leaf node!"); | |||
| @@ -185,6 +207,8 @@ Status RepeatPass::VisitAfter(std::shared_ptr<CacheLookupNode> node, bool *const | |||
| #endif | |||
| Status RepeatPass::VisitAfter(std::shared_ptr<TransferNode> node, bool *const modified) { | |||
| RETURN_UNEXPECTED_IF_NULL(node); | |||
| RETURN_UNEXPECTED_IF_NULL(modified); | |||
| // Set total repeats and total epochs for the TransferNode | |||
| node->SetTotalRepeats(num_epochs_); | |||
| node->SetNumEpochs(num_epochs_); | |||
| @@ -192,7 +216,12 @@ Status RepeatPass::VisitAfter(std::shared_ptr<TransferNode> node, bool *const mo | |||
| } | |||
| // Adds an operator to the cached operator stack save area | |||
| void RepeatPass::AddToCachedNodeStack(const std::shared_ptr<DatasetNode> &node) { cached_node_stacks_.push(node); } | |||
| void RepeatPass::AddToCachedNodeStack(const std::shared_ptr<DatasetNode> &node) { | |||
| if (node == nullptr) { | |||
| return; | |||
| } | |||
| cached_node_stacks_.push(node); | |||
| } | |||
| // Pops an operator from the cached operator stack save area | |||
| std::shared_ptr<DatasetNode> RepeatPass::PopFromCachedNodeStack() { | |||
| @@ -29,6 +29,10 @@ EpochCtrlPass::InjectionFinder::InjectionFinder(std::shared_ptr<DatasetNode> nod | |||
| // Performs finder work for BuildVocabOp that has special rules about epoch control injection | |||
| Status EpochCtrlPass::InjectionFinder::Visit(std::shared_ptr<RootNode> node, bool *const modified) { | |||
| RETURN_UNEXPECTED_IF_NULL(node); | |||
| RETURN_UNEXPECTED_IF_NULL(modified); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(node->Children().size() > 0, | |||
| "Invalid data, the node of child should greater than zero."); | |||
| // The injection is at the child of the root node | |||
| injection_point_ = node->Children()[0]; | |||
| num_epochs_ = node->num_epochs(); | |||
| @@ -37,6 +41,8 @@ Status EpochCtrlPass::InjectionFinder::Visit(std::shared_ptr<RootNode> node, boo | |||
| // Performs finder work for BuildVocabOp that has special rules about epoch control injection | |||
| Status EpochCtrlPass::InjectionFinder::Visit(std::shared_ptr<BuildVocabNode> node, bool *const modified) { | |||
| RETURN_UNEXPECTED_IF_NULL(node); | |||
| RETURN_UNEXPECTED_IF_NULL(modified); | |||
| injection_point_ = nullptr; | |||
| return Status::OK(); | |||
| } | |||
| @@ -44,12 +50,18 @@ Status EpochCtrlPass::InjectionFinder::Visit(std::shared_ptr<BuildVocabNode> nod | |||
| #ifndef ENABLE_ANDROID | |||
| // Performs finder work for BuildSentencePieceVocabNode that has special rules about epoch control injection | |||
| Status EpochCtrlPass::InjectionFinder::Visit(std::shared_ptr<BuildSentenceVocabNode> node, bool *const modified) { | |||
| RETURN_UNEXPECTED_IF_NULL(node); | |||
| RETURN_UNEXPECTED_IF_NULL(modified); | |||
| injection_point_ = nullptr; | |||
| return Status::OK(); | |||
| } | |||
| #endif | |||
| Status EpochCtrlPass::InjectionFinder::VisitAfter(std::shared_ptr<TransferNode> node, bool *const modified) { | |||
| RETURN_UNEXPECTED_IF_NULL(node); | |||
| RETURN_UNEXPECTED_IF_NULL(modified); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(node->Children().size() > 0, | |||
| "Invalid data, the node of child should greater than zero."); | |||
| // Assumption: There is only one TransferNode in a pipeline. This assumption is not validated here. | |||
| // Move the injection point to the child of this node. | |||
| injection_point_ = node->Children()[0]; | |||
| @@ -61,6 +73,8 @@ EpochCtrlPass::EpochCtrlPass() {} | |||
| // Runs an injection pass to inject in operators needed at the pre pass stage | |||
| Status EpochCtrlPass::RunOnTree(std::shared_ptr<DatasetNode> root_ir, bool *const modified) { | |||
| RETURN_UNEXPECTED_IF_NULL(root_ir); | |||
| RETURN_UNEXPECTED_IF_NULL(modified); | |||
| MS_LOG(INFO) << "Pre pass: Injection pass started."; | |||
| // First, run the finder to perform any injection info before we can go ahead to drive the op injection work. | |||
| @@ -53,8 +53,8 @@ json ConnectorSize::ParseOpInfo(const DatasetOp &node, const std::vector<int32_t | |||
| auto children = node.Children(); | |||
| std::vector<int32_t> children_id; | |||
| std::transform(children.begin(), children.end(), std::back_inserter(children_id), | |||
| [](std::shared_ptr<DatasetOp> op) -> int32_t { return op->id(); }); | |||
| (void)std::transform(children.begin(), children.end(), std::back_inserter(children_id), | |||
| [](const std::shared_ptr<DatasetOp> &op) -> int32_t { return op->id(); }); | |||
| if (!children_id.empty()) { | |||
| json_node["children"] = children_id; | |||
| } | |||
| @@ -29,6 +29,9 @@ namespace dataset { | |||
| // temporary helper | |||
| int ConnectorThroughput::InitNodes() { | |||
| if (tree_ == nullptr) { | |||
| return 0; | |||
| } | |||
| auto it = (*tree_).begin(); | |||
| return it.NumNodes(); | |||
| } | |||
| @@ -43,15 +46,16 @@ Status ConnectorThroughput::Sample() { | |||
| out_row_count_row[col] = cur_out_rows_count; | |||
| auto sz = timestamps_.size(); | |||
| cur_time = std::chrono::steady_clock::now(); | |||
| double dt = 0; | |||
| double data_time = 0; | |||
| if (sz > 1) { | |||
| auto _dt = std::chrono::duration_cast<std::chrono::microseconds>(timestamps_[0][sz - 1] - timestamps_[0][sz - 2]); | |||
| dt = std::chrono::duration<double>(_dt).count(); | |||
| auto full_time = | |||
| std::chrono::duration_cast<std::chrono::microseconds>(timestamps_[0][sz - 1] - timestamps_[0][sz - 2]); | |||
| data_time = std::chrono::duration<double>(full_time).count(); | |||
| } | |||
| auto prev_out_rows_count = out_row_count_table_[col][out_row_count_table_.size() - 1]; | |||
| if (dt != 0) { | |||
| if (data_time != 0) { | |||
| const int32_t multiplier = 1000; | |||
| auto thr = (cur_out_rows_count - prev_out_rows_count) / (multiplier * dt); | |||
| auto thr = (cur_out_rows_count - prev_out_rows_count) / (multiplier * data_time); | |||
| throughput_row[col] = thr; | |||
| } else { | |||
| throughput_row[col] = 0; | |||
| @@ -70,7 +74,7 @@ json ConnectorThroughput::ParseOpInfo(const DatasetOp &node, const std::vector<d | |||
| auto children = node.Children(); | |||
| std::vector<int32_t> children_id; | |||
| std::transform(children.begin(), children.end(), std::back_inserter(children_id), | |||
| [](std::shared_ptr<DatasetOp> op) -> int32_t { return op->id(); }); | |||
| [](const std::shared_ptr<DatasetOp> &op) -> int32_t { return op ? op->id() : 0; }); | |||
| json json_node; | |||
| json_node["op_id"] = node.id(); | |||
| json_node["op_type"] = node.Name(); | |||
| @@ -100,8 +104,10 @@ Status ConnectorThroughput::SaveToFile() { | |||
| int col = 0; | |||
| for (auto &node : *tree_) { | |||
| std::vector<double> throughput; | |||
| for (auto i = 0; i < throughput_.size(); i++) { | |||
| throughput.push_back(throughput_[col][i]); | |||
| if (throughput_.size() > col) { | |||
| for (auto i = 0; i < throughput_[col].size(); i++) { | |||
| throughput.push_back(throughput_[col][i]); | |||
| } | |||
| } | |||
| if (!path.Exists()) { | |||
| @@ -18,9 +18,9 @@ | |||
| #if !defined(_WIN32) && !defined(_WIN64) && !defined(__ANDROID__) && !defined(ANDROID) && !defined(__APPLE__) | |||
| #include <sys/syscall.h> | |||
| #endif | |||
| #include <algorithm> | |||
| #include <cmath> | |||
| #include <cstdio> | |||
| #include <algorithm> | |||
| #include <fstream> | |||
| #include <memory> | |||
| #include <string> | |||
| @@ -33,8 +33,8 @@ | |||
| using json = nlohmann::json; | |||
| namespace mindspore { | |||
| namespace dataset { | |||
| bool BaseCpu::fetched_all_process_shared = false; | |||
| std::unordered_map<int32_t, std::vector<pid_t>> BaseCpu::op_process_shared = {}; | |||
| bool BaseCpu::fetched_all_process_shared_ = false; | |||
| std::unordered_map<int32_t, std::vector<pid_t>> BaseCpu::op_process_shared_ = {}; | |||
| #if !defined(_WIN32) && !defined(_WIN64) && !defined(__ANDROID__) && !defined(ANDROID) && !defined(__APPLE__) | |||
| #define USING_LINUX | |||
| @@ -46,8 +46,8 @@ BaseCpu::BaseCpu() { | |||
| pre_cpu_stat_.io_stat_ = 0; | |||
| pre_cpu_stat_.idle_stat_ = 0; | |||
| pre_cpu_stat_.total_stat_ = 0; | |||
| fetched_all_process = false; | |||
| pre_fetched_state = false; | |||
| fetched_all_process_ = false; | |||
| pre_fetched_state_ = false; | |||
| cpu_processor_num_ = 0; | |||
| } | |||
| @@ -157,6 +157,7 @@ Status DeviceCpu::Collect(const ExecutionTree *tree) { | |||
| return Status::OK(); | |||
| } | |||
| Status DeviceCpu::Analyze(std::string *name, double *utilization, std::string *extra_message) { | |||
| RETURN_UNEXPECTED_IF_NULL(name); | |||
| name->clear(); | |||
| name->append("device_info"); | |||
| int total_samples = cpu_util_.size(); | |||
| @@ -221,6 +222,7 @@ Status DeviceCpu::SaveToFile(const std::string &file_path) { | |||
| Status OperatorCpu::ParseCpuInfo(int32_t op_id, int64_t thread_id, | |||
| std::unordered_map<int32_t, std::unordered_map<int64_t, CpuOpStat>> *op_stat) { | |||
| RETURN_UNEXPECTED_IF_NULL(op_stat); | |||
| pid_t pid = 0; | |||
| #if defined(USING_LINUX) | |||
| pid = syscall(SYS_getpid); | |||
| @@ -257,11 +259,12 @@ Status OperatorCpu::ParseCpuInfo(int32_t op_id, int64_t thread_id, | |||
| } | |||
| Status OperatorCpu::Collect(const ExecutionTree *tree) { | |||
| RETURN_UNEXPECTED_IF_NULL(tree); | |||
| if (first_collect_) { | |||
| for (auto iter = tree->begin(); iter != tree->end(); ++iter) { | |||
| id_count_++; | |||
| op_name[iter->id()] = iter->NameWithID(); | |||
| op_parallel_workers[iter->id()] = iter->num_workers(); | |||
| op_name_[iter->id()] = iter->NameWithID(); | |||
| op_parallel_workers_[iter->id()] = iter->num_workers(); | |||
| } | |||
| #if defined(USING_LINUX) | |||
| cpu_processor_num_ = get_nprocs_conf(); | |||
| @@ -269,34 +272,34 @@ Status OperatorCpu::Collect(const ExecutionTree *tree) { | |||
| } | |||
| // Obtain the op and thread mapping | |||
| op_thread.clear(); | |||
| op_thread_.clear(); | |||
| List<Task> allTasks = tree->AllTasks()->GetTask(); | |||
| for (auto &task1 : allTasks) { | |||
| int32_t op_id = task1.get_operator_id(); | |||
| op_thread[op_id].emplace_back(task1.get_linux_id()); | |||
| op_thread_[op_id].emplace_back(task1.get_linux_id()); | |||
| } | |||
| // add process id into op_thread | |||
| if (!fetched_all_process) { | |||
| if (!fetched_all_process_) { | |||
| { | |||
| py::gil_scoped_acquire gil_acquire; | |||
| py::module ds = py::module::import("mindspore.dataset.engine.datasets"); | |||
| py::tuple process_info = ds.attr("_get_operator_process")(); | |||
| py::dict sub_process = py::reinterpret_borrow<py::dict>(process_info[0]); | |||
| fetched_all_process = py::reinterpret_borrow<py::bool_>(process_info[1]); | |||
| fetched_all_process_ = py::reinterpret_borrow<py::bool_>(process_info[1]); | |||
| // parse dict value | |||
| op_process = toIntMap(sub_process); | |||
| BaseCpu::op_process_shared = op_process; | |||
| BaseCpu::fetched_all_process_shared = fetched_all_process; | |||
| op_process_ = toIntMap(sub_process); | |||
| BaseCpu::op_process_shared_ = op_process_; | |||
| BaseCpu::fetched_all_process_shared_ = fetched_all_process_; | |||
| } | |||
| // judge whether there is device_que operator, if so operator id may need increase by one, temp use directly | |||
| for (auto item : op_process) { | |||
| for (auto item : op_process_) { | |||
| if (!item.second.empty()) { | |||
| if (op_thread.find(item.first) != op_thread.end()) { | |||
| op_thread[item.first].insert(op_thread[item.first].end(), item.second.begin(), item.second.end()); | |||
| if (op_thread_.find(item.first) != op_thread_.end()) { | |||
| op_thread_[item.first].insert(op_thread_[item.first].end(), item.second.begin(), item.second.end()); | |||
| } else { | |||
| op_thread[item.first] = item.second; | |||
| op_thread_[item.first] = item.second; | |||
| } | |||
| } | |||
| } | |||
| @@ -310,16 +313,15 @@ Status OperatorCpu::Collect(const ExecutionTree *tree) { | |||
| if (!first_collect_) { | |||
| // obtain all the op id in current tasks | |||
| std::vector<int32_t> total_op_id; | |||
| for (auto iter = op_thread.begin(); iter != op_thread.end(); iter++) { | |||
| total_op_id.emplace_back(iter->first); | |||
| } | |||
| (void)std::transform(op_thread_.begin(), op_thread_.end(), std::back_inserter(total_op_id), | |||
| [](const auto &iter) { return iter.first; }); | |||
| // iter all the op, and obtain the CPU utilization of each operator | |||
| for (auto op_id = -1; op_id < id_count_; op_id++) { | |||
| float user_util = 0, sys_util = 0; | |||
| auto iter = std::find(total_op_id.begin(), total_op_id.end(), op_id); | |||
| if (iter != total_op_id.end()) { | |||
| for (auto thread_id : op_thread[op_id]) { | |||
| for (auto thread_id : op_thread_[op_id]) { | |||
| if (ParseCpuInfo(op_id, thread_id, &op_stat_) == Status::OK()) { | |||
| user_util += (op_stat_[op_id][thread_id].user_stat_ - pre_op_stat_[op_id][thread_id].user_stat_) * 1.0 / | |||
| (total_stat_ - pre_total_stat_) * 100; | |||
| @@ -329,7 +331,7 @@ Status OperatorCpu::Collect(const ExecutionTree *tree) { | |||
| } | |||
| } | |||
| CpuOpUtil info; | |||
| info.op_id = op_id; | |||
| info.op_id_ = op_id; | |||
| info.sys_utilization_ = sys_util; | |||
| info.user_utilization_ = user_util; | |||
| cpu_step_util_.emplace_back(info); | |||
| @@ -337,10 +339,10 @@ Status OperatorCpu::Collect(const ExecutionTree *tree) { | |||
| cpu_op_util_.emplace_back(cpu_step_util_); | |||
| } else { | |||
| // mainly obtain the init CPU execute time in first collect | |||
| for (auto iter = op_thread.begin(); iter != op_thread.end(); iter++) { | |||
| int32_t op_id = iter->first; | |||
| for (auto thread_id_ : iter->second) { | |||
| // ignore errors in the first collect | |||
| for (const auto &iter : op_thread_) { | |||
| int32_t op_id = iter.first; | |||
| for (auto thread_id_ : iter.second) { | |||
| // ParseCpuInfo may execute failed for cpu data not ready, but we still get next thread cpu info | |||
| (void)ParseCpuInfo(op_id, thread_id_, &op_stat_); | |||
| } | |||
| } | |||
| @@ -355,6 +357,8 @@ Status OperatorCpu::Collect(const ExecutionTree *tree) { | |||
| } | |||
| Status OperatorCpu::Analyze(std::string *name, double *utilization, std::string *extra_message) { | |||
| RETURN_UNEXPECTED_IF_NULL(name); | |||
| RETURN_UNEXPECTED_IF_NULL(extra_message); | |||
| int total_samples = cpu_op_util_.size(); | |||
| // Only analyze the middle half of the samples | |||
| @@ -374,15 +378,15 @@ Status OperatorCpu::Analyze(std::string *name, double *utilization, std::string | |||
| sum += cpu_op_util_[i][index].sys_utilization_; | |||
| } | |||
| if ((end_analyze - start_analyze) > 0) { | |||
| op_util = 1.0 * sum * cpu_processor_num_ / (op_parallel_workers[op_id] * (end_analyze - start_analyze)); | |||
| op_util = 1.0 * sum * cpu_processor_num_ / (op_parallel_workers_[op_id] * (end_analyze - start_analyze)); | |||
| } | |||
| if (op_util > *utilization) { | |||
| *utilization = op_util; | |||
| name->clear(); | |||
| name->append(op_name[op_id]); | |||
| (void)name->append(op_name_[op_id]); | |||
| } | |||
| extra_message->append(op_name[op_id] + " utiliization per thread: " + std::to_string(op_util) + "% (" + | |||
| std::to_string(op_parallel_workers[op_id]) + " parallel_workers); "); | |||
| (void)extra_message->append(op_name_[op_id] + " utilization per thread: " + std::to_string(op_util) + "% (" + | |||
| std::to_string(op_parallel_workers_[op_id]) + " parallel_workers); "); | |||
| } | |||
| return Status::OK(); | |||
| } | |||
| @@ -428,24 +432,24 @@ Status ProcessCpu::ParseCpuInfo() { | |||
| uint64_t total_stat_; | |||
| RETURN_IF_NOT_OK(GetTotalCpuTime(&total_stat_)); | |||
| if (!pre_fetched_state) { | |||
| process_id.clear(); | |||
| if (!pre_fetched_state_) { | |||
| process_id_.clear(); | |||
| pid_t main_pid = 0; | |||
| #if defined(USING_LINUX) | |||
| main_pid = syscall(SYS_getpid); | |||
| #endif | |||
| process_id.emplace_back(main_pid); | |||
| op_process = BaseCpu::op_process_shared; | |||
| fetched_all_process = BaseCpu::fetched_all_process_shared; | |||
| for (auto item : op_process) { | |||
| for (auto id : item.second) { | |||
| process_id.emplace_back(id); | |||
| process_id_.emplace_back(main_pid); | |||
| op_process_ = BaseCpu::op_process_shared_; | |||
| fetched_all_process_ = BaseCpu::fetched_all_process_shared_; | |||
| for (const auto &item : op_process_) { | |||
| for (const auto &id : item.second) { | |||
| process_id_.emplace_back(id); | |||
| } | |||
| } | |||
| } | |||
| float user_util = 0, sys_util = 0; | |||
| for (auto pid : process_id) { | |||
| for (const auto &pid : process_id_) { | |||
| std::string stat_path = "/proc/" + std::to_string(pid) + "/stat"; | |||
| std::ifstream file(stat_path); | |||
| @@ -479,11 +483,12 @@ Status ProcessCpu::ParseCpuInfo() { | |||
| } | |||
| pre_total_stat_ = total_stat_; | |||
| first_collect_ = false; | |||
| pre_fetched_state = fetched_all_process; | |||
| pre_fetched_state_ = fetched_all_process_; | |||
| return Status::OK(); | |||
| } | |||
| Status ProcessCpu::Collect(const ExecutionTree *tree) { | |||
| RETURN_UNEXPECTED_IF_NULL(tree); | |||
| if (first_collect_) { | |||
| #if defined(USING_LINUX) | |||
| cpu_processor_num_ = get_nprocs_conf(); | |||
| @@ -495,6 +500,9 @@ Status ProcessCpu::Collect(const ExecutionTree *tree) { | |||
| } | |||
| Status ProcessCpu::Analyze(std::string *name, double *utilization, std::string *extra_message) { | |||
| RETURN_UNEXPECTED_IF_NULL(name); | |||
| RETURN_UNEXPECTED_IF_NULL(utilization); | |||
| RETURN_UNEXPECTED_IF_NULL(extra_message); | |||
| name->clear(); | |||
| name->append("process_info"); | |||
| int total_samples = process_util_.size(); | |||
| @@ -49,7 +49,7 @@ typedef struct CpuInfo_s { | |||
| typedef struct CpuOpInfo_s { | |||
| float user_utilization_; | |||
| float sys_utilization_; | |||
| int32_t op_id; | |||
| int32_t op_id_; | |||
| } CpuOpUtil; | |||
| // CPU utilization of process | |||
| @@ -78,11 +78,11 @@ class BaseCpu { | |||
| protected: | |||
| std::vector<CpuUtil> cpu_util_; | |||
| CpuStat pre_cpu_stat_; | |||
| static bool fetched_all_process_shared; | |||
| static std::unordered_map<int32_t, std::vector<pid_t>> op_process_shared; | |||
| bool fetched_all_process; | |||
| bool pre_fetched_state; | |||
| std::unordered_map<int32_t, std::vector<pid_t>> op_process; | |||
| static bool fetched_all_process_shared_; | |||
| static std::unordered_map<int32_t, std::vector<pid_t>> op_process_shared_; | |||
| bool fetched_all_process_; | |||
| bool pre_fetched_state_; | |||
| std::unordered_map<int32_t, std::vector<pid_t>> op_process_; | |||
| int32_t cpu_processor_num_; | |||
| }; | |||
| @@ -136,9 +136,9 @@ class OperatorCpu : public BaseCpu { | |||
| bool first_collect_; | |||
| // Store the id and its corresponding threads. | |||
| std::unordered_map<int32_t, std::vector<pid_t>> op_thread; | |||
| std::unordered_map<int32_t, std::string> op_name; | |||
| std::unordered_map<int32_t, int32_t> op_parallel_workers; | |||
| std::unordered_map<int32_t, std::vector<pid_t>> op_thread_; | |||
| std::unordered_map<int32_t, std::string> op_name_; | |||
| std::unordered_map<int32_t, int32_t> op_parallel_workers_; | |||
| std::unordered_map<int32_t, std::unordered_map<int64_t, CpuOpStat>> pre_op_stat_; | |||
| uint64_t pre_total_stat_; | |||
| int32_t id_count_; | |||
| @@ -161,7 +161,7 @@ class ProcessCpu : public BaseCpu { | |||
| std::vector<CpuProcessUtil> process_util_; | |||
| uint64_t pre_total_stat_; | |||
| std::unordered_map<int64_t, CpuOpStat> pre_process_stat_; | |||
| std::vector<pid_t> process_id; | |||
| std::vector<pid_t> process_id_; | |||
| }; | |||
| // Sampling CPU information | |||
| @@ -52,7 +52,9 @@ class PerfData { | |||
| void AddSample(const T &row) { | |||
| auto i = 0; | |||
| for (const auto &e : row) { | |||
| data_[i++].push_back(e); | |||
| if (data_.size() > i) { | |||
| data_[i++].push_back(e); | |||
| } | |||
| } | |||
| counter_++; | |||
| } | |||
| @@ -62,7 +64,9 @@ class PerfData { | |||
| auto Row(dsize_t idx) { | |||
| std::vector<V> row(n_cols_); | |||
| for (auto i = 0; i < n_cols_; i++) { | |||
| row[i] = data_[i][idx]; | |||
| if (data_.size() > i && data_[i].size() > idx) { | |||
| row[i] = data_[i][idx]; | |||
| } | |||
| } | |||
| return row; | |||
| } | |||
| @@ -51,6 +51,7 @@ Status Tracing::SaveToFile() { | |||
| } | |||
| Status Sampling::ReadJson(nlohmann::json *output) { | |||
| RETURN_UNEXPECTED_IF_NULL(output); | |||
| Path path = Path(file_path_); | |||
| if (path.Exists()) { | |||
| MS_LOG(DEBUG) << file_path_ << " exists"; | |||
| @@ -25,6 +25,8 @@ std::map<std::string, Status (*)(nlohmann::json json_obj, std::shared_ptr<Tensor | |||
| Serdes::func_ptr_ = Serdes::InitializeFuncPtr(); | |||
| Status Serdes::SaveToJSON(std::shared_ptr<DatasetNode> node, const std::string &filename, nlohmann::json *out_json) { | |||
| RETURN_UNEXPECTED_IF_NULL(node); | |||
| RETURN_UNEXPECTED_IF_NULL(out_json); | |||
| // Dump attributes of current node to json string | |||
| nlohmann::json args; | |||
| RETURN_IF_NOT_OK(node->to_json(&args)); | |||
| @@ -48,6 +48,7 @@ TreeAdapter::TreeAdapter(UsageFlag usage) : usage_(usage), launched_(false), tre | |||
| } | |||
| Status TreeAdapter::PrePass(std::shared_ptr<DatasetNode> ir) { | |||
| RETURN_UNEXPECTED_IF_NULL(ir); | |||
| // Vector of actions in pre-pass phase | |||
| std::vector<std::unique_ptr<IRPass>> actions; | |||
| @@ -73,6 +74,7 @@ Status TreeAdapter::PrePass(std::shared_ptr<DatasetNode> ir) { | |||
| } | |||
| Status TreeAdapter::Optimize(std::shared_ptr<DatasetNode> ir) { | |||
| RETURN_UNEXPECTED_IF_NULL(ir); | |||
| // Vector of optimizations | |||
| std::vector<std::unique_ptr<IRNodePass>> optimizations; | |||
| MS_LOG(INFO) << "Running optimization pass loops"; | |||
| @@ -89,6 +91,7 @@ Status TreeAdapter::Optimize(std::shared_ptr<DatasetNode> ir) { | |||
| } | |||
| Status TreeAdapter::PostPass(std::shared_ptr<DatasetNode> ir) { | |||
| RETURN_UNEXPECTED_IF_NULL(ir); | |||
| // Vector of actions in post-pass phase | |||
| std::vector<std::unique_ptr<IRPass>> actions; | |||
| MS_LOG(INFO) << "Running post pass loops."; | |||
| @@ -118,6 +121,9 @@ Status TreeAdapter::PostPass(std::shared_ptr<DatasetNode> ir) { | |||
| } | |||
| Status TreeAdapter::BuildExecutionTreeRecur(std::shared_ptr<DatasetNode> ir, std::shared_ptr<DatasetOp> *const op) { | |||
| RETURN_UNEXPECTED_IF_NULL(ir); | |||
| RETURN_UNEXPECTED_IF_NULL(op); | |||
| RETURN_UNEXPECTED_IF_NULL(tree_); | |||
| // Build the DatasetOp ExecutionTree from the optimized IR tree | |||
| std::vector<std::shared_ptr<DatasetOp>> ops; | |||
| RETURN_IF_NOT_OK(ir->Build(&ops)); | |||
| @@ -133,7 +139,7 @@ Status TreeAdapter::BuildExecutionTreeRecur(std::shared_ptr<DatasetNode> ir, std | |||
| } | |||
| // Build the children of IR, once they return, add the return value to *op | |||
| for (std::shared_ptr<DatasetNode> child_ir : ir->Children()) { | |||
| for (const std::shared_ptr<DatasetNode> &child_ir : ir->Children()) { | |||
| std::shared_ptr<DatasetOp> child_op; | |||
| RETURN_IF_NOT_OK(BuildExecutionTreeRecur(child_ir, &child_op)); | |||
| RETURN_IF_NOT_OK(ops.back()->AddChild(child_op)); // append children to the last of ops | |||
| @@ -143,6 +149,7 @@ Status TreeAdapter::BuildExecutionTreeRecur(std::shared_ptr<DatasetNode> ir, std | |||
| } | |||
| Status TreeAdapter::Build(std::shared_ptr<DatasetNode> root_ir) { | |||
| RETURN_UNEXPECTED_IF_NULL(root_ir); | |||
| // This will evolve in the long run | |||
| tree_ = std::make_unique<ExecutionTree>(); | |||
| // disable profiling if this is only a getter pass | |||
| @@ -22,6 +22,8 @@ namespace dataset { | |||
| TreeAdapterLite::TreeAdapterLite() : root_(nullptr) { tree_ = std::make_unique<ExecutionTree>(); } | |||
| Status TreeAdapterLite::BuildExecutionTreeRecur(std::shared_ptr<DatasetNode> ir, std::shared_ptr<DatasetOp> *const op) { | |||
| RETURN_UNEXPECTED_IF_NULL(ir); | |||
| RETURN_UNEXPECTED_IF_NULL(op); | |||
| // Build the DatasetOp ExecutionTree from the optimized IR tree | |||
| std::vector<std::shared_ptr<DatasetOp>> ops; | |||
| RETURN_IF_NOT_OK(ir->Build(&ops)); | |||
| @@ -41,7 +43,7 @@ Status TreeAdapterLite::BuildExecutionTreeRecur(std::shared_ptr<DatasetNode> ir, | |||
| } | |||
| // Build the children of IR, once they return, add the return value to *op | |||
| for (std::shared_ptr<DatasetNode> child_ir : ir->Children()) { | |||
| for (const std::shared_ptr<DatasetNode> &child_ir : ir->Children()) { | |||
| std::shared_ptr<DatasetOp> child_op; | |||
| RETURN_IF_NOT_OK(BuildExecutionTreeRecur(child_ir, &child_op)); | |||
| RETURN_IF_NOT_OK(ops.back()->AddChild(child_op)); // append children to the last of ops | |||
| @@ -60,6 +62,7 @@ Status TreeAdapterLite::BuildTree(std::shared_ptr<DatasetNode> root_ir) { | |||
| Status TreeAdapterLite::GetNextRow(TensorRow *const row) { | |||
| RETURN_UNEXPECTED_IF_NULL(root_); | |||
| RETURN_IF_NOT_OK(root_->GetNextRowPullMode(row)); | |||
| RETURN_UNEXPECTED_IF_NULL(row); | |||
| return Status::OK(); | |||
| } | |||
| @@ -19,6 +19,7 @@ | |||
| #include <memory> | |||
| #include <string> | |||
| #include <utility> | |||
| #include <vector> | |||
| #include "minddata/dataset/core/device_tensor.h" | |||
| #include "minddata/dataset/core/device_resource.h" | |||
| @@ -30,7 +31,8 @@ namespace mindspore { | |||
| namespace dataset { | |||
| class DvppNormalizeOp : public TensorOp { | |||
| public: | |||
| explicit DvppNormalizeOp(std::vector<float> mean, std::vector<float> std) : mean_(mean), std_(std) {} | |||
| explicit DvppNormalizeOp(std::vector<float> mean, std::vector<float> std) | |||
| : mean_(std::move(mean)), std_(std::move(std)) {} | |||
| ~DvppNormalizeOp() = default; | |||
| @@ -18,7 +18,7 @@ | |||
| #ifndef ENABLE_DVPP_INTERFACE | |||
| #define ENABLE_DVPP_INTERFACE | |||
| #endif | |||
| #include <stdio.h> | |||
| #include <cstdio> | |||
| #include <iostream> | |||
| #include <memory> | |||
| #include <vector> | |||
| @@ -13,13 +13,14 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #include "minddata/dataset/kernels/image/dvpp/utils/MDAclProcess.h" | |||
| #include <thread> | |||
| #include <sys/stat.h> | |||
| #include <sys/time.h> | |||
| #include "minddata/dataset/include/dataset/constants.h" | |||
| #include "minddata/dataset/core/tensor_shape.h" | |||
| #include "minddata/dataset/kernels/image/image_utils.h" | |||
| #include "MDAclProcess.h" | |||
| #include <sys/time.h> | |||
| #include <thread> | |||
| #include <sys/stat.h> | |||
| namespace { | |||
| const int BUFFER_SIZE = 2048; | |||
| @@ -17,25 +17,25 @@ | |||
| #define MDACLMANAGER_H | |||
| #include <climits> | |||
| #include <string> | |||
| #include <string.h> | |||
| #include <cstdio> | |||
| #include <map> | |||
| #include <iostream> | |||
| #include <memory> | |||
| #include <unistd.h> | |||
| #include <string> | |||
| #include <sys/stat.h> | |||
| #include <sys/types.h> | |||
| #include "acl/acl.h" | |||
| #include "CommonDataType.h" | |||
| #include "minddata/dataset/core/tensor_shape.h" | |||
| #include "minddata/dataset/core/data_type.h" | |||
| #include "minddata/dataset/kernels/image/dvpp/utils/CommonDataType.h" | |||
| #include "minddata/dataset/kernels/image/dvpp/utils/DvppCommon.h" | |||
| #include "minddata/dataset/kernels/image/dvpp/utils/ErrorCode.h" | |||
| #include "mindspore/ccsrc/minddata/dataset/core/device_tensor.h" | |||
| #include "mindspore/ccsrc/minddata/dataset/core/tensor.h" | |||
| #include "mindspore/core/utils/log_adapter.h" | |||
| #include "mindspore/ccsrc/minddata/dataset/util/status.h" | |||
| #include "ErrorCode.h" | |||
| #include "DvppCommon.h" | |||
| #include <stdio.h> | |||
| #include <unistd.h> | |||
| #include <sys/stat.h> | |||
| #include <sys/types.h> | |||
| mode_t SetFileDefaultUmask(); | |||
| @@ -16,17 +16,18 @@ | |||
| #ifndef RESOURCEMANAGER_H | |||
| #define RESOURCEMANAGER_H | |||
| #include <vector> | |||
| #include <set> | |||
| #include <climits> | |||
| #include <cstring> | |||
| #include <climits> | |||
| #include <unordered_map> | |||
| #include <mutex> | |||
| #include "CommonDataType.h" | |||
| #include "ErrorCode.h" | |||
| #include <set> | |||
| #include <sys/stat.h> | |||
| #include <unordered_map> | |||
| #include <vector> | |||
| #include "mindspore/core/utils/log_adapter.h" | |||
| #include "mindspore/ccsrc/cxx_api/graph/acl/acl_env_guard.h" | |||
| #include "minddata/dataset/kernels/image/dvpp/utils/CommonDataType.h" | |||
| #include "minddata/dataset/kernels/image/dvpp/utils/ErrorCode.h" | |||
| enum ModelLoadMethod { | |||
| LOAD_FROM_FILE = 0, // Loading from file, memory of model and weights are managed by ACL | |||
| @@ -48,7 +48,7 @@ static void GetSobelKernel(float *kernel, int flag, int ksize, double scale) { | |||
| buffer[0] = 1, buffer[1] = -2, buffer[2] = 1; | |||
| } | |||
| } else { | |||
| int old, now; | |||
| float old, now; | |||
| buffer[0] = 1; | |||
| for (int i = 0; i < ksize; i++) { | |||
| buffer[i + 1] = 0; | |||
| @@ -571,9 +571,8 @@ bool ConvertTo(const LiteMat &src, LiteMat &dst, double scale) { | |||
| if (dst.IsEmpty()) { | |||
| dst.Init(src.width_, src.height_, src.channel_, LDataType::FLOAT32); | |||
| } else if (src.width_ != dst.width_ || src.height_ != dst.height_ || src.channel_ != dst.channel_) { | |||
| return false; | |||
| } else if (dst.data_type_ != LDataType::FLOAT32) { | |||
| } else if (src.width_ != dst.width_ || src.height_ != dst.height_ || src.channel_ != dst.channel_ || | |||
| dst.data_type_ != LDataType::FLOAT32) { | |||
| return false; | |||
| } | |||
| @@ -662,24 +661,16 @@ bool Crop(const LiteMat &src, LiteMat &dst, int x, int y, int w, int h) { | |||
| } | |||
| static bool CheckZero(const std::vector<float> &vs) { | |||
| for (int i = 0; i < vs.size(); i++) { | |||
| if (Equal(vs[i], 0.0f)) { | |||
| return true; | |||
| } | |||
| } | |||
| return false; | |||
| return std::any_of(vs.begin(), vs.end(), [](const float &v) { return Equal(v, 0.0f); }); | |||
| } | |||
| static bool CheckZero(const std::vector<size_t> &vs) { | |||
| for (int i = 0; i < vs.size(); i++) { | |||
| if (vs[i] == 0) return true; | |||
| } | |||
| return false; | |||
| return std::any_of(vs.begin(), vs.end(), [](const float &v) { return v == 0; }); | |||
| } | |||
| static bool CheckMeanAndStd(const LiteMat &src, LiteMat &dst, int channel, const std::vector<float> &mean, | |||
| const std::vector<float> &std) { | |||
| if (mean.size() == 0 && std.size() == 0) { | |||
| if (mean.empty() && std.empty()) { | |||
| return false; | |||
| } | |||
| if (src.data_type_ != LDataType::FLOAT32) { | |||
| @@ -935,8 +926,8 @@ bool Merge(const std::vector<LiteMat> &mv, LiteMat &dst) { | |||
| LDataType data_type = mv[0].data_type_; | |||
| // The arrays in list must be single-channel | |||
| for (int i = 0; i < mv.size(); i++) { | |||
| if (mv[i].channel_ != 1) return false; | |||
| if (std::any_of(mv.begin(), mv.end(), [](const LiteMat &m) { return m.channel_ != 1; })) { | |||
| return false; | |||
| } | |||
| for (int i = 1; i < mv.size(); i++) { | |||
| @@ -998,7 +989,7 @@ bool Pad(const LiteMat &src, LiteMat &dst, int top, int bottom, int left, int ri | |||
| return true; | |||
| } | |||
| std::vector<std::vector<float>> GetDefaultBoxes(BoxesConfig config) { | |||
| std::vector<std::vector<float>> GetDefaultBoxes(const BoxesConfig config) { | |||
| size_t size = config.num_default.size(); | |||
| if (size <= 1 || config.feature_size.size() != size || config.steps.size() != size || | |||
| config.aspect_rations.size() != size) { | |||
| @@ -1116,6 +1107,7 @@ std::vector<int> ApplyNms(const std::vector<std::vector<float>> &all_boxes, std: | |||
| } | |||
| } | |||
| std::vector<int> new_order; | |||
| new_order.reserve(inds.size()); | |||
| for (int k = 0; k < inds.size(); k++) { | |||
| new_order.push_back(order[inds[k]]); | |||
| } | |||
| @@ -283,9 +283,7 @@ void LiteMat::Release() { | |||
| if (data_ptr_) { | |||
| AlignFree(data_ptr_); | |||
| } | |||
| if (ref_count_) { | |||
| delete[] ref_count_; | |||
| } | |||
| delete[] ref_count_; | |||
| } | |||
| data_ptr_ = nullptr; | |||
| elem_size_ = 0; | |||
| @@ -293,7 +291,7 @@ void LiteMat::Release() { | |||
| height_ = 0; | |||
| channel_ = 0; | |||
| c_step_ = 0; | |||
| ref_count_ = 0; | |||
| ref_count_ = nullptr; | |||
| size_ = 0; | |||
| setSteps(0, 0, 0); | |||
| } | |||
| @@ -418,7 +416,7 @@ inline void SubtractImpl(const uint32_t *src0, const uint32_t *src1, uint32_t *d | |||
| } | |||
| inline bool CheckSubstract(const LiteMat &src_a, const LiteMat &src_b, LiteMat *dst) { | |||
| if (dst == NULL) { | |||
| if (dst == nullptr) { | |||
| return false; | |||
| } | |||
| @@ -426,10 +424,7 @@ inline bool CheckSubstract(const LiteMat &src_a, const LiteMat &src_b, LiteMat * | |||
| return false; | |||
| } | |||
| if (src_a.data_type_ != src_b.data_type_) { | |||
| return false; | |||
| } | |||
| return true; | |||
| return src_a.data_type_ == src_b.data_type_; | |||
| } | |||
| bool Subtract(const LiteMat &src_a, const LiteMat &src_b, LiteMat *dst) { | |||
| @@ -585,7 +580,7 @@ inline void DivideImpl(const uint32_t *src0, const uint32_t *src1, uint32_t *dst | |||
| } | |||
| inline bool CheckDivide(const LiteMat &src_a, const LiteMat &src_b, LiteMat *dst) { | |||
| if (dst == NULL) { | |||
| if (dst == nullptr) { | |||
| return false; | |||
| } | |||
| @@ -593,10 +588,7 @@ inline bool CheckDivide(const LiteMat &src_a, const LiteMat &src_b, LiteMat *dst | |||
| return false; | |||
| } | |||
| if (src_a.data_type_ != src_b.data_type_) { | |||
| return false; | |||
| } | |||
| return true; | |||
| return src_a.data_type_ == src_b.data_type_; | |||
| } | |||
| bool Divide(const LiteMat &src_a, const LiteMat &src_b, LiteMat *dst) { | |||
| @@ -693,7 +685,7 @@ inline void MultiplyImpl(const uint32_t *src0, const uint32_t *src1, uint32_t *d | |||
| } | |||
| inline bool CheckMultiply(const LiteMat &src_a, const LiteMat &src_b, LiteMat *dst) { | |||
| if (dst == NULL) { | |||
| if (dst == nullptr) { | |||
| return false; | |||
| } | |||
| @@ -701,10 +693,7 @@ inline bool CheckMultiply(const LiteMat &src_a, const LiteMat &src_b, LiteMat *d | |||
| return false; | |||
| } | |||
| if (src_a.data_type_ != src_b.data_type_) { | |||
| return false; | |||
| } | |||
| return true; | |||
| return src_a.data_type_ == src_b.data_type_; | |||
| } | |||
| bool Multiply(const LiteMat &src_a, const LiteMat &src_b, LiteMat *dst) { | |||
| @@ -166,15 +166,9 @@ class LDataType { | |||
| ~LDataType() = default; | |||
| inline Type Value() const { return type_; } | |||
| inline bool operator==(const LDataType &ps) const { | |||
| if (this->type_ == ps.type_) return true; | |||
| return false; | |||
| } | |||
| inline bool operator==(const LDataType &ps) const { return this->type_ == ps.type_; } | |||
| inline bool operator!=(const LDataType &ps) const { | |||
| if (this->type_ != ps.type_) return true; | |||
| return false; | |||
| } | |||
| inline bool operator!=(const LDataType &ps) const { return this->type_ != ps.type_; } | |||
| uint8_t SizeInBytes() const { | |||
| if (type_ < LDataType::NUM_OF_TYPES) | |||
| @@ -381,11 +381,9 @@ bool WarpAffineBilinear(const LiteMat &src, LiteMat &dst, const LiteMat &M, int | |||
| } | |||
| if (dst.IsEmpty()) { | |||
| (void)dst.Init(dst_w, dst_h, src.channel_, LDataType::UINT8); | |||
| } else if (dst.height_ != dst_h || dst.width_ != dst_w || dst.channel_ != src.channel_) { | |||
| return false; | |||
| } else if (dst.data_type_ != LDataType::UINT8) { | |||
| } else if (dst.height_ != dst_h || dst.width_ != dst_w || dst.channel_ != src.channel_ || | |||
| dst.data_type_ != LDataType::UINT8) { | |||
| return false; | |||
| } else { | |||
| } | |||
| double IM[6]; | |||
| @@ -182,6 +182,8 @@ Status JpegCropAndDecode(const std::shared_ptr<Tensor> &input, std::shared_ptr<T | |||
| } catch (std::runtime_error &e) { | |||
| return DestroyDecompressAndReturnError(e.what()); | |||
| } | |||
| CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits<int32_t>::max() - crop_w) > crop_x, "invalid crop width"); | |||
| CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits<int32_t>::max() - crop_h) > crop_y, "invalid crop height"); | |||
| if (crop_x == 0 && crop_y == 0 && crop_w == 0 && crop_h == 0) { | |||
| crop_w = cinfo.output_width; | |||
| crop_h = cinfo.output_height; | |||
| @@ -190,6 +192,7 @@ Status JpegCropAndDecode(const std::shared_ptr<Tensor> &input, std::shared_ptr<T | |||
| return DestroyDecompressAndReturnError("Decode: invalid crop size"); | |||
| } | |||
| const int mcu_size = cinfo.min_DCT_scaled_size; | |||
| CHECK_FAIL_RETURN_UNEXPECTED(mcu_size != 0, "Invalid data."); | |||
| unsigned int crop_x_aligned = (crop_x / mcu_size) * mcu_size; | |||
| unsigned int crop_w_aligned = crop_w + crop_x - crop_x_aligned; | |||
| try { | |||
| @@ -206,8 +209,13 @@ Status JpegCropAndDecode(const std::shared_ptr<Tensor> &input, std::shared_ptr<T | |||
| RETURN_IF_NOT_OK(Tensor::CreateEmpty(ts, DataType(DataType::DE_UINT8), &output_tensor)); | |||
| const int buffer_size = output_tensor->SizeInBytes(); | |||
| JSAMPLE *buffer = reinterpret_cast<JSAMPLE *>(&(*output_tensor->begin<uint8_t>())); | |||
| // stride refers to output tensor, which has 3 components at most | |||
| CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits<int32_t>::max() - skipped_scanlines) > crop_h, | |||
| "Invalid crop height."); | |||
| const int max_scanlines_to_read = skipped_scanlines + crop_h; | |||
| // stride refers to output tensor, which has 3 components at most | |||
| CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits<int32_t>::max() / crop_w) > kOutNumComponents, | |||
| "Invalid crop width."); | |||
| const int stride = crop_w * kOutNumComponents; | |||
| // offset is calculated for scanlines read from the image, therefore | |||
| // has the same number of components as the image | |||
| @@ -246,6 +254,8 @@ Status Crop(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *outpu | |||
| RETURN_STATUS_UNEXPECTED("Crop: image datatype is not float32 or uint8"); | |||
| } | |||
| CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits<int32_t>::max() - y) > h, "Invalid crop height."); | |||
| CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits<int32_t>::max() - x) > w, "Invalid crop width."); | |||
| // account for integer overflow | |||
| if (y < 0 || (y + h) > input->shape()[0] || (y + h) < 0) { | |||
| RETURN_STATUS_UNEXPECTED( | |||
| @@ -410,7 +420,10 @@ Status Resize(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *out | |||
| Status ResizePreserve(const TensorRow &inputs, int32_t height, int32_t width, int32_t img_orientation, | |||
| TensorRow *outputs) { | |||
| outputs->resize(3); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(inputs.size() > 0, | |||
| "Invalid input, should greater than 0, but got " + std::to_string(inputs.size())); | |||
| std::shared_ptr<Tensor> input = inputs[0]; | |||
| CHECK_FAIL_RETURN_UNEXPECTED(input->shape().Size() >= 3, "Invalid input shape, should be greater than 3 dimensions."); | |||
| LiteMat lite_mat_src(input->shape()[1], input->shape()[0], input->shape()[2], | |||
| const_cast<void *>(reinterpret_cast<const void *>(input->GetBuffer())), | |||
| GetLiteCVDataType(input->type())); | |||
| @@ -537,7 +550,15 @@ Status Pad(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output | |||
| std::shared_ptr<Tensor> output_tensor; | |||
| CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits<int32_t>::max() - lite_mat_rgb.width_) > pad_left, | |||
| "Invalid pad width."); | |||
| CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits<int32_t>::max() - lite_mat_rgb.width_ + pad_left) > pad_right, | |||
| "Invalid pad width."); | |||
| int pad_width = lite_mat_rgb.width_ + pad_left + pad_right; | |||
| CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits<int32_t>::max() - lite_mat_rgb.height_) > pad_top, | |||
| "Invalid pad height."); | |||
| CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits<int32_t>::max() - lite_mat_rgb.height_ + pad_top) > pad_bottom, | |||
| "Invalid pad height."); | |||
| int pad_height = lite_mat_rgb.height_ + pad_top + pad_bottom; | |||
| TensorShape new_shape = TensorShape({pad_height, pad_width, input->shape()[2]}); | |||
| RETURN_IF_NOT_OK(Tensor::CreateEmpty(new_shape, input->type(), &output_tensor)); | |||
| @@ -721,11 +742,13 @@ Status Affine(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *out | |||
| } | |||
| int height = 0; | |||
| int width = 0; | |||
| CHECK_FAIL_RETURN_UNEXPECTED(mat.size() <= 6, "Invalid mat shape."); | |||
| double M[6] = {}; | |||
| for (int i = 0; i < mat.size(); i++) { | |||
| M[i] = static_cast<double>(mat[i]); | |||
| } | |||
| CHECK_FAIL_RETURN_UNEXPECTED(input->shape().Size() >= 3, "Invalid input shape, should be 3."); | |||
| LiteMat lite_mat_rgb(input->shape()[1], input->shape()[0], input->shape()[2], | |||
| const_cast<void *>(reinterpret_cast<const void *>(input->GetBuffer())), | |||
| GetLiteCVDataType(input->type())); | |||
| @@ -22,7 +22,7 @@ | |||
| namespace mindspore { | |||
| namespace dataset { | |||
| const int32_t ResizePreserveAROp::kDefImgorientation = 0; | |||
| const int32_t ResizePreserveAROp::kDefImgOrientation = 0; | |||
| ResizePreserveAROp::ResizePreserveAROp(int32_t height, int32_t width, int32_t img_orientation) | |||
| : height_(height), width_(width), img_orientation_(img_orientation) {} | |||
| @@ -34,9 +34,9 @@ namespace dataset { | |||
| class ResizePreserveAROp : public TensorOp { | |||
| public: | |||
| // Default values, also used by python_bindings.cc | |||
| static const int32_t kDefImgorientation; | |||
| static const int32_t kDefImgOrientation; | |||
| ResizePreserveAROp(int32_t height, int32_t width, int32_t img_orientation = kDefImgorientation); | |||
| ResizePreserveAROp(int32_t height, int32_t width, int32_t img_orientation = kDefImgOrientation); | |||
| ~ResizePreserveAROp() override = default; | |||
| @@ -35,9 +35,9 @@ Status ResizeWithBBoxOp::Compute(const TensorRow &input, TensorRow *output) { | |||
| int32_t input_w = input[0]->shape()[1]; | |||
| output->resize(2); | |||
| (*output)[1] = std::move(input[1]); // move boxes over to output | |||
| (*output)[1] = input[1]; // move boxes over to output | |||
| std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(std::move(input[0])); | |||
| std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input[0]); | |||
| RETURN_IF_NOT_OK(ResizeOp::Compute(std::static_pointer_cast<Tensor>(input_cv), &(*output)[0])); | |||
| @@ -29,7 +29,7 @@ namespace mindspore { | |||
| namespace dataset { | |||
| class RgbaToBgrOp : public TensorOp { | |||
| public: | |||
| RgbaToBgrOp() {} | |||
| RgbaToBgrOp() = default; | |||
| ~RgbaToBgrOp() override = default; | |||
| @@ -29,7 +29,7 @@ namespace mindspore { | |||
| namespace dataset { | |||
| class RgbaToRgbOp : public TensorOp { | |||
| public: | |||
| RgbaToRgbOp() {} | |||
| RgbaToRgbOp() = default; | |||
| ~RgbaToRgbOp() override = default; | |||
| @@ -42,9 +42,10 @@ Status SharpnessOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_pt | |||
| /// 1, 5, 1, | |||
| /// 1, 1, 1 | |||
| float filterSum = 13.0; | |||
| const float filterMid = 5.0; | |||
| const float filterSum = 13.0; | |||
| cv::Mat filter = cv::Mat(3, 3, CV_32F, cv::Scalar::all(1.0 / filterSum)); | |||
| filter.at<float>(1, 1) = 5.0 / filterSum; | |||
| filter.at<float>(1, 1) = filterMid / filterSum; | |||
| /// applying filter on channels | |||
| cv::Mat result = cv::Mat(); | |||
| @@ -57,7 +57,7 @@ Status SoftDvppDecodeRandomCropResizeJpegOp::Compute(const std::shared_ptr<Tenso | |||
| SoftDpCropInfo crop_info; | |||
| RETURN_IF_NOT_OK(GetCropInfo(input, &crop_info)); | |||
| try { | |||
| unsigned char *buffer = const_cast<unsigned char *>(input->GetBuffer()); | |||
| auto buffer = const_cast<unsigned char *>(input->GetBuffer()); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(buffer != nullptr, | |||
| "SoftDvppDecodeRandomCropResizeJpeg: the input image buffer is empty."); | |||
| SoftDpProcsessInfo info; | |||
| @@ -21,9 +21,9 @@ | |||
| #include <random> | |||
| #include <string> | |||
| #include "./utils/external_soft_dp.h" | |||
| #include "minddata/dataset/core/tensor.h" | |||
| #include "minddata/dataset/kernels/image/random_crop_and_resize_op.h" | |||
| #include "minddata/dataset/kernels/image/soft_dvpp/utils/external_soft_dp.h" | |||
| #include "minddata/dataset/util/status.h" | |||
| namespace mindspore { | |||
| @@ -32,7 +32,7 @@ class SoftDvppDecodeResizeJpegOp : public TensorOp { | |||
| : target_height_(target_height), target_width_(target_width) {} | |||
| /// \brief Destructor | |||
| ~SoftDvppDecodeResizeJpegOp() = default; | |||
| ~SoftDvppDecodeResizeJpegOp() override = default; | |||
| Status Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) override; | |||
| Status OutputShape(const std::vector<TensorShape> &inputs, std::vector<TensorShape> &outputs) override; | |||
| @@ -17,7 +17,7 @@ | |||
| #ifndef EXTERNAL_SOFTDP_H | |||
| #define EXTERNAL_SOFTDP_H | |||
| #include <stdint.h> | |||
| #include <cstdint> | |||
| struct SoftDpProcsessInfo { | |||
| uint8_t *input_buffer; // input buffer | |||
| @@ -44,11 +44,10 @@ uint32_t DecodeAndResizeJpeg(SoftDpProcsessInfo *soft_dp_process_info) { | |||
| } | |||
| // use vpc interface to resize and convert RGB, give user output buf and output size. | |||
| SoftDpCropInfo crop; | |||
| crop.left = 0; | |||
| crop.right = vpc_input_info.real_width - 1; | |||
| crop.up = 0; | |||
| crop.down = vpc_input_info.real_height - 1; | |||
| auto crop = SoftDpCropInfo{.left = 0, | |||
| .right = static_cast<uint32_t>(vpc_input_info.real_width - 1), | |||
| .up = 0, | |||
| .down = static_cast<uint32_t>(vpc_input_info.real_height - 1)}; | |||
| VpcInfo output; | |||
| output.addr = soft_dp_process_info->output_buffer; | |||
| @@ -17,8 +17,8 @@ | |||
| #ifndef SOFT_DP_H | |||
| #define SOFT_DP_H | |||
| #include <stdint.h> | |||
| #include "./external_soft_dp.h" | |||
| #include <cstdint> | |||
| #include "minddata/dataset/kernels/image/soft_dvpp/utils/external_soft_dp.h" | |||
| enum JpegdToVpcFormat { | |||
| INPUT_VPC_UNKNOWN = -1, | |||
| @@ -25,11 +25,10 @@ | |||
| #define DP_EVENT 0x10000 | |||
| #define DP_DEBUG_LEVEL (DP_EVENT | DP_ERR | DP_WARNING | DP_INFO | DP_DEBUG) | |||
| #include <vector> | |||
| #include <string> | |||
| #if defined(DVPP_UTST) || defined(DEBUG) | |||
| #include <stdio.h> | |||
| #include <string> | |||
| #include <vector> | |||
| #define DP_LOG(model, level, format, ...) \ | |||
| do { \ | |||
| @@ -67,6 +66,8 @@ | |||
| #include <securec.h> | |||
| #include <cstdio> | |||
| #include <vector> | |||
| #include <string> | |||
| #include "glog/logging.h" | |||
| template <typename... Args> | |||
| @@ -48,9 +48,5 @@ bool IsDirectory(const std::string &path) { | |||
| return false; | |||
| } | |||
| if (S_ISDIR(buf.st_mode)) { | |||
| return true; | |||
| } else { | |||
| return false; | |||
| } | |||
| return S_ISDIR(buf.st_mode); | |||
| } | |||
| @@ -40,11 +40,7 @@ T1 AlignDown(T1 num, T2 align) { | |||
| template <typename T> | |||
| bool IsInTheScope(T num, T left_point, T right_point) { | |||
| if (num >= left_point && num <= right_point) { | |||
| return true; | |||
| } | |||
| return false; | |||
| return num >= left_point && num <= right_point; | |||
| } | |||
| template <typename T> | |||
| @@ -109,19 +109,19 @@ int32_t SoftVpc::CheckParamter() { | |||
| uint32_t out_width = out_width_; | |||
| uint32_t out_height = out_height_; | |||
| bool flag = (out_width * 32 >= crop_width) ? true : false; // A maximum of 32x zoom-out | |||
| bool flag = (out_width * 32 >= crop_width); // A maximum of 32x zoom-out | |||
| VPC_CHECK_COND_FAIL_PRINT_RETURN(flag, dpFail, | |||
| "Max reduction multiple is 32. Please check left(%u), right(%u), out_width(%u).", | |||
| left_, right_, out_width); // Up to 16x magnification | |||
| flag = (crop_width * 16 >= out_width) ? true : false; | |||
| flag = (crop_width * 16 >= out_width); | |||
| VPC_CHECK_COND_FAIL_PRINT_RETURN(flag, dpFail, | |||
| "Max magnification is 16. Please check left(%u), right(%u), out_width(%u).", left_, | |||
| right_, out_width); | |||
| flag = (out_height * 32 >= crop_height) ? true : false; // A maximum of 32x zoom-out | |||
| flag = (out_height * 32 >= crop_height); // A maximum of 32x zoom-out | |||
| VPC_CHECK_COND_FAIL_PRINT_RETURN(flag, dpFail, | |||
| "Max reduction multiple is 32. Please check up(%u), down(%u), out_height(%u).", up_, | |||
| down_, out_height); | |||
| flag = (crop_height * 16 >= out_height) ? true : false; // Up to 16x magnification | |||
| flag = (crop_height * 16 >= out_height); // Up to 16x magnification | |||
| VPC_CHECK_COND_FAIL_PRINT_RETURN( | |||
| flag, dpFail, "Max magnification is 16. Please check up(%u), down(%u), out_height(%u).", up_, down_, out_height); | |||
| return dpSucc; | |||
| @@ -34,7 +34,7 @@ class SoftVpc { | |||
| public: | |||
| SoftVpc(); | |||
| ~SoftVpc() {} | |||
| ~SoftVpc() = default; | |||
| /* | |||
| * @brief : vpc Cropping and Scaling APIs. | |||
| @@ -75,7 +75,7 @@ void GetParaSet(std::string str_line, int32_t *flag_ctl, int32_t *flag_tap, YuvW | |||
| // taps_4, the second character in the square brackets is the start address of the array block. | |||
| if ((*flag_ctl - initBracketNum) % arrTypeNum == 2) { | |||
| while (1) { | |||
| while (true) { | |||
| ss >> yuv_scaler_paraset->scale[cnt].taps_4[index->first_index++]; | |||
| if (ss.fail()) { // rerad failed. | |||
| index->first_index = index->first_index - 1; | |||
| @@ -94,7 +94,7 @@ void GetParaSet(std::string str_line, int32_t *flag_ctl, int32_t *flag_tap, YuvW | |||
| // taps_6 | |||
| if ((*flag_ctl - initBracketNum) % arrTypeNum == 0) { | |||
| while (1) { | |||
| while (true) { | |||
| ss >> yuv_scaler_paraset->scale[cnt].taps_6[index->second_index++]; | |||
| if (ss.fail()) { // read failed. | |||
| index->second_index = index->second_index - 1; | |||
| @@ -115,7 +115,6 @@ void GetParaSet(std::string str_line, int32_t *flag_ctl, int32_t *flag_tap, YuvW | |||
| } | |||
| int32_t CheckParamater(std::pair<bool, std::string> rlt, uint32_t i) { | |||
| int32_t ret = dpSucc; | |||
| if (rlt.first == false) { | |||
| API_LOGE("Get real path failed. index = %u", i); | |||
| return dpFail; | |||
| @@ -126,7 +125,7 @@ int32_t CheckParamater(std::pair<bool, std::string> rlt, uint32_t i) { | |||
| return dpFail; | |||
| } | |||
| return ret; | |||
| return dpSucc; | |||
| } | |||
| // Read the parameter set file and skip the comments in the file. | |||
| @@ -177,7 +176,7 @@ int32_t ParseFileToVar(const std::string *para_set_name, uint32_t yuv_scaler_par | |||
| } | |||
| // cale the number of "{",check the location of the data. | |||
| if (str_line.find("{") != std::string::npos) { | |||
| if (str_line.find('{') != std::string::npos) { | |||
| flag_ctl++; | |||
| flag_tap = 1; | |||
| } | |||
| @@ -19,6 +19,7 @@ | |||
| #include <memory> | |||
| #include <string> | |||
| #include <utility> | |||
| #include <vector> | |||
| #include "minddata/dataset/core/tensor.h" | |||
| @@ -29,9 +30,9 @@ namespace mindspore { | |||
| namespace dataset { | |||
| class SolarizeOp : public TensorOp { | |||
| public: | |||
| explicit SolarizeOp(std::vector<uint8_t> threshold = {0, 255}) : threshold_(threshold) {} | |||
| explicit SolarizeOp(std::vector<uint8_t> threshold = {0, 255}) : threshold_(std::move(threshold)) {} | |||
| ~SolarizeOp() = default; | |||
| ~SolarizeOp() override = default; | |||
| Status Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) override; | |||
| @@ -30,7 +30,7 @@ namespace dataset { | |||
| class SwapRedBlueOp : public TensorOp { | |||
| public: | |||
| /// \brief Constructor | |||
| SwapRedBlueOp() {} | |||
| SwapRedBlueOp() = default; | |||
| SwapRedBlueOp(const SwapRedBlueOp &rhs) = default; | |||
| @@ -22,7 +22,7 @@ namespace dataset { | |||
| const int UniformAugOp::kDefNumOps = 2; | |||
| UniformAugOp::UniformAugOp(std::vector<std::shared_ptr<TensorOp>> op_list, int32_t num_ops) | |||
| : tensor_op_list_(op_list), num_ops_(num_ops) { | |||
| : tensor_op_list_(std::move(op_list)), num_ops_(num_ops) { | |||
| rnd_.seed(GetSeed()); | |||
| } | |||