From 7c6dbcbe0131fc670a579384f7f271fb77d16b4b Mon Sep 17 00:00:00 2001 From: YangLuo Date: Sat, 5 Dec 2020 12:16:18 +0800 Subject: [PATCH] c++ minddata eager plus ms eager --- .../ccsrc/minddata/dataset/CMakeLists.txt | 11 ++ .../ccsrc/minddata/dataset/api/CMakeLists.txt | 34 ++-- .../minddata/dataset/api/minddata_eager.cc | 154 ++++++++++++++++++ .../minddata/dataset/include/constants.h | 104 ++++++++++++ .../minddata/dataset/include/minddata_eager.h | 62 +++++++ .../minddata/dataset/include/transforms.h | 4 +- .../ccsrc/minddata/dataset/include/vision.h | 4 +- tests/cxx_st/CMakeLists.txt | 5 +- tests/cxx_st/dataset/test_de.cc | 49 ++++++ tests/cxx_st/runtest.sh | 43 +++++ 10 files changed, 454 insertions(+), 16 deletions(-) create mode 100644 mindspore/ccsrc/minddata/dataset/api/minddata_eager.cc create mode 100644 mindspore/ccsrc/minddata/dataset/include/constants.h create mode 100644 mindspore/ccsrc/minddata/dataset/include/minddata_eager.h create mode 100644 tests/cxx_st/dataset/test_de.cc create mode 100755 tests/cxx_st/runtest.sh diff --git a/mindspore/ccsrc/minddata/dataset/CMakeLists.txt b/mindspore/ccsrc/minddata/dataset/CMakeLists.txt index 87df0fa6bf..a19c57d196 100644 --- a/mindspore/ccsrc/minddata/dataset/CMakeLists.txt +++ b/mindspore/ccsrc/minddata/dataset/CMakeLists.txt @@ -41,6 +41,7 @@ include_directories(${CMAKE_SOURCE_DIR}/mindspore/ccsrc/runtime/device/ascend/pl include_directories(${CMAKE_BINARY_DIR}) # for protobuf generated .h +include_directories(${CMAKE_SOURCE_DIR}/mindspore) include_directories(${CMAKE_SOURCE_DIR}/mindspore/ccsrc/minddata/mindrecord/include) include_directories(${CMAKE_SOURCE_DIR}/mindspore/ccsrc/minddata/dataset/include) include_directories(${CMAKE_SOURCE_DIR}/mindspore/ccsrc/minddata/dataset/kernels/image) @@ -204,6 +205,16 @@ else () endif () endif () +add_dependencies(_c_dataengine mindspore_shared_lib) +if (${CMAKE_SYSTEM_NAME} MATCHES "Windows") + set(MINDSPORE_LINK_OBJECT ${CMAKE_BINARY_DIR}/mindspore/ccsrc/cxx_api/CMakeFiles/mindspore_shared_lib.dir/objects.a) + target_link_libraries(_c_dataengine PRIVATE mindspore_shared_lib ${MINDSPORE_LINK_OBJECT}) +else() + if (ENABLE_ACL) + target_link_libraries(_c_dataengine PRIVATE mindspore_shared_lib) + endif () +endif() + if (USE_GLOG) target_link_libraries(_c_dataengine PRIVATE mindspore::glog) else () diff --git a/mindspore/ccsrc/minddata/dataset/api/CMakeLists.txt b/mindspore/ccsrc/minddata/dataset/api/CMakeLists.txt index 3cda25e994..e24fbd6df7 100644 --- a/mindspore/ccsrc/minddata/dataset/api/CMakeLists.txt +++ b/mindspore/ccsrc/minddata/dataset/api/CMakeLists.txt @@ -58,14 +58,28 @@ if (APPLE) vision.cc ) else() - add_library(cpp-API OBJECT - config.cc - datasets.cc - execute.cc - iterator.cc - transforms.cc - samplers.cc - text.cc - vision.cc - ) + if (ENABLE_ACL) + add_library(cpp-API OBJECT + config.cc + datasets.cc + execute.cc + iterator.cc + minddata_eager.cc + transforms.cc + samplers.cc + text.cc + vision.cc + ) + else() + add_library(cpp-API OBJECT + config.cc + datasets.cc + execute.cc + iterator.cc + transforms.cc + samplers.cc + text.cc + vision.cc + ) + endif() endif() diff --git a/mindspore/ccsrc/minddata/dataset/api/minddata_eager.cc b/mindspore/ccsrc/minddata/dataset/api/minddata_eager.cc new file mode 100644 index 0000000000..13b9abea98 --- /dev/null +++ b/mindspore/ccsrc/minddata/dataset/api/minddata_eager.cc @@ -0,0 +1,154 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include "minddata/dataset/include/minddata_eager.h" +#include "minddata/dataset/include/vision.h" +#include "minddata/dataset/core/tensor.h" +#include "minddata/dataset/kernels/tensor_op.h" +#include "minddata/dataset/util/path.h" + +namespace mindspore { +namespace api { + +MindDataEager::MindDataEager(std::vector> ops) : ops_(ops) {} + +// Helper function to convert Type from DE to MS +DataType ToMSType(dataset::DataType type) { + switch (dataset::DataType::Type(type)) { + case dataset::DataType::DE_BOOL: + return DataType::kMsBool; + case dataset::DataType::DE_UINT8: + return DataType::kMsUint8; + case dataset::DataType::DE_INT32: + return DataType::kMsInt32; + case dataset::DataType::DE_INT64: + return DataType::kMsInt64; + case dataset::DataType::DE_FLOAT32: + return DataType::kMsFloat32; + default: + return DataType::kMsUnknown; + } +} + +// Helper function to convert Type from MS to DE +dataset::DataType ToDEType(DataType type) { + switch (type) { + case DataType::kMsBool: + return dataset::DataType(dataset::DataType::DE_BOOL); + case DataType::kMsUint8: + return dataset::DataType(dataset::DataType::DE_UINT8); + case DataType::kMsInt32: + return dataset::DataType(dataset::DataType::DE_INT32); + case DataType::kMsInt64: + return dataset::DataType(dataset::DataType::DE_INT64); + case DataType::kMsFloat32: + return dataset::DataType(dataset::DataType::DE_FLOAT32); + default: + return dataset::DataType(dataset::DataType::DE_UNKNOWN); + } +} + +Status MindDataEager::LoadImageFromDir(const std::string &image_dir, std::vector> *images) { + // Check target directory + dataset::Path image_dir_(image_dir); + if (!image_dir_.Exists() || !image_dir_.IsDirectory()) { + std::string err_msg = "Target directory: " + image_dir + " does not exist or not a dir."; + MS_LOG(ERROR) << err_msg; + return Status(StatusCode::FAILED, err_msg); + } + if (access(image_dir_.toString().c_str(), R_OK) == -1) { + std::string err_msg = "No access to target directory: " + image_dir; + MS_LOG(ERROR) << err_msg; + return Status(StatusCode::FAILED, err_msg); + } + + // Start reading images and constructing tensors + auto path_itr = dataset::Path::DirIterator::OpenDirectory(&image_dir_); + while (path_itr->hasNext()) { + dataset::Path file = path_itr->next(); + std::shared_ptr image; + dataset::Tensor::CreateFromFile(file.toString(), &image); + + std::shared_ptr ms_image = std::make_shared("image", DataType(kMsUint8), image->shape().AsVector(), + image->GetBuffer(), image->SizeInBytes()); + images->push_back(ms_image); + } + + // Check if read images or not + if (images->empty()) { + std::string err_msg = "No images found in target directory: " + image_dir; + MS_LOG(ERROR) << err_msg; + return Status(StatusCode::FAILED, err_msg); + } + + return Status(StatusCode::SUCCESS); +} + +std::shared_ptr MindDataEager::operator()(std::shared_ptr input) { + // Validate ops + if (ops_.empty()) { + MS_LOG(ERROR) << "Input TensorOperation should be provided"; + return nullptr; + } + for (int32_t i = 0; i < ops_.size(); i++) { + if (ops_[i] == nullptr) { + MS_LOG(ERROR) << "Input TensorOperation[" << i << "] is invalid or null"; + return nullptr; + } + } + // Validate input tensor + if (input == nullptr) { + MS_LOG(ERROR) << "Input Tensor should not be null"; + return nullptr; + } + + // Start applying transforms in ops + std::shared_ptr de_input; + dataset::Tensor::CreateFromMemory(dataset::TensorShape(input->Shape()), ToDEType(input->DataType()), + (const uchar *)(input->Data()), &de_input); + + for (int32_t i = 0; i < ops_.size(); i++) { + // Build runtime op and run + std::shared_ptr de_output; + std::shared_ptr transform = ops_[i]->Build(); + dataset::Status rc = transform->Compute(de_input, &de_output); + + // check execution failed + if (rc.IsError()) { + MS_LOG(ERROR) << "Operation execution failed : " << rc.ToString(); + return nullptr; + } + + // For next transform + de_input = std::move(de_output); + } + + // Convert DETensor to Tensor + if (!de_input->HasData()) { + MS_LOG(ERROR) << "Apply transform failed, output tensor has no data"; + return nullptr; + } + std::shared_ptr output = + std::make_shared("transfomed", ToMSType(de_input->type()), de_input->shape().AsVector(), + de_input->GetBuffer(), de_input->SizeInBytes()); + return output; +} + +} // namespace api +} // namespace mindspore diff --git a/mindspore/ccsrc/minddata/dataset/include/constants.h b/mindspore/ccsrc/minddata/dataset/include/constants.h new file mode 100644 index 0000000000..0e03df5c50 --- /dev/null +++ b/mindspore/ccsrc/minddata/dataset/include/constants.h @@ -0,0 +1,104 @@ +/** + * Copyright 2019 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_CORE_CONSTANTS_H_ +#define MINDSPORE_CCSRC_MINDDATA_DATASET_CORE_CONSTANTS_H_ + +#include +#include +#include + +namespace mindspore { +namespace dataset { +// Various type defines for convenience +using uchar = unsigned char; +using dsize_t = int64_t; + +// Target devices to perform map operation +enum class MapTargetDevice { kCpu, kGpu, kDvpp }; + +// Possible dataset types for holding the data and client type +enum class DatasetType { kUnknown, kArrow, kTf }; + +// Possible flavours of Tensor implementations +enum class TensorImpl { kNone, kFlexible, kCv, kNP }; + +// Possible values for shuffle +enum class ShuffleMode { kFalse = 0, kFiles = 1, kGlobal = 2 }; + +// Possible values for Border types +enum class BorderType { kConstant = 0, kEdge = 1, kReflect = 2, kSymmetric = 3 }; + +// Possible values for Image format types in a batch +enum class ImageBatchFormat { kNHWC = 0, kNCHW = 1 }; + +// Possible values for Image format types +enum class ImageFormat { HWC = 0, CHW = 1, HW = 2 }; + +// Possible interpolation modes +enum class InterpolationMode { kLinear = 0, kNearestNeighbour = 1, kCubic = 2, kArea = 3 }; + +// Possible JiebaMode modes +enum class JiebaMode { kMix = 0, kMp = 1, kHmm = 2 }; + +// Possible values for SPieceTokenizerOutType +enum class SPieceTokenizerOutType { kString = 0, kInt = 1 }; + +// Possible values for SPieceTokenizerLoadType +enum class SPieceTokenizerLoadType { kFile = 0, kModel = 1 }; + +// Possible values for NormalizeForm +enum class NormalizeForm { + kNone = 0, + kNfc, + kNfkc, + kNfd, + kNfkd, +}; + +// convenience functions for 32bit int bitmask +inline bool BitTest(uint32_t bits, uint32_t bitMask) { return (bits & bitMask) == bitMask; } + +inline void BitSet(uint32_t *bits, uint32_t bitMask) { *bits |= bitMask; } + +inline void BitClear(uint32_t *bits, uint32_t bitMask) { *bits &= (~bitMask); } + +constexpr int32_t kDeMaxDim = std::numeric_limits::max(); // 2147483647 or 2^32 -1 +constexpr int32_t kDeMaxRank = std::numeric_limits::max(); +constexpr int64_t kDeMaxFreq = std::numeric_limits::max(); // 9223372036854775807 or 2^(64-1) +constexpr int64_t kDeMaxTopk = std::numeric_limits::max(); + +constexpr uint32_t kCfgRowsPerBuffer = 1; +constexpr uint32_t kCfgParallelWorkers = 4; +constexpr uint32_t kCfgWorkerConnectorSize = 16; +constexpr uint32_t kCfgOpConnectorSize = 16; +constexpr uint32_t kCfgDefaultSeed = std::mt19937::default_seed; +constexpr uint32_t kCfgMonitorSamplingInterval = 10; +constexpr uint32_t kCfgCallbackTimeout = 60; // timeout value for callback in seconds +constexpr int32_t kCfgDefaultCachePort = 50052; +constexpr char kCfgDefaultCacheHost[] = "127.0.0.1"; +constexpr int32_t kDftPrefetchSize = 20; +constexpr int32_t kDftNumConnections = 12; + +// Invalid OpenCV type should not be from 0 to 7 (opencv4/opencv2/core/hal/interface.h) +constexpr uint8_t kCVInvalidType = 255; + +using connection_id_type = uint64_t; +using session_id_type = uint32_t; +using row_id_type = int64_t; +} // namespace dataset +} // namespace mindspore + +#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_CORE_CONSTANTS_H_ diff --git a/mindspore/ccsrc/minddata/dataset/include/minddata_eager.h b/mindspore/ccsrc/minddata/dataset/include/minddata_eager.h new file mode 100644 index 0000000000..9f20d7f60a --- /dev/null +++ b/mindspore/ccsrc/minddata/dataset/include/minddata_eager.h @@ -0,0 +1,62 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_MINDDATA_EAGER_H_ +#define MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_MINDDATA_EAGER_H_ + +#include +#include +#include + +#include "include/api/status.h" +#include "include/api/types.h" +#include "minddata/dataset/include/transforms.h" +#include "minddata/dataset/include/vision.h" + +namespace mindspore { +namespace api { + +// class to run tensor operations in eager mode +class MindDataEager { + public: + /// \brief Constructor + MindDataEager() = default; + + /// \brief Constructor + /// \param[inout] ops Transforms to be applied + explicit MindDataEager(std::vector> ops); + + /// \brief Destructor + ~MindDataEager() = default; + + /// \brief Function to read images from local directory + /// \param[inout] image_dir Target directory which contains images + /// \param[output] images Vector of image Tensor + /// \return Return error status if encounters exception + static Status LoadImageFromDir(const std::string &image_dir, std::vector> *images); + + /// \brief Callable function to execute the TensorOperation in eager mode + /// \param[inout] input Tensor to be transformed + /// \return Output tensor, nullptr if Compute fails + std::shared_ptr operator()(std::shared_ptr input); + + private: + std::vector> ops_; +}; + +} // namespace api +} // namespace mindspore +#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_MINDDATA_EAGER_H_ diff --git a/mindspore/ccsrc/minddata/dataset/include/transforms.h b/mindspore/ccsrc/minddata/dataset/include/transforms.h index 208976fc52..d3e4538ba1 100644 --- a/mindspore/ccsrc/minddata/dataset/include/transforms.h +++ b/mindspore/ccsrc/minddata/dataset/include/transforms.h @@ -20,8 +20,8 @@ #include #include #include -#include "minddata/dataset/core/constants.h" -#include "minddata/dataset/util/status.h" +#include "minddata/dataset/include/constants.h" +#include "minddata/dataset/include/status.h" namespace mindspore { namespace dataset { diff --git a/mindspore/ccsrc/minddata/dataset/include/vision.h b/mindspore/ccsrc/minddata/dataset/include/vision.h index edc5977623..1e4527166c 100644 --- a/mindspore/ccsrc/minddata/dataset/include/vision.h +++ b/mindspore/ccsrc/minddata/dataset/include/vision.h @@ -23,10 +23,10 @@ #include #include -#include "minddata/dataset/core/constants.h" +#include "minddata/dataset/include/constants.h" #include "minddata/dataset/include/transforms.h" #include "minddata/dataset/include/vision_lite.h" -#include "minddata/dataset/util/status.h" +#include "minddata/dataset/include/status.h" namespace mindspore { namespace dataset { diff --git a/tests/cxx_st/CMakeLists.txt b/tests/cxx_st/CMakeLists.txt index 982445eacd..32d8c749a0 100644 --- a/tests/cxx_st/CMakeLists.txt +++ b/tests/cxx_st/CMakeLists.txt @@ -1,11 +1,12 @@ include_directories(${PYTHON_INCLUDE_DIRS}) include_directories(${MS_CCSRC_PATH}) -include_directories(${CMAKE_SOURCE_DIR}/mindspore/core) include_directories(${CMAKE_CURRENT_SOURCE_DIR}) include_directories(${CMAKE_CURRENT_SOURCE_DIR}/stub/runtime/) +include_directories(${CMAKE_SOURCE_DIR}/mindspore/ccsrc) +include_directories(${CMAKE_SOURCE_DIR}/mindspore/core) include_directories(${CMAKE_BINARY_DIR}) include_directories(${CUDA_INCLUDE_DIRS}) file(GLOB_RECURSE CXX_ST_SRC RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} *.cc) add_executable(st_tests ${CXX_ST_SRC}) -target_link_libraries(st_tests PRIVATE mindspore_shared_lib mindspore::gtest) +target_link_libraries(st_tests PRIVATE mindspore_shared_lib _c_dataengine mindspore::gtest) diff --git a/tests/cxx_st/dataset/test_de.cc b/tests/cxx_st/dataset/test_de.cc new file mode 100644 index 0000000000..2ae83c0e04 --- /dev/null +++ b/tests/cxx_st/dataset/test_de.cc @@ -0,0 +1,49 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include "common/common_test.h" +#include "include/api/types.h" +#include "minddata/dataset/include/minddata_eager.h" +#include "minddata/dataset/include/vision.h" +#include "minddata/dataset/kernels/tensor_op.h" + +using namespace mindspore::api; +using namespace mindspore::dataset::vision; + +class TestDE : public ST::Common { + public: + TestDE() {} +}; + +TEST_F(TestDE, Test1) { + std::vector> images; + MindDataEager::LoadImageFromDir("/home/eager/apple", &images); + + MindDataEager Compose({Decode(), + Resize({224, 224}), + Normalize({0.485 * 255, 0.456 * 255, 0.406 * 255}, {0.229 * 255, 0.224 * 255, 0.225 * 255}), + HWC2CHW()}); + + for (auto &img : images) { + img = Compose(img); + } + + ASSERT_EQ(images[0]->Shape().size(), 3); + ASSERT_EQ(images[0]->Shape()[0], 3); + ASSERT_EQ(images[0]->Shape()[1], 224); + ASSERT_EQ(images[0]->Shape()[2], 224); +} diff --git a/tests/cxx_st/runtest.sh b/tests/cxx_st/runtest.sh new file mode 100755 index 0000000000..31bfc85100 --- /dev/null +++ b/tests/cxx_st/runtest.sh @@ -0,0 +1,43 @@ +#!/bin/bash +# Copyright 2019 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +set -e +BASEPATH=$(cd "$(dirname $0)"; pwd) +PROJECT_PATH=${BASEPATH}/../.. +if [ $BUILD_PATH ];then + echo "BUILD_PATH = $BUILD_PATH" +else + BUILD_PATH=${PROJECT_PATH}/build + echo "BUILD_PATH = $BUILD_PATH" +fi + +cd ${BUILD_PATH}/mindspore/tests/cxx_st2 + +export LD_LIBRARY_PATH=${BUILD_PATH}/mindspore/googletest/googlemock/gtest:${PROJECT_PATH}/mindspore:${PROJECT_PATH}/mindspore/lib:$LD_LIBRARY_PATH +export PYTHONPATH=${PROJECT_PATH}/tests/ut/cpp/python_input:$PYTHONPATH:${PROJECT_PATH} +export GLOG_v=2 +export GC_COLLECT_IN_CELL=1 + + +if [ $# -gt 0 ]; then + ./st_tests --gtest_filter=$1 +else + ./st_tests +fi +RET=$? +cd - + +exit ${RET}