Merge pull request !31 from changzherui/syn-codetags/v0.3.0-alpha
| @@ -105,11 +105,11 @@ When reporting issues, refer to this format: | |||
| * If it is a new feature that needs lots of design details, a design proposal should also be submitted. | |||
| * After reaching consensus in the issue discussions and design proposal reviews, complete the development on the forked repo and submit a PR. | |||
| * None of PRs is not permitted until it receives **2+ LGTM** from approvers. Please NOTICE that approver is NOT allowed to add *LGTM* on his own PR. | |||
| * After PR is sufficiently discussed, it will get merged, abondoned or rejected depending on the outcome of the discussion. | |||
| * After PR is sufficiently discussed, it will get merged, abandoned or rejected depending on the outcome of the discussion. | |||
| **PRs advisory:** | |||
| - Any irrelevant changes should be avoided. | |||
| - Make sure your commit history being ordered. | |||
| - Always keep your branch up with the master branch. | |||
| - For bug-fix PRs, make sure all related issues being linked. | |||
| - For bug-fix PRs, make sure all related issues being linked. | |||
| @@ -129,7 +129,7 @@ Check out how MindSpore Open Governance [works](https://gitee.com/mindspore/comm | |||
| - [MindSpore Slack](https://join.slack.com/t/mindspore/shared_invite/enQtOTcwMTIxMDI3NjM0LTNkMWM2MzI5NjIyZWU5ZWQ5M2EwMTQ5MWNiYzMxOGM4OWFhZjI4M2E5OGI2YTg3ODU1ODE2Njg1MThiNWI3YmQ) - Communication platform for developers. | |||
| - IRC channel at `#mindspore` (only for meeting minutes logging purpose) | |||
| - Video Conferencing: meet.jit.si | |||
| - Mailing-list: https://mailweb.mindspore.cn/postorius/lists | |||
| - Mailing-list: https://mailweb.mindspore.cn/postorius/lists | |||
| ## Contributing | |||
| @@ -70,4 +70,4 @@ | |||
| * [MindSpore Official Website] (https://www.mindspore.cn/) | |||
| * [MindInsight Visualization Debugging and Optimization] (https://gitee.com/mindspore/mindinsight) | |||
| * [MindArmour Model Security Hardening Package] (https://gitee.com/mindspore/mindarmour) | |||
| * [GraphEngine Computational Graph Engine] (https://gitee.com/mindspore/graphengine) | |||
| * [GraphEngine Computational Graph Engine] (https://gitee.com/mindspore/graphengine) | |||
| @@ -368,7 +368,7 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | |||
| THE SOFTWARE. | |||
| Software: MKL-DNN 1.1.2 | |||
| Software: oneDNN 1.1.2 | |||
| Copyright (c) 2009-2018 The MathJax Consortium | |||
| Copyright 2018 Intel Corporation | |||
| Copyright 2019 Intel Corporation | |||
| @@ -26,7 +26,7 @@ usage() | |||
| echo "Usage:" | |||
| echo "bash build.sh [-d] [-r] [-v] [-c on|off] [-t on|off] [-g on|off] [-h] [-s] [-b ge|cpu] [-m infer|train] \\" | |||
| echo " [-a on|off] [-g on|off] [-p on|off] [-i] [-L] [-R] [-D on|off] [-j[n]] [-e gpu|d|cpu] \\" | |||
| echo " [-P on|off] [-z] [-M on|off] [-V 9.2|10.1] [-I] [-K]" | |||
| echo " [-P on|off] [-z [on|off]] [-M on|off] [-V 9.2|10.1] [-I] [-K]" | |||
| echo "" | |||
| echo "Options:" | |||
| echo " -d Debug mode" | |||
| @@ -50,8 +50,8 @@ usage() | |||
| echo " -P Enable dump anf graph to file in ProtoBuffer format, default on" | |||
| echo " -Q Enable dump end to end, default off" | |||
| echo " -D Enable dumping of function graph ir, default on" | |||
| echo " -z Compile dataset & mindrecord, default off" | |||
| echo " -M Enable MPI and NCCL for GPU training, default off" | |||
| echo " -z Compile dataset & mindrecord, default on" | |||
| echo " -M Enable MPI and NCCL for GPU training, default on" | |||
| echo " -V Specify the minimum required cuda version, default CUDA 9.2" | |||
| echo " -I Compile predict, default off" | |||
| echo " -K Compile with AKG, default off" | |||
| @@ -88,8 +88,8 @@ checkopts() | |||
| ENABLE_DUMP2PROTO="on" | |||
| ENABLE_DUMPE2E="off" | |||
| ENABLE_DUMP_IR="on" | |||
| COMPILE_MINDDATA="off" | |||
| ENABLE_MPI="off" | |||
| COMPILE_MINDDATA="on" | |||
| ENABLE_MPI="on" | |||
| CUDA_VERSION="9.2" | |||
| COMPILE_PREDICT="off" | |||
| USE_GLOG="on" | |||
| @@ -177,7 +177,7 @@ checkopts() | |||
| if [[ "X$OPTARG" == "Xgpu" ]]; then | |||
| ENABLE_GPU="on" | |||
| ENABLE_CPU="on" | |||
| elif [[ "X$OPTARG" == "Xd" ]]; then | |||
| elif [[ "X$OPTARG" == "Xd" || "X$OPTARG" == "Xascend" ]]; then | |||
| ENABLE_D="on" | |||
| ENABLE_CPU="on" | |||
| elif [[ "X$OPTARG" == "Xcpu" ]]; then | |||
| @@ -216,7 +216,17 @@ checkopts() | |||
| echo "enable dump function graph ir" | |||
| ;; | |||
| z) | |||
| COMPILE_MINDDATA="on" | |||
| eval ARG=\$\{$OPTIND\} | |||
| if [[ -n $ARG && $ARG != -* ]]; then | |||
| OPTARG=$ARG | |||
| check_on_off $OPTARG z | |||
| OPTIND=$((OPTIND + 1)) | |||
| else | |||
| OPTARG="" | |||
| fi | |||
| if [[ "X$OPTARG" == "Xoff" ]]; then | |||
| COMPILE_MINDDATA="off" | |||
| fi | |||
| ;; | |||
| I) | |||
| COMPILE_PREDICT="on" | |||
| @@ -452,8 +462,10 @@ if [[ "X$INC_BUILD" = "Xoff" ]]; then | |||
| bash "${PROJECT_PATH}/package.sh" ge | |||
| elif [[ "X$ENABLE_GPU" = "Xon" ]]; then | |||
| bash "${PROJECT_PATH}/package.sh" ms gpu | |||
| elif [[ "X$ENABLE_D" = "Xon" ]] || [[ "X$ENABLE_CPU" = "Xon" ]]; then | |||
| bash "${PROJECT_PATH}/package.sh" ms | |||
| elif [[ "X$ENABLE_D" = "Xon" ]]; then | |||
| bash "${PROJECT_PATH}/package.sh" ms ascend | |||
| elif [[ "X$ENABLE_CPU" = "Xon" ]]; then | |||
| bash "${PROJECT_PATH}/package.sh" ms cpu | |||
| else | |||
| bash "${PROJECT_PATH}/package.sh" debug | |||
| fi | |||
| @@ -39,7 +39,11 @@ elseif (DEFINED ENV{D_LINK_PATH}) | |||
| find_library(resource libresource.so ${GE_LIB_PATH}) | |||
| else() | |||
| # Ascend mode | |||
| set(ASCEND_PATH /usr/local/Ascend) | |||
| if(DEFINED ENV{ASCEND_CUSTOM_PATH}) | |||
| set(ASCEND_PATH $ENV{ASCEND_CUSTOM_PATH}) | |||
| else() | |||
| set(ASCEND_PATH /usr/local/Ascend) | |||
| endif() | |||
| set(ASCEND_DRIVER_PATH ${ASCEND_PATH}/driver/lib64/common) | |||
| set(ASCEND_RUNTIME_PATH ${ASCEND_PATH}/fwkacllib/lib64) | |||
| find_library(c_sec libc_sec.so ${ASCEND_DRIVER_PATH}) | |||
| @@ -1,11 +1,11 @@ | |||
| set(mkl_dnn_CXXFLAGS "-D_FORTIFY_SOURCE=2 -O2") | |||
| set(mkl_dnn_CFLAGS "-D_FORTIFY_SOURCE=2 -O2") | |||
| mindspore_add_pkg(mkl_dnn | |||
| VER 1.1.1 | |||
| set(onednn_CXXFLAGS "-D_FORTIFY_SOURCE=2 -O2") | |||
| set(onednn_CFLAGS "-D_FORTIFY_SOURCE=2 -O2") | |||
| mindspore_add_pkg(onednn | |||
| VER 1.1.2 | |||
| LIBS dnnl mkldnn | |||
| URL https://github.com/intel/mkl-dnn/archive/v1.1.1.tar.gz | |||
| MD5 d6a422b00459600bdc22242590953f38 | |||
| URL https://github.com/oneapi-src/oneDNN/archive/v1.1.2.tar.gz | |||
| MD5 ab40d52230f3ad1d7a6f06ce0f6bc17a | |||
| CMAKE_OPTION -DDNNL_ARCH_OPT_FLAGS='' -DDNNL_CPU_RUNTIME='SEQ' -DDNNL_BUILD_EXAMPLES=OFF -DDNNL_BUILD_TESTS=OFF) | |||
| include_directories(${mkl_dnn_INC}) | |||
| add_library(mindspore::dnnl ALIAS mkl_dnn::dnnl) | |||
| add_library(mindspore::mkldnn ALIAS mkl_dnn::mkldnn) | |||
| include_directories(${onednn_INC}) | |||
| add_library(mindspore::dnnl ALIAS onednn::dnnl) | |||
| add_library(mindspore::mkldnn ALIAS onednn::mkldnn) | |||
| @@ -29,11 +29,11 @@ if (ENABLE_GPU) | |||
| include(${CMAKE_SOURCE_DIR}/cmake/external_libs/dmlc_core.cmake) | |||
| include(${CMAKE_SOURCE_DIR}/cmake/external_libs/rang.cmake) | |||
| include(${CMAKE_SOURCE_DIR}/cmake/external_libs/tvm_gpu.cmake) | |||
| endif() | |||
| if (ENABLE_MPI) | |||
| include(${CMAKE_SOURCE_DIR}/cmake/external_libs/nccl.cmake) | |||
| include(${CMAKE_SOURCE_DIR}/cmake/external_libs/ompi.cmake) | |||
| if (ENABLE_MPI) | |||
| include(${CMAKE_SOURCE_DIR}/cmake/external_libs/nccl.cmake) | |||
| include(${CMAKE_SOURCE_DIR}/cmake/external_libs/ompi.cmake) | |||
| endif() | |||
| endif() | |||
| if (ENABLE_GE) | |||
| @@ -40,6 +40,8 @@ else() | |||
| set(JOBS 8) | |||
| if (${JOBS} GREATER ${N}) | |||
| set(THNUM ${N}) | |||
| else() | |||
| set(THNUM ${JOBS}) | |||
| endif() | |||
| endif () | |||
| message("set make thread num: ${THNUM}") | |||
| @@ -67,7 +67,7 @@ if __name__ == '__main__': | |||
| parser.add_argument("--distribute", type=bool, default=False, help="Run distribute, default is false.") | |||
| parser.add_argument("--device_id", type=int, default=0, help="Device id, default is 0.") | |||
| parser.add_argument("--device_num", type=int, default=1, help="Use device nums, default is 1.") | |||
| parser.add_argument("--mode", type=str, default="graph", help="Run graph mode or feed mode, default is graph") | |||
| parser.add_argument("--mode", type=str, default="sink", help="Run sink mode or not, default is sink") | |||
| parser.add_argument("--epoch_size", type=int, default=10, help="Epoch size, default is 10") | |||
| parser.add_argument("--batch_size", type=int, default=32, help="Batch size, default is 32.") | |||
| parser.add_argument("--checkpoint_path", type=str, default="", help="Checkpoint file path") | |||
| @@ -150,8 +150,8 @@ if __name__ == '__main__': | |||
| model = Model(net) | |||
| dataset_sink_mode = False | |||
| if args_opt.mode == "graph": | |||
| print("In graph mode, one epoch return a loss.") | |||
| if args_opt.mode == "sink": | |||
| print("In sink mode, one epoch return a loss.") | |||
| dataset_sink_mode = True | |||
| print("Start train YOLOv3, the first epoch will be slower because of the graph compilation.") | |||
| model.train(args_opt.epoch_size, dataset, callbacks=callback, dataset_sink_mode=dataset_sink_mode) | |||
| @@ -132,6 +132,7 @@ file(GLOB_RECURSE MINDSPORE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} | |||
| "kernel/kash/*.cc" | |||
| "device/kernel_info.cc" | |||
| "device/kernel_runtime.cc" | |||
| "device/memory_manager.cc" | |||
| "device/kernel_runtime_manager.cc" | |||
| "device/convert_tensor_utils.cc" | |||
| "pre_activate/common/*.cc" | |||
| @@ -295,7 +296,11 @@ if(ENABLE_D) | |||
| endif() | |||
| else() | |||
| MESSAGE("use system default lib") | |||
| set(ASCEND_PATH /usr/local/Ascend) | |||
| if(DEFINED ENV{ASCEND_CUSTOM_PATH}) | |||
| set(ASCEND_PATH $ENV{ASCEND_CUSTOM_PATH}) | |||
| else() | |||
| set(ASCEND_PATH /usr/local/Ascend) | |||
| endif() | |||
| set(ASCEND_DRIVER_PATH ${ASCEND_PATH}/driver/lib64/common) | |||
| set(ASCEND_RUNTIME_PATH ${ASCEND_PATH}/fwkacllib/lib64) | |||
| endif() | |||
| @@ -499,7 +504,11 @@ add_dependencies(add_ms_lib _c_expression) | |||
| if (NOT ENABLE_GE) | |||
| if (ENABLE_D) | |||
| set(ASCEND_PATH /usr/local/Ascend) | |||
| if(DEFINED ENV{ASCEND_CUSTOM_PATH}) | |||
| set(ASCEND_PATH $ENV{ASCEND_CUSTOM_PATH}) | |||
| else() | |||
| set(ASCEND_PATH /usr/local/Ascend) | |||
| endif() | |||
| set(ASCEND_DRIVER_PATH ${ASCEND_PATH}/driver/lib64/common) | |||
| add_custom_target(add_ge_lib ALL | |||
| COMMAND cp ${MS_CCSRC_BUILD_PATH}/../../graphengine/src/common/graph/libgraph.so ${MS_LIB_PATH} | |||
| @@ -542,7 +551,7 @@ endif() | |||
| if (ENABLE_CPU) | |||
| add_custom_target(add_cpu_lib ALL | |||
| COMMAND cp ${mkl_dnn_LIBPATH}/libdnnl.so.1.1 ${MS_LIB_PATH}/libdnnl.so.1 | |||
| COMMAND cp ${onednn_LIBPATH}/libdnnl.so.1.1 ${MS_LIB_PATH}/libdnnl.so.1 | |||
| ) | |||
| add_dependencies(add_cpu_lib add_ms_lib) | |||
| endif() | |||
| @@ -17,8 +17,6 @@ if (ENABLE_TDTQUE) | |||
| message(STATUS "TDT queue is enabled") | |||
| endif () | |||
| add_definitions(-D ENABLE_MINDRECORD) | |||
| # conde coverage | |||
| # option(ENABLE_COVERAGE "Enable code coverage report" OFF) | |||
| # if (ENABLE_COVERAGE) | |||
| @@ -23,17 +23,14 @@ | |||
| #include "dataset/engine/datasetops/source/image_folder_op.h" | |||
| #include "dataset/engine/datasetops/source/mnist_op.h" | |||
| #include "dataset/engine/datasetops/source/voc_op.h" | |||
| #include "dataset/util/make_unique.h" | |||
| #include "dataset/core/tensor.h" | |||
| #include "dataset/engine/dataset_iterator.h" | |||
| #include "dataset/engine/datasetops/source/manifest_op.h" | |||
| #include "dataset/engine/datasetops/source/cifar_op.h" | |||
| #include "dataset/engine/datasetops/source/celeba_op.h" | |||
| #ifdef ENABLE_MINDRECORD | |||
| #include "./shard_category.h" | |||
| #include "./shard_sample.h" | |||
| #include "./shard_shuffle.h" | |||
| #endif | |||
| #include "mindrecord/include/shard_category.h" | |||
| #include "mindrecord/include/shard_sample.h" | |||
| #include "mindrecord/include/shard_shuffle.h" | |||
| #include "dataset/util/random.h" | |||
| #include "dataset/util/status.h" | |||
| @@ -46,9 +43,7 @@ using pFunction = Status (DEPipeline::*)(const py::dict &, std::shared_ptr<Datas | |||
| static std::unordered_map<uint32_t, pFunction> g_parse_op_func_ = {{kStorage, &DEPipeline::ParseStorageOp}, | |||
| {kShuffle, &DEPipeline::ParseShuffleOp}, | |||
| #ifdef ENABLE_MINDRECORD | |||
| {kMindrecord, &DEPipeline::ParseMindRecordOp}, | |||
| #endif | |||
| {kMap, &DEPipeline::ParseMapOp}, | |||
| {kBatch, &DEPipeline::ParseBatchOp}, | |||
| {kRepeat, &DEPipeline::ParseRepeatOp}, | |||
| @@ -123,7 +118,7 @@ Status DEPipeline::AssignRootNode(const DsOpPtr &dataset_op) { return (tree_->As | |||
| Status DEPipeline::LaunchTreeExec() { | |||
| RETURN_IF_NOT_OK(tree_->Prepare()); | |||
| RETURN_IF_NOT_OK(tree_->Launch()); | |||
| iterator_ = make_unique<DatasetIterator>(tree_); | |||
| iterator_ = std::make_unique<DatasetIterator>(tree_); | |||
| if (iterator_ == nullptr) RETURN_STATUS_UNEXPECTED("Cannot create an Iterator."); | |||
| return Status::OK(); | |||
| } | |||
| @@ -311,7 +306,7 @@ Status DEPipeline::ParseStorageOp(const py::dict &args, std::shared_ptr<DatasetO | |||
| if (!args["schema"].is_none()) { | |||
| (void)builder->SetSchemaFile(ToString(args["schema"])); | |||
| } else if (!args["schema_json_string"].is_none()) { | |||
| std::unique_ptr<DataSchema> schema = make_unique<DataSchema>(); | |||
| std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>(); | |||
| std::string s = ToString(args["schema_json_string"]); | |||
| RETURN_IF_NOT_OK(schema->LoadSchemaString(s, std::vector<std::string>())); | |||
| (void)builder->SetNumRows(schema->num_rows()); | |||
| @@ -364,7 +359,6 @@ Status DEPipeline::ParseShuffleOp(const py::dict &args, std::shared_ptr<DatasetO | |||
| return Status::OK(); | |||
| } | |||
| #ifdef ENABLE_MINDRECORD | |||
| Status DEPipeline::CheckMindRecordPartitionInfo(const py::dict &args, std::vector<int> *in_partitions) { | |||
| if (args["partitions"].is_none()) { | |||
| std::string err_msg = "Error: partitions is not set (None)"; | |||
| @@ -450,7 +444,6 @@ Status DEPipeline::ParseMindRecordOp(const py::dict &args, std::shared_ptr<Datas | |||
| *ptr = op; | |||
| return Status::OK(); | |||
| } | |||
| #endif | |||
| Status DEPipeline::ParseMapOp(const py::dict &args, std::shared_ptr<DatasetOp> *ptr) { | |||
| std::shared_ptr<MapOp::Builder> builder = std::make_shared<MapOp::Builder>(); | |||
| @@ -689,7 +682,7 @@ Status DEPipeline::ParseTFReaderOp(const py::dict &args, std::shared_ptr<Dataset | |||
| } | |||
| } | |||
| if (schema_exists) { | |||
| std::unique_ptr<DataSchema> schema = make_unique<DataSchema>(); | |||
| std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>(); | |||
| if (args.contains("schema_file_path")) { | |||
| RETURN_IF_NOT_OK(schema->LoadSchemaFile(ToString(args["schema_file_path"]), columns_to_load)); | |||
| } else { | |||
| @@ -38,9 +38,7 @@ using DsOpPtr = std::shared_ptr<DatasetOp>; | |||
| enum OpName { | |||
| kStorage = 0, | |||
| kShuffle, | |||
| #ifdef ENABLE_MINDRECORD | |||
| kMindrecord, | |||
| #endif | |||
| kBatch, | |||
| kCache, | |||
| kRepeat, | |||
| @@ -101,11 +99,9 @@ class DEPipeline { | |||
| Status ParseShuffleOp(const py::dict &args, std::shared_ptr<DatasetOp> *ptr); | |||
| #ifdef ENABLE_MINDRECORD | |||
| Status CheckMindRecordPartitionInfo(const py::dict &args, std::vector<int> *ptr); | |||
| Status ParseMindRecordOp(const py::dict &args, std::shared_ptr<DatasetOp> *ptr); | |||
| #endif | |||
| Status ParseMapOp(const py::dict &args, std::shared_ptr<DatasetOp> *ptr); | |||
| @@ -44,9 +44,7 @@ | |||
| #include "dataset/engine/datasetops/source/io_block.h" | |||
| #include "dataset/engine/datasetops/source/mnist_op.h" | |||
| #include "dataset/engine/datasetops/source/manifest_op.h" | |||
| #ifdef ENABLE_MINDRECORD | |||
| #include "dataset/engine/datasetops/source/mindrecord_op.h" | |||
| #endif | |||
| #include "dataset/engine/datasetops/source/sampler/distributed_sampler.h" | |||
| #include "dataset/engine/datasetops/source/sampler/pk_sampler.h" | |||
| #include "dataset/engine/datasetops/source/sampler/random_sampler.h" | |||
| @@ -146,14 +144,12 @@ void bindDatasetOps(py::module *m) { | |||
| return py::make_tuple(count, num_classes); | |||
| }); | |||
| #ifdef ENABLE_MINDRECORD | |||
| (void)py::class_<MindRecordOp, DatasetOp, std::shared_ptr<MindRecordOp>>(*m, "MindRecordOp") | |||
| .def_static("get_num_rows", [](const std::string &path) { | |||
| int64_t count = 0; | |||
| THROW_IF_ERROR(MindRecordOp::CountTotalRows(path, &count)); | |||
| return count; | |||
| }); | |||
| #endif | |||
| (void)py::class_<ManifestOp, DatasetOp, std::shared_ptr<ManifestOp>>(*m, "ManifestOp") | |||
| .def_static("get_num_rows_and_classes", | |||
| @@ -424,9 +420,7 @@ PYBIND11_MODULE(_c_dataengine, m) { | |||
| .value("STORAGE", OpName::kStorage) | |||
| .value("SHUFFLE", OpName::kShuffle) | |||
| .value("BATCH", OpName::kBatch) | |||
| #ifdef ENABLE_MINDRECORD | |||
| .value("MINDRECORD", OpName::kMindrecord) | |||
| #endif | |||
| .value("CACHE", OpName::kCache) | |||
| .value("REPEAT", OpName::kRepeat) | |||
| .value("TAKE", OpName::kTake) | |||
| @@ -55,9 +55,9 @@ Status GlobalContext::Init() { | |||
| // For testing we can use Dummy pool instead | |||
| // Create some tensor allocators for the different types and hook them into the pool. | |||
| tensor_allocator_ = mindspore::make_unique<Allocator<Tensor>>(mem_pool_); | |||
| cv_tensor_allocator_ = mindspore::make_unique<Allocator<CVTensor>>(mem_pool_); | |||
| int_allocator_ = mindspore::make_unique<IntAlloc>(mem_pool_); | |||
| tensor_allocator_ = std::make_unique<Allocator<Tensor>>(mem_pool_); | |||
| cv_tensor_allocator_ = std::make_unique<Allocator<CVTensor>>(mem_pool_); | |||
| int_allocator_ = std::make_unique<IntAlloc>(mem_pool_); | |||
| return Status::OK(); | |||
| } | |||
| @@ -28,7 +28,6 @@ | |||
| #include "dataset/core/global_context.h" | |||
| #include "dataset/core/pybind_support.h" | |||
| #include "dataset/core/tensor_shape.h" | |||
| #include "dataset/util/make_unique.h" | |||
| namespace py = pybind11; | |||
| namespace mindspore { | |||
| @@ -53,7 +52,7 @@ namespace dataset { | |||
| Tensor::Tensor(const TensorShape &shape, const DataType &type) : shape_(shape), type_(type), data_(nullptr) { | |||
| // grab the mem pool from global context and create the allocator for char data area | |||
| std::shared_ptr<MemoryPool> global_pool = GlobalContext::Instance()->mem_pool(); | |||
| data_allocator_ = mindspore::make_unique<Allocator<unsigned char>>(global_pool); | |||
| data_allocator_ = std::make_unique<Allocator<unsigned char>>(global_pool); | |||
| } | |||
| Tensor::Tensor(const TensorShape &shape, const DataType &type, const unsigned char *data) : Tensor(shape, type) { | |||
| @@ -137,7 +136,7 @@ Status Tensor::CreateTensor(std::shared_ptr<Tensor> *ptr, py::array arr) { | |||
| if ((*ptr)->type_ == DataType::DE_UNKNOWN) RETURN_STATUS_UNEXPECTED("Invalid data type."); | |||
| std::shared_ptr<MemoryPool> global_pool = GlobalContext::Instance()->mem_pool(); | |||
| (*ptr)->data_allocator_ = mindspore::make_unique<Allocator<unsigned char>>(global_pool); | |||
| (*ptr)->data_allocator_ = std::make_unique<Allocator<unsigned char>>(global_pool); | |||
| static_cast<void>((*ptr)->StartAddr()); | |||
| int64_t byte_size = (*ptr)->SizeInBytes(); | |||
| unsigned char *data = static_cast<unsigned char *>(arr.request().ptr); | |||
| @@ -40,7 +40,7 @@ Status DataBuffer::CreateDataBuffer( | |||
| case DatasetType::kTf: { | |||
| // This type of buffer is for TF record data. | |||
| // Allocate derived class version for a TF buffers | |||
| new_data_buffer = mindspore::make_unique<TFBuffer>(id, kDeBFlagNone, storage_client); | |||
| new_data_buffer = std::make_unique<TFBuffer>(id, kDeBFlagNone, storage_client); | |||
| break; | |||
| } | |||
| default: { | |||
| @@ -26,8 +26,8 @@ | |||
| #include "common/utils.h" | |||
| #include "dataset/util/status.h" | |||
| #include "dataset/core/tensor_shape.h" | |||
| #include "dataset/util/make_unique.h" | |||
| #include "utils/log_adapter.h" | |||
| #include "dataset/util/de_error.h" | |||
| namespace mindspore { | |||
| namespace dataset { | |||
| @@ -58,7 +58,7 @@ ColDescriptor::ColDescriptor(const std::string &col_name, DataType col_type, Ten | |||
| // our shape. Otherwise, set our shape to be empty. | |||
| if (in_shape != nullptr) { | |||
| // Create a shape and copy construct it into our column's shape. | |||
| tensor_shape_ = mindspore::make_unique<TensorShape>(*in_shape); | |||
| tensor_shape_ = std::make_unique<TensorShape>(*in_shape); | |||
| } else { | |||
| tensor_shape_ = nullptr; | |||
| } | |||
| @@ -75,7 +75,7 @@ ColDescriptor::ColDescriptor(const std::string &col_name, DataType col_type, Ten | |||
| ColDescriptor::ColDescriptor(const ColDescriptor &in_cd) | |||
| : type_(in_cd.type_), rank_(in_cd.rank_), tensor_impl_(in_cd.tensor_impl_), col_name_(in_cd.col_name_) { | |||
| // If it has a tensor shape, make a copy of it with our own unique_ptr. | |||
| tensor_shape_ = in_cd.hasShape() ? mindspore::make_unique<TensorShape>(in_cd.shape()) : nullptr; | |||
| tensor_shape_ = in_cd.hasShape() ? std::make_unique<TensorShape>(in_cd.shape()) : nullptr; | |||
| } | |||
| // Assignment overload | |||
| @@ -86,7 +86,7 @@ ColDescriptor &ColDescriptor::operator=(const ColDescriptor &in_cd) { | |||
| tensor_impl_ = in_cd.tensor_impl_; | |||
| col_name_ = in_cd.col_name_; | |||
| // If it has a tensor shape, make a copy of it with our own unique_ptr. | |||
| tensor_shape_ = in_cd.hasShape() ? mindspore::make_unique<TensorShape>(in_cd.shape()) : nullptr; | |||
| tensor_shape_ = in_cd.hasShape() ? std::make_unique<TensorShape>(in_cd.shape()) : nullptr; | |||
| } | |||
| return *this; | |||
| } | |||
| @@ -59,8 +59,8 @@ Status BatchOp::operator()() { | |||
| TaskManager::FindMe()->Post(); | |||
| int32_t epoch_num = 0, batch_num = 0, cnt = 0; | |||
| TensorRow new_row; | |||
| std::unique_ptr<TensorQTable> table = make_unique<TensorQTable>(); | |||
| child_iterator_ = mindspore::make_unique<ChildIterator>(this, 0, 0); | |||
| std::unique_ptr<TensorQTable> table = std::make_unique<TensorQTable>(); | |||
| child_iterator_ = std::make_unique<ChildIterator>(this, 0, 0); | |||
| RETURN_IF_NOT_OK(child_iterator_->FetchNextTensorRow(&new_row)); | |||
| column_name_map_ = child_iterator_->col_name_id_map(); | |||
| int32_t cur_batch_size = 0; | |||
| @@ -72,7 +72,7 @@ Status BatchOp::operator()() { | |||
| if (table->size() == static_cast<size_t>(cur_batch_size)) { | |||
| RETURN_IF_NOT_OK(worker_queues_[cnt++ % num_workers_]->EmplaceBack( | |||
| std::make_pair(std::move(table), CBatchInfo(epoch_num, batch_num++, cnt - epoch_num)))); | |||
| table = make_unique<TensorQTable>(); | |||
| table = std::make_unique<TensorQTable>(); | |||
| RETURN_IF_NOT_OK(GetBatchSize(&cur_batch_size, CBatchInfo(epoch_num, batch_num, cnt - epoch_num))); | |||
| } | |||
| RETURN_IF_NOT_OK(child_iterator_->FetchNextTensorRow(&new_row)); | |||
| @@ -82,7 +82,7 @@ Status BatchOp::operator()() { | |||
| RETURN_IF_NOT_OK(worker_queues_[cnt++ % num_workers_]->EmplaceBack( | |||
| std::make_pair(std::move(table), CBatchInfo(epoch_num, batch_num++, cnt - epoch_num)))); | |||
| } | |||
| table = make_unique<TensorQTable>(); // this drops when drop == true | |||
| table = std::make_unique<TensorQTable>(); // this drops when drop == true | |||
| // end of the current epoch, batch_num should start from 0 again | |||
| batch_num = 0; | |||
| epoch_num++; | |||
| @@ -153,9 +153,9 @@ Status BatchOp::WorkerEntry(int32_t workerId) { | |||
| RETURN_IF_NOT_OK(worker_queues_[workerId]->PopFront(&table_pair)); | |||
| while (table_pair.second.ctrl_ != batchCtrl::kQuit) { | |||
| if (table_pair.second.ctrl_ == batchCtrl::kEOE) { | |||
| RETURN_IF_NOT_OK(out_connector_->Add(workerId, make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE))); | |||
| RETURN_IF_NOT_OK(out_connector_->Add(workerId, std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE))); | |||
| } else if (table_pair.second.ctrl_ == batchCtrl::kEOF) { | |||
| RETURN_IF_NOT_OK(out_connector_->Add(workerId, make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOF))); | |||
| RETURN_IF_NOT_OK(out_connector_->Add(workerId, std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOF))); | |||
| } else if (table_pair.second.ctrl_ == batchCtrl::kNoCtrl) { | |||
| std::unique_ptr<DataBuffer> db = nullptr; | |||
| RETURN_IF_NOT_OK(MakeBatchedBuffer(std::move(table_pair), &db)); | |||
| @@ -170,8 +170,8 @@ Status BatchOp::MakeBatchedBuffer(std::pair<std::unique_ptr<TensorQTable>, CBatc | |||
| std::unique_ptr<DataBuffer> *db) { | |||
| RETURN_UNEXPECTED_IF_NULL(table_pair.first); | |||
| if (!input_column_names_.empty()) RETURN_IF_NOT_OK(MapColumns(&table_pair)); // pass it through pyfunc | |||
| (*db) = make_unique<DataBuffer>(table_pair.second.batch_num_, DataBuffer::kDeBFlagNone); | |||
| std::unique_ptr<TensorQTable> dest_table = make_unique<TensorQTable>(); | |||
| (*db) = std::make_unique<DataBuffer>(table_pair.second.batch_num_, DataBuffer::kDeBFlagNone); | |||
| std::unique_ptr<TensorQTable> dest_table = std::make_unique<TensorQTable>(); | |||
| RETURN_IF_NOT_OK(BatchRows(&table_pair.first, &dest_table, table_pair.first->size())); | |||
| (*db)->set_tensor_table(std::move(dest_table)); | |||
| (*db)->set_column_name_map(column_name_map_); | |||
| @@ -80,9 +80,9 @@ void DatasetOp::CreateConnector(int32_t num_producers, int32_t num_consumers) { | |||
| MS_LOG(INFO) << "Creating connector in tree operator: " << operator_id_ << ". Producer: " << num_producers | |||
| << ". Consumer: " << num_consumers << "."; | |||
| if (oc_queue_size_ > 0) { | |||
| out_connector_ = mindspore::make_unique<DbConnector>(num_producers, // The number of producers | |||
| num_consumers, // Only one consumer (the training App) | |||
| oc_queue_size_); | |||
| out_connector_ = std::make_unique<DbConnector>(num_producers, // The number of producers | |||
| num_consumers, // Only one consumer (the training App) | |||
| oc_queue_size_); | |||
| } else { | |||
| // Some op's may choose not to have an output connector | |||
| MS_LOG(INFO) << "Bypassed connector creation for tree operator: " << operator_id_ << "."; | |||
| @@ -149,7 +149,7 @@ Status DatasetOp::GetNextInput(std::unique_ptr<DataBuffer> *p_buffer, int32_t wo | |||
| // The base class implementation simply flows the eoe message to output. Derived classes | |||
| // may override if they need to perform special eoe handling. | |||
| Status DatasetOp::EoeReceived(int32_t worker_id) { | |||
| std::unique_ptr<DataBuffer> eoe_buffer = mindspore::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE); | |||
| std::unique_ptr<DataBuffer> eoe_buffer = std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE); | |||
| return (out_connector_->Add(static_cast<int>(worker_id), std::move(eoe_buffer))); | |||
| } | |||
| @@ -157,7 +157,7 @@ Status DatasetOp::EoeReceived(int32_t worker_id) { | |||
| // The base class implementation simply flows the eof message to output. Derived classes | |||
| // may override if they need to perform special eof handling. | |||
| Status DatasetOp::EofReceived(int32_t worker_id) { | |||
| std::unique_ptr<DataBuffer> eof_buffer = mindspore::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOF); | |||
| std::unique_ptr<DataBuffer> eof_buffer = std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOF); | |||
| return (out_connector_->Add(static_cast<int>(worker_id), std::move(eof_buffer))); | |||
| } | |||
| @@ -225,7 +225,7 @@ Status DeviceQueueOp::SendDataToCPU() { | |||
| MS_LOG(INFO) << "Device queue, sending data to CPU."; | |||
| int64_t total_batch = 0; | |||
| std::unique_ptr<ChildIterator> child_iterator = mindspore::make_unique<ChildIterator>(this, 0, 0); | |||
| std::unique_ptr<ChildIterator> child_iterator = std::make_unique<ChildIterator>(this, 0, 0); | |||
| while (!(child_iterator->eof_handled())) { | |||
| TensorRow curr_row; | |||
| RETURN_IF_NOT_OK(child_iterator->FetchNextTensorRow(&curr_row)); | |||
| @@ -179,7 +179,7 @@ Status MapOp::WorkerEntry(int32_t worker_id) { | |||
| RETURN_IF_NOT_OK(WorkerEntryInit(in_buffer.get(), &keep_input_columns, &to_process_indices, &final_col_name_id_map, | |||
| &input_columns, &output_columns)); | |||
| std::unique_ptr<TensorQTable> new_tensor_table(mindspore::make_unique<TensorQTable>()); | |||
| std::unique_ptr<TensorQTable> new_tensor_table(std::make_unique<TensorQTable>()); | |||
| // Perform the compute function of TensorOp(s) and store the result in new_tensor_table. | |||
| RETURN_IF_NOT_OK(WorkerCompute(in_buffer.get(), to_process_indices, new_tensor_table.get(), keep_input_columns, | |||
| &input_columns, &output_columns)); | |||
| @@ -48,7 +48,7 @@ Status ParallelOp::CreateWorkerConnector(int32_t worker_connector_size) { | |||
| // Instantiate the worker connector. This is the internal connector, not the operators | |||
| // output connector. It has single master consuming from it (num producers is 1), and the number | |||
| // of workers is the defined count from the op. | |||
| worker_connector_ = mindspore::make_unique<DbConnector>(num_workers_, num_producers_, worker_connector_size); | |||
| worker_connector_ = std::make_unique<DbConnector>(num_workers_, num_producers_, worker_connector_size); | |||
| return Status::OK(); | |||
| } | |||
| @@ -79,7 +79,7 @@ Status ProjectOp::Project(std::unique_ptr<DataBuffer> *data_buffer) { | |||
| new_column_name_mapping[current_column] = i; | |||
| projected_column_indices.push_back(column_name_mapping[current_column]); | |||
| } | |||
| std::unique_ptr<TensorQTable> new_tensor_table = mindspore::make_unique<TensorQTable>(); | |||
| std::unique_ptr<TensorQTable> new_tensor_table = std::make_unique<TensorQTable>(); | |||
| while ((*data_buffer)->NumRows() > 0) { | |||
| TensorRow current_row; | |||
| RETURN_IF_NOT_OK((*data_buffer)->PopRow(¤t_row)); | |||
| @@ -84,13 +84,13 @@ Status RenameOp::operator()() { | |||
| // we got eoe, now try again until we get eof | |||
| MS_LOG(INFO) << "Rename operator EOE Received."; | |||
| RETURN_IF_NOT_OK(out_connector_->Add(0, std::move(mindspore::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE)))); | |||
| RETURN_IF_NOT_OK(out_connector_->Add(0, std::move(std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE)))); | |||
| MS_LOG(DEBUG) << "Rename operator fetching buffer after EOE."; | |||
| RETURN_IF_NOT_OK(GetNextInput(&curr_buffer)); | |||
| } // end of while eof loop | |||
| MS_LOG(INFO) << "Rename opeerator EOF Received."; | |||
| RETURN_IF_NOT_OK(out_connector_->Add(0, std::move(mindspore::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOF)))); | |||
| RETURN_IF_NOT_OK(out_connector_->Add(0, std::move(std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOF)))); | |||
| return Status::OK(); | |||
| } | |||
| @@ -70,7 +70,7 @@ ShuffleOp::ShuffleOp(int32_t shuffle_size, uint32_t shuffle_seed, int32_t op_con | |||
| rng_(shuffle_seed), | |||
| buffer_counter_(0), | |||
| rows_per_buffer_(rows_per_buffer), | |||
| shuffle_buffer_(mindspore::make_unique<TensorTable>()), | |||
| shuffle_buffer_(std::make_unique<TensorTable>()), | |||
| shuffle_last_row_idx_(0), | |||
| shuffle_buffer_state_(kShuffleStateInit) {} | |||
| @@ -90,7 +90,7 @@ Status ShuffleOp::SelfReset() { | |||
| shuffle_seed_ = distribution(random_device); | |||
| rng_ = std::mt19937_64(shuffle_seed_); | |||
| } | |||
| shuffle_buffer_ = mindspore::make_unique<TensorTable>(); | |||
| shuffle_buffer_ = std::make_unique<TensorTable>(); | |||
| buffer_counter_ = 0; | |||
| shuffle_last_row_idx_ = 0; | |||
| shuffle_buffer_state_ = kShuffleStateInit; | |||
| @@ -142,7 +142,7 @@ Status ShuffleOp::operator()() { | |||
| // Create the child iterator to fetch our data from. | |||
| int32_t worker_id = 0; | |||
| int32_t child_idx = 0; | |||
| child_iterator_ = mindspore::make_unique<ChildIterator>(this, worker_id, child_idx); | |||
| child_iterator_ = std::make_unique<ChildIterator>(this, worker_id, child_idx); | |||
| // Main operator loop | |||
| while (true) { | |||
| @@ -161,7 +161,7 @@ Status ShuffleOp::operator()() { | |||
| // Step 1) | |||
| // Create an output tensor table if one is not created yet. | |||
| if (!new_buffer_table) { | |||
| new_buffer_table = mindspore::make_unique<TensorQTable>(); | |||
| new_buffer_table = std::make_unique<TensorQTable>(); | |||
| } | |||
| // Step 2) | |||
| @@ -176,7 +176,7 @@ Status ShuffleOp::operator()() { | |||
| // and send this buffer on it's way up the pipeline. Special case is if this is the | |||
| // last row then we also send it. | |||
| if (new_buffer_table->size() == rows_per_buffer_ || shuffle_last_row_idx_ == 0) { | |||
| auto new_buffer = mindspore::make_unique<DataBuffer>(buffer_counter_, DataBuffer::kDeBFlagNone); | |||
| auto new_buffer = std::make_unique<DataBuffer>(buffer_counter_, DataBuffer::kDeBFlagNone); | |||
| new_buffer->set_tensor_table(std::move(new_buffer_table)); | |||
| new_buffer->set_column_name_map(column_name_map_); | |||
| buffer_counter_++; | |||
| @@ -218,7 +218,7 @@ Status ShuffleOp::operator()() { | |||
| // Since we overloaded eoeReceived function, we are responsible to flow the EOE up the | |||
| // pipepline manually now that we are done draining the shuffle buffer | |||
| MS_LOG(INFO) << "Shuffle operator sending EOE."; | |||
| auto eoe_buffer = mindspore::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE); | |||
| auto eoe_buffer = std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE); | |||
| RETURN_IF_NOT_OK(out_connector_->Add(0, std::move(eoe_buffer))); | |||
| // Do not wait for any reset to be flown down from operators above us. | |||
| @@ -40,7 +40,7 @@ Status CelebAOp::Builder::Build(std::shared_ptr<CelebAOp> *op) { | |||
| builder_sampler_ = std::make_shared<SequentialSampler>(); | |||
| } | |||
| builder_schema_ = make_unique<DataSchema>(); | |||
| builder_schema_ = std::make_unique<DataSchema>(); | |||
| RETURN_IF_NOT_OK( | |||
| builder_schema_->AddColumn(ColDescriptor("image", DataType(DataType::DE_UINT8), TensorImpl::kFlexible, 1))); | |||
| // label is like this:0 1 0 0 1...... | |||
| @@ -83,7 +83,7 @@ CelebAOp::CelebAOp(int32_t num_workers, int32_t rows_per_buffer, const std::stri | |||
| col_name_map_[data_schema_->column(index).name()] = index; | |||
| } | |||
| attr_info_queue_ = make_unique<Queue<std::vector<std::string>>>(queue_size); | |||
| attr_info_queue_ = std::make_unique<Queue<std::vector<std::string>>>(queue_size); | |||
| io_block_queues_.Init(num_workers_, queue_size); | |||
| } | |||
| @@ -311,7 +311,7 @@ Status CelebAOp::AddIOBlock(std::unique_ptr<DataBuffer> *data_buffer) { | |||
| row_count++; | |||
| if (row_count % rows_per_buffer_ == 0) { | |||
| RETURN_IF_NOT_OK(io_block_queues_[buff_count++ % num_workers_]->Add( | |||
| make_unique<IOBlock>(IOBlock(keys, IOBlock::kDeIoBlockNone)))); | |||
| std::make_unique<IOBlock>(IOBlock(keys, IOBlock::kDeIoBlockNone)))); | |||
| keys.clear(); | |||
| } | |||
| } | |||
| @@ -320,21 +320,21 @@ Status CelebAOp::AddIOBlock(std::unique_ptr<DataBuffer> *data_buffer) { | |||
| if (!keys.empty()) { | |||
| RETURN_IF_NOT_OK(io_block_queues_[(buff_count++) % num_workers_]->Add( | |||
| make_unique<IOBlock>(IOBlock(keys, IOBlock::kDeIoBlockNone)))); | |||
| std::make_unique<IOBlock>(IOBlock(keys, IOBlock::kDeIoBlockNone)))); | |||
| } | |||
| if (!BitTest(op_ctrl_flags_, kDeOpRepeated) || BitTest(op_ctrl_flags_, kDeOpLastRepeat)) { | |||
| RETURN_IF_NOT_OK( | |||
| io_block_queues_[(buff_count++) % num_workers_]->Add(make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEoe))); | |||
| io_block_queues_[(buff_count++) % num_workers_]->Add(std::make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEoe))); | |||
| RETURN_IF_NOT_OK( | |||
| io_block_queues_[(buff_count++) % num_workers_]->Add(make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEof))); | |||
| io_block_queues_[(buff_count++) % num_workers_]->Add(std::make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEof))); | |||
| for (int32_t i = 0; i < num_workers_; i++) { | |||
| RETURN_IF_NOT_OK( | |||
| io_block_queues_[i]->Add(std::move(make_unique<IOBlock>(std::vector<int64_t>(), IOBlock::kDeIoBlockNone)))); | |||
| io_block_queues_[i]->Add(std::make_unique<IOBlock>(std::vector<int64_t>(), IOBlock::kDeIoBlockNone))); | |||
| } | |||
| return Status::OK(); | |||
| } else { // not the last repeat. Acquire lock, sleeps master thread, wait for the wake-up from reset | |||
| RETURN_IF_NOT_OK( | |||
| io_block_queues_[(buff_count++) % num_workers_]->Add(make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEoe))); | |||
| io_block_queues_[(buff_count++) % num_workers_]->Add(std::make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEoe))); | |||
| RETURN_IF_NOT_OK(wp_.Wait()); // Master thread goes to sleep after it has made all the IOBlocks | |||
| wp_.Clear(); | |||
| RETURN_IF_NOT_OK(sampler_->GetNextBuffer(data_buffer)); | |||
| @@ -349,17 +349,17 @@ Status CelebAOp::WorkerEntry(int32_t worker_id) { | |||
| RETURN_IF_NOT_OK(io_block_queues_[worker_id]->PopFront(&io_block)); | |||
| while (io_block != nullptr) { | |||
| if (io_block->eoe() == true) { | |||
| RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::move(make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE)))); | |||
| RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE))); | |||
| buffer_id = worker_id; | |||
| } else if (io_block->eof() == true) { | |||
| RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::move(make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOF)))); | |||
| RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOF))); | |||
| } else { | |||
| std::vector<int64_t> keys; | |||
| RETURN_IF_NOT_OK(io_block->GetKeys(&keys)); | |||
| if (keys.empty()) { | |||
| return Status::OK(); // empty key is a quit signal for workers | |||
| } | |||
| std::unique_ptr<DataBuffer> db = make_unique<DataBuffer>(buffer_id, DataBuffer::kDeBFlagNone); | |||
| std::unique_ptr<DataBuffer> db = std::make_unique<DataBuffer>(buffer_id, DataBuffer::kDeBFlagNone); | |||
| RETURN_IF_NOT_OK(LoadBuffer(keys, &db)); | |||
| RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::move(db))); | |||
| buffer_id += num_workers_; | |||
| @@ -370,7 +370,7 @@ Status CelebAOp::WorkerEntry(int32_t worker_id) { | |||
| } | |||
| Status CelebAOp::LoadBuffer(const std::vector<int64_t> &keys, std::unique_ptr<DataBuffer> *db) { | |||
| std::unique_ptr<TensorQTable> deq = make_unique<TensorQTable>(); | |||
| std::unique_ptr<TensorQTable> deq = std::make_unique<TensorQTable>(); | |||
| for (const auto &key : keys) { | |||
| TensorRow row; | |||
| RETURN_IF_NOT_OK(LoadTensorRow(image_labels_vec_[key], &row)); | |||
| @@ -47,7 +47,7 @@ Status CifarOp::Builder::Build(std::shared_ptr<CifarOp> *ptr) { | |||
| if (sampler_ == nullptr) { | |||
| sampler_ = std::make_shared<SequentialSampler>(); | |||
| } | |||
| schema_ = make_unique<DataSchema>(); | |||
| schema_ = std::make_unique<DataSchema>(); | |||
| TensorShape scalar = TensorShape::CreateScalar(); | |||
| RETURN_IF_NOT_OK(schema_->AddColumn(ColDescriptor("image", DataType(DataType::DE_UINT8), TensorImpl::kFlexible, 1))); | |||
| if (cifar_type_ == kCifar10) { | |||
| @@ -91,7 +91,7 @@ CifarOp::CifarOp(CifarType type, int32_t num_works, int32_t rows_per_buf, const | |||
| col_name_map_[data_schema_->column(i).name()] = i; | |||
| } | |||
| constexpr uint64_t kUtilQueueSize = 512; | |||
| cifar_raw_data_block_ = make_unique<Queue<std::vector<unsigned char>>>(kUtilQueueSize); | |||
| cifar_raw_data_block_ = std::make_unique<Queue<std::vector<unsigned char>>>(kUtilQueueSize); | |||
| io_block_queues_.Init(num_workers_, queue_size); | |||
| } | |||
| @@ -114,7 +114,7 @@ Status CifarOp::operator()() { | |||
| if (row_cnt_ >= num_samples_) break; // enough row read, break for loop | |||
| if (row_cnt_ % rows_per_buffer_ == 0) { | |||
| RETURN_IF_NOT_OK(io_block_queues_[buf_cnt_++ % num_workers_]->Add( | |||
| make_unique<IOBlock>(IOBlock(keys, IOBlock::kDeIoBlockNone)))); | |||
| std::make_unique<IOBlock>(IOBlock(keys, IOBlock::kDeIoBlockNone)))); | |||
| keys.clear(); | |||
| } | |||
| } | |||
| @@ -122,21 +122,21 @@ Status CifarOp::operator()() { | |||
| } | |||
| if (keys.empty() == false) { | |||
| RETURN_IF_NOT_OK(io_block_queues_[(buf_cnt_++) % num_workers_]->Add( | |||
| make_unique<IOBlock>(IOBlock(keys, IOBlock::kDeIoBlockNone)))); | |||
| std::make_unique<IOBlock>(IOBlock(keys, IOBlock::kDeIoBlockNone)))); | |||
| } | |||
| if (!BitTest(op_ctrl_flags_, kDeOpRepeated) || BitTest(op_ctrl_flags_, kDeOpLastRepeat)) { | |||
| RETURN_IF_NOT_OK( | |||
| io_block_queues_[(buf_cnt_++) % num_workers_]->Add(make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEoe))); | |||
| io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEoe))); | |||
| RETURN_IF_NOT_OK( | |||
| io_block_queues_[(buf_cnt_++) % num_workers_]->Add(make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEof))); | |||
| io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEof))); | |||
| for (int32_t i = 0; i < num_workers_; i++) { | |||
| RETURN_IF_NOT_OK( | |||
| io_block_queues_[i]->Add(make_unique<IOBlock>(std::vector<int64_t>(), IOBlock::kDeIoBlockNone))); | |||
| io_block_queues_[i]->Add(std::make_unique<IOBlock>(std::vector<int64_t>(), IOBlock::kDeIoBlockNone))); | |||
| } | |||
| return Status::OK(); | |||
| } else { // not the last repeat. Acquire lock, sleeps master thread, wait for the wake-up from reset | |||
| RETURN_IF_NOT_OK( | |||
| io_block_queues_[(buf_cnt_++) % num_workers_]->Add(make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEoe))); | |||
| io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEoe))); | |||
| RETURN_IF_NOT_OK(wp_.Wait()); // Master thread goes to sleep after it has made all the IOBlocks | |||
| wp_.Clear(); | |||
| RETURN_IF_NOT_OK(sampler_->GetNextBuffer(&sampler_buffer)); | |||
| @@ -169,17 +169,17 @@ Status CifarOp::WorkerEntry(int32_t worker_id) { | |||
| RETURN_IF_NOT_OK(io_block_queues_[worker_id]->PopFront(&io_block)); | |||
| while (io_block != nullptr) { | |||
| if (io_block->eoe() == true) { | |||
| RETURN_IF_NOT_OK(out_connector_->Add(worker_id, make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE))); | |||
| RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE))); | |||
| buffer_id = worker_id; | |||
| } else if (io_block->eof() == true) { | |||
| RETURN_IF_NOT_OK(out_connector_->Add(worker_id, make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOF))); | |||
| RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOF))); | |||
| } else { | |||
| std::vector<int64_t> keys; | |||
| RETURN_IF_NOT_OK(io_block->GetKeys(&keys)); | |||
| if (keys.empty() == true) { | |||
| return Status::OK(); // empty key is a quit signal for workers | |||
| } | |||
| std::unique_ptr<DataBuffer> db = make_unique<DataBuffer>(buffer_id, DataBuffer::kDeBFlagNone); | |||
| std::unique_ptr<DataBuffer> db = std::make_unique<DataBuffer>(buffer_id, DataBuffer::kDeBFlagNone); | |||
| RETURN_IF_NOT_OK(LoadBuffer(keys, &db)); | |||
| RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::move(db))); | |||
| buffer_id += num_workers_; | |||
| @@ -213,7 +213,7 @@ Status CifarOp::LoadTensorRow(uint64_t index, TensorRow *trow) { | |||
| // Looping over LoadTensorRow to make 1 DataBuffer. 1 function call produces 1 buffer | |||
| Status CifarOp::LoadBuffer(const std::vector<int64_t> &keys, std::unique_ptr<DataBuffer> *db) { | |||
| std::unique_ptr<TensorQTable> deq = make_unique<TensorQTable>(); | |||
| std::unique_ptr<TensorQTable> deq = std::make_unique<TensorQTable>(); | |||
| for (const int64_t &key : keys) { | |||
| TensorRow trow; | |||
| RETURN_IF_NOT_OK(LoadTensorRow(key, &trow)); | |||
| @@ -173,9 +173,9 @@ Status GeneratorOp::operator()() { | |||
| bool eof = false; | |||
| while (!eof) { | |||
| // Create new buffer each iteration | |||
| fetched_buffer = mindspore::make_unique<DataBuffer>(buffer_id_++, DataBuffer::kDeBFlagNone); | |||
| fetched_buffer = std::make_unique<DataBuffer>(buffer_id_++, DataBuffer::kDeBFlagNone); | |||
| fetched_buffer->set_column_name_map(column_names_map_); | |||
| std::unique_ptr<TensorQTable> fetched_table = mindspore::make_unique<TensorQTable>(); | |||
| std::unique_ptr<TensorQTable> fetched_table = std::make_unique<TensorQTable>(); | |||
| bool eoe = false; | |||
| { | |||
| py::gil_scoped_acquire gil_acquire; | |||
| @@ -201,12 +201,12 @@ Status GeneratorOp::operator()() { | |||
| if (eoe) { | |||
| // Push out EOE upon StopIteration exception from generator | |||
| MS_LOG(INFO) << "Generator operator sends out EOE."; | |||
| std::unique_ptr<DataBuffer> eoe_buffer = mindspore::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE); | |||
| std::unique_ptr<DataBuffer> eoe_buffer = std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE); | |||
| RETURN_IF_NOT_OK(out_connector_->Add(0, std::move(eoe_buffer))); | |||
| if (!BitTest(op_ctrl_flags_, kDeOpRepeated) || BitTest(op_ctrl_flags_, kDeOpLastRepeat)) { | |||
| // If last repeat or not repeated, push out EOF and exit master loop | |||
| MS_LOG(INFO) << "Generator operator sends out EOF."; | |||
| std::unique_ptr<DataBuffer> eof_buffer = mindspore::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOF); | |||
| std::unique_ptr<DataBuffer> eof_buffer = std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOF); | |||
| RETURN_IF_NOT_OK(out_connector_->Add(0, std::move(eof_buffer))); | |||
| MS_LOG(INFO) << "Generator operator main execution loop complete."; | |||
| eof = true; | |||
| @@ -39,7 +39,7 @@ Status ImageFolderOp::Builder::Build(std::shared_ptr<ImageFolderOp> *ptr) { | |||
| if (builder_sampler_ == nullptr) { | |||
| builder_sampler_ = std::make_shared<SequentialSampler>(); | |||
| } | |||
| builder_schema_ = make_unique<DataSchema>(); | |||
| builder_schema_ = std::make_unique<DataSchema>(); | |||
| TensorShape scalar = TensorShape::CreateScalar(); | |||
| RETURN_IF_NOT_OK( | |||
| builder_schema_->AddColumn(ColDescriptor("image", DataType(DataType::DE_UINT8), TensorImpl::kFlexible, 1))); | |||
| @@ -82,8 +82,8 @@ ImageFolderOp::ImageFolderOp(int32_t num_wkrs, int32_t rows_per_buffer, std::str | |||
| for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) { | |||
| col_name_map_[data_schema_->column(i).name()] = i; | |||
| } | |||
| folder_name_queue_ = make_unique<Queue<std::string>>(num_wkrs * queue_size); | |||
| image_name_queue_ = make_unique<Queue<FolderImagesPair>>(num_wkrs * queue_size); | |||
| folder_name_queue_ = std::make_unique<Queue<std::string>>(num_wkrs * queue_size); | |||
| image_name_queue_ = std::make_unique<Queue<FolderImagesPair>>(num_wkrs * queue_size); | |||
| io_block_queues_.Init(num_workers_, queue_size); | |||
| } | |||
| @@ -143,7 +143,7 @@ Status ImageFolderOp::operator()() { | |||
| row_cnt_++; | |||
| if (row_cnt_ % rows_per_buffer_ == 0) { | |||
| RETURN_IF_NOT_OK( | |||
| io_block_queues_[buf_cnt_++ % num_workers_]->Add(make_unique<IOBlock>(keys, IOBlock::kDeIoBlockNone))); | |||
| io_block_queues_[buf_cnt_++ % num_workers_]->Add(std::make_unique<IOBlock>(keys, IOBlock::kDeIoBlockNone))); | |||
| keys.clear(); | |||
| } | |||
| } | |||
| @@ -151,21 +151,21 @@ Status ImageFolderOp::operator()() { | |||
| } | |||
| if (keys.empty() == false) { | |||
| RETURN_IF_NOT_OK( | |||
| io_block_queues_[(buf_cnt_++) % num_workers_]->Add(make_unique<IOBlock>(keys, IOBlock::kDeIoBlockNone))); | |||
| io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::make_unique<IOBlock>(keys, IOBlock::kDeIoBlockNone))); | |||
| } | |||
| if (!BitTest(op_ctrl_flags_, kDeOpRepeated) || BitTest(op_ctrl_flags_, kDeOpLastRepeat)) { | |||
| std::unique_ptr<IOBlock> eoe_block = make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEoe); | |||
| std::unique_ptr<IOBlock> eof_block = make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEof); | |||
| std::unique_ptr<IOBlock> eoe_block = std::make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEoe); | |||
| std::unique_ptr<IOBlock> eof_block = std::make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEof); | |||
| RETURN_IF_NOT_OK(io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::move(eoe_block))); | |||
| RETURN_IF_NOT_OK(io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::move(eof_block))); | |||
| for (int32_t i = 0; i < num_workers_; ++i) { | |||
| RETURN_IF_NOT_OK( | |||
| io_block_queues_[i]->Add(make_unique<IOBlock>(std::vector<int64_t>(), IOBlock::kDeIoBlockNone))); | |||
| io_block_queues_[i]->Add(std::make_unique<IOBlock>(std::vector<int64_t>(), IOBlock::kDeIoBlockNone))); | |||
| } | |||
| return Status::OK(); | |||
| } else { // not the last repeat. Sleep master thread, wait for the wake-up from reset | |||
| RETURN_IF_NOT_OK( | |||
| io_block_queues_[(buf_cnt_++) % num_workers_]->Add(make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEoe))); | |||
| io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEoe))); | |||
| RETURN_IF_NOT_OK(wp_.Wait()); // Master thread goes to sleep after it has made all the IOBlocks | |||
| wp_.Clear(); | |||
| RETURN_IF_NOT_OK(sampler_->GetNextBuffer(&sampler_buffer)); | |||
| @@ -182,15 +182,15 @@ Status ImageFolderOp::WorkerEntry(int32_t worker_id) { | |||
| RETURN_IF_NOT_OK(io_block_queues_[worker_id]->PopFront(&io_block)); | |||
| while (io_block != nullptr) { | |||
| if (io_block->eoe() == true) { | |||
| RETURN_IF_NOT_OK(out_connector_->Add(worker_id, make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE))); | |||
| RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE))); | |||
| buffer_id = worker_id; | |||
| } else if (io_block->eof() == true) { | |||
| RETURN_IF_NOT_OK(out_connector_->Add(worker_id, make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOF))); | |||
| RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOF))); | |||
| } else { | |||
| std::vector<int64_t> keys; | |||
| RETURN_IF_NOT_OK(io_block->GetKeys(&keys)); | |||
| if (keys.empty() == true) return Status::OK(); // empty key is a quit signal for workers | |||
| std::unique_ptr<DataBuffer> db = make_unique<DataBuffer>(buffer_id, DataBuffer::kDeBFlagNone); | |||
| std::unique_ptr<DataBuffer> db = std::make_unique<DataBuffer>(buffer_id, DataBuffer::kDeBFlagNone); | |||
| RETURN_IF_NOT_OK(LoadBuffer(keys, &db)); | |||
| RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::move(db))); | |||
| buffer_id += num_workers_; | |||
| @@ -231,7 +231,7 @@ Status ImageFolderOp::LoadTensorRow(ImageLabelPair pairPtr, TensorRow *trow) { | |||
| // Looping over LoadTensorRow to make 1 DataBuffer. 1 function call produces 1 buffer | |||
| Status ImageFolderOp::LoadBuffer(const std::vector<int64_t> &keys, std::unique_ptr<DataBuffer> *db) { | |||
| std::unique_ptr<TensorQTable> deq = make_unique<TensorQTable>(); | |||
| std::unique_ptr<TensorQTable> deq = std::make_unique<TensorQTable>(); | |||
| TensorRow trow; | |||
| for (const int64_t &key : keys) { | |||
| RETURN_IF_NOT_OK(this->LoadTensorRow(image_label_pairs_[key], &trow)); | |||
| @@ -40,7 +40,7 @@ Status ManifestOp::Builder::Build(std::shared_ptr<ManifestOp> *ptr) { | |||
| if (builder_sampler_ == nullptr) { | |||
| builder_sampler_ = std::make_shared<SequentialSampler>(); | |||
| } | |||
| builder_schema_ = make_unique<DataSchema>(); | |||
| builder_schema_ = std::make_unique<DataSchema>(); | |||
| RETURN_IF_NOT_OK( | |||
| builder_schema_->AddColumn(ColDescriptor("image", DataType(DataType::DE_UINT8), TensorImpl::kFlexible, 1))); | |||
| RETURN_IF_NOT_OK( | |||
| @@ -105,7 +105,7 @@ Status ManifestOp::AddIoBlock(std::unique_ptr<DataBuffer> *sampler_buffer) { | |||
| row_cnt_++; | |||
| if (row_cnt_ % rows_per_buffer_ == 0) { | |||
| RETURN_IF_NOT_OK(io_block_queues_[buf_cnt_++ % num_workers_]->Add( | |||
| make_unique<IOBlock>(IOBlock(keys, IOBlock::kDeIoBlockNone)))); | |||
| std::make_unique<IOBlock>(IOBlock(keys, IOBlock::kDeIoBlockNone)))); | |||
| keys.clear(); | |||
| } | |||
| } | |||
| @@ -113,21 +113,21 @@ Status ManifestOp::AddIoBlock(std::unique_ptr<DataBuffer> *sampler_buffer) { | |||
| } | |||
| if (keys.empty() == false) { | |||
| RETURN_IF_NOT_OK(io_block_queues_[(buf_cnt_++) % num_workers_]->Add( | |||
| make_unique<IOBlock>(IOBlock(keys, IOBlock::kDeIoBlockNone)))); | |||
| std::make_unique<IOBlock>(IOBlock(keys, IOBlock::kDeIoBlockNone)))); | |||
| } | |||
| if (!BitTest(op_ctrl_flags_, kDeOpRepeated) || BitTest(op_ctrl_flags_, kDeOpLastRepeat)) { | |||
| RETURN_IF_NOT_OK( | |||
| io_block_queues_[(buf_cnt_++) % num_workers_]->Add(make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEoe))); | |||
| io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEoe))); | |||
| RETURN_IF_NOT_OK( | |||
| io_block_queues_[(buf_cnt_++) % num_workers_]->Add(make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEof))); | |||
| io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEof))); | |||
| for (int32_t i = 0; i < num_workers_; i++) { | |||
| RETURN_IF_NOT_OK( | |||
| io_block_queues_[i]->Add(make_unique<IOBlock>(std::vector<int64_t>(), IOBlock::kDeIoBlockNone))); | |||
| io_block_queues_[i]->Add(std::make_unique<IOBlock>(std::vector<int64_t>(), IOBlock::kDeIoBlockNone))); | |||
| } | |||
| return Status::OK(); | |||
| } else { | |||
| RETURN_IF_NOT_OK( | |||
| io_block_queues_[(buf_cnt_++) % num_workers_]->Add(make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEoe))); | |||
| io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEoe))); | |||
| RETURN_IF_NOT_OK(wp_.Wait()); // Master thread goes to sleep after it has made all the IOBlocks | |||
| wp_.Clear(); | |||
| RETURN_IF_NOT_OK(sampler_->GetNextBuffer(sampler_buffer)); | |||
| @@ -160,17 +160,17 @@ Status ManifestOp::WorkerEntry(int32_t worker_id) { | |||
| RETURN_IF_NOT_OK(io_block_queues_[worker_id]->PopFront(&io_block)); | |||
| while (io_block != nullptr) { | |||
| if (io_block->eoe() == true) { | |||
| RETURN_IF_NOT_OK(out_connector_->Add(worker_id, make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE))); | |||
| RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE))); | |||
| buffer_id = worker_id; | |||
| } else if (io_block->eof() == true) { | |||
| RETURN_IF_NOT_OK(out_connector_->Add(worker_id, make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOF))); | |||
| RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOF))); | |||
| } else { | |||
| std::vector<int64_t> keys; | |||
| RETURN_IF_NOT_OK(io_block->GetKeys(&keys)); | |||
| if (keys.empty()) { | |||
| return Status::OK(); // empty key is a quit signal for workers | |||
| } | |||
| std::unique_ptr<DataBuffer> db = make_unique<DataBuffer>(buffer_id, DataBuffer::kDeBFlagNone); | |||
| std::unique_ptr<DataBuffer> db = std::make_unique<DataBuffer>(buffer_id, DataBuffer::kDeBFlagNone); | |||
| RETURN_IF_NOT_OK(LoadBuffer(keys, &db)); | |||
| RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::move(db))); | |||
| buffer_id += num_workers_; | |||
| @@ -227,7 +227,7 @@ Status ManifestOp::LoadTensorRow(const std::pair<std::string, std::vector<std::s | |||
| // Looping over LoadTensorRow to make 1 DataBuffer. 1 function call produces 1 buffer | |||
| Status ManifestOp::LoadBuffer(const std::vector<int64_t> &keys, std::unique_ptr<DataBuffer> *db) { | |||
| std::unique_ptr<TensorQTable> deq = make_unique<TensorQTable>(); | |||
| std::unique_ptr<TensorQTable> deq = std::make_unique<TensorQTable>(); | |||
| for (const auto &key : keys) { | |||
| TensorRow trow; | |||
| RETURN_IF_NOT_OK(LoadTensorRow(image_labelname_[static_cast<size_t>(key)], &trow)); | |||
| @@ -13,8 +13,6 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifdef ENABLE_MINDRECORD | |||
| #include "dataset/engine/datasetops/source/mindrecord_op.h" | |||
| #include <algorithm> | |||
| @@ -30,7 +28,6 @@ | |||
| #include "dataset/engine/datasetops/dataset_op.h" | |||
| #include "dataset/engine/db_connector.h" | |||
| #include "dataset/engine/execution_tree.h" | |||
| #include "dataset/util/make_unique.h" | |||
| #include "utils/log_adapter.h" | |||
| namespace mindspore { | |||
| @@ -96,18 +93,19 @@ MindRecordOp::MindRecordOp(int32_t num_mind_record_workers, int32_t rows_per_buf | |||
| io_blk_queues_.Init(num_workers_, op_connector_queue_size); | |||
| if (!block_reader_) return; | |||
| for (int32_t i = 0; i < num_workers_; ++i) { | |||
| block_buffer_.emplace_back(make_unique<std::vector<ShardTuple>>(std::vector<ShardTuple>{})); | |||
| block_buffer_.emplace_back(std::make_unique<std::vector<ShardTuple>>(std::vector<ShardTuple>{})); | |||
| } | |||
| } | |||
| // Private helper method to encapsulate some common construction/reset tasks | |||
| Status MindRecordOp::Init() { | |||
| shard_reader_ = mindspore::make_unique<ShardReader>(); | |||
| shard_reader_ = std::make_unique<ShardReader>(); | |||
| auto rc = shard_reader_->Open(dataset_file_, num_mind_record_workers_, columns_to_load_, operators_, block_reader_); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(rc != MSRStatus::FAILED, "MindRecordOp init failed."); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(rc != MSRStatus::FAILED, | |||
| "MindRecordOp init failed. Error message: " + ErrnoToMessage(rc)); | |||
| data_schema_ = mindspore::make_unique<DataSchema>(); | |||
| data_schema_ = std::make_unique<DataSchema>(); | |||
| std::vector<std::shared_ptr<Schema>> schema_vec = shard_reader_->get_shard_header()->get_schemas(); | |||
| // check whether schema exists, if so use the first one | |||
| @@ -144,7 +142,7 @@ Status MindRecordOp::Init() { | |||
| } | |||
| if (!load_all_cols) { | |||
| std::unique_ptr<DataSchema> tmp_schema = make_unique<DataSchema>(); | |||
| std::unique_ptr<DataSchema> tmp_schema = std::make_unique<DataSchema>(); | |||
| for (std::string colname : columns_to_load_) { | |||
| CHECK_FAIL_RETURN_UNEXPECTED(colname_to_ind.find(colname) != colname_to_ind.end(), colname + ": doesn't exist"); | |||
| RETURN_IF_NOT_OK(tmp_schema->AddColumn(data_schema_->column(colname_to_ind[colname]))); | |||
| @@ -298,7 +296,7 @@ Status MindRecordOp::LoadFloat(TensorShape *new_shape, std::unique_ptr<T[]> *arr | |||
| RETURN_IF_NOT_OK(GetFloat(&value, columns_json[column_name], use_double)); | |||
| *new_shape = TensorShape::CreateScalar(); | |||
| *array_data = mindspore::make_unique<T[]>(1); | |||
| *array_data = std::make_unique<T[]>(1); | |||
| (*array_data)[0] = value; | |||
| } else { | |||
| if (column.hasShape()) { | |||
| @@ -309,7 +307,7 @@ Status MindRecordOp::LoadFloat(TensorShape *new_shape, std::unique_ptr<T[]> *arr | |||
| } | |||
| int idx = 0; | |||
| *array_data = mindspore::make_unique<T[]>(new_shape->NumOfElements()); | |||
| *array_data = std::make_unique<T[]>(new_shape->NumOfElements()); | |||
| for (auto &element : columns_json[column_name]) { | |||
| T value = 0; | |||
| RETURN_IF_NOT_OK(GetFloat(&value, element, use_double)); | |||
| @@ -350,7 +348,7 @@ Status MindRecordOp::LoadInt(TensorShape *new_shape, std::unique_ptr<T[]> *array | |||
| RETURN_IF_NOT_OK(GetInt(&value, columns_json[column_name])); | |||
| *new_shape = TensorShape::CreateScalar(); | |||
| *array_data = mindspore::make_unique<T[]>(1); | |||
| *array_data = std::make_unique<T[]>(1); | |||
| (*array_data)[0] = value; | |||
| } else { | |||
| if (column.hasShape()) { | |||
| @@ -361,7 +359,7 @@ Status MindRecordOp::LoadInt(TensorShape *new_shape, std::unique_ptr<T[]> *array | |||
| } | |||
| int idx = 0; | |||
| *array_data = mindspore::make_unique<T[]>(new_shape->NumOfElements()); | |||
| *array_data = std::make_unique<T[]>(new_shape->NumOfElements()); | |||
| for (auto &element : columns_json[column_name]) { | |||
| T value = 0; | |||
| RETURN_IF_NOT_OK(GetInt(&value, element)); | |||
| @@ -431,12 +429,14 @@ Status MindRecordOp::WorkerEntry(int32_t worker_id) { | |||
| RETURN_IF_NOT_OK(io_blk_queues_[worker_id]->PopFront(&io_block)); | |||
| while (io_block != nullptr) { | |||
| if (io_block->eoe() == true) { | |||
| RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::move(make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE)))); | |||
| RETURN_IF_NOT_OK( | |||
| out_connector_->Add(worker_id, std::move(std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE)))); | |||
| RETURN_IF_NOT_OK(io_blk_queues_[worker_id]->PopFront(&io_block)); | |||
| continue; | |||
| } | |||
| if (io_block->eof() == true) { | |||
| RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::move(make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOF)))); | |||
| RETURN_IF_NOT_OK( | |||
| out_connector_->Add(worker_id, std::move(std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOF)))); | |||
| RETURN_IF_NOT_OK(io_blk_queues_[worker_id]->PopFront(&io_block)); | |||
| continue; | |||
| } | |||
| @@ -486,9 +486,9 @@ Status MindRecordOp::WorkerEntry(int32_t worker_id) { | |||
| Status MindRecordOp::GetBufferFromReader(std::unique_ptr<DataBuffer> *fetched_buffer, int64_t buffer_id, | |||
| int32_t worker_id) { | |||
| *fetched_buffer = mindspore::make_unique<DataBuffer>(buffer_id, DataBuffer::kDeBFlagNone); | |||
| *fetched_buffer = std::make_unique<DataBuffer>(buffer_id, DataBuffer::kDeBFlagNone); | |||
| (*fetched_buffer)->set_column_name_map(column_name_mapping_); | |||
| std::unique_ptr<TensorQTable> tensor_table = mindspore::make_unique<TensorQTable>(); | |||
| std::unique_ptr<TensorQTable> tensor_table = std::make_unique<TensorQTable>(); | |||
| for (int32_t i = 0; i < rows_per_buffer_; ++i) { | |||
| ShardTuple tupled_buffer; | |||
| if (block_reader_) { | |||
| @@ -597,22 +597,22 @@ Status MindRecordOp::operator()() { | |||
| for (int32_t i = 0; i < buffers_needed_; ++i) { | |||
| if (block_reader_) RETURN_IF_NOT_OK(FetchBlockBuffer(i)); | |||
| std::vector<int64_t> keys(1, i); | |||
| RETURN_IF_NOT_OK( | |||
| io_blk_queues_[buf_cnt_++ % num_workers_]->Add(make_unique<IOBlock>(IOBlock(keys, IOBlock::kDeIoBlockNone)))); | |||
| RETURN_IF_NOT_OK(io_blk_queues_[buf_cnt_++ % num_workers_]->Add( | |||
| std::make_unique<IOBlock>(IOBlock(keys, IOBlock::kDeIoBlockNone)))); | |||
| } | |||
| if (!BitTest(op_ctrl_flags_, kDeOpRepeated) || BitTest(op_ctrl_flags_, kDeOpLastRepeat)) { | |||
| RETURN_IF_NOT_OK( | |||
| io_blk_queues_[(buf_cnt_++) % num_workers_]->Add(make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEoe))); | |||
| io_blk_queues_[(buf_cnt_++) % num_workers_]->Add(std::make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEoe))); | |||
| RETURN_IF_NOT_OK( | |||
| io_blk_queues_[(buf_cnt_++) % num_workers_]->Add(make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEof))); | |||
| io_blk_queues_[(buf_cnt_++) % num_workers_]->Add(std::make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEof))); | |||
| for (int32_t i = 0; i < num_workers_; i++) { | |||
| RETURN_IF_NOT_OK( | |||
| io_blk_queues_[i]->Add(std::move(make_unique<IOBlock>(std::vector<int64_t>(), IOBlock::kDeIoBlockNone)))); | |||
| RETURN_IF_NOT_OK(io_blk_queues_[i]->Add( | |||
| std::move(std::make_unique<IOBlock>(std::vector<int64_t>(), IOBlock::kDeIoBlockNone)))); | |||
| } | |||
| return Status::OK(); | |||
| } else { // not the last repeat. Acquire lock, sleeps master thread, wait for the wake-up from reset | |||
| RETURN_IF_NOT_OK( | |||
| io_blk_queues_[(buf_cnt_++) % num_workers_]->Add(make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEoe))); | |||
| io_blk_queues_[(buf_cnt_++) % num_workers_]->Add(std::make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEoe))); | |||
| // reset our buffer count and go to loop again. | |||
| RETURN_IF_NOT_OK(shard_reader_wait_post_.Wait()); | |||
| @@ -656,7 +656,7 @@ Status MindRecordOp::LaunchThreadAndInitOp() { | |||
| } | |||
| Status MindRecordOp::CountTotalRows(const std::string dataset_path, int64_t *count) { | |||
| std::unique_ptr<ShardReader> shard_reader = mindspore::make_unique<ShardReader>(); | |||
| std::unique_ptr<ShardReader> shard_reader = std::make_unique<ShardReader>(); | |||
| MSRStatus rc = shard_reader->CountTotalRows(dataset_path, count); | |||
| if (rc == MSRStatus::FAILED) { | |||
| RETURN_STATUS_UNEXPECTED("MindRecordOp count total rows failed."); | |||
| @@ -665,4 +665,3 @@ Status MindRecordOp::CountTotalRows(const std::string dataset_path, int64_t *cou | |||
| } | |||
| } // namespace dataset | |||
| } // namespace mindspore | |||
| #endif | |||
| @@ -15,7 +15,6 @@ | |||
| */ | |||
| #ifndef DATASET_ENGINE_DATASETOPS_SOURCE_MINDRECORD_OP_H_ | |||
| #define DATASET_ENGINE_DATASETOPS_SOURCE_MINDRECORD_OP_H_ | |||
| #ifdef ENABLE_MINDRECORD | |||
| #pragma once | |||
| #include <cstdint> | |||
| @@ -33,6 +32,7 @@ | |||
| #include "dataset/engine/datasetops/source/io_block.h" | |||
| #include "dataset/util/queue.h" | |||
| #include "dataset/util/status.h" | |||
| #include "mindrecord/include/shard_error.h" | |||
| #include "mindrecord/include/shard_reader.h" | |||
| #include "mindrecord/include/common/shard_utils.h" | |||
| #include "dataset/util/wait_post.h" | |||
| @@ -276,5 +276,4 @@ class MindRecordOp : public ParallelOp { | |||
| }; | |||
| } // namespace dataset | |||
| } // namespace mindspore | |||
| #endif | |||
| #endif // DATASET_ENGINE_DATASETOPS_SOURCE_MINDRECORD_OP_H_ | |||
| @@ -43,7 +43,7 @@ Status MnistOp::Builder::Build(std::shared_ptr<MnistOp> *ptr) { | |||
| if (builder_sampler_ == nullptr) { | |||
| builder_sampler_ = std::make_shared<SequentialSampler>(); | |||
| } | |||
| builder_schema_ = make_unique<DataSchema>(); | |||
| builder_schema_ = std::make_unique<DataSchema>(); | |||
| RETURN_IF_NOT_OK( | |||
| builder_schema_->AddColumn(ColDescriptor("image", DataType(DataType::DE_UINT8), TensorImpl::kCv, 1))); | |||
| TensorShape scalar = TensorShape::CreateScalar(); | |||
| @@ -89,7 +89,7 @@ Status MnistOp::TraversalSampleIds(const std::shared_ptr<Tensor> &sample_ids, st | |||
| row_cnt_++; | |||
| if (row_cnt_ % rows_per_buffer_ == 0) { | |||
| RETURN_IF_NOT_OK(io_block_queues_[buf_cnt_++ % num_workers_]->Add( | |||
| make_unique<IOBlock>(IOBlock(*keys, IOBlock::kDeIoBlockNone)))); | |||
| std::make_unique<IOBlock>(IOBlock(*keys, IOBlock::kDeIoBlockNone)))); | |||
| keys->clear(); | |||
| } | |||
| } | |||
| @@ -115,21 +115,21 @@ Status MnistOp::operator()() { | |||
| } | |||
| if (keys.empty() == false) { | |||
| RETURN_IF_NOT_OK(io_block_queues_[(buf_cnt_++) % num_workers_]->Add( | |||
| make_unique<IOBlock>(IOBlock(keys, IOBlock::kDeIoBlockNone)))); | |||
| std::make_unique<IOBlock>(IOBlock(keys, IOBlock::kDeIoBlockNone)))); | |||
| } | |||
| if (!BitTest(op_ctrl_flags_, kDeOpRepeated) || BitTest(op_ctrl_flags_, kDeOpLastRepeat)) { | |||
| RETURN_IF_NOT_OK( | |||
| io_block_queues_[(buf_cnt_++) % num_workers_]->Add(make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEoe))); | |||
| io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEoe))); | |||
| RETURN_IF_NOT_OK( | |||
| io_block_queues_[(buf_cnt_++) % num_workers_]->Add(make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEof))); | |||
| io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEof))); | |||
| for (int32_t i = 0; i < num_workers_; ++i) { | |||
| RETURN_IF_NOT_OK( | |||
| io_block_queues_[i]->Add(make_unique<IOBlock>(std::vector<int64_t>(), IOBlock::kDeIoBlockNone))); | |||
| io_block_queues_[i]->Add(std::make_unique<IOBlock>(std::vector<int64_t>(), IOBlock::kDeIoBlockNone))); | |||
| } | |||
| return Status::OK(); | |||
| } else { | |||
| RETURN_IF_NOT_OK( | |||
| io_block_queues_[(buf_cnt_++) % num_workers_]->Add(make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEoe))); | |||
| io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEoe))); | |||
| RETURN_IF_NOT_OK(wp_.Wait()); // Master thread goes to sleep after it has made all the IOBlocks | |||
| wp_.Clear(); | |||
| RETURN_IF_NOT_OK(sampler_->GetNextBuffer(&sampler_buffer)); | |||
| @@ -145,15 +145,15 @@ Status MnistOp::WorkerEntry(int32_t worker_id) { | |||
| RETURN_IF_NOT_OK(io_block_queues_[worker_id]->PopFront(&iOBlock)); | |||
| while (iOBlock != nullptr) { | |||
| if (iOBlock->eoe() == true) { | |||
| RETURN_IF_NOT_OK(out_connector_->Add(worker_id, make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE))); | |||
| RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE))); | |||
| buffer_id = worker_id; | |||
| } else if (iOBlock->eof() == true) { | |||
| RETURN_IF_NOT_OK(out_connector_->Add(worker_id, make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOF))); | |||
| RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOF))); | |||
| } else { | |||
| std::vector<int64_t> keys; | |||
| RETURN_IF_NOT_OK(iOBlock->GetKeys(&keys)); | |||
| if (keys.empty() == true) return Status::OK(); // empty key is a quit signal for workers | |||
| std::unique_ptr<DataBuffer> db = make_unique<DataBuffer>(buffer_id, DataBuffer::kDeBFlagNone); | |||
| std::unique_ptr<DataBuffer> db = std::make_unique<DataBuffer>(buffer_id, DataBuffer::kDeBFlagNone); | |||
| RETURN_IF_NOT_OK(LoadBuffer(keys, &db)); | |||
| RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::move(db))); | |||
| buffer_id += num_workers_; | |||
| @@ -178,7 +178,7 @@ Status MnistOp::LoadTensorRow(const MnistLabelPair &mnist_pair, TensorRow *trow) | |||
| // Looping over LoadTensorRow to make 1 DataBuffer. 1 function call produces 1 buffer | |||
| Status MnistOp::LoadBuffer(const std::vector<int64_t> &keys, std::unique_ptr<DataBuffer> *db) { | |||
| std::unique_ptr<TensorQTable> deq = make_unique<TensorQTable>(); | |||
| std::unique_ptr<TensorQTable> deq = std::make_unique<TensorQTable>(); | |||
| TensorRow trow; | |||
| for (const int64_t &key : keys) { | |||
| RETURN_IF_NOT_OK(this->LoadTensorRow(image_label_pairs_[key], &trow)); | |||
| @@ -309,8 +309,8 @@ Status MnistOp::ReadImageAndLabel(std::ifstream *image_reader, std::ifstream *la | |||
| CHECK_FAIL_RETURN_UNEXPECTED((num_images == num_labels), "num_images != num_labels"); | |||
| // The image size of the Mnist dataset is fixed at [28,28] | |||
| int64_t size = kMnistImageRows * kMnistImageCols; | |||
| auto images_buf = mindspore::make_unique<char[]>(size * num_images); | |||
| auto labels_buf = mindspore::make_unique<char[]>(num_images); | |||
| auto images_buf = std::make_unique<char[]>(size * num_images); | |||
| auto labels_buf = std::make_unique<char[]>(num_images); | |||
| if (images_buf == nullptr || labels_buf == nullptr) { | |||
| std::string err_msg = "Fail to allocate memory for MNIST Buffer."; | |||
| MS_LOG(ERROR) << err_msg.c_str(); | |||
| @@ -52,9 +52,9 @@ Status DistributedSampler::GetNextBuffer(std::unique_ptr<DataBuffer> *out_buffer | |||
| if (cnt_ > samples_per_buffer_) { | |||
| RETURN_STATUS_UNEXPECTED("Distributed Sampler Error"); | |||
| } else if (cnt_ == samples_per_buffer_) { | |||
| (*out_buffer) = mindspore::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE); | |||
| (*out_buffer) = std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE); | |||
| } else { | |||
| (*out_buffer) = mindspore::make_unique<DataBuffer>(cnt_, DataBuffer::kDeBFlagNone); | |||
| (*out_buffer) = std::make_unique<DataBuffer>(cnt_, DataBuffer::kDeBFlagNone); | |||
| std::shared_ptr<Tensor> sample_ids; | |||
| RETURN_IF_NOT_OK(CreateSamplerTensor(&sample_ids, samples_per_buffer_)); | |||
| int64_t *id_ptr = reinterpret_cast<int64_t *>(sample_ids->StartAddr()); | |||
| @@ -63,7 +63,7 @@ Status DistributedSampler::GetNextBuffer(std::unique_ptr<DataBuffer> *out_buffer | |||
| *(id_ptr++) = shuffle_ ? shuffle_vec_[static_cast<size_t>(next_id)] : next_id; | |||
| } | |||
| TensorRow row(1, sample_ids); | |||
| (*out_buffer)->set_tensor_table(make_unique<TensorQTable>(1, row)); | |||
| (*out_buffer)->set_tensor_table(std::make_unique<TensorQTable>(1, row)); | |||
| } | |||
| return Status::OK(); | |||
| } | |||
| @@ -53,9 +53,9 @@ Status PKSampler::GetNextBuffer(std::unique_ptr<DataBuffer> *out_buffer) { | |||
| if (next_id_ > num_pk_samples_ || num_pk_samples_ == 0) { | |||
| RETURN_STATUS_UNEXPECTED("Index out of bound in PKSampler"); | |||
| } else if (next_id_ == num_pk_samples_) { | |||
| (*out_buffer) = mindspore::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE); | |||
| (*out_buffer) = std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE); | |||
| } else { | |||
| (*out_buffer) = mindspore::make_unique<DataBuffer>(next_id_, DataBuffer::kDeBFlagNone); | |||
| (*out_buffer) = std::make_unique<DataBuffer>(next_id_, DataBuffer::kDeBFlagNone); | |||
| std::shared_ptr<Tensor> sample_ids; | |||
| int64_t last_id = | |||
| (samples_per_buffer_ + next_id_ > num_pk_samples_) ? num_pk_samples_ : samples_per_buffer_ + next_id_; | |||
| @@ -68,7 +68,7 @@ Status PKSampler::GetNextBuffer(std::unique_ptr<DataBuffer> *out_buffer) { | |||
| *(id_ptr++) = samples[rnd_ind]; | |||
| } | |||
| TensorRow row(1, sample_ids); | |||
| (*out_buffer)->set_tensor_table(make_unique<TensorQTable>(1, row)); | |||
| (*out_buffer)->set_tensor_table(std::make_unique<TensorQTable>(1, row)); | |||
| } | |||
| return Status::OK(); | |||
| } | |||
| @@ -32,9 +32,9 @@ Status RandomSampler::GetNextBuffer(std::unique_ptr<DataBuffer> *out_buffer) { | |||
| if (next_id_ > num_samples_) { | |||
| RETURN_STATUS_UNEXPECTED("RandomSampler Internal Error"); | |||
| } else if (next_id_ == num_samples_) { | |||
| (*out_buffer) = make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE); | |||
| (*out_buffer) = std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE); | |||
| } else { | |||
| (*out_buffer) = make_unique<DataBuffer>(next_id_, DataBuffer::kDeBFlagNone); | |||
| (*out_buffer) = std::make_unique<DataBuffer>(next_id_, DataBuffer::kDeBFlagNone); | |||
| std::shared_ptr<Tensor> sampleIds; | |||
| int64_t last_id = samples_per_buffer_ + next_id_ > num_samples_ ? num_samples_ : samples_per_buffer_ + next_id_; | |||
| RETURN_IF_NOT_OK(CreateSamplerTensor(&sampleIds, last_id - next_id_)); | |||
| @@ -44,7 +44,7 @@ Status RandomSampler::GetNextBuffer(std::unique_ptr<DataBuffer> *out_buffer) { | |||
| } | |||
| next_id_ = last_id; | |||
| TensorRow row(1, sampleIds); | |||
| (*out_buffer)->set_tensor_table(make_unique<TensorQTable>(1, row)); | |||
| (*out_buffer)->set_tensor_table(std::make_unique<TensorQTable>(1, row)); | |||
| } | |||
| return Status::OK(); | |||
| } | |||
| @@ -61,7 +61,7 @@ Status RandomSampler::Init(const RandomAccessOp *op) { | |||
| } | |||
| std::shuffle(shuffled_ids_.begin(), shuffled_ids_.end(), rnd_); | |||
| } else { | |||
| dist = make_unique<std::uniform_int_distribution<int64_t>>(0, num_rows_ - 1); | |||
| dist = std::make_unique<std::uniform_int_distribution<int64_t>>(0, num_rows_ - 1); | |||
| } | |||
| rnd_.seed(seed_++); | |||
| return Status::OK(); | |||
| @@ -35,7 +35,7 @@ Status Sampler::CreateSamplerTensor(std::shared_ptr<Tensor> *sample_ids, int64_t | |||
| } | |||
| if (col_desc_ == nullptr) { | |||
| // a ColDescriptor for Tensor that holds SampleIds | |||
| col_desc_ = make_unique<ColDescriptor>("sampleIds", DataType(DataType::DE_INT64), TensorImpl::kFlexible, 1); | |||
| col_desc_ = std::make_unique<ColDescriptor>("sampleIds", DataType(DataType::DE_INT64), TensorImpl::kFlexible, 1); | |||
| } | |||
| TensorShape shape(std::vector<dsize_t>(1, num_elements)); | |||
| RETURN_IF_NOT_OK(Tensor::CreateTensor(sample_ids, col_desc_->tensorImpl(), shape, col_desc_->type())); | |||
| @@ -27,7 +27,6 @@ | |||
| #include "dataset/engine/data_buffer.h" | |||
| #include "dataset/engine/data_schema.h" | |||
| #include "dataset/engine/datasetops/dataset_op.h" | |||
| #include "dataset/util/make_unique.h" | |||
| namespace mindspore { | |||
| namespace dataset { | |||
| @@ -25,9 +25,9 @@ Status SequentialSampler::GetNextBuffer(std::unique_ptr<DataBuffer> *out_buffer) | |||
| if (next_id_ > num_samples_) { | |||
| RETURN_STATUS_UNEXPECTED("Sequential Sampler Internal Error"); | |||
| } else if (next_id_ == num_samples_) { | |||
| (*out_buffer) = make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE); | |||
| (*out_buffer) = std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE); | |||
| } else { | |||
| (*out_buffer) = make_unique<DataBuffer>(next_id_, DataBuffer::kDeBFlagNone); | |||
| (*out_buffer) = std::make_unique<DataBuffer>(next_id_, DataBuffer::kDeBFlagNone); | |||
| std::shared_ptr<Tensor> sampleIds; | |||
| int64_t lastId = (samples_per_buffer_ + next_id_ > num_samples_) ? num_samples_ : samples_per_buffer_ + next_id_; | |||
| RETURN_IF_NOT_OK(CreateSamplerTensor(&sampleIds, lastId - next_id_)); | |||
| @@ -36,7 +36,7 @@ Status SequentialSampler::GetNextBuffer(std::unique_ptr<DataBuffer> *out_buffer) | |||
| *(idPtr++) = next_id_++; | |||
| } | |||
| TensorRow row(1, sampleIds); | |||
| (*out_buffer)->set_tensor_table(make_unique<TensorQTable>(1, row)); | |||
| (*out_buffer)->set_tensor_table(std::make_unique<TensorQTable>(1, row)); | |||
| } | |||
| return Status::OK(); | |||
| } | |||
| @@ -64,9 +64,9 @@ Status SubsetRandomSampler::Reset() { | |||
| Status SubsetRandomSampler::GetNextBuffer(std::unique_ptr<DataBuffer> *out_buffer) { | |||
| // All samples have been drawn | |||
| if (sample_id_ == indices_.size()) { | |||
| (*out_buffer) = make_unique<DataBuffer>(buffer_id_++, DataBuffer::kDeBFlagEOE); | |||
| (*out_buffer) = std::make_unique<DataBuffer>(buffer_id_++, DataBuffer::kDeBFlagEOE); | |||
| } else { | |||
| (*out_buffer) = make_unique<DataBuffer>(buffer_id_++, DataBuffer::kDeBFlagNone); | |||
| (*out_buffer) = std::make_unique<DataBuffer>(buffer_id_++, DataBuffer::kDeBFlagNone); | |||
| std::shared_ptr<Tensor> outputIds; | |||
| int64_t last_id = sample_id_ + samples_per_buffer_; | |||
| @@ -92,7 +92,7 @@ Status SubsetRandomSampler::GetNextBuffer(std::unique_ptr<DataBuffer> *out_buffe | |||
| } | |||
| // Create a TensorTable from that single tensor and push into DataBuffer | |||
| (*out_buffer)->set_tensor_table(make_unique<TensorQTable>(1, TensorRow(1, outputIds))); | |||
| (*out_buffer)->set_tensor_table(std::make_unique<TensorQTable>(1, TensorRow(1, outputIds))); | |||
| } | |||
| return Status::OK(); | |||
| @@ -46,10 +46,10 @@ Status WeightedRandomSampler::Init(const RandomAccessOp *op) { | |||
| CHECK_FAIL_RETURN_UNEXPECTED(num_samples_ > 0 && samples_per_buffer_ > 0, "Fail to init WeightedRandomSampler"); | |||
| if (!replacement_) { | |||
| exp_dist_ = mindspore::make_unique<std::exponential_distribution<>>(1); | |||
| exp_dist_ = std::make_unique<std::exponential_distribution<>>(1); | |||
| InitOnePassSampling(); | |||
| } else { | |||
| discrete_dist_ = mindspore::make_unique<std::discrete_distribution<int64_t>>(weights_.begin(), weights_.end()); | |||
| discrete_dist_ = std::make_unique<std::discrete_distribution<int64_t>>(weights_.begin(), weights_.end()); | |||
| } | |||
| return Status::OK(); | |||
| @@ -96,9 +96,9 @@ Status WeightedRandomSampler::GetNextBuffer(std::unique_ptr<DataBuffer> *out_buf | |||
| } | |||
| if (sample_id_ == num_samples_) { | |||
| (*out_buffer) = make_unique<DataBuffer>(buffer_id_++, DataBuffer::kDeBFlagEOE); | |||
| (*out_buffer) = std::make_unique<DataBuffer>(buffer_id_++, DataBuffer::kDeBFlagEOE); | |||
| } else { | |||
| (*out_buffer) = make_unique<DataBuffer>(buffer_id_++, DataBuffer::kDeBFlagNone); | |||
| (*out_buffer) = std::make_unique<DataBuffer>(buffer_id_++, DataBuffer::kDeBFlagNone); | |||
| std::shared_ptr<Tensor> outputIds; | |||
| int64_t last_id = sample_id_ + samples_per_buffer_; | |||
| @@ -132,7 +132,7 @@ Status WeightedRandomSampler::GetNextBuffer(std::unique_ptr<DataBuffer> *out_buf | |||
| } | |||
| // Create a TensorTable from that single tensor and push into DataBuffer | |||
| (*out_buffer)->set_tensor_table(make_unique<TensorQTable>(1, TensorRow(1, outputIds))); | |||
| (*out_buffer)->set_tensor_table(std::make_unique<TensorQTable>(1, TensorRow(1, outputIds))); | |||
| } | |||
| return Status::OK(); | |||
| @@ -24,7 +24,6 @@ | |||
| #include "dataset/engine/datasetops/source/storage_client.h" | |||
| #include "dataset/engine/datasetops/source/storage_op.h" | |||
| #include "dataset/engine/datasetops/source/tf_client.h" | |||
| #include "dataset/util/make_unique.h" | |||
| #include "dataset/util/status.h" | |||
| namespace mindspore { | |||
| @@ -57,7 +56,7 @@ static Status CreateStorageClientSwitch( | |||
| case DatasetType::kTf: { | |||
| // Construct the derived class TFClient, stored as base class StorageClient | |||
| store_op->set_rows_per_buffer(32); | |||
| *out_client = mindspore::make_unique<TFClient>(std::move(schema), store_op); | |||
| *out_client = std::make_unique<TFClient>(std::move(schema), store_op); | |||
| break; | |||
| } | |||
| case DatasetType::kUnknown: | |||
| @@ -83,7 +82,7 @@ Status StorageClient::CreateStorageClient( | |||
| std::shared_ptr<StorageClient> *out_client) { // Out: the created storage client | |||
| // Make a new schema first. This only assigns the dataset type. It does not | |||
| // create the columns yet. | |||
| auto new_schema = mindspore::make_unique<DataSchema>(); | |||
| auto new_schema = std::make_unique<DataSchema>(); | |||
| RETURN_IF_NOT_OK(new_schema->LoadDatasetType(dataset_schema_path)); | |||
| RETURN_IF_NOT_OK(CreateStorageClientSwitch(std::move(new_schema), store_op, out_client)); | |||
| return Status::OK(); | |||
| @@ -99,7 +98,7 @@ Status StorageClient::CreateStorageClient( | |||
| std::shared_ptr<StorageClient> *out_client) { // Out: the created storage client | |||
| // The dataset type is passed in by the user. Create an empty schema with only | |||
| // only the dataset type filled in and then create the client with it. | |||
| auto new_schema = mindspore::make_unique<DataSchema>(); | |||
| auto new_schema = std::make_unique<DataSchema>(); | |||
| new_schema->set_dataset_type(in_type); | |||
| RETURN_IF_NOT_OK(CreateStorageClientSwitch(std::move(new_schema), store_op, out_client)); | |||
| return Status::OK(); | |||
| @@ -147,7 +146,7 @@ Status StorageClient::AssignDatasetLayout(uint32_t num_rows, // In: Th | |||
| // The current schema was just an empty one with only the dataset field populated. | |||
| // Let's copy construct a new one that will be a copy of the input schema (releasing the old | |||
| // one) and then set the number of rows that the user requested. | |||
| data_schema_ = mindspore::make_unique<DataSchema>(schema); | |||
| data_schema_ = std::make_unique<DataSchema>(schema); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(num_rows <= MAX_INTEGER_INT32, "numRows exceeds the boundary numRows>2147483647"); | |||
| num_rows_in_dataset_ = num_rows; | |||
| @@ -303,7 +303,7 @@ Status StorageOp::init() { | |||
| // For simplicity, we'll make both of them 3 so they are the same size. | |||
| int32_t action_queue_size = (buffers_needed / num_workers_) + 1; | |||
| for (int32_t i = 0; i < num_workers_; ++i) { | |||
| auto new_queue = mindspore::make_unique<Queue<int32_t>>(action_queue_size); | |||
| auto new_queue = std::make_unique<Queue<int32_t>>(action_queue_size); | |||
| action_queue_.push_back(std::move(new_queue)); | |||
| } | |||
| } | |||
| @@ -483,10 +483,10 @@ Status StorageOp::operator()() { | |||
| // Post the control message to tell the workers to stop waiting on action queue | |||
| // because we are done! | |||
| RETURN_IF_NOT_OK(this->PostEndOfData()); | |||
| std::unique_ptr<DataBuffer> eoeBuffer = mindspore::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE); | |||
| std::unique_ptr<DataBuffer> eoeBuffer = std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE); | |||
| RETURN_IF_NOT_OK(out_connector_->Add(0, std::move(eoeBuffer))); | |||
| MS_LOG(INFO) << "StorageOp master: Flow end-of-data eof message."; | |||
| std::unique_ptr<DataBuffer> eofBuffer = mindspore::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOF); | |||
| std::unique_ptr<DataBuffer> eofBuffer = std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOF); | |||
| RETURN_IF_NOT_OK(out_connector_->Add(0, std::move(eofBuffer))); | |||
| MS_LOG(INFO) << "StorageOp master: Main execution loop complete."; | |||
| done = true; // while loop exit | |||
| @@ -496,7 +496,7 @@ Status StorageOp::operator()() { | |||
| // RepeatOp above us somewhere in the tree will re-init us with the data to fetch again | |||
| // once it gets the end-of-epoch message. | |||
| MS_LOG(INFO) << "StorageOp master: Flow end-of-epoch eoe message."; | |||
| std::unique_ptr<DataBuffer> eoe_buffer = mindspore::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE); | |||
| std::unique_ptr<DataBuffer> eoe_buffer = std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE); | |||
| RETURN_IF_NOT_OK(out_connector_->Add(0, std::move(eoe_buffer))); | |||
| // reset our buffer count and go to loop again. | |||
| @@ -27,7 +27,6 @@ | |||
| #include "dataset/core/data_type.h" | |||
| #include "dataset/engine/datasetops/source/storage_client.h" | |||
| #include "dataset/engine/data_schema.h" | |||
| #include "dataset/util/make_unique.h" | |||
| namespace mindspore { | |||
| namespace dataset { | |||
| @@ -72,7 +71,7 @@ Status TFBuffer::Load() { | |||
| } | |||
| // Construct the Tensor table for this buffer. | |||
| tensor_table_ = mindspore::make_unique<TensorQTable>(); | |||
| tensor_table_ = std::make_unique<TensorQTable>(); | |||
| // At each position in the tensor table, instantiate the shared pointer to it's Tensor. | |||
| uint32_t row = 0; | |||
| @@ -272,7 +271,7 @@ Status TFBuffer::LoadFloatList(const ColDescriptor ¤t_col, const dataengin | |||
| // Identify how many values we have and then create a local array of these | |||
| // to deserialize into | |||
| *num_elements = float_list.value_size(); | |||
| *float_array = mindspore::make_unique<float[]>(*num_elements); | |||
| *float_array = std::make_unique<float[]>(*num_elements); | |||
| for (int i = 0; i < float_list.value_size(); i++) { | |||
| (*float_array)[i] = float_list.value(i); | |||
| } | |||
| @@ -294,7 +293,7 @@ Status TFBuffer::LoadIntList(const ColDescriptor ¤t_col, const dataengine: | |||
| // Identify how many values we have and then create a local array of these | |||
| // to deserialize into | |||
| *num_elements = int64_list.value_size(); | |||
| *int_array = mindspore::make_unique<int64_t[]>(*num_elements); | |||
| *int_array = std::make_unique<int64_t[]>(*num_elements); | |||
| for (int i = 0; i < int64_list.value_size(); i++) { | |||
| (*int_array)[i] = int64_list.value(i); | |||
| } | |||
| @@ -36,7 +36,6 @@ | |||
| #include "dataset/engine/db_connector.h" | |||
| #include "dataset/engine/execution_tree.h" | |||
| #include "dataset/engine/jagged_connector.h" | |||
| #include "dataset/util/make_unique.h" | |||
| #include "dataset/util/path.h" | |||
| #include "dataset/util/queue.h" | |||
| #include "dataset/util/random.h" | |||
| @@ -54,7 +53,7 @@ TFReaderOp::Builder::Builder() | |||
| builder_op_connector_size_ = config_manager->op_connector_size(); | |||
| builder_rows_per_buffer_ = config_manager->rows_per_buffer(); | |||
| builder_shuffle_files_ = false; | |||
| builder_data_schema_ = make_unique<DataSchema>(); | |||
| builder_data_schema_ = std::make_unique<DataSchema>(); | |||
| } | |||
| Status TFReaderOp::Builder::ValidateInputs() const { | |||
| @@ -103,7 +102,7 @@ TFReaderOp::TFReaderOp(int32_t num_workers, int32_t worker_connector_size, int64 | |||
| finished_reading_dataset_(false), | |||
| shuffle_files_(shuffle_files), | |||
| data_schema_(std::move(data_schema)), | |||
| filename_index_(make_unique<StringIndex>()), | |||
| filename_index_(std::make_unique<StringIndex>()), | |||
| load_io_block_queue_(true), | |||
| load_jagged_connector_(true), | |||
| num_rows_(0), | |||
| @@ -129,7 +128,7 @@ Status TFReaderOp::Init() { | |||
| // parallel op base. | |||
| RETURN_IF_NOT_OK(ParallelOp::CreateWorkerConnector(worker_connector_size_)); | |||
| jagged_buffer_connector_ = mindspore::make_unique<JaggedConnector>(num_workers_, 1, worker_connector_size_); | |||
| jagged_buffer_connector_ = std::make_unique<JaggedConnector>(num_workers_, 1, worker_connector_size_); | |||
| // temporary: make size large enough to hold all files + EOE to avoid hangs | |||
| int32_t safe_queue_size = static_cast<int32_t>(std::ceil(dataset_files_list_.size() / num_workers_)) + 1; | |||
| @@ -229,7 +228,7 @@ Status TFReaderOp::operator()() { | |||
| } | |||
| // all workers finished reading for this epoch, and we have read all the data from all workers | |||
| std::unique_ptr<DataBuffer> eoe_buffer = mindspore::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE); | |||
| std::unique_ptr<DataBuffer> eoe_buffer = std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE); | |||
| RETURN_IF_NOT_OK(out_connector_->Add(0, std::move(eoe_buffer))); | |||
| if (!BitTest(op_ctrl_flags_, kDeOpRepeated) || BitTest(op_ctrl_flags_, kDeOpLastRepeat)) { | |||
| @@ -241,7 +240,7 @@ Status TFReaderOp::operator()() { | |||
| } | |||
| } | |||
| std::unique_ptr<DataBuffer> eof_buffer = mindspore::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOF); | |||
| std::unique_ptr<DataBuffer> eof_buffer = std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOF); | |||
| RETURN_IF_NOT_OK(out_connector_->Add(0, std::move(eof_buffer))); | |||
| RETURN_IF_NOT_OK(PostEndOfData()); | |||
| @@ -274,7 +273,7 @@ Status TFReaderOp::WorkerEntry(int32_t worker_id) { | |||
| MS_LOG(INFO) << "TFReader operator worker " << worker_id << " loaded file " << filename << "."; | |||
| } | |||
| } else { | |||
| std::unique_ptr<DataBuffer> eoe_buffer = mindspore::make_unique<DataBuffer>(1, DataBuffer::kDeBFlagEOE); | |||
| std::unique_ptr<DataBuffer> eoe_buffer = std::make_unique<DataBuffer>(1, DataBuffer::kDeBFlagEOE); | |||
| RETURN_IF_NOT_OK(jagged_buffer_connector_->Add(worker_id, std::move(eoe_buffer))); | |||
| } | |||
| @@ -288,7 +287,7 @@ Status TFReaderOp::WorkerEntry(int32_t worker_id) { | |||
| // When the worker pops this control indicator, it will shut itself down gracefully. | |||
| Status TFReaderOp::PostEndOfData() { | |||
| for (int i = 0; i < num_workers_; ++i) { | |||
| std::unique_ptr<FilenameBlock> eof = mindspore::make_unique<FilenameBlock>(IOBlock::kDeIoBlockFlagEof); | |||
| std::unique_ptr<FilenameBlock> eof = std::make_unique<FilenameBlock>(IOBlock::kDeIoBlockFlagEof); | |||
| RETURN_IF_NOT_OK(PushIoBlockQueue(i, std::move(eof))); | |||
| } | |||
| @@ -299,7 +298,7 @@ Status TFReaderOp::PostEndOfData() { | |||
| // pops this control indicator, it will wait until the next epoch starts and then resume execution. | |||
| Status TFReaderOp::PostEndOfEpoch(int32_t queue_index) { | |||
| for (int i = 0; i < num_workers_; ++i) { | |||
| std::unique_ptr<FilenameBlock> eoe = mindspore::make_unique<FilenameBlock>(IOBlock::kDeIoBlockFlagEoe); | |||
| std::unique_ptr<FilenameBlock> eoe = std::make_unique<FilenameBlock>(IOBlock::kDeIoBlockFlagEoe); | |||
| RETURN_IF_NOT_OK(PushIoBlockQueue((queue_index + i) % num_workers_, std::move(eoe))); | |||
| } | |||
| @@ -358,7 +357,7 @@ Status TFReaderOp::FillIOBlockShuffle(const std::vector<int64_t> &i_keys) { | |||
| } | |||
| if (!equal_rows_per_shard_) { | |||
| if (key_index++ % num_devices_ == device_id_) { | |||
| auto ioBlock = make_unique<FilenameBlock>(*it, kInvalidOffset, kInvalidOffset, IOBlock::kDeIoBlockNone); | |||
| auto ioBlock = std::make_unique<FilenameBlock>(*it, kInvalidOffset, kInvalidOffset, IOBlock::kDeIoBlockNone); | |||
| RETURN_IF_NOT_OK(PushIoBlockQueue(queue_index, std::move(ioBlock))); | |||
| queue_index = (queue_index + 1) % num_workers_; | |||
| } | |||
| @@ -367,7 +366,7 @@ Status TFReaderOp::FillIOBlockShuffle(const std::vector<int64_t> &i_keys) { | |||
| auto file_it = filename_index_->Search(*it); | |||
| std::string file_name = file_it.value(); | |||
| if (NeedPushFileToblockQueue(file_name, &start_offset, &end_offset, pre_count)) { | |||
| auto ioBlock = make_unique<FilenameBlock>(*it, start_offset, end_offset, IOBlock::kDeIoBlockNone); | |||
| auto ioBlock = std::make_unique<FilenameBlock>(*it, start_offset, end_offset, IOBlock::kDeIoBlockNone); | |||
| RETURN_IF_NOT_OK(PushIoBlockQueue(queue_index, std::move(ioBlock))); | |||
| MS_LOG(DEBUG) << "File name " << *it << " start offset " << start_offset << " end_offset " << end_offset; | |||
| queue_index = (queue_index + 1) % num_workers_; | |||
| @@ -404,14 +403,15 @@ Status TFReaderOp::FillIOBlockNoShuffle() { | |||
| } | |||
| if (!equal_rows_per_shard_) { | |||
| if (key_index++ % num_devices_ == device_id_) { | |||
| auto ioBlock = make_unique<FilenameBlock>(it.key(), kInvalidOffset, kInvalidOffset, IOBlock::kDeIoBlockNone); | |||
| auto ioBlock = | |||
| std::make_unique<FilenameBlock>(it.key(), kInvalidOffset, kInvalidOffset, IOBlock::kDeIoBlockNone); | |||
| RETURN_IF_NOT_OK(PushIoBlockQueue(queue_index, std::move(ioBlock))); | |||
| queue_index = (queue_index + 1) % num_workers_; | |||
| } | |||
| } else { | |||
| std::string file_name = it.value(); | |||
| if (NeedPushFileToblockQueue(file_name, &start_offset, &end_offset, pre_count)) { | |||
| auto ioBlock = make_unique<FilenameBlock>(it.key(), start_offset, end_offset, IOBlock::kDeIoBlockNone); | |||
| auto ioBlock = std::make_unique<FilenameBlock>(it.key(), start_offset, end_offset, IOBlock::kDeIoBlockNone); | |||
| RETURN_IF_NOT_OK(PushIoBlockQueue(queue_index, std::move(ioBlock))); | |||
| queue_index = (queue_index + 1) % num_workers_; | |||
| } | |||
| @@ -490,14 +490,13 @@ Status TFReaderOp::LoadFile(const std::string &filename, const int64_t start_off | |||
| int64_t rows_read = 0; | |||
| int64_t rows_total = 0; | |||
| std::unique_ptr<DataBuffer> current_buffer = | |||
| mindspore::make_unique<DataBuffer>(0, DataBuffer::BufferFlags::kDeBFlagNone); | |||
| std::unique_ptr<DataBuffer> current_buffer = std::make_unique<DataBuffer>(0, DataBuffer::BufferFlags::kDeBFlagNone); | |||
| std::unordered_map<std::string, int32_t> column_name_map; | |||
| for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) { | |||
| column_name_map[data_schema_->column(i).name()] = i; | |||
| } | |||
| current_buffer->set_column_name_map(column_name_map); | |||
| std::unique_ptr<TensorQTable> new_tensor_table = make_unique<TensorQTable>(); | |||
| std::unique_ptr<TensorQTable> new_tensor_table = std::make_unique<TensorQTable>(); | |||
| while (reader.peek() != EOF) { | |||
| if (!load_jagged_connector_) { | |||
| @@ -532,9 +531,9 @@ Status TFReaderOp::LoadFile(const std::string &filename, const int64_t start_off | |||
| current_buffer->set_tensor_table(std::move(new_tensor_table)); | |||
| RETURN_IF_NOT_OK(jagged_buffer_connector_->Add(worker_id, std::move(current_buffer))); | |||
| current_buffer = make_unique<DataBuffer>(0, DataBuffer::BufferFlags::kDeBFlagNone); | |||
| current_buffer = std::make_unique<DataBuffer>(0, DataBuffer::BufferFlags::kDeBFlagNone); | |||
| current_buffer->set_column_name_map(column_name_map); | |||
| new_tensor_table = make_unique<TensorQTable>(); | |||
| new_tensor_table = std::make_unique<TensorQTable>(); | |||
| rows_read = 0; | |||
| } | |||
| } | |||
| @@ -742,7 +741,7 @@ Status TFReaderOp::LoadFloatList(const ColDescriptor ¤t_col, const dataeng | |||
| // Identify how many values we have and then create a local array of these | |||
| // to deserialize into | |||
| *num_elements = float_list.value_size(); | |||
| *float_array = mindspore::make_unique<float[]>(*num_elements); | |||
| *float_array = std::make_unique<float[]>(*num_elements); | |||
| for (int i = 0; i < float_list.value_size(); ++i) { | |||
| (*float_array)[i] = float_list.value(i); | |||
| } | |||
| @@ -38,7 +38,7 @@ Status VOCOp::Builder::Build(std::shared_ptr<VOCOp> *ptr) { | |||
| if (builder_sampler_ == nullptr) { | |||
| builder_sampler_ = std::make_shared<SequentialSampler>(); | |||
| } | |||
| builder_schema_ = make_unique<DataSchema>(); | |||
| builder_schema_ = std::make_unique<DataSchema>(); | |||
| RETURN_IF_NOT_OK( | |||
| builder_schema_->AddColumn(ColDescriptor("image", DataType(DataType::DE_UINT8), TensorImpl::kFlexible, 1))); | |||
| RETURN_IF_NOT_OK( | |||
| @@ -85,7 +85,7 @@ Status VOCOp::TraverseSampleIds(const std::shared_ptr<Tensor> &sample_ids, std:: | |||
| row_cnt_++; | |||
| if (row_cnt_ % rows_per_buffer_ == 0) { | |||
| RETURN_IF_NOT_OK(io_block_queues_[buf_cnt_++ % num_workers_]->Add( | |||
| make_unique<IOBlock>(IOBlock(*keys, IOBlock::kDeIoBlockNone)))); | |||
| std::make_unique<IOBlock>(IOBlock(*keys, IOBlock::kDeIoBlockNone)))); | |||
| keys->clear(); | |||
| } | |||
| } | |||
| @@ -110,21 +110,21 @@ Status VOCOp::operator()() { | |||
| } | |||
| if (keys.empty() == false) { | |||
| RETURN_IF_NOT_OK(io_block_queues_[(buf_cnt_++) % num_workers_]->Add( | |||
| make_unique<IOBlock>(IOBlock(keys, IOBlock::kDeIoBlockNone)))); | |||
| std::make_unique<IOBlock>(IOBlock(keys, IOBlock::kDeIoBlockNone)))); | |||
| } | |||
| if (!BitTest(op_ctrl_flags_, kDeOpRepeated) || BitTest(op_ctrl_flags_, kDeOpLastRepeat)) { | |||
| std::unique_ptr<IOBlock> eoe_block = make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEoe); | |||
| std::unique_ptr<IOBlock> eof_block = make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEof); | |||
| std::unique_ptr<IOBlock> eoe_block = std::make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEoe); | |||
| std::unique_ptr<IOBlock> eof_block = std::make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEof); | |||
| RETURN_IF_NOT_OK(io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::move(eoe_block))); | |||
| RETURN_IF_NOT_OK(io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::move(eof_block))); | |||
| for (int32_t i = 0; i < num_workers_; i++) { | |||
| RETURN_IF_NOT_OK( | |||
| io_block_queues_[i]->Add(make_unique<IOBlock>(std::vector<int64_t>(), IOBlock::kDeIoBlockNone))); | |||
| io_block_queues_[i]->Add(std::make_unique<IOBlock>(std::vector<int64_t>(), IOBlock::kDeIoBlockNone))); | |||
| } | |||
| return Status::OK(); | |||
| } else { | |||
| RETURN_IF_NOT_OK( | |||
| io_block_queues_[(buf_cnt_++) % num_workers_]->Add(make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEoe))); | |||
| io_block_queues_[(buf_cnt_++) % num_workers_]->Add(std::make_unique<IOBlock>(IOBlock::kDeIoBlockFlagEoe))); | |||
| RETURN_IF_NOT_OK(wp_.Wait()); | |||
| wp_.Clear(); | |||
| RETURN_IF_NOT_OK(sampler_->GetNextBuffer(&sampler_buffer)); | |||
| @@ -164,7 +164,7 @@ Status VOCOp::LoadTensorRow(const std::string &image_id, TensorRow *trow) { | |||
| } | |||
| Status VOCOp::LoadBuffer(const std::vector<int64_t> &keys, std::unique_ptr<DataBuffer> *db) { | |||
| std::unique_ptr<TensorQTable> deq = make_unique<TensorQTable>(); | |||
| std::unique_ptr<TensorQTable> deq = std::make_unique<TensorQTable>(); | |||
| TensorRow trow; | |||
| for (const uint64_t &key : keys) { | |||
| RETURN_IF_NOT_OK(this->LoadTensorRow(image_ids_[key], &trow)); | |||
| @@ -182,15 +182,15 @@ Status VOCOp::WorkerEntry(int32_t worker_id) { | |||
| RETURN_IF_NOT_OK(io_block_queues_[worker_id]->PopFront(&io_block)); | |||
| while (io_block != nullptr) { | |||
| if (io_block->eoe() == true) { | |||
| RETURN_IF_NOT_OK(out_connector_->Add(worker_id, make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE))); | |||
| RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE))); | |||
| buffer_id = worker_id; | |||
| } else if (io_block->eof() == true) { | |||
| RETURN_IF_NOT_OK(out_connector_->Add(worker_id, (make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOF)))); | |||
| RETURN_IF_NOT_OK(out_connector_->Add(worker_id, (std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOF)))); | |||
| } else { | |||
| std::vector<int64_t> keys; | |||
| RETURN_IF_NOT_OK(io_block->GetKeys(&keys)); | |||
| if (keys.empty() == true) return Status::OK(); | |||
| std::unique_ptr<DataBuffer> db = make_unique<DataBuffer>(buffer_id, DataBuffer::kDeBFlagNone); | |||
| std::unique_ptr<DataBuffer> db = std::make_unique<DataBuffer>(buffer_id, DataBuffer::kDeBFlagNone); | |||
| RETURN_IF_NOT_OK(LoadBuffer(keys, &db)); | |||
| RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::move(db))); | |||
| buffer_id += num_workers_; | |||
| @@ -65,13 +65,13 @@ Status ZipOp::operator()() { | |||
| // initialize the iterators | |||
| for (int32_t i = 0; i < children_num_; ++i) { | |||
| // magic number 0 since Zip is not a parallel Op | |||
| child_iterators_.push_back(mindspore::make_unique<ChildIterator>(this, 0, i)); | |||
| child_iterators_.push_back(std::make_unique<ChildIterator>(this, 0, i)); | |||
| } | |||
| // Loop until eof is true | |||
| while (!eof_) { | |||
| // Create tensor table and prepare it by fetching and packing the first zipped row into it. | |||
| std::unique_ptr<TensorQTable> curr_table = mindspore::make_unique<TensorQTable>(); | |||
| std::unique_ptr<TensorQTable> curr_table = std::make_unique<TensorQTable>(); | |||
| RETURN_IF_NOT_OK(prepare(curr_table.get())); | |||
| // If an eof got picked up during the above prepare, then we're done | |||
| @@ -81,7 +81,7 @@ Status ZipOp::operator()() { | |||
| while (!draining_) { | |||
| // 1. If a previous loop iteration sent the current table out, then create a new one. | |||
| if (curr_table == nullptr) { | |||
| curr_table = mindspore::make_unique<TensorQTable>(); | |||
| curr_table = std::make_unique<TensorQTable>(); | |||
| } | |||
| // 2 fill the table. Note: draining mode might get turned on if any of the child inputs were done | |||
| @@ -89,8 +89,7 @@ Status ZipOp::operator()() { | |||
| // 3 create and update buffer and send it to the out connector | |||
| if (!curr_table->empty()) { | |||
| std::unique_ptr<DataBuffer> curr_buffer = | |||
| mindspore::make_unique<DataBuffer>(buffer_id_, DataBuffer::kDeBFlagNone); | |||
| std::unique_ptr<DataBuffer> curr_buffer = std::make_unique<DataBuffer>(buffer_id_, DataBuffer::kDeBFlagNone); | |||
| curr_buffer->set_tensor_table(std::move(curr_table)); | |||
| curr_buffer->set_column_name_map(col_name_id_map_); | |||
| MS_LOG(DEBUG) << "Zip operator finished one buffer, pushing, rows " << curr_buffer->NumRows() << ", cols " | |||
| @@ -105,15 +104,14 @@ Status ZipOp::operator()() { | |||
| MS_LOG(DEBUG) << "Zip operator is now draining child inputs."; | |||
| RETURN_IF_NOT_OK(drainPipeline()); | |||
| // Now that we have drained child inputs, send the eoe up. | |||
| RETURN_IF_NOT_OK( | |||
| out_connector_->Add(0, std::move(mindspore::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE)))); | |||
| RETURN_IF_NOT_OK(out_connector_->Add(0, std::move(std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE)))); | |||
| } | |||
| } | |||
| // 5 handle eof | |||
| // propagate eof here. | |||
| MS_LOG(INFO) << "Zip operator got EOF, propagating."; | |||
| RETURN_IF_NOT_OK(out_connector_->Add(0, std::move(mindspore::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOF)))); | |||
| RETURN_IF_NOT_OK(out_connector_->Add(0, std::move(std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOF)))); | |||
| return Status::OK(); | |||
| } | |||
| @@ -65,7 +65,7 @@ class DbConnector : public Connector<std::unique_ptr<DataBuffer>> { | |||
| RETURN_IF_NOT_OK(cv_.Wait(&lk, [this, worker_id]() { return expect_consumer_ == worker_id; })); | |||
| // Once an EOF message is encountered this flag will be set and we can return early. | |||
| if (end_of_file_) { | |||
| *result = mindspore::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOF); | |||
| *result = std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOF); | |||
| } else { | |||
| RETURN_IF_NOT_OK(queues_[pop_from_]->PopFront(result)); | |||
| if (*result == nullptr) { | |||
| @@ -24,7 +24,7 @@ namespace mindspore { | |||
| namespace dataset { | |||
| // Constructor | |||
| ExecutionTree::ExecutionTree() : id_count_(0) { | |||
| tg_ = mindspore::make_unique<TaskGroup>(); | |||
| tg_ = std::make_unique<TaskGroup>(); | |||
| tree_state_ = kDeTStateInit; | |||
| prepare_flags_ = kDePrepNone; | |||
| } | |||
| @@ -24,7 +24,6 @@ | |||
| #include "dataset/core/cv_tensor.h" | |||
| #include "dataset/core/tensor.h" | |||
| #include "dataset/core/tensor_shape.h" | |||
| #include "dataset/util/make_unique.h" | |||
| #include "dataset/util/random.h" | |||
| #define MAX_INT_PRECISION 16777216 // float int precision is 16777216 | |||
| @@ -376,7 +375,7 @@ Status HwcToChw(std::shared_ptr<Tensor> input, std::shared_ptr<Tensor> *output) | |||
| int width = input_cv->shape()[1]; | |||
| int num_channels = input_cv->shape()[2]; | |||
| auto output_cv = mindspore::make_unique<CVTensor>(TensorShape{num_channels, height, width}, input_cv->type()); | |||
| auto output_cv = std::make_unique<CVTensor>(TensorShape{num_channels, height, width}, input_cv->type()); | |||
| for (int i = 0; i < num_channels; ++i) { | |||
| cv::Mat mat; | |||
| RETURN_IF_NOT_OK(output_cv->Mat({i}, &mat)); | |||
| @@ -84,18 +84,8 @@ Status Resize(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *out | |||
| // Returns Decoded image | |||
| // Supported images: | |||
| // - Windows bitmaps - \*.bmp, \*.dib (always supported) | |||
| // - JPEG files - \*.jpeg, \*.jpg, \*.jpe (see the *Note* section) | |||
| // - JPEG 2000 files - \*.jp2 (see the *Note* section) | |||
| // - Portable Network Graphics - \*.png (see the *Note* section) | |||
| // - WebP - \*.webp (see the *Note* section) | |||
| // - Portable image format - \*.pbm, \*.pgm, \*.ppm \*.pxm, \*.pnm (always supported) | |||
| // - PFM files - \*.pfm (see the *Note* section) | |||
| // - Sun rasters - \*.sr, \*.ras (always supported) | |||
| // - TIFF files - \*.tiff, \*.tif (see the *Note* section) | |||
| // - OpenEXR Image files - \*.exr (see the *Note* section) | |||
| // - Radiance HDR - \*.hdr, \*.pic (always supported) | |||
| // - Raster and Vector geospatial data supported by GDAL (see the *Note* section) | |||
| // BMP JPEG JPG PNG TIFF | |||
| // supported by opencv, if user need more image analysis capabilities, please compile opencv particularlly. | |||
| // @param input: CVTensor containing the not decoded image 1D bytes | |||
| // @param output: Decoded image Tensor of shape <H,W,C> and type DE_UINT8. Pixel order is RGB | |||
| Status Decode(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output); | |||
| @@ -20,7 +20,6 @@ | |||
| #include "dataset/core/tensor.h" | |||
| #include "dataset/kernels/tensor_op.h" | |||
| #include "dataset/util/make_unique.h" | |||
| #include "dataset/util/status.h" | |||
| namespace mindspore { | |||
| @@ -16,7 +16,6 @@ | |||
| #include "dataset/util/arena.h" | |||
| #include <unistd.h> | |||
| #include <utility> | |||
| #include "dataset/util/make_unique.h" | |||
| #include "dataset/util/system_pool.h" | |||
| #include "dataset/util/de_error.h" | |||
| #include "./securec.h" | |||
| @@ -18,10 +18,8 @@ | |||
| #include <algorithm> | |||
| #include <limits> | |||
| #include <utility> | |||
| #include "./securec.h" | |||
| #include "dataset/util/make_unique.h" | |||
| #include "dataset/util/de_error.h" | |||
| #include "dataset/util/system_pool.h" | |||
| namespace mindspore { | |||
| @@ -16,6 +16,13 @@ | |||
| #ifndef DATASET_UTIL_DE_ERROR_H_ | |||
| #define DATASET_UTIL_DE_ERROR_H_ | |||
| #ifdef DEBUG | |||
| #include <cassert> | |||
| #define DS_ASSERT(f) assert(f) | |||
| #else | |||
| #define DS_ASSERT(f) ((void)0) | |||
| #endif | |||
| #include <map> | |||
| #include "utils/error_code.h" | |||
| @@ -18,8 +18,7 @@ | |||
| #include <iostream> | |||
| #include <iterator> | |||
| #include "dataset/util/make_unique.h" | |||
| #include "dataset/util/de_error.h" | |||
| namespace mindspore { | |||
| namespace dataset { | |||
| @@ -14,6 +14,7 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #include "dataset/util/lock.h" | |||
| #include "dataset/util/de_error.h" | |||
| namespace mindspore { | |||
| namespace dataset { | |||
| @@ -19,7 +19,6 @@ | |||
| #include <atomic> | |||
| #include <condition_variable> | |||
| #include <mutex> | |||
| #include "dataset/util/make_unique.h" | |||
| namespace mindspore { | |||
| namespace dataset { | |||
| @@ -1,37 +0,0 @@ | |||
| /** | |||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef DATASET_UTIL_MAKE_UNIQUE_H_ | |||
| #define DATASET_UTIL_MAKE_UNIQUE_H_ | |||
| #ifdef DEBUG | |||
| #include <cassert> | |||
| #define DS_ASSERT(f) assert(f) | |||
| #else | |||
| #define DS_ASSERT(f) ((void)0) | |||
| #endif | |||
| #include <memory> | |||
| #include <type_traits> | |||
| #include <utility> | |||
| #include "dataset/util/de_error.h" | |||
| #include "utils/log_adapter.h" | |||
| namespace mindspore { | |||
| using std::make_unique; | |||
| } // namespace mindspore | |||
| #endif // DATASET_UTIL_MAKE_UNIQUE_H_ | |||
| @@ -212,7 +212,7 @@ class QueueList { | |||
| void Init(int num_queues, int capacity) { | |||
| queue_list_.reserve(num_queues); | |||
| for (int i = 0; i < num_queues; i++) { | |||
| queue_list_.emplace_back(mindspore::make_unique<Queue<T>>(capacity)); | |||
| queue_list_.emplace_back(std::make_unique<Queue<T>>(capacity)); | |||
| } | |||
| } | |||
| @@ -27,7 +27,6 @@ | |||
| #include <string> | |||
| #include <thread> | |||
| #include "dataset/util/de_error.h" | |||
| #include "dataset/util/make_unique.h" | |||
| #include "dataset/util/intrp_resource.h" | |||
| #include "dataset/util/list.h" | |||
| #include "dataset/util/memory_pool.h" | |||
| @@ -262,8 +262,8 @@ AscendDeviceAddress::~AscendDeviceAddress() { | |||
| if (ptr_ == nullptr) { | |||
| return; | |||
| } | |||
| if (mem_dynamic_alloc_) { | |||
| AscendMemoryAllocator::GetInstance().FreeTensorMem(ptr_); | |||
| if (from_mem_pool_) { | |||
| AscendMemoryPool::GetInstance().FreeTensorMem(ptr_); | |||
| ptr_ = nullptr; | |||
| } | |||
| } | |||
| @@ -21,7 +21,7 @@ | |||
| #include <vector> | |||
| #include <memory> | |||
| #include "device/device_address.h" | |||
| #include "device/ascend/ascend_memory_allocator.h" | |||
| #include "device/ascend/ascend_memory_pool.h" | |||
| #include "ir/dtype.h" | |||
| namespace mindspore { | |||
| @@ -29,7 +29,7 @@ | |||
| #include "hccl/hcom.h" | |||
| #include "runtime/context.h" | |||
| #include "device/ascend/ascend_stream_assign.h" | |||
| #include "device/ascend/ascend_memory_allocator.h" | |||
| #include "device/ascend/ascend_memory_pool.h" | |||
| #include "framework/ge_runtime/model_runner.h" | |||
| #include "device/ascend/tasksink/task_generator.h" | |||
| #include "session/anf_runtime_algorithm.h" | |||
| @@ -37,6 +37,7 @@ | |||
| #include "kernel/tbe/tbe_utils.h" | |||
| #include "kernel/tbe/tbe_python_funcs.h" | |||
| #include "pre_activate/mem_reuse/mem_reuse_checker.h" | |||
| #include "device/ascend/ascend_memory_manager.h" | |||
| using mindspore::device::ascend::ProfilingManager; | |||
| using mindspore::device::ascend::ProfilingUtils; | |||
| @@ -47,8 +48,6 @@ using std::vector; | |||
| namespace mindspore { | |||
| namespace device { | |||
| namespace ascend { | |||
| static const uint64_t ASCEND_MEM_SIZE = 20; | |||
| static const uint64_t ASCEND_MEM_SIZE_BYTE = (ASCEND_MEM_SIZE << 30); | |||
| static const size_t PRAMATER_OUTPUT_INDEX = 0; | |||
| AscendKernelRuntime::~AscendKernelRuntime() { graph_model_map_.clear(); } | |||
| @@ -86,7 +85,8 @@ void AscendKernelRuntime::ReleaseDeviceRes() { | |||
| MS_EXCEPTION(DeviceProcessError) << "rtSetDevice, ret[" << static_cast<int>(ret) << "]"; | |||
| } | |||
| FreeDeviceMemory(); | |||
| MS_EXCEPTION_IF_NULL(mem_manager_); | |||
| mem_manager_->FreeDeviceMemory(); | |||
| (void)DestroyHccl(); | |||
| (void)ResetDevice(); | |||
| (void)ProfilingManager::GetInstance().StopProfiling(); | |||
| @@ -109,11 +109,9 @@ bool AscendKernelRuntime::Init() { | |||
| if (!ret) { | |||
| return ret; | |||
| } | |||
| ret = MallocDeviceMemory(); | |||
| if (!ret) { | |||
| return ret; | |||
| } | |||
| mem_manager_ = std::make_shared<AscendMemoryManager>(); | |||
| MS_EXCEPTION_IF_NULL(mem_manager_); | |||
| mem_manager_->MallocDeviceMemory(); | |||
| ret = ProfilingManager::GetInstance().StartupProfiling(device_id_); | |||
| if (!ret) { | |||
| @@ -239,13 +237,6 @@ DeviceAddressPtr AscendKernelRuntime::CreateDeviceAddress(void *device_ptr, size | |||
| return std::make_shared<AscendDeviceAddress>(device_ptr, device_size, format, type_id); | |||
| } | |||
| void AscendKernelRuntime::MallocOpMemory(const DeviceAddressPtr address, size_t size, int) { | |||
| auto device_ptr = AscendMemoryAllocator::GetInstance().AllocTensorMem(size); | |||
| MS_EXCEPTION_IF_NULL(device_ptr); | |||
| address->ptr_ = device_ptr; | |||
| address->mem_dynamic_alloc_ = true; | |||
| } | |||
| bool AscendKernelRuntime::GenTask(const session::KernelGraph *graph) { | |||
| auto context_ptr = MsContext::GetInstance(); | |||
| MS_EXCEPTION_IF_NULL(context_ptr); | |||
| @@ -474,42 +465,6 @@ bool AscendKernelRuntime::DestroyHccl() { | |||
| context_ptr->set_enable_hccl(false); | |||
| return true; | |||
| } | |||
| bool AscendKernelRuntime::MallocDeviceMemory() { | |||
| device_mem_size_ = ASCEND_MEM_SIZE_BYTE; | |||
| static_mem_offset_ = FloatToSize(device_mem_size_ * GRAPH_INIT_ASCEND_MEM_RATIO); | |||
| auto ret = rtMalloc(reinterpret_cast<void **>(&device_mem_base_), static_mem_offset_, RT_MEMORY_HBM); | |||
| if (ret != RT_ERROR_NONE) { | |||
| MS_EXCEPTION(DeviceProcessError) << "rtMalloc mem size[" << static_mem_offset_ << "] fail, ret[" << ret << "]"; | |||
| } | |||
| device_mem_pool_size_ = FloatToSize(device_mem_size_ * (1 - GRAPH_INIT_ASCEND_MEM_RATIO)); | |||
| ret = rtMalloc(reinterpret_cast<void **>(&device_mem_pool_base_), device_mem_pool_size_, RT_MEMORY_HBM); | |||
| if (ret != RT_ERROR_NONE) { | |||
| MS_EXCEPTION(DeviceProcessError) << "rtMalloc mem size[" << device_mem_pool_size_ << "] fail, ret[" << ret << "]"; | |||
| } | |||
| AscendMemoryAllocator::GetInstance().set_device_mem_pool_base(device_mem_pool_base_); | |||
| AscendMemoryAllocator::GetInstance().set_device_mem_pool_size(device_mem_pool_size_); | |||
| return true; | |||
| } | |||
| void AscendKernelRuntime::FreeDeviceMemory() { | |||
| if (device_mem_base_ != nullptr) { | |||
| auto ret = rtFree(device_mem_base_); | |||
| if (ret != RT_ERROR_NONE) { | |||
| MS_LOG(ERROR) << "rtFree mem size[" << device_mem_size_ << "] fail, ret[" << ret << "]"; | |||
| } | |||
| device_mem_base_ = nullptr; | |||
| } | |||
| if (device_mem_pool_base_ != nullptr) { | |||
| auto ret = rtFree(device_mem_pool_base_); | |||
| if (ret != RT_ERROR_NONE) { | |||
| MS_LOG(ERROR) << "rtFree mem size[" << device_mem_pool_size_ << "] fail, ret[" << ret << "]"; | |||
| } | |||
| device_mem_pool_base_ = nullptr; | |||
| } | |||
| } | |||
| void AscendKernelRuntime::FreeHostMemory() { dynamic_mem_offset_ = 0; } | |||
| } // namespace ascend | |||
| } // namespace device | |||
| } // namespace mindspore | |||
| @@ -39,13 +39,11 @@ class AscendKernelRuntime : public KernelRuntime { | |||
| bool GenTask(const session::KernelGraph *graph) override; | |||
| bool RunTask(const session::KernelGraph *graph) override; | |||
| bool LoadTask(const session::KernelGraph *graph) override; | |||
| void FreeHostMemory() override; | |||
| protected: | |||
| DeviceAddressPtr CreateDeviceAddress(void *device_ptr, size_t device_size, const string &format, | |||
| TypeId type_id) override; | |||
| bool SyncStream() override; | |||
| void MallocOpMemory(const DeviceAddressPtr address, size_t size, int flag) override; | |||
| private: | |||
| bool InitDevice(); | |||
| @@ -53,8 +51,7 @@ class AscendKernelRuntime : public KernelRuntime { | |||
| bool HcclInit(); | |||
| bool NeedDestroyHccl(); | |||
| bool DestroyHccl(); | |||
| bool MallocDeviceMemory(); | |||
| void FreeDeviceMemory(); | |||
| void ClearGraphModelMap(); | |||
| void ReleaseDeviceRes() override; | |||
| uint32_t GetGraphModelId(const session::KernelGraph *kernel_graph); | |||
| @@ -0,0 +1,67 @@ | |||
| /** | |||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "device/ascend/ascend_memory_manager.h" | |||
| #include "device/ascend/ascend_memory_pool.h" | |||
| #include "utils/context/ms_context.h" | |||
| #include "runtime/mem.h" | |||
| namespace mindspore { | |||
| namespace device { | |||
| namespace ascend { | |||
| const uint64_t kAscendDeviceMemGB = 20; | |||
| const uint64_t kAscendMemPoolGB = 5; | |||
| const uint64_t kAscendDeviceMemSize = (kAscendDeviceMemGB << 30); | |||
| const uint64_t kAscendMemPoolSize = (kAscendMemPoolGB << 30); | |||
| void AscendMemoryManager::MallocDeviceMemory() { | |||
| device_mem_size_ = kAscendDeviceMemSize; | |||
| static_mem_offset_ = device_mem_size_; | |||
| auto ret = rtMalloc(reinterpret_cast<void **>(&device_mem_base_), static_mem_offset_, RT_MEMORY_HBM); | |||
| if (ret != RT_ERROR_NONE) { | |||
| MS_EXCEPTION(DeviceProcessError) << "rtMalloc mem size[" << static_mem_offset_ << "] fail, ret[" << ret << "]"; | |||
| } | |||
| device_mem_pool_size_ = kAscendMemPoolSize; | |||
| ret = rtMalloc(reinterpret_cast<void **>(&device_mem_pool_base_), device_mem_pool_size_, RT_MEMORY_HBM); | |||
| if (ret != RT_ERROR_NONE) { | |||
| MS_EXCEPTION(DeviceProcessError) << "rtMalloc mem size[" << device_mem_pool_size_ << "] fail, ret[" << ret << "]"; | |||
| } | |||
| AscendMemoryPool::GetInstance().set_device_mem_pool_base(device_mem_pool_base_); | |||
| AscendMemoryPool::GetInstance().set_device_mem_pool_size(device_mem_pool_size_); | |||
| } | |||
| void AscendMemoryManager::FreeDeviceMemory() { | |||
| if (device_mem_base_ != nullptr) { | |||
| auto ret = rtFree(device_mem_base_); | |||
| if (ret != RT_ERROR_NONE) { | |||
| MS_LOG(ERROR) << "rtFree mem size[" << device_mem_size_ << "] fail, ret[" << ret << "]"; | |||
| } | |||
| device_mem_base_ = nullptr; | |||
| } | |||
| if (device_mem_pool_base_ != nullptr) { | |||
| auto ret = rtFree(device_mem_pool_base_); | |||
| if (ret != RT_ERROR_NONE) { | |||
| MS_LOG(ERROR) << "rtFree mem size[" << device_mem_pool_size_ << "] fail, ret[" << ret << "]"; | |||
| } | |||
| device_mem_pool_base_ = nullptr; | |||
| } | |||
| } | |||
| void *AscendMemoryManager::MallocMemFromMemPool(size_t size) { | |||
| return AscendMemoryPool::GetInstance().AllocTensorMem(size); | |||
| } | |||
| } // namespace ascend | |||
| } // namespace device | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,39 @@ | |||
| /** | |||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_MINDSPORE_CCSRC_DEVICE_ASCEND_ASCEND_MEMORY_MANAGER_H_ | |||
| #define MINDSPORE_MINDSPORE_CCSRC_DEVICE_ASCEND_ASCEND_MEMORY_MANAGER_H_ | |||
| #include "device/memory_manager.h" | |||
| namespace mindspore { | |||
| namespace device { | |||
| namespace ascend { | |||
| class AscendMemoryManager : public MemoryManager { | |||
| public: | |||
| AscendMemoryManager() = default; | |||
| virtual ~AscendMemoryManager() = default; | |||
| void MallocDeviceMemory() override; | |||
| void FreeDeviceMemory() override; | |||
| void *MallocMemFromMemPool(size_t size) override; | |||
| private: | |||
| uint8_t *device_mem_pool_base_{nullptr}; | |||
| uint64_t device_mem_pool_size_{0}; | |||
| }; | |||
| } // namespace ascend | |||
| } // namespace device | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_MINDSPORE_CCSRC_DEVICE_ASCEND_ASCEND_MEMORY_MANAGER_H_ | |||
| @@ -14,24 +14,15 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #include "device/ascend/ascend_memory_allocator.h" | |||
| #include "device/ascend/ascend_memory_pool.h" | |||
| #include "device/ascend/ascend_kernel_runtime.h" | |||
| #include "utils/log_adapter.h" | |||
| namespace mindspore { | |||
| namespace device { | |||
| namespace ascend { | |||
| const uint64_t MEM_SIZE = 20; | |||
| const uint64_t MEM_SIZE_BYTE = (MEM_SIZE << 30); | |||
| AscendMemoryAllocator::AscendMemoryAllocator() { | |||
| hasMalloc_ = false; | |||
| free_mem_size_ = FloatToSize(MEM_SIZE_BYTE * (1 - GRAPH_INIT_ASCEND_MEM_RATIO)); | |||
| total_mem_size_ = free_mem_size_; | |||
| } | |||
| size_t AscendMemoryAllocator::AllocDeviceMem(size_t size, DeviceMemPtr* addr) { | |||
| if (hasMalloc_) { | |||
| size_t AscendMemoryPool::AllocDeviceMem(size_t size, DeviceMemPtr* addr) { | |||
| if (has_malloc_) { | |||
| MS_LOG(EXCEPTION) << "Has alloc memory pool memory !"; | |||
| } | |||
| if (size == 0 || size > free_mem_size_) { | |||
| @@ -41,35 +32,35 @@ size_t AscendMemoryAllocator::AllocDeviceMem(size_t size, DeviceMemPtr* addr) { | |||
| if (*addr == nullptr) { | |||
| MS_LOG(EXCEPTION) << "Device memory pool base is nullptr, failed to alloc memory pool memory!"; | |||
| } | |||
| hasMalloc_ = true; | |||
| has_malloc_ = true; | |||
| free_mem_size_ -= size; | |||
| return size; | |||
| } | |||
| bool AscendMemoryAllocator::FreeDeviceMem(const DeviceMemPtr& addr) { | |||
| bool AscendMemoryPool::FreeDeviceMem(const DeviceMemPtr& addr) { | |||
| MS_EXCEPTION_IF_NULL(addr); | |||
| hasMalloc_ = false; | |||
| has_malloc_ = false; | |||
| free_mem_size_ = total_mem_size_; | |||
| return true; | |||
| } | |||
| size_t AscendMemoryAllocator::AlignMemorySize(size_t size) const { | |||
| size_t AscendMemoryPool::AlignMemorySize(size_t size) const { | |||
| if (size == 0) { | |||
| return DYNAMIC_MEM_ALIGN_SIZE; | |||
| } | |||
| return ((size + DYNAMIC_MEM_ALIGN_SIZE + 31) / DYNAMIC_MEM_ALIGN_SIZE) * DYNAMIC_MEM_ALIGN_SIZE; | |||
| } | |||
| size_t AscendMemoryAllocator::mem_alloc_unit_size() const { return free_mem_size_ - 512; } | |||
| size_t AscendMemoryPool::mem_alloc_unit_size() const { return free_mem_size_ - 512; } | |||
| void AscendMemoryAllocator::set_device_mem_pool_base(uint8_t* device_mem_pool_base) { | |||
| void AscendMemoryPool::set_device_mem_pool_base(uint8_t* device_mem_pool_base) { | |||
| MS_EXCEPTION_IF_NULL(device_mem_pool_base); | |||
| device_mem_pool_base_ = device_mem_pool_base; | |||
| } | |||
| size_t AscendMemoryAllocator::free_mem_size() { return free_mem_size_; } | |||
| size_t AscendMemoryPool::free_mem_size() { return free_mem_size_; } | |||
| size_t AscendMemoryAllocator::total_mem_size() { return total_mem_size_; } | |||
| size_t AscendMemoryPool::total_mem_size() { return total_mem_size_; } | |||
| } // namespace ascend | |||
| } // namespace device | |||
| } // namespace mindspore | |||
| @@ -14,8 +14,8 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_DEVICE_ASCEND_ASCEND_MEMORY_ALLOCATOR_H_ | |||
| #define MINDSPORE_CCSRC_DEVICE_ASCEND_ASCEND_MEMORY_ALLOCATOR_H_ | |||
| #ifndef MINDSPORE_CCSRC_DEVICE_ASCEND_ASCEND_MEMORY_POOL_H_ | |||
| #define MINDSPORE_CCSRC_DEVICE_ASCEND_ASCEND_MEMORY_POOL_H_ | |||
| #include <memory> | |||
| #include "pre_activate/mem_reuse/mem_dynamic_allocator.h" | |||
| @@ -23,22 +23,23 @@ | |||
| namespace mindspore { | |||
| namespace device { | |||
| namespace ascend { | |||
| // The fraction of total ascend memory used to compute the graph. | |||
| static const float GRAPH_INIT_ASCEND_MEM_RATIO = 0.8; | |||
| class AscendMemoryAllocator : public DynamicMemPoolBestFit { | |||
| class AscendMemoryPool : public DynamicMemPoolBestFit { | |||
| public: | |||
| ~AscendMemoryAllocator() override = default; | |||
| ~AscendMemoryPool() override = default; | |||
| size_t AllocDeviceMem(size_t size, DeviceMemPtr* addr) override; | |||
| bool FreeDeviceMem(const DeviceMemPtr& addr) override; | |||
| void set_device_mem_pool_base(uint8_t* device_mem_pool_base); | |||
| void set_device_mem_pool_size(uint64_t device_mem_pool_size) { device_mem_pool_size_ = device_mem_pool_size; } | |||
| void set_device_mem_pool_size(uint64_t device_mem_pool_size) { | |||
| device_mem_pool_size_ = device_mem_pool_size; | |||
| free_mem_size_ = device_mem_pool_size_; | |||
| total_mem_size_ = free_mem_size_; | |||
| } | |||
| size_t free_mem_size() override; | |||
| size_t total_mem_size() override; | |||
| static AscendMemoryAllocator& GetInstance() { | |||
| static AscendMemoryAllocator instance; | |||
| static AscendMemoryPool& GetInstance() { | |||
| static AscendMemoryPool instance; | |||
| return instance; | |||
| } | |||
| @@ -49,10 +50,10 @@ class AscendMemoryAllocator : public DynamicMemPoolBestFit { | |||
| size_t mem_alloc_unit_size() const override; | |||
| private: | |||
| AscendMemoryAllocator(); | |||
| AscendMemoryAllocator(const AscendMemoryAllocator&) = delete; | |||
| AscendMemoryAllocator& operator=(const AscendMemoryAllocator&) = delete; | |||
| bool hasMalloc_; | |||
| AscendMemoryPool() = default; | |||
| AscendMemoryPool(const AscendMemoryPool&) = delete; | |||
| AscendMemoryPool& operator=(const AscendMemoryPool&) = delete; | |||
| bool has_malloc_{false}; | |||
| uint8_t* device_mem_pool_base_{nullptr}; | |||
| uint64_t device_mem_pool_size_{0}; | |||
| size_t free_mem_size_; | |||
| @@ -62,4 +63,4 @@ class AscendMemoryAllocator : public DynamicMemPoolBestFit { | |||
| } // namespace device | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_DEVICE_ASCEND_ASCEND_MEMORY_ALLOCATOR_H_ | |||
| #endif // MINDSPORE_CCSRC_DEVICE_ASCEND_ASCEND_MEMORY_POOL_H_ | |||
| @@ -702,7 +702,7 @@ void AscendStreamAssign::PrintGraphExeOrders(const shared_ptr<mindspore::session | |||
| << AnfAlgo::GetStreamId(cur_cnode_ptr) << "], event_id[" | |||
| << GetValue<uint32_t>(primitive->GetAttr(kAttrEventId)) << "]"; | |||
| } else { | |||
| MS_LOG(INFO) << "node name[" << AnfAlgo::GetCNodeName(cur_cnode_ptr) << "], logic id[" | |||
| MS_LOG(INFO) << "node name[" << cur_cnode_ptr->fullname_with_scope() << "], logic id[" | |||
| << AnfAlgo::GetStreamDistinctionLabel(cur_cnode_ptr.get()) << "], stream id[" | |||
| << AnfAlgo::GetStreamId(cur_cnode_ptr) << "]"; | |||
| } | |||
| @@ -29,10 +29,6 @@ namespace ascend { | |||
| // PROFILING_CUSTOM_LOGID_START 3 | |||
| const uint64_t kProfilingFpStartLogId = 1; | |||
| const uint64_t kProfilingBpEndLogId = 2; | |||
| const uint64_t kProfilingAllReduce1Start = 3; | |||
| const uint64_t kProfilingAllReduce1End = 4; | |||
| const uint64_t kProfilingAllReduce2Start = 5; | |||
| const uint64_t kProfilingAllReduce2End = 6; | |||
| const uint64_t kProfilingIterEndLogId = 255; | |||
| class ProfilingEngineImpl; | |||
| @@ -14,10 +14,8 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #include "device/ascend/profiling/profiling_utils.h" | |||
| #include <map> | |||
| #include "device/ascend/profiling/profiling_utils.h" | |||
| #include "kernel/kernel.h" | |||
| #include "device/ascend/profiling/profiling_manager.h" | |||
| #include "session/anf_runtime_algorithm.h" | |||
| @@ -27,82 +25,61 @@ | |||
| namespace mindspore { | |||
| namespace device { | |||
| namespace ascend { | |||
| const char ProfilingUtils::kProfiling[] = "Profiling"; | |||
| const char ProfilingUtils::kNotify[] = "notify"; | |||
| const char ProfilingUtils::kProfilerTraceId[] = "profiler_trace_id"; | |||
| const char ProfilingUtils::kFlags[] = "flags"; | |||
| constexpr uint32_t kMaxProfilingNodeNum = 100; | |||
| constexpr char kCustomNode[] = "PROFILING_CUSTOM_"; | |||
| constexpr char kFpStartNode[] = "PROFILING_FP_START"; | |||
| constexpr char kBpEndNode[] = "PROFILING_BP_END"; | |||
| constexpr char kIterEndNode[] = "PROFILING_ITER_END"; | |||
| std::unordered_map<uint32_t, std::vector<std::string>> ProfilingUtils::graph_kernel_name_; | |||
| bool ProfilingUtils::GetProfilingTraceInfo(const std::shared_ptr<session::KernelGraph> &graph_ptr, | |||
| ProfilingTraceInfo *profiling_trace_info) { | |||
| MS_EXCEPTION_IF_NULL(profiling_trace_info); | |||
| MS_EXCEPTION_IF_NULL(graph_ptr); | |||
| bool find_begin = false; | |||
| bool first_allreduce = true; | |||
| for (const auto &anf_node : graph_ptr->execution_order()) { | |||
| if (anf_node->isa<CNode>()) { | |||
| const std::string kernel_name = AnfAlgo::GetCNodeName(anf_node); | |||
| if ((kernel_name == "Cast" || kernel_name == "Four2Five") && !find_begin) { | |||
| profiling_trace_info->profiling_trace_begin = anf_node->fullname_with_scope(); | |||
| find_begin = true; | |||
| } | |||
| if (kernel_name == "Conv2DBackpropFilter") { | |||
| profiling_trace_info->profiling_trace_bp_end = anf_node->fullname_with_scope(); | |||
| } | |||
| if (kernel_name == kFusedMulApplyMomentumOpName || kernel_name == kApplyMomentumOpName) { | |||
| profiling_trace_info->profiling_trace_netoutput = anf_node->fullname_with_scope(); | |||
| } | |||
| if (kernel_name == kAllReduceOpName) { | |||
| if (first_allreduce) { | |||
| profiling_trace_info->profiling_allreduce1_start = anf_node->fullname_with_scope(); | |||
| profiling_trace_info->profiling_allreduce1_end = anf_node->fullname_with_scope(); | |||
| first_allreduce = false; | |||
| } else { | |||
| profiling_trace_info->profiling_allreduce2_start = anf_node->fullname_with_scope(); | |||
| profiling_trace_info->profiling_allreduce2_end = anf_node->fullname_with_scope(); | |||
| } | |||
| } | |||
| uint32_t ProfilingUtils::custom_node_index_ = 1; | |||
| ProfilingTraceInfo ProfilingUtils::GetProfilingTraceFromEnv(NotNull<session::KernelGraph *> graph_ptr) { | |||
| MS_LOG(INFO) << "get env start"; | |||
| custom_node_index_ = 1; | |||
| auto &cnode_exec_order = graph_ptr->execution_order(); | |||
| ProfilingTraceInfo profiling_trace; | |||
| profiling_trace.trace_begin = GetTraceBegin(cnode_exec_order); | |||
| profiling_trace.trace_bp_end = GetTraceBpEnd(); | |||
| profiling_trace.trace_netoutput = GetTraceNetoutput(cnode_exec_order); | |||
| MS_LOG(INFO) << "[profiling] trace_begin:" << profiling_trace.trace_begin | |||
| << " trace_bp_end:" << profiling_trace.trace_bp_end | |||
| << " trace_netoutput:" << profiling_trace.trace_netoutput; | |||
| for (uint32_t i = 1; i <= kMaxProfilingNodeNum; ++i) { | |||
| std::string env_str = std::string(kCustomNode) + std::to_string(i); | |||
| const char *node_full_name = std::getenv(env_str.c_str()); | |||
| if (node_full_name == nullptr) { | |||
| break; | |||
| } | |||
| MS_LOG(INFO) << "Get profiling node:" << node_full_name; | |||
| profiling_trace.trace_custom_node.insert(node_full_name); | |||
| } | |||
| MS_LOG(INFO) << "[profiling]begin:" << profiling_trace_info->profiling_trace_begin | |||
| << ", net_output:" << profiling_trace_info->profiling_trace_netoutput | |||
| << ", end:" << profiling_trace_info->profiling_trace_bp_end | |||
| << ", allreduce1:" << profiling_trace_info->profiling_allreduce1_start | |||
| << ", allreduce2:" << profiling_trace_info->profiling_allreduce2_start; | |||
| return profiling_trace_info->IsValid(); | |||
| MS_LOG(INFO) << "get env end"; | |||
| return profiling_trace; | |||
| } | |||
| bool ProfilingUtils::GetNetOutput(AnfNodePtr anf_node, std::string *profiling_trace_net_output) { | |||
| MS_EXCEPTION_IF_NULL(anf_node); | |||
| MS_EXCEPTION_IF_NULL(profiling_trace_net_output); | |||
| MS_LOG(INFO) << "[profiling]Anf node's full name with scope:" << anf_node->fullname_with_scope(); | |||
| if (!profiling_trace_net_output->empty()) { | |||
| MS_LOG(INFO) << "[profiling]Has got the net_output:" << profiling_trace_net_output->c_str(); | |||
| return true; | |||
| } | |||
| if (AnfAlgo::IsRealKernel(anf_node)) { | |||
| *profiling_trace_net_output = anf_node->fullname_with_scope(); | |||
| return true; | |||
| } | |||
| std::string ProfilingUtils::GetTraceBegin(const std::vector<CNodePtr> &cnode_exec_order) { | |||
| const char *trace_begin = std::getenv(kFpStartNode); | |||
| auto &first_cnode = cnode_exec_order.front(); | |||
| MS_EXCEPTION_IF_NULL(first_cnode); | |||
| return trace_begin == nullptr ? first_cnode->fullname_with_scope() : std::string(trace_begin); | |||
| } | |||
| auto cnode = anf_node->cast<CNodePtr>(); | |||
| if (cnode == nullptr) { | |||
| MS_LOG(ERROR) << "[profiling]Anf node should be a CNode"; | |||
| return false; | |||
| } | |||
| std::string ProfilingUtils::GetTraceBpEnd() { | |||
| const char *trace_bp_end = std::getenv(kBpEndNode); | |||
| return trace_bp_end == nullptr ? "" : std::string(trace_bp_end); | |||
| } | |||
| auto inputs = cnode->inputs(); | |||
| auto input_size = inputs.size(); | |||
| if (input_size < 2) { | |||
| MS_LOG(ERROR) << "[profiling]Anf node' input size(" << input_size << ") < 2, don't support get apply kernel node."; | |||
| return false; | |||
| } | |||
| return GetNetOutput(inputs[1], profiling_trace_net_output); | |||
| std::string ProfilingUtils::GetTraceNetoutput(const std::vector<CNodePtr> &cnode_exec_order) { | |||
| const char *trace_netoutput = std::getenv(kIterEndNode); | |||
| auto &last_cnode = cnode_exec_order.back(); | |||
| MS_EXCEPTION_IF_NULL(last_cnode); | |||
| return trace_netoutput == nullptr ? last_cnode->fullname_with_scope() : std::string(trace_netoutput); | |||
| } | |||
| CNodePtr ProfilingUtils::CreateProfilingCNode(const std::shared_ptr<session::KernelGraph> &graph_ptr, bool notify, | |||
| uint64_t profiler_trace_id, uint32_t flags) { | |||
| MS_EXCEPTION_IF_NULL(graph_ptr); | |||
| NotNull<CNodePtr> ProfilingUtils::CreateProfilingCNode(const ProfilingContent &profiling_content, | |||
| NotNull<session::KernelGraph *> graph_ptr) { | |||
| kernel::KernelBuildInfo::KernelBuildInfoBuilder selected_kernel_builder; | |||
| selected_kernel_builder.SetInputsFormat({kOpFormat_DEFAULT, kOpFormat_DEFAULT}); | |||
| selected_kernel_builder.SetInputsDeviceType({TypeId::kNumberTypeInt32, TypeId::kNumberTypeInt32}); | |||
| @@ -118,75 +95,79 @@ CNodePtr ProfilingUtils::CreateProfilingCNode(const std::shared_ptr<session::Ker | |||
| AnfAlgo::SetSelectKernelBuildInfo(selected_kernel_builder.Build(), cnode_ptr.get()); | |||
| cnode_ptr->set_abstract(type_none_abstract); | |||
| // set attr | |||
| ValuePtr notify_value = MakeValue(notify); | |||
| ValuePtr trace_id_value = MakeValue(profiler_trace_id); | |||
| ValuePtr flags_value = MakeValue(flags); | |||
| ValuePtr notify_value = MakeValue(profiling_content.notify); | |||
| ValuePtr trace_id_value = MakeValue(profiling_content.profiler_trace_id); | |||
| ValuePtr flags_value = MakeValue(profiling_content.flags); | |||
| AnfAlgo::SetNodeAttr(ProfilingUtils::kNotify, notify_value, cnode_ptr); | |||
| AnfAlgo::SetNodeAttr(ProfilingUtils::kProfilerTraceId, trace_id_value, cnode_ptr); | |||
| AnfAlgo::SetNodeAttr(ProfilingUtils::kFlags, flags_value, cnode_ptr); | |||
| return cnode_ptr; | |||
| return NOT_NULL(cnode_ptr); | |||
| } | |||
| void ProfilingUtils::ProfilingTraceFpStart(const std::shared_ptr<mindspore::session::KernelGraph> &graph_ptr, | |||
| const mindspore::AnfNodePtr &anf_node, | |||
| const mindspore::device::ascend::ProfilingTraceInfo &profiling_trace_info, | |||
| std::vector<mindspore::CNodePtr> *kernel_list) { | |||
| if (profiling_trace_info.IsValid() && profiling_trace_info.profiling_trace_begin == anf_node->fullname_with_scope()) { | |||
| if (graph_ptr == nullptr || kernel_list == nullptr || anf_node == nullptr) { | |||
| MS_LOG(ERROR) << "[profiling]input param invalid"; | |||
| return; | |||
| } | |||
| void ProfilingUtils::ProfilingTraceFpStart(const mindspore::AnfNodePtr &anf_node, | |||
| const ProfilingTraceInfo &profiling_trace_info, | |||
| NotNull<session::KernelGraph *> graph_ptr, | |||
| NotNull<std::vector<mindspore::CNodePtr> *> kernel_list) { | |||
| if (profiling_trace_info.trace_begin == anf_node->fullname_with_scope()) { | |||
| auto job_id = ProfilingManager::GetInstance().GetJobId(); | |||
| // job task info | |||
| CNodePtr job_kernel_ptr = CreateProfilingCNode(graph_ptr, false, job_id, 0); | |||
| AnfAlgo::SetStreamDistinctionLabel(AnfAlgo::GetStreamDistinctionLabel(anf_node.get()), job_kernel_ptr.get()); | |||
| AnfAlgo::SetStreamId(AnfAlgo::GetStreamId(anf_node), job_kernel_ptr.get()); | |||
| // fp task info | |||
| CNodePtr start_kernel_ptr = CreateProfilingCNode(graph_ptr, false, kProfilingFpStartLogId, 0); | |||
| AnfAlgo::SetStreamDistinctionLabel(AnfAlgo::GetStreamDistinctionLabel(anf_node.get()), start_kernel_ptr.get()); | |||
| AnfAlgo::SetStreamId(AnfAlgo::GetStreamId(anf_node), start_kernel_ptr.get()); | |||
| kernel_list->emplace_back(job_kernel_ptr); | |||
| kernel_list->emplace_back(start_kernel_ptr); | |||
| ProfilingContent job_profiling_context = {false, job_id, 0}; | |||
| auto job_profiling_node = CreateProfilingCNodeWithStream(anf_node, job_profiling_context, graph_ptr); | |||
| kernel_list->emplace_back(job_profiling_node); | |||
| ProfilingContent fp_profiling_content = {false, kProfilingFpStartLogId, 0}; | |||
| auto fp_profiling_node = CreateProfilingCNodeWithStream(anf_node, fp_profiling_content, graph_ptr); | |||
| kernel_list->emplace_back(fp_profiling_node); | |||
| } | |||
| } | |||
| void ProfilingUtils::ProfilingAllReduce(const std::shared_ptr<session::KernelGraph> &graph_ptr, | |||
| const AnfNodePtr &anf_node, int job_id, const std::string &profiling_node_name, | |||
| std::vector<CNodePtr> *kernel_list) { | |||
| MS_EXCEPTION_IF_NULL(graph_ptr); | |||
| CNodePtr ProfilingUtils::CreateProfilingCNodeWithStream(const mindspore::AnfNodePtr &anf_node, | |||
| const ProfilingContent &profiling_content, | |||
| NotNull<session::KernelGraph *> graph_ptr) { | |||
| CNodePtr profiling_node = CreateProfilingCNode(profiling_content, graph_ptr); | |||
| AnfAlgo::SetStreamDistinctionLabel(AnfAlgo::GetStreamDistinctionLabel(anf_node.get()), profiling_node.get()); | |||
| AnfAlgo::SetStreamId(AnfAlgo::GetStreamId(anf_node), profiling_node.get()); | |||
| return profiling_node; | |||
| } | |||
| void ProfilingUtils::ProfilingCustomOp(const AnfNodePtr &anf_node, const ProfilingTraceInfo &profiling_trace_info, | |||
| NotNull<session::KernelGraph *> graph_ptr, | |||
| NotNull<std::vector<CNodePtr> *> kernel_list) { | |||
| MS_EXCEPTION_IF_NULL(anf_node); | |||
| MS_EXCEPTION_IF_NULL(kernel_list); | |||
| auto full_scope_name = anf_node->fullname_with_scope(); | |||
| if (profiling_node_name == full_scope_name) { | |||
| CNodePtr allreduce_kernel_ptr = CreateProfilingCNode(graph_ptr, false, job_id, 0); | |||
| AnfAlgo::SetStreamDistinctionLabel(AnfAlgo::GetStreamDistinctionLabel(anf_node.get()), allreduce_kernel_ptr.get()); | |||
| AnfAlgo::SetStreamId(AnfAlgo::GetStreamId(anf_node), allreduce_kernel_ptr.get()); | |||
| kernel_list->emplace_back(allreduce_kernel_ptr); | |||
| auto iter = profiling_trace_info.trace_custom_node.find(anf_node->fullname_with_scope()); | |||
| if (iter == profiling_trace_info.trace_custom_node.end()) { | |||
| return; | |||
| } | |||
| // custom op profiling job start from 3. | |||
| ProfilingContent front_profiling_content = {false, 2 * custom_node_index_ + 1, 0}; | |||
| CNodePtr front_node = CreateProfilingCNodeWithStream(anf_node, front_profiling_content, graph_ptr); | |||
| kernel_list->insert(kernel_list->end() - 1, front_node); | |||
| ProfilingContent back_profiling_content = {false, 2 * custom_node_index_ + 2, 0}; | |||
| CNodePtr back_node = CreateProfilingCNodeWithStream(anf_node, back_profiling_content, graph_ptr); | |||
| kernel_list->insert(kernel_list->end(), back_node); | |||
| ++custom_node_index_; | |||
| } | |||
| void ProfilingUtils::ProfilingTraceEnd(const std::shared_ptr<mindspore::session::KernelGraph> &graph_ptr, | |||
| const mindspore::AnfNodePtr &anf_node, | |||
| const mindspore::device::ascend::ProfilingTraceInfo &profiling_trace_info, | |||
| std::vector<mindspore::CNodePtr> *kernel_list) { | |||
| MS_EXCEPTION_IF_NULL(graph_ptr); | |||
| void ProfilingUtils::ProfilingTraceBpEnd(const AnfNodePtr &anf_node, const ProfilingTraceInfo &profiling_trace_info, | |||
| NotNull<session::KernelGraph *> graph_ptr, | |||
| NotNull<std::vector<CNodePtr> *> kernel_list) { | |||
| MS_EXCEPTION_IF_NULL(anf_node); | |||
| MS_EXCEPTION_IF_NULL(kernel_list); | |||
| if (profiling_trace_info.IsValid()) { | |||
| auto full_scope_name = anf_node->fullname_with_scope(); | |||
| if (profiling_trace_info.profiling_trace_netoutput == full_scope_name) { | |||
| CNodePtr bp_kernel_ptr = CreateProfilingCNode(graph_ptr, true, kProfilingIterEndLogId, 0); | |||
| AnfAlgo::SetStreamDistinctionLabel(AnfAlgo::GetStreamDistinctionLabel(anf_node.get()), bp_kernel_ptr.get()); | |||
| AnfAlgo::SetStreamId(AnfAlgo::GetStreamId(anf_node), bp_kernel_ptr.get()); | |||
| kernel_list->emplace_back(bp_kernel_ptr); | |||
| } | |||
| if (profiling_trace_info.trace_bp_end == anf_node->fullname_with_scope()) { | |||
| ProfilingContent bp_end_profiling_content = {false, kProfilingBpEndLogId, 0}; | |||
| CNodePtr bp_end_node = CreateProfilingCNodeWithStream(anf_node, bp_end_profiling_content, graph_ptr); | |||
| kernel_list->emplace_back(bp_end_node); | |||
| } | |||
| } | |||
| if (profiling_trace_info.profiling_trace_bp_end == full_scope_name) { | |||
| CNodePtr end_task_info = CreateProfilingCNode(graph_ptr, false, kProfilingBpEndLogId, 0); | |||
| AnfAlgo::SetStreamDistinctionLabel(AnfAlgo::GetStreamDistinctionLabel(anf_node.get()), end_task_info.get()); | |||
| AnfAlgo::SetStreamId(AnfAlgo::GetStreamId(anf_node), end_task_info.get()); | |||
| kernel_list->emplace_back(end_task_info); | |||
| } | |||
| void ProfilingUtils::ProfilingTraceEnd(const AnfNodePtr &anf_node, const ProfilingTraceInfo &profiling_trace_info, | |||
| NotNull<session::KernelGraph *> graph_ptr, | |||
| NotNull<std::vector<mindspore::CNodePtr> *> kernel_list) { | |||
| MS_EXCEPTION_IF_NULL(anf_node); | |||
| auto full_scope_name = anf_node->fullname_with_scope(); | |||
| if (profiling_trace_info.trace_netoutput == full_scope_name) { | |||
| ProfilingContent bp_end_profiling_content = {true, kProfilingIterEndLogId, 0}; | |||
| CNodePtr bp_kernel_ptr = CreateProfilingCNodeWithStream(anf_node, bp_end_profiling_content, graph_ptr); | |||
| kernel_list->emplace_back(bp_kernel_ptr); | |||
| } | |||
| } | |||
| @@ -19,63 +19,102 @@ | |||
| #include <memory> | |||
| #include <string> | |||
| #include <vector> | |||
| #include <set> | |||
| #include <unordered_map> | |||
| #include "session/kernel_graph.h" | |||
| #include "utils/contract.h" | |||
| namespace mindspore { | |||
| namespace device { | |||
| namespace ascend { | |||
| struct ProfilingTraceInfo { | |||
| // execute order's first execute op(like: Cast or Four2Five ...), except tdt op(GetNext ...) | |||
| std::string profiling_trace_begin; | |||
| std::string trace_begin; | |||
| // get first net_output(apply kernel) from graph outputs: fp ->net_output<- bp | |||
| std::string profiling_trace_bp_end; | |||
| std::string trace_bp_end; | |||
| // execute order's end execute (like: Conv2DBackpropFilter) | |||
| std::string profiling_trace_netoutput; | |||
| std::string trace_netoutput; | |||
| std::string profiling_allreduce1_start; | |||
| std::string profiling_allreduce1_end; | |||
| std::string profiling_allreduce2_start; | |||
| std::string profiling_allreduce2_end; | |||
| // profiling specific op, such as AllReduce; | |||
| std::set<std::string> trace_custom_node; | |||
| // 1. insert profiling_trace_begin if profiling_trace_bp_end is not empty. | |||
| // 2. op lanuch get task info with callback func. | |||
| // 3. insert profiling_trace_bp_end. | |||
| // 4. insert profiling_trace_net_output if profiling_trace_bp_end is not empty. | |||
| bool IsValid() const { return !(profiling_trace_begin.empty() || profiling_trace_bp_end.empty()); } | |||
| bool IsValid() const { return !(trace_begin.empty() || trace_bp_end.empty() || trace_netoutput.empty()); } | |||
| }; | |||
| struct ProfilingContent { | |||
| // true -send data from device to host and finish profiling | |||
| bool notify; | |||
| uint64_t profiler_trace_id; | |||
| uint32_t flags; | |||
| }; | |||
| class ProfilingUtils { | |||
| public: | |||
| ProfilingUtils() = default; | |||
| ~ProfilingUtils() = default; | |||
| static bool GetProfilingTraceInfo(const std::shared_ptr<session::KernelGraph> &graph_ptr, | |||
| ProfilingTraceInfo *profiling_trace_info); | |||
| static void ProfilingTraceFpStart(const std::shared_ptr<session::KernelGraph> &graph_ptr, const AnfNodePtr &anf_node, | |||
| const ProfilingTraceInfo &profiling_trace_info, std::vector<CNodePtr> *kernel_list); | |||
| static void ProfilingAllReduce(const std::shared_ptr<session::KernelGraph> &graph_ptr, const AnfNodePtr &anf_node, | |||
| int job_id, const std::string &profiling_node_name, | |||
| std::vector<CNodePtr> *kernel_list); | |||
| static void ProfilingTraceEnd(const std::shared_ptr<session::KernelGraph> &graph_ptr, const AnfNodePtr &anf_node, | |||
| const ProfilingTraceInfo &profiling_trace_info, std::vector<CNodePtr> *kernel_list); | |||
| // Insert job_id profiling node and fp_start profiling node. | |||
| // Job_id is got from envs, which shound be a number greater than 255 | |||
| // Fp_start node should been inserted in the start of a network, and the log_id is hard code to 1. | |||
| static void ProfilingTraceFpStart(const AnfNodePtr &anf_node, const ProfilingTraceInfo &profiling_trace_info, | |||
| NotNull<session::KernelGraph *> graph_ptr, | |||
| NotNull<std::vector<CNodePtr> *> kernel_list); | |||
| // Insert net output profiling node, which tells the device to stop profiling. | |||
| // The notify in struct ProfilingContent should be 'true', which tells the device to send data to host. | |||
| static void ProfilingTraceEnd(const AnfNodePtr &anf_node, const ProfilingTraceInfo &profiling_trace_info, | |||
| NotNull<session::KernelGraph *> graph_ptr, | |||
| NotNull<std::vector<CNodePtr> *> kernel_list); | |||
| // Insert bp_end profiling node, which should been inserted after the last backpropagation CNode in the network. | |||
| static void ProfilingTraceBpEnd(const mindspore::AnfNodePtr &anf_node, const ProfilingTraceInfo &profiling_trace_info, | |||
| NotNull<session::KernelGraph *> graph_ptr, | |||
| NotNull<std::vector<mindspore::CNodePtr> *> kernel_list); | |||
| // Mapping graph id and the kernels' name in the graph | |||
| static void SetGraphKernelName(uint32_t graph_id, const std::vector<std::string> &kernel_names); | |||
| // Mapping task_id and kernel name for device to generate the time cost of specific kernel. | |||
| // Device calculate the time cost of the task which is marked by task id. | |||
| // But we need data of (kernel name , time cost) | |||
| static void ReportProfilingData(uint32_t graph_id, const std::vector<uint32_t> &task_ids); | |||
| static const char kProfiling[]; | |||
| static const char kNotify[]; | |||
| static const char kProfilerTraceId[]; | |||
| static const char kFlags[]; | |||
| // Get profiling trace point from envs. | |||
| // export PROFILING_FP_START='full name of the first cnode to execute' | |||
| // export PROFILING_BP_END='full name of the last backpropagation cnode to execute' | |||
| // export PROFILING_ITER_END='full name of last cnode in graph to execute' | |||
| // And other cnode, like AllReduce, export PROFILING_CUSTOM_1='full name of AllReduce cnode' | |||
| // GetNext, export PROFIFLING_CUSTOM_2='full name fo GetNext cnode' | |||
| // The variable i in PROFILING_CUSTOM_i should start from 1 without interruption. | |||
| static ProfilingTraceInfo GetProfilingTraceFromEnv(NotNull<session::KernelGraph *> graph_ptr); | |||
| // Insert two profiling trace points, one in front and one behind | |||
| static void ProfilingCustomOp(const mindspore::AnfNodePtr &anf_node, const ProfilingTraceInfo &profiling_trace_info, | |||
| NotNull<session::KernelGraph *> graph_ptr, | |||
| NotNull<std::vector<mindspore::CNodePtr> *> kernel_list); | |||
| inline static constexpr char kProfiling[] = "Profiling"; | |||
| inline static constexpr char kNotify[] = "notify"; | |||
| inline static constexpr char kProfilerTraceId[] = "profiler_trace_id"; | |||
| inline static constexpr char kFlags[] = "flags"; | |||
| private: | |||
| static bool GetNetOutput(AnfNodePtr anf_node, std::string *profiling_trace_net_output); | |||
| static CNodePtr CreateProfilingCNode(const std::shared_ptr<session::KernelGraph> &graph_ptr, bool notify, | |||
| uint64_t profiler_trace_id, uint32_t flags); | |||
| static NotNull<CNodePtr> CreateProfilingCNode(const ProfilingContent &profiling_content, | |||
| NotNull<session::KernelGraph *> graph_ptr); | |||
| static CNodePtr CreateProfilingCNodeWithStream(const AnfNodePtr &anf_node, const ProfilingContent &profiling_content, | |||
| NotNull<session::KernelGraph *> graph_ptr); | |||
| static std::string GetTraceBegin(const std::vector<CNodePtr> &cnode_exec_order); | |||
| static std::string GetTraceBpEnd(); | |||
| static std::string GetTraceNetoutput(const std::vector<CNodePtr> &cnode_exec_order); | |||
| // graph id --> (kernel name list) | |||
| static std::unordered_map<uint32_t, std::vector<std::string>> graph_kernel_name_; | |||
| static uint32_t custom_node_index_; | |||
| }; | |||
| } // namespace ascend | |||
| } // namespace device | |||
| @@ -33,12 +33,14 @@ class CPUKernelRuntime; | |||
| } // namespace cpu | |||
| namespace ascend { | |||
| class AscendKernelRuntime; | |||
| class AscendMemoryManager; | |||
| namespace tasksink { | |||
| class TaskGenerator; | |||
| } // namespace tasksink | |||
| } // namespace ascend | |||
| namespace gpu { | |||
| class GPUKernelRuntime; | |||
| class GPUMemoryManager; | |||
| } // namespace gpu | |||
| } // namespace device | |||
| } // namespace mindspore | |||
| @@ -68,14 +70,17 @@ class DeviceAddress { | |||
| size_t ref_count_{0}; | |||
| string format_{"DefaultFormat"}; | |||
| TypeId type_id_{kNumberTypeFloat16}; | |||
| bool mem_dynamic_alloc_{false}; | |||
| bool from_mem_pool_{false}; | |||
| friend class KernelRuntime; | |||
| friend class MemoryManager; | |||
| friend class mindspore::device::ascend::tasksink::TaskGenerator; | |||
| friend class mindspore::device::cpu::CPUSimpleMemPlan; | |||
| friend class mindspore::device::cpu::CPUResourceManager; | |||
| friend class mindspore::device::cpu::CPUKernelRuntime; | |||
| friend class mindspore::device::gpu::GPUKernelRuntime; | |||
| friend class mindspore::device::gpu::GPUMemoryManager; | |||
| friend class mindspore::device::ascend::AscendKernelRuntime; | |||
| friend class mindspore::device::ascend::AscendMemoryManager; | |||
| }; | |||
| using DeviceAddressPtr = std::shared_ptr<DeviceAddress>; | |||
| @@ -17,7 +17,6 @@ | |||
| #include "device/gpu/blocking_queue.h" | |||
| #include <chrono> | |||
| #include "device/gpu/gpu_common.h" | |||
| #include "dataset/util/make_unique.h" | |||
| #include "common/utils.h" | |||
| namespace mindspore { | |||
| @@ -32,7 +31,7 @@ GpuQueue::GpuQueue(void *addr, size_t feature_size, size_t label_size, size_t ca | |||
| stream_(0), | |||
| node_info_(nullptr) { | |||
| CHECK_CUDA_RET_WITH_ERROR(cudaStreamCreate(&stream_), "Cuda Create Stream Failed"); | |||
| node_info_ = mindspore::make_unique<NodeInfo[]>(capacity); | |||
| node_info_ = std::make_unique<NodeInfo[]>(capacity); | |||
| } | |||
| GpuQueue::~GpuQueue() { buffer_ = nullptr; } | |||
| @@ -46,7 +46,7 @@ GPUDeviceAddress::~GPUDeviceAddress() { | |||
| } | |||
| auto ms_context = MsContext::GetInstance(); | |||
| MS_EXCEPTION_IF_NULL(ms_context); | |||
| if (mem_dynamic_alloc_) { | |||
| if (from_mem_pool_) { | |||
| GPUMemoryAllocator::GetInstance().FreeTensorMem(ptr_); | |||
| ptr_ = nullptr; | |||
| } | |||
| @@ -26,6 +26,7 @@ | |||
| #include "device/kernel_runtime_manager.h" | |||
| #include "device/gpu/gpu_common.h" | |||
| #include "common/utils.h" | |||
| #include "device/gpu/gpu_memory_manager.h" | |||
| namespace mindspore { | |||
| namespace device { | |||
| @@ -36,26 +37,14 @@ bool GPUKernelRuntime::Init() { | |||
| if (device_init_ == true) { | |||
| return true; | |||
| } | |||
| auto ret = InitDevice(); | |||
| if (!ret) { | |||
| MS_LOG(ERROR) << "InitDevice error."; | |||
| return ret; | |||
| } | |||
| auto context_ptr = MsContext::GetInstance(); | |||
| MS_EXCEPTION_IF_NULL(context_ptr); | |||
| // If use the dynamic memory pool, then alloc the first memory block to init. | |||
| if (context_ptr->enable_dynamic_mem_pool()) { | |||
| auto device_addr = AllocTensorMemDynamic(1); | |||
| if (!device_addr) { | |||
| MS_LOG(ERROR) << "Dynamic memory pool init error."; | |||
| return false; | |||
| } | |||
| } else { | |||
| MallocDeviceMemory(); | |||
| } | |||
| mem_manager_ = std::make_shared<GPUMemoryManager>(); | |||
| MS_EXCEPTION_IF_NULL(mem_manager_); | |||
| mem_manager_->MallocDeviceMemory(); | |||
| const void *collective_handle_ = CollectiveInitializer::instance().collective_handle(); | |||
| bool collective_inited = CollectiveInitializer::instance().collective_inited(); | |||
| if (collective_inited && collective_handle_ != nullptr) { | |||
| @@ -101,16 +90,6 @@ bool GPUKernelRuntime::InitDevice() { | |||
| return true; | |||
| } | |||
| void GPUKernelRuntime::MallocDeviceMemory() { | |||
| // Need to reserve 20% space for dynamic memory | |||
| const float init_gpu_mem_ratio = 0.8; | |||
| size_t mem_size = FloatToSize(GPUMemoryAllocator::GetInstance().free_mem_size() * init_gpu_mem_ratio); | |||
| auto alloc_size = | |||
| GPUMemoryAllocator::GetInstance().AllocDeviceMem(mem_size, reinterpret_cast<void **>(&device_mem_base_)); | |||
| device_mem_size_ = alloc_size; | |||
| static_mem_offset_ = device_mem_size_; | |||
| } | |||
| void GPUKernelRuntime::ReleaseDeviceRes() { | |||
| // For dataset mode. | |||
| if (GpuBufferMgr::GetInstance().IsInit()) { | |||
| @@ -122,39 +101,22 @@ void GPUKernelRuntime::ReleaseDeviceRes() { | |||
| CHECK_OP_RET_WITH_EXCEPT(GpuBufferMgr::GetInstance().Destroy(), "Could not destroy gpu data queue."); | |||
| } | |||
| GPUDeviceManager::GetInstance().ReleaseDevice(); | |||
| if (device_mem_base_ != nullptr) { | |||
| if (!GPUMemoryAllocator::GetInstance().FreeDeviceMem(device_mem_base_)) { | |||
| MS_LOG(EXCEPTION) << "Could not free gpu device memory."; | |||
| } | |||
| } | |||
| GPUMemoryAllocator::GetInstance().ReleaseDeviceRes(); | |||
| } | |||
| void GPUKernelRuntime::FreeHostMemory() { dynamic_mem_offset_ = 0; } | |||
| void *GPUKernelRuntime::AllocTensorMemDynamic(size_t size) { | |||
| return GPUMemoryAllocator::GetInstance().AllocTensorMem(size); | |||
| } | |||
| void GPUKernelRuntime::FreeTensorMemDynamic(void *device_ptr) { | |||
| GPUMemoryAllocator::GetInstance().FreeTensorMem(device_ptr); | |||
| MS_EXCEPTION_IF_NULL(mem_manager_); | |||
| mem_manager_->FreeDeviceMemory(); | |||
| } | |||
| void GPUKernelRuntime::AssignMemory(session::KernelGraph *graph) { | |||
| auto context_ptr = MsContext::GetInstance(); | |||
| MS_EXCEPTION_IF_NULL(context_ptr); | |||
| MS_EXCEPTION_IF_NULL(mem_manager_); | |||
| mem_manager_->ResetDynamicMemory(); | |||
| AssignStaticMemory(graph); | |||
| bool is_enable_mem_reuse = context_ptr->enable_mem_reuse(); | |||
| bool is_enable_dynamic_mem = context_ptr->enable_dynamic_mem_pool(); | |||
| if (is_enable_dynamic_mem) { | |||
| // Use the dynamic memory pool. | |||
| InitKernelRefCount(graph); | |||
| InitKernelOutputAddress(graph); | |||
| } else if (is_enable_mem_reuse) { | |||
| // Use the memory reuse. | |||
| ReuseAssignDynamicMemory(graph); | |||
| } else { | |||
| // Normal way. | |||
| AssignDynamicMemory(graph); | |||
| } | |||
| } | |||
| @@ -179,32 +141,6 @@ bool GPUKernelRuntime::Run(session::KernelGraph *graph) { | |||
| return ret; | |||
| } | |||
| uint8_t *GPUKernelRuntime::MallocStaticMem(size_t size, bool) { | |||
| auto context_ptr = MsContext::GetInstance(); | |||
| MS_EXCEPTION_IF_NULL(context_ptr); | |||
| if (context_ptr->enable_dynamic_mem_pool()) { | |||
| auto device_ptr = AllocTensorMemDynamic(size); | |||
| MS_EXCEPTION_IF_NULL(device_ptr); | |||
| return AddressOffset(device_ptr, 0); | |||
| } | |||
| auto align_size = GetCommonAlignSize(size); | |||
| if (static_mem_offset_ < align_size) { | |||
| MS_LOG(EXCEPTION) << "Out of memory!!! total[" << device_mem_size_ << "](dynamic[" << total_dynamic_size_ | |||
| << "] static[" << total_static_size_ << "])" | |||
| << " malloc [" << align_size << "] failed!"; | |||
| } | |||
| auto offset = static_mem_offset_ - align_size; | |||
| if (dynamic_mem_offset_ > offset) { | |||
| MS_LOG(EXCEPTION) << "Out of memory!!! total[" << device_mem_size_ << "](dynamic[" << total_dynamic_size_ | |||
| << "] static[" << total_static_size_ << "])" | |||
| << " malloc [" << align_size << "] failed!"; | |||
| } | |||
| total_static_size_ += align_size; | |||
| static_mem_offset_ = offset; | |||
| return device_mem_base_ + offset; | |||
| } | |||
| void GPUKernelRuntime::InitKernelRefCount(const session::KernelGraph *graph) { | |||
| MS_EXCEPTION_IF_NULL(graph); | |||
| MemReuseUtilPtr mem_reuse_util_ptr = std::make_shared<memreuse::MemReuseUtil>(); | |||
| @@ -273,6 +209,7 @@ void GPUKernelRuntime::AllocKernelDynamicRes(const mindspore::kernel::KernelMod | |||
| MS_EXCEPTION_IF_NULL(kernel_inputs); | |||
| MS_EXCEPTION_IF_NULL(kernel_workspaces); | |||
| MS_EXCEPTION_IF_NULL(kernel_outputs); | |||
| MS_EXCEPTION_IF_NULL(mem_manager_); | |||
| for (size_t i = 0; i < AnfAlgo::GetInputTensorNum(kernel); ++i) { | |||
| auto device_address = AnfAlgo::GetPrevNodeOutputAddr(kernel, i); | |||
| MS_EXCEPTION_IF_NULL(device_address); | |||
| @@ -290,7 +227,7 @@ void GPUKernelRuntime::AllocKernelDynamicRes(const mindspore::kernel::KernelMod | |||
| MS_EXCEPTION_IF_NULL(device_address); | |||
| auto device_ptr = device_address->ptr_; | |||
| if (device_ptr == nullptr) { | |||
| device_ptr = AllocTensorMemDynamic(output_sizes[i]); | |||
| device_ptr = mem_manager_->MallocMemFromMemPool(output_sizes[i]); | |||
| MS_EXCEPTION_IF_NULL(device_ptr); | |||
| device_address->ptr_ = device_ptr; | |||
| } | |||
| @@ -307,7 +244,7 @@ void GPUKernelRuntime::AllocKernelDynamicRes(const mindspore::kernel::KernelMod | |||
| kernel_workspaces->emplace_back(nullptr); | |||
| continue; | |||
| } | |||
| auto device_ptr = AllocTensorMemDynamic(workspace_sizes[i]); | |||
| auto device_ptr = mem_manager_->MallocMemFromMemPool(workspace_sizes[i]); | |||
| MS_EXCEPTION_IF_NULL(device_ptr); | |||
| kernel::AddressPtr workspace = std::make_shared<kernel::Address>(); | |||
| MS_EXCEPTION_IF_NULL(workspace); | |||
| @@ -333,6 +270,7 @@ void GPUKernelRuntime::AllocCommunicationOpDynamicRes(const session::KernelGraph | |||
| void GPUKernelRuntime::AllocCommunicationOpInputDynamicRes(const mindspore::AnfNodePtr &kernel) { | |||
| MS_EXCEPTION_IF_NULL(kernel); | |||
| MS_EXCEPTION_IF_NULL(mem_manager_); | |||
| // The reference count of communication kernel input is not 0. | |||
| if (communication_op_input_ref_count_ != 0) { | |||
| MS_LOG(ERROR) << "The reference count of communication kernel input is not 0."; | |||
| @@ -354,7 +292,7 @@ void GPUKernelRuntime::AllocCommunicationOpInputDynamicRes(const mindspore::AnfN | |||
| addr_size.emplace_back(device_address.get(), output_size); | |||
| } | |||
| auto device_mem_ptr = AllocTensorMemDynamic(total); | |||
| auto device_mem_ptr = mem_manager_->MallocMemFromMemPool(total); | |||
| MS_EXCEPTION_IF_NULL(device_mem_ptr); | |||
| for (const auto &iter : addr_size) { | |||
| MS_EXCEPTION_IF_NULL(iter.first); | |||
| @@ -366,6 +304,7 @@ void GPUKernelRuntime::AllocCommunicationOpInputDynamicRes(const mindspore::AnfN | |||
| void GPUKernelRuntime::AllocCommunicationOpOutputDynamicRes(const mindspore::AnfNodePtr &kernel) { | |||
| MS_EXCEPTION_IF_NULL(kernel); | |||
| MS_EXCEPTION_IF_NULL(mem_manager_); | |||
| // The reference count of communication kernel output is not 0. | |||
| if (communication_op_output_ref_count_ != 0) { | |||
| MS_LOG(ERROR) << "The reference count of communication kernel output is not 0."; | |||
| @@ -389,7 +328,7 @@ void GPUKernelRuntime::AllocCommunicationOpOutputDynamicRes(const mindspore::Anf | |||
| addr_size.emplace_back(device_address.get(), output_sizes[i]); | |||
| } | |||
| auto device_mem_ptr = AllocTensorMemDynamic(total); | |||
| auto device_mem_ptr = mem_manager_->MallocMemFromMemPool(total); | |||
| MS_EXCEPTION_IF_NULL(device_mem_ptr); | |||
| for (const auto &iter : addr_size) { | |||
| MS_EXCEPTION_IF_NULL(iter.first); | |||
| @@ -402,6 +341,7 @@ void GPUKernelRuntime::AllocCommunicationOpOutputDynamicRes(const mindspore::Anf | |||
| void GPUKernelRuntime::FreeKernelDynamicRes(const mindspore::AnfNodePtr &kernel, | |||
| const AddressPtrList &kernel_workspaces) { | |||
| MS_EXCEPTION_IF_NULL(kernel); | |||
| MS_EXCEPTION_IF_NULL(mem_manager_); | |||
| auto cnode = kernel->cast<CNodePtr>(); | |||
| MS_EXCEPTION_IF_NULL(cnode); | |||
| // Free the input of kernel by reference count. | |||
| @@ -421,7 +361,7 @@ void GPUKernelRuntime::FreeKernelDynamicRes(const mindspore::AnfNodePtr &kernel, | |||
| auto device_address = AnfAlgo::GetPrevNodeMutableOutputAddr(kernel, i); | |||
| MS_EXCEPTION_IF_NULL(device_address); | |||
| MS_EXCEPTION_IF_NULL(device_address->ptr_); | |||
| FreeTensorMemDynamic(device_address->ptr_); | |||
| mem_manager_->FreeMemFromMemPool(device_address->ptr_); | |||
| device_address->ptr_ = nullptr; | |||
| } | |||
| } | |||
| @@ -432,7 +372,7 @@ void GPUKernelRuntime::FreeKernelDynamicRes(const mindspore::AnfNodePtr &kernel, | |||
| auto workspace = kernel_workspaces[i]; | |||
| if (workspace != nullptr) { | |||
| MS_EXCEPTION_IF_NULL(workspace->addr); | |||
| FreeTensorMemDynamic(workspace->addr); | |||
| mem_manager_->FreeMemFromMemPool(workspace->addr); | |||
| workspace->addr = nullptr; | |||
| } | |||
| } | |||
| @@ -441,6 +381,7 @@ void GPUKernelRuntime::FreeKernelDynamicRes(const mindspore::AnfNodePtr &kernel, | |||
| void GPUKernelRuntime::FreeCommunicationOpDynamicRes(const mindspore::AnfNodePtr &kernel, size_t input_idx, | |||
| bool *is_communication_op) { | |||
| MS_EXCEPTION_IF_NULL(kernel); | |||
| MS_EXCEPTION_IF_NULL(mem_manager_); | |||
| // The inputs memory of communication kernel is one piece memory, need release together. | |||
| if (AnfAlgo::GetCNodeName(kernel) == kAllReduceOpName) { | |||
| communication_op_input_ref_count_--; | |||
| @@ -448,7 +389,7 @@ void GPUKernelRuntime::FreeCommunicationOpDynamicRes(const mindspore::AnfNodePtr | |||
| auto device_address = AnfAlgo::GetPrevNodeMutableOutputAddr(kernel, 0); | |||
| MS_EXCEPTION_IF_NULL(device_address); | |||
| MS_EXCEPTION_IF_NULL(device_address->ptr_); | |||
| FreeTensorMemDynamic(device_address->ptr_); | |||
| mem_manager_->FreeMemFromMemPool(device_address->ptr_); | |||
| device_address->ptr_ = nullptr; | |||
| } | |||
| *is_communication_op = true; | |||
| @@ -470,19 +411,12 @@ void GPUKernelRuntime::FreeCommunicationOpDynamicRes(const mindspore::AnfNodePtr | |||
| auto device_address = AnfAlgo::GetMutableOutputAddr(kernel_input.first, 0); | |||
| MS_EXCEPTION_IF_NULL(device_address); | |||
| MS_EXCEPTION_IF_NULL(device_address->ptr_); | |||
| FreeTensorMemDynamic(device_address->ptr_); | |||
| mem_manager_->FreeMemFromMemPool(device_address->ptr_); | |||
| device_address->ptr_ = nullptr; | |||
| } | |||
| *is_communication_op = true; | |||
| } | |||
| } | |||
| void GPUKernelRuntime::MallocOpMemory(const DeviceAddressPtr address, size_t size, int) { | |||
| auto device_ptr = AllocTensorMemDynamic(size); | |||
| MS_EXCEPTION_IF_NULL(device_ptr); | |||
| address->ptr_ = device_ptr; | |||
| address->mem_dynamic_alloc_ = true; | |||
| } | |||
| } // namespace gpu | |||
| } // namespace device | |||
| } // namespace mindspore | |||
| @@ -33,7 +33,6 @@ class GPUKernelRuntime : public KernelRuntime { | |||
| ~GPUKernelRuntime() override = default; | |||
| bool Init() override; | |||
| void ReleaseDeviceRes() override; | |||
| void FreeHostMemory() override; | |||
| void AssignMemory(session::KernelGraph *graph) override; | |||
| bool Run(session::KernelGraph *graph) override; | |||
| @@ -41,18 +40,11 @@ class GPUKernelRuntime : public KernelRuntime { | |||
| DeviceAddressPtr CreateDeviceAddress(void *device_ptr, size_t device_size, const string &format, | |||
| TypeId type_id) override; | |||
| bool SyncStream() override; | |||
| // Alloc memory use the dynamic memory pool. | |||
| void *AllocTensorMemDynamic(size_t size) override; | |||
| // Free memory use the dynamic memory pool. | |||
| void FreeTensorMemDynamic(void *device_ptr) override; | |||
| void MallocOpMemory(const DeviceAddressPtr address, size_t size, int flag) override; | |||
| uint8_t *MallocStaticMem(size_t size, bool communication_mem) override; | |||
| private: | |||
| GPUKernelRuntime(const GPUKernelRuntime &); | |||
| GPUKernelRuntime &operator=(const GPUKernelRuntime &); | |||
| bool InitDevice(); | |||
| void MallocDeviceMemory(); | |||
| bool device_init_{false}; | |||
| // The related functions and members for using dynamic memory pool. | |||
| @@ -69,6 +61,7 @@ class GPUKernelRuntime : public KernelRuntime { | |||
| void FreeCommunicationOpDynamicRes(const mindspore::AnfNodePtr &kernel, size_t input_idx, bool *is_communication_op); | |||
| size_t communication_op_input_ref_count_{0}; | |||
| size_t communication_op_output_ref_count_{0}; | |||
| MemReuseUtilPtr mem_reuse_util_ptr_{nullptr}; | |||
| }; | |||
| MS_REG_KERNEL_RUNTIME(kGPUDevice, GPUKernelRuntime); | |||
| } // namespace gpu | |||
| @@ -0,0 +1,88 @@ | |||
| /** | |||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "device/gpu/gpu_memory_manager.h" | |||
| #include "device/gpu/gpu_memory_allocator.h" | |||
| #include "utils/context/ms_context.h" | |||
| #include "utils/convert_utils.h" | |||
| namespace mindspore { | |||
| namespace device { | |||
| namespace gpu { | |||
| void *GPUMemoryManager::MallocMemFromMemPool(size_t size) { | |||
| return GPUMemoryAllocator::GetInstance().AllocTensorMem(size); | |||
| } | |||
| void GPUMemoryManager::FreeMemFromMemPool(void *device_ptr) { | |||
| GPUMemoryAllocator::GetInstance().FreeTensorMem(device_ptr); | |||
| } | |||
| void GPUMemoryManager::MallocDeviceMemory() { | |||
| auto context_ptr = MsContext::GetInstance(); | |||
| MS_EXCEPTION_IF_NULL(context_ptr); | |||
| // If use the dynamic memory pool, then alloc the first memory block to init. | |||
| if (context_ptr->enable_dynamic_mem_pool()) { | |||
| auto device_addr = MallocMemFromMemPool(1); | |||
| if (!device_addr) { | |||
| MS_LOG(ERROR) << "Dynamic memory pool init error."; | |||
| } | |||
| } else { | |||
| // Need to reserve 20% space for dynamic memory | |||
| const float init_gpu_mem_ratio = 0.8; | |||
| size_t mem_size = FloatToSize(GPUMemoryAllocator::GetInstance().free_mem_size() * init_gpu_mem_ratio); | |||
| auto alloc_size = | |||
| GPUMemoryAllocator::GetInstance().AllocDeviceMem(mem_size, reinterpret_cast<void **>(&device_mem_base_)); | |||
| device_mem_size_ = alloc_size; | |||
| static_mem_offset_ = device_mem_size_; | |||
| } | |||
| } | |||
| void GPUMemoryManager::FreeDeviceMemory() { | |||
| if (device_mem_base_ != nullptr) { | |||
| if (!GPUMemoryAllocator::GetInstance().FreeDeviceMem(device_mem_base_)) { | |||
| MS_LOG(EXCEPTION) << "Could not free gpu device memory."; | |||
| } | |||
| } | |||
| GPUMemoryAllocator::GetInstance().ReleaseDeviceRes(); | |||
| } | |||
| uint8_t *GPUMemoryManager::MallocStaticMem(size_t size, bool) { | |||
| auto context_ptr = MsContext::GetInstance(); | |||
| MS_EXCEPTION_IF_NULL(context_ptr); | |||
| if (context_ptr->enable_dynamic_mem_pool()) { | |||
| auto device_ptr = MallocMemFromMemPool(size); | |||
| MS_EXCEPTION_IF_NULL(device_ptr); | |||
| return AddressOffset(device_ptr, 0); | |||
| } | |||
| auto align_size = GetCommonAlignSize(size); | |||
| if (static_mem_offset_ < align_size) { | |||
| MS_LOG(EXCEPTION) << "Out of memory!!! total[" << device_mem_size_ << "](dynamic[" << total_dynamic_size_ | |||
| << "] static[" << total_static_size_ << "])" | |||
| << " malloc [" << align_size << "] failed!"; | |||
| } | |||
| auto offset = static_mem_offset_ - align_size; | |||
| if (dynamic_mem_offset_ > offset) { | |||
| MS_LOG(EXCEPTION) << "Out of memory!!! total[" << device_mem_size_ << "](dynamic[" << total_dynamic_size_ | |||
| << "] static[" << total_static_size_ << "])" | |||
| << " malloc [" << align_size << "] failed!"; | |||
| } | |||
| total_static_size_ += align_size; | |||
| static_mem_offset_ = offset; | |||
| return device_mem_base_ + offset; | |||
| } | |||
| } // namespace gpu | |||
| } // namespace device | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,40 @@ | |||
| /** | |||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_MINDSPORE_CCSRC_DEVICE_GPU_GPU_MEMORY_MANAGER_H_ | |||
| #define MINDSPORE_MINDSPORE_CCSRC_DEVICE_GPU_GPU_MEMORY_MANAGER_H_ | |||
| #include "device/memory_manager.h" | |||
| namespace mindspore { | |||
| namespace device { | |||
| namespace gpu { | |||
| class GPUMemoryManager : public MemoryManager { | |||
| public: | |||
| GPUMemoryManager() = default; | |||
| virtual ~GPUMemoryManager() = default; | |||
| void MallocDeviceMemory() override; | |||
| void FreeDeviceMemory() override; | |||
| void *MallocMemFromMemPool(size_t size) override; | |||
| void FreeMemFromMemPool(void *device_ptr) override; | |||
| protected: | |||
| uint8_t *MallocStaticMem(size_t size, bool communication_mem) override; | |||
| }; | |||
| } // namespace gpu | |||
| } // namespace device | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_MINDSPORE_CCSRC_DEVICE_GPU_GPU_MEMORY_MANAGER_H_ | |||
| @@ -438,23 +438,22 @@ void KernelAdjust::LoadSwitchInputs(std::vector<tensor::TensorPtr> *inputs) { | |||
| MS_LOG(INFO) << "---------------- LoadSwitchInputs End--"; | |||
| } | |||
| void KernelAdjust::Profiling(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr) { | |||
| void KernelAdjust::Profiling(NotNull<session::KernelGraph *> kernel_graph_ptr) { | |||
| if (!ascend::ProfilingManager::GetInstance().IsProfiling()) { | |||
| MS_LOG(INFO) << "No need to profiling"; | |||
| return; | |||
| } | |||
| ProfilingTraceInfo profiling_trace_info; | |||
| if (ProfilingUtils::GetProfilingTraceInfo(kernel_graph_ptr, &profiling_trace_info)) { | |||
| InsertProfilingKernel(kernel_graph_ptr, profiling_trace_info); | |||
| } else { | |||
| MS_LOG(WARNING) << "[profiling] GetProfilingTraceInfo failed"; | |||
| ProfilingTraceInfo profiling_trace_info = ProfilingUtils::GetProfilingTraceFromEnv(kernel_graph_ptr); | |||
| if (!profiling_trace_info.IsValid()) { | |||
| MS_LOG(WARNING) << "[profiling] no profiling node found!"; | |||
| return; | |||
| } | |||
| InsertProfilingKernel(profiling_trace_info, kernel_graph_ptr); | |||
| } | |||
| void KernelAdjust::InsertProfilingKernel(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr, | |||
| const ProfilingTraceInfo &profiling_trace_info) { | |||
| void KernelAdjust::InsertProfilingKernel(const ProfilingTraceInfo &profiling_trace_info, | |||
| NotNull<session::KernelGraph *> kernel_graph_ptr) { | |||
| MS_LOG(INFO) << "[profiling] Insert profiling kernel start"; | |||
| MS_EXCEPTION_IF_NULL(kernel_graph_ptr); | |||
| if (!profiling_trace_info.IsValid()) { | |||
| MS_LOG(WARNING) << "Profiling trace point not found"; | |||
| return; | |||
| @@ -462,18 +461,12 @@ void KernelAdjust::InsertProfilingKernel(const std::shared_ptr<session::KernelGr | |||
| std::vector<CNodePtr> new_cnode_list; | |||
| std::vector<CNodePtr> cnode_ptr_list = kernel_graph_ptr->execution_order(); | |||
| for (const auto &cnode_ptr : cnode_ptr_list) { | |||
| ProfilingUtils::ProfilingTraceFpStart(kernel_graph_ptr, cnode_ptr, profiling_trace_info, &new_cnode_list); | |||
| ProfilingUtils::ProfilingAllReduce(kernel_graph_ptr, cnode_ptr, ascend::kProfilingAllReduce1Start, | |||
| profiling_trace_info.profiling_allreduce1_start, &new_cnode_list); | |||
| ProfilingUtils::ProfilingAllReduce(kernel_graph_ptr, cnode_ptr, ascend::kProfilingAllReduce2Start, | |||
| profiling_trace_info.profiling_allreduce2_start, &new_cnode_list); | |||
| ProfilingUtils::ProfilingTraceFpStart(cnode_ptr, profiling_trace_info, kernel_graph_ptr, NOT_NULL(&new_cnode_list)); | |||
| new_cnode_list.emplace_back(cnode_ptr); | |||
| ProfilingUtils::ProfilingAllReduce(kernel_graph_ptr, cnode_ptr, ascend::kProfilingAllReduce1End, | |||
| profiling_trace_info.profiling_allreduce1_end, &new_cnode_list); | |||
| ProfilingUtils::ProfilingAllReduce(kernel_graph_ptr, cnode_ptr, ascend::kProfilingAllReduce2End, | |||
| profiling_trace_info.profiling_allreduce2_end, &new_cnode_list); | |||
| ProfilingUtils::ProfilingTraceEnd(kernel_graph_ptr, cnode_ptr, profiling_trace_info, &new_cnode_list); | |||
| ProfilingUtils::ProfilingCustomOp(cnode_ptr, profiling_trace_info, kernel_graph_ptr, NOT_NULL(&new_cnode_list)); | |||
| ProfilingUtils::ProfilingTraceBpEnd(cnode_ptr, profiling_trace_info, kernel_graph_ptr, NOT_NULL(&new_cnode_list)); | |||
| ProfilingUtils::ProfilingTraceEnd(cnode_ptr, profiling_trace_info, kernel_graph_ptr, NOT_NULL(&new_cnode_list)); | |||
| } | |||
| kernel_graph_ptr->set_execution_order(new_cnode_list); | |||
| } | |||
| @@ -48,7 +48,7 @@ class KernelAdjust { | |||
| void SetStreamSwitchOps(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr); | |||
| bool StepLoadCtrlInputs(const std::shared_ptr<session::Context> &context, | |||
| const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr); | |||
| void Profiling(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr); | |||
| void Profiling(NotNull<session::KernelGraph *> kernel_graph_ptr); | |||
| static bool NeedInsertSwitch(); | |||
| CNodePtr CreateSteamActiveOp(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr); | |||
| @@ -66,8 +66,8 @@ class KernelAdjust { | |||
| kernel::KernelBuildInfo::KernelBuildInfoBuilder CreateMngKernelBuilder(const std::vector<std::string> &formats, | |||
| const std::vector<TypeId> &type_ids); | |||
| void LoadSwitchInputs(std::vector<tensor::TensorPtr> *inputs); | |||
| void InsertProfilingKernel(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr, | |||
| const ProfilingTraceInfo &profiling_trace_info); | |||
| void InsertProfilingKernel(const ProfilingTraceInfo &profiling_trace_info, | |||
| NotNull<session::KernelGraph *> kernel_graph_ptr); | |||
| }; | |||
| } // namespace device | |||
| } // namespace mindspore | |||
| @@ -31,18 +31,13 @@ | |||
| #include "ir/value.h" | |||
| using mindspore::kernel::Address; | |||
| using mindspore::kernel::AddressPtr; | |||
| using mindspore::memreuse::BestFitMemReuse; | |||
| using mindspore::memreuse::MemReuseUtilPtr; | |||
| namespace mindspore { | |||
| namespace device { | |||
| KernelRuntime::~KernelRuntime() { | |||
| device_mem_base_ = nullptr; | |||
| device_mem_pool_base_ = nullptr; | |||
| #ifdef ENABLE_DUMP_E2E | |||
| dump_conf_ptr_ = nullptr; | |||
| #endif | |||
| mem_reuse_util_ptr_ = nullptr; | |||
| } | |||
| bool KernelRuntime::Run(session::KernelGraph *graph) { | |||
| @@ -88,11 +83,6 @@ bool KernelRuntime::LoadTask(const session::KernelGraph *graph) { | |||
| return false; | |||
| } | |||
| void KernelRuntime::FreeHostMemory() { | |||
| dynamic_mem_offset_ = 0; | |||
| static_mem_offset_ = 0; | |||
| } | |||
| // for D to impl | |||
| bool KernelRuntime::RunTask(const session::KernelGraph *graph) { | |||
| if (graph != nullptr) { | |||
| @@ -126,13 +116,11 @@ size_t KernelRuntime::CountNodeDeviceMemorySize(const mindspore::AnfNodePtr &nod | |||
| void KernelRuntime::AssignMemory(session::KernelGraph *graph) { | |||
| auto context_ptr = MsContext::GetInstance(); | |||
| MS_EXCEPTION_IF_NULL(context_ptr); | |||
| MS_EXCEPTION_IF_NULL(mem_manager_); | |||
| mem_manager_->ResetDynamicMemory(); | |||
| AssignStaticMemory(graph); | |||
| bool is_enable_mem_reuse = context_ptr->enable_mem_reuse(); | |||
| if (is_enable_mem_reuse) { | |||
| ReuseAssignDynamicMemory(graph); | |||
| } else { | |||
| AssignDynamicMemory(graph); | |||
| } | |||
| AssignDynamicMemory(graph); | |||
| UpdateRefNodeOutputMem(graph); | |||
| } | |||
| @@ -159,6 +147,7 @@ void KernelRuntime::AssignStaticMemory(session::KernelGraph *graph) { | |||
| void KernelRuntime::RunOpAssignInputMemory(const std::vector<tensor::TensorPtr> &input_tensors, | |||
| const session::KernelGraph *graph) { | |||
| MS_EXCEPTION_IF_NULL(graph); | |||
| MS_EXCEPTION_IF_NULL(mem_manager_); | |||
| for (size_t input_index = 0; input_index < graph->inputs().size(); ++input_index) { | |||
| auto item = graph->inputs()[input_index]; | |||
| MS_EXCEPTION_IF_NULL(item); | |||
| @@ -180,7 +169,7 @@ void KernelRuntime::RunOpAssignInputMemory(const std::vector<tensor::TensorPtr> | |||
| auto device_address = | |||
| CreateDeviceAddress(nullptr, tensor_size, AnfAlgo::GetOutputFormat(item, index), output_type_id); | |||
| MS_EXCEPTION_IF_NULL(device_address); | |||
| MallocOpMemory(device_address, tensor_size, kStaticMem); | |||
| mem_manager_->MallocMemFromMemPool(device_address, tensor_size); | |||
| AnfAlgo::SetOutputAddr(device_address, index, item.get()); | |||
| } | |||
| } | |||
| @@ -188,6 +177,7 @@ void KernelRuntime::RunOpAssignInputMemory(const std::vector<tensor::TensorPtr> | |||
| void KernelRuntime::RunOpAssignOutputMemory(const AnfNodePtr &kernel) { | |||
| MS_EXCEPTION_IF_NULL(kernel); | |||
| MS_EXCEPTION_IF_NULL(mem_manager_); | |||
| auto kernel_mod = AnfAlgo::GetKernelMod(kernel); | |||
| MS_EXCEPTION_IF_NULL(kernel_mod); | |||
| auto output_sizes = kernel_mod->GetOutputSizeList(); | |||
| @@ -208,13 +198,14 @@ void KernelRuntime::RunOpAssignOutputMemory(const AnfNodePtr &kernel) { | |||
| auto output_type = AnfAlgo::GetOutputDeviceDataType(kernel, i); | |||
| auto device_address = CreateDeviceAddress(nullptr, output_sizes[i], output_format, output_type); | |||
| MS_EXCEPTION_IF_NULL(device_address); | |||
| MallocOpMemory(device_address, output_sizes[i], kDynamicMem); | |||
| mem_manager_->MallocMemFromMemPool(device_address, output_sizes[i]); | |||
| AnfAlgo::SetOutputAddr(device_address, i, kernel.get()); | |||
| } | |||
| } | |||
| void KernelRuntime::RunOpAssignWorkSpaceMemory(const AnfNodePtr &kernel) { | |||
| MS_EXCEPTION_IF_NULL(kernel); | |||
| MS_EXCEPTION_IF_NULL(mem_manager_); | |||
| if (kernel->isa<CNode>()) { | |||
| auto kernel_mod = AnfAlgo::GetKernelMod(kernel); | |||
| MS_EXCEPTION_IF_NULL(kernel_mod); | |||
| @@ -222,7 +213,7 @@ void KernelRuntime::RunOpAssignWorkSpaceMemory(const AnfNodePtr &kernel) { | |||
| for (size_t i = 0; i < workspace_lists.size(); ++i) { | |||
| auto device_address = CreateDeviceAddress(nullptr, workspace_lists[i], "", kTypeUnknown); | |||
| MS_EXCEPTION_IF_NULL(device_address); | |||
| MallocOpMemory(device_address, workspace_lists[i], kDynamicMem); | |||
| mem_manager_->MallocMemFromMemPool(device_address, workspace_lists[i]); | |||
| AnfAlgo::SetWorkspaceAddr(device_address, i, kernel.get()); | |||
| } | |||
| } | |||
| @@ -230,6 +221,7 @@ void KernelRuntime::RunOpAssignWorkSpaceMemory(const AnfNodePtr &kernel) { | |||
| void KernelRuntime::AssignStaticMemoryInput(const session::KernelGraph *graph) { | |||
| MS_EXCEPTION_IF_NULL(graph); | |||
| MS_EXCEPTION_IF_NULL(mem_manager_); | |||
| for (auto &item : graph->inputs()) { | |||
| MS_EXCEPTION_IF_NULL(item); | |||
| if (!item->isa<Parameter>()) { | |||
| @@ -247,7 +239,7 @@ void KernelRuntime::AssignStaticMemoryInput(const session::KernelGraph *graph) { | |||
| output_type_id = AnfAlgo::GetOutputInferDataType(item, index); | |||
| } | |||
| auto tensor_size = CountNodeDeviceMemorySize(item, index); | |||
| auto ptr = MallocStaticMem(tensor_size, false); | |||
| auto ptr = mem_manager_->MallocMem(kStaticMem, tensor_size); | |||
| auto address = CreateDeviceAddress(ptr, tensor_size, AnfAlgo::GetOutputFormat(item, index), output_type_id); | |||
| AnfAlgo::SetOutputAddr(address, index, item.get()); | |||
| } | |||
| @@ -301,6 +293,7 @@ void KernelRuntime::UpdateRefNodeOutputMem(const session::KernelGraph *graph) { | |||
| void KernelRuntime::AssignCommunicationNodeOutputMem(int flag, const AnfNodePtr &node) { | |||
| MS_EXCEPTION_IF_NULL(node); | |||
| MS_EXCEPTION_IF_NULL(mem_manager_); | |||
| auto kernel_mod = AnfAlgo::GetKernelMod(node); | |||
| MS_EXCEPTION_IF_NULL(kernel_mod); | |||
| auto output_sizes = kernel_mod->GetOutputSizeList(); | |||
| @@ -314,12 +307,12 @@ void KernelRuntime::AssignCommunicationNodeOutputMem(int flag, const AnfNodePtr | |||
| std::vector<size_t> align_size_list; | |||
| for (uint64_t mem_size : output_sizes) { | |||
| if (context_ptr->enable_hccl()) { | |||
| mem_size = GetCommonAlignSize(mem_size); | |||
| mem_size = mem_manager_->GetCommonAlignSize(mem_size); | |||
| } | |||
| total_size += mem_size; | |||
| align_size_list.emplace_back(mem_size); | |||
| } | |||
| uint8_t *output_ptr = CalDeviceMem(node, total_size, flag, 0); | |||
| uint8_t *output_ptr = mem_manager_->MallocOutputMem(node, 0, flag, total_size); | |||
| for (size_t j = 0; j < align_size_list.size(); ++j) { | |||
| std::string output_format = AnfAlgo::GetOutputFormat(node, j); | |||
| auto output_type = AnfAlgo::GetOutputDeviceDataType(node, j); | |||
| @@ -333,6 +326,7 @@ void KernelRuntime::UpdateCommunicationOpInputMem(const AnfNodePtr &node) { | |||
| auto context_ptr = MsContext::GetInstance(); | |||
| MS_EXCEPTION_IF_NULL(context_ptr); | |||
| MS_EXCEPTION_IF_NULL(node); | |||
| MS_EXCEPTION_IF_NULL(mem_manager_); | |||
| size_t total_size = 0; | |||
| std::vector<std::pair<mindspore::device::DeviceAddress *, size_t>> addr_size; | |||
| for (size_t i = 0; i < AnfAlgo::GetInputTensorNum(node); ++i) { | |||
| @@ -340,12 +334,12 @@ void KernelRuntime::UpdateCommunicationOpInputMem(const AnfNodePtr &node) { | |||
| MS_EXCEPTION_IF_NULL(address); | |||
| auto mem_size = address->size(); | |||
| if (context_ptr->enable_hccl()) { | |||
| mem_size = GetCommonAlignSize(mem_size); | |||
| mem_size = mem_manager_->GetCommonAlignSize(mem_size); | |||
| } | |||
| total_size += mem_size; | |||
| addr_size.emplace_back(address.get(), mem_size); | |||
| } | |||
| uint8_t *input_ptr = CalDeviceMem(node, total_size, kDynamicMem, 0); | |||
| uint8_t *input_ptr = mem_manager_->MallocOutputMem(node, 0, kDynamicMem, total_size); | |||
| for (const auto &iter : addr_size) { | |||
| MS_EXCEPTION_IF_NULL(iter.first); | |||
| iter.first->set_ptr(input_ptr); | |||
| @@ -355,7 +349,8 @@ void KernelRuntime::UpdateCommunicationOpInputMem(const AnfNodePtr &node) { | |||
| void KernelRuntime::AssignNodeOutputMem(int flag, const AnfNodePtr &node, int index) { | |||
| MS_EXCEPTION_IF_NULL(node); | |||
| if (IsCommunicationOp(node)) { | |||
| MS_EXCEPTION_IF_NULL(mem_manager_); | |||
| if (AnfAlgo::IsCommunicationOp(node)) { | |||
| UpdateCommunicationOpInputMem(node); | |||
| AssignCommunicationNodeOutputMem(flag, node); | |||
| return; | |||
| @@ -375,7 +370,7 @@ void KernelRuntime::AssignNodeOutputMem(int flag, const AnfNodePtr &node, int in | |||
| MS_LOG(INFO) << "Already malloc index:" << i; | |||
| continue; | |||
| } | |||
| auto ptr = CalDeviceMem(node, output_sizes[i], flag, i); | |||
| auto ptr = mem_manager_->MallocOutputMem(node, i, flag, output_sizes[i]); | |||
| if (ptr == nullptr) { | |||
| // reused ptr, no need alloc, continue; | |||
| continue; | |||
| @@ -390,6 +385,7 @@ void KernelRuntime::AssignValueNodeTensor(const ValueNodePtr &value_node, const | |||
| size_t output_idx) { | |||
| MS_EXCEPTION_IF_NULL(value_node); | |||
| MS_EXCEPTION_IF_NULL(node_value); | |||
| MS_EXCEPTION_IF_NULL(mem_manager_); | |||
| auto tensor = node_value->cast<TensorPtr>(); | |||
| if (tensor == nullptr) { | |||
| MS_LOG(WARNING) << "Tensor is null"; | |||
| @@ -397,7 +393,7 @@ void KernelRuntime::AssignValueNodeTensor(const ValueNodePtr &value_node, const | |||
| } | |||
| size_t tensor_size = tensor->data().nbytes(); | |||
| auto node_size = CountNodeDeviceMemorySize(value_node, output_idx); | |||
| auto ptr = MallocStaticMem(node_size, false); | |||
| auto ptr = mem_manager_->MallocMem(kStaticMem, node_size); | |||
| TypeId output_type_id = AnfAlgo::GetOutputDeviceDataType(value_node, output_idx); | |||
| if (output_type_id == kTypeUnknown) { | |||
| output_type_id = AnfAlgo::GetOutputInferDataType(value_node, output_idx); | |||
| @@ -414,6 +410,7 @@ void KernelRuntime::AssignValueNodeTensor(const ValueNodePtr &value_node, const | |||
| void KernelRuntime::AssignStaticMemoryValueNode(session::KernelGraph *graph) { | |||
| MS_EXCEPTION_IF_NULL(graph); | |||
| MS_EXCEPTION_IF_NULL(mem_manager_); | |||
| for (auto &value_node : graph->graph_value_nodes()) { | |||
| MS_EXCEPTION_IF_NULL(value_node); | |||
| if (AnfAlgo::OutputAddrExist(value_node, 0)) { | |||
| @@ -440,7 +437,7 @@ void KernelRuntime::AssignStaticMemoryValueNode(session::KernelGraph *graph) { | |||
| } else if (node_value->isa<StringImm>()) { | |||
| auto value = GetValue<std::string>(node_value); | |||
| size_t tensor_size = value.size(); | |||
| auto ptr = MallocStaticMem(tensor_size, false); | |||
| auto ptr = mem_manager_->MallocMem(kStaticMem, tensor_size); | |||
| auto address = CreateDeviceAddress(ptr, tensor_size, kOpFormat_DEFAULT, kNumberTypeUInt8); | |||
| MS_EXCEPTION_IF_NULL(address); | |||
| AnfAlgo::SetOutputAddr(address, 0, value_node.get()); | |||
| @@ -452,103 +449,37 @@ void KernelRuntime::AssignStaticMemoryValueNode(session::KernelGraph *graph) { | |||
| } | |||
| } | |||
| void KernelRuntime::AssignDynamicMemory(const session::KernelGraph *graph) { | |||
| void KernelRuntime::AssignDynamicMemory(session::KernelGraph *graph) { | |||
| MS_EXCEPTION_IF_NULL(graph); | |||
| // reset dynamic mem offset | |||
| dynamic_mem_offset_ = 0; | |||
| auto &kernels = graph->execution_order(); | |||
| for (auto &kernel : kernels) { | |||
| AssignNodeOutputMem(kDynamicMem, kernel, kGetAllOuts); | |||
| AssignWorkSpaceMem(kernel); | |||
| MS_EXCEPTION_IF_NULL(mem_manager_); | |||
| auto context_ptr = MsContext::GetInstance(); | |||
| MS_EXCEPTION_IF_NULL(context_ptr); | |||
| bool is_enable_mem_reuse = context_ptr->enable_mem_reuse(); | |||
| auto mem_flag = kDynamicMem; | |||
| if (is_enable_mem_reuse) { | |||
| mem_manager_->MallocReusedDynamicMem(graph); | |||
| mem_flag = kReuseDynamicMem; | |||
| } | |||
| } | |||
| void KernelRuntime::ReuseAssignDynamicMemory(session::KernelGraph *graph) { | |||
| MS_EXCEPTION_IF_NULL(graph); | |||
| dynamic_mem_offset_ = 0; | |||
| MemReuseUtilPtr mem_reuse_util_ptr = std::make_shared<memreuse::MemReuseUtil>(); | |||
| MS_EXCEPTION_IF_NULL(mem_reuse_util_ptr); | |||
| // set all infos | |||
| mem_reuse_util_ptr->SetAllInfo(graph); | |||
| auto bestfit_mem_reuse = std::make_shared<BestFitMemReuse>(); | |||
| MS_EXCEPTION_IF_NULL(bestfit_mem_reuse); | |||
| bestfit_mem_reuse->Reuse(mem_reuse_util_ptr.get()); | |||
| size_t total_allocated_size = bestfit_mem_reuse->GetAllocatedSize(); | |||
| MS_LOG(INFO) << "TotalReuseDynamicSize [" << total_allocated_size << "]"; | |||
| mem_reuse_util_ptr_ = mem_reuse_util_ptr; | |||
| auto base_ptr = MallocDynamicMem(total_allocated_size, false); | |||
| mem_reuse_util_ptr_->set_mem_base(base_ptr); | |||
| auto &kernels = graph->execution_order(); | |||
| for (auto &kernel : kernels) { | |||
| AssignNodeOutputMem(kReuseDynamicMem, kernel, kGetAllOuts); | |||
| AssignReuseWorkSpaceMem(kernel); | |||
| AssignNodeOutputMem(mem_flag, kernel, kGetAllOuts); | |||
| AssignWorkSpaceMem(mem_flag, kernel); | |||
| } | |||
| } | |||
| void KernelRuntime::AssignReuseWorkSpaceMem(const AnfNodePtr &node) { | |||
| void KernelRuntime::AssignWorkSpaceMem(int flag, const AnfNodePtr &node) { | |||
| MS_EXCEPTION_IF_NULL(node); | |||
| MS_EXCEPTION_IF_NULL(mem_manager_); | |||
| auto kernel_mod = AnfAlgo::GetKernelMod(node); | |||
| MS_EXCEPTION_IF_NULL(kernel_mod); | |||
| size_t index = 0; | |||
| for (auto &size : kernel_mod->GetWorkspaceSizeList()) { | |||
| auto wk_ptr = mem_reuse_util_ptr_->GetNodeWorkSpacePtr(node, index); | |||
| AnfAlgo::SetWorkspaceAddr(CreateDeviceAddress(wk_ptr, size, "", kTypeUnknown), index, node.get()); | |||
| auto ptr = mem_manager_->MallocWorkSpaceMem(node, flag, index, size); | |||
| AnfAlgo::SetWorkspaceAddr(CreateDeviceAddress(ptr, size, "", kTypeUnknown), index, node.get()); | |||
| index++; | |||
| } | |||
| } | |||
| void KernelRuntime::AssignWorkSpaceMem(const AnfNodePtr &node) { | |||
| MS_EXCEPTION_IF_NULL(node); | |||
| if (node->isa<CNode>()) { | |||
| auto kernel_mod = AnfAlgo::GetKernelMod(node); | |||
| MS_EXCEPTION_IF_NULL(kernel_mod); | |||
| size_t index = 0; | |||
| for (auto &size : kernel_mod->GetWorkspaceSizeList()) { | |||
| auto ptr = MallocDynamicMem(size, false); | |||
| AnfAlgo::SetWorkspaceAddr(CreateDeviceAddress(ptr, size, "", kTypeUnknown), index, node.get()); | |||
| index++; | |||
| } | |||
| } | |||
| } | |||
| bool KernelRuntime::IsCommunicationOp(const AnfNodePtr &node) { | |||
| MS_EXCEPTION_IF_NULL(node); | |||
| auto kernel_name = AnfAlgo::GetCNodeName(node); | |||
| auto kernel_type = AnfAlgo::GetKernelType(node); | |||
| if (kernel_name == kAllReduceOpName || kernel_type == HCCL_KERNEL) { | |||
| return true; | |||
| } | |||
| return false; | |||
| } | |||
| uint8_t *KernelRuntime::CalDeviceMem(const AnfNodePtr &node, size_t size, int flag, size_t index) { | |||
| MS_EXCEPTION_IF_NULL(node); | |||
| auto context_ptr = MsContext::GetInstance(); | |||
| MS_EXCEPTION_IF_NULL(context_ptr); | |||
| uint8_t *ptr = nullptr; | |||
| if (IsCommunicationOp(node)) { | |||
| bool communication_mem = false; | |||
| if (context_ptr->enable_hccl()) { | |||
| communication_mem = true; | |||
| } | |||
| if (flag == kStaticMem) { | |||
| ptr = MallocStaticMem(size, communication_mem); | |||
| } else { | |||
| ptr = MallocDynamicMem(size, communication_mem); | |||
| } | |||
| return ptr; | |||
| } | |||
| if (flag == kStaticMem) { | |||
| ptr = MallocStaticMem(size, false); | |||
| } else if (flag == kDynamicMem) { | |||
| ptr = MallocDynamicMem(size, false); | |||
| } else if (flag == kReuseDynamicMem) { | |||
| ptr = mem_reuse_util_ptr_->GetNodeOutputPtr(node, index); | |||
| } | |||
| return ptr; | |||
| } | |||
| void KernelRuntime::GenLaunchArgs(const mindspore::kernel::KernelMod &kernel_mod, const mindspore::AnfNodePtr &kernel, | |||
| AddressPtrList *kernel_inputs, AddressPtrList *const kernel_workspaces, | |||
| AddressPtrList *kernel_outputs) { | |||
| @@ -659,65 +590,6 @@ bool KernelRuntime::LaunchKernelMod(const session::KernelGraph &graph) { | |||
| return true; | |||
| } | |||
| size_t KernelRuntime::GetCommonAlignSize(size_t input_size) const { | |||
| return (input_size + mem_align_size_ + 31) / mem_align_size_ * mem_align_size_; | |||
| } | |||
| size_t KernelRuntime::GetCommunicationAlignSize(size_t input_size) const { | |||
| return (input_size + mem_align_size_ - 1) / mem_align_size_ * mem_align_size_ + 2 * mem_align_size_; | |||
| } | |||
| uint8_t *KernelRuntime::MallocStaticMem(size_t size, bool communication_mem) { | |||
| size_t align_size = 0; | |||
| if (communication_mem) { | |||
| align_size = GetCommunicationAlignSize(size); | |||
| } else { | |||
| align_size = GetCommonAlignSize(size); | |||
| } | |||
| if (static_mem_offset_ < align_size) { | |||
| MS_LOG(EXCEPTION) << "Out of memory!!! total[" << device_mem_size_ << "](dynamic[" << total_dynamic_size_ | |||
| << "] static[" << total_static_size_ << "])" | |||
| << " malloc [" << align_size << "] failed!"; | |||
| } | |||
| total_static_size_ += align_size; | |||
| auto offset = static_mem_offset_ - align_size; | |||
| if (dynamic_mem_offset_ > offset) { | |||
| MS_LOG(EXCEPTION) << "Out of memory!!! total[" << device_mem_size_ << "](dynamic[" << total_dynamic_size_ | |||
| << "] static[" << total_static_size_ << "])" | |||
| << " malloc [" << align_size << "] failed!"; | |||
| } | |||
| static_mem_offset_ = offset; | |||
| if (communication_mem) { | |||
| return device_mem_base_ + offset + mem_align_size_; | |||
| } else { | |||
| return device_mem_base_ + offset; | |||
| } | |||
| } | |||
| uint8_t *KernelRuntime::MallocDynamicMem(size_t size, bool communication_mem) { | |||
| size_t align_size = 0; | |||
| if (communication_mem) { | |||
| align_size = GetCommunicationAlignSize(size); | |||
| } else { | |||
| align_size = GetCommonAlignSize(size); | |||
| } | |||
| uint64_t offset = dynamic_mem_offset_; | |||
| auto new_offset = dynamic_mem_offset_ + align_size; | |||
| if (new_offset > static_mem_offset_) { | |||
| MS_LOG(EXCEPTION) << "Out of memory!!! total[" << device_mem_size_ << "](dynamic[" << total_dynamic_size_ | |||
| << "] static[" << total_static_size_ << "])" | |||
| << " malloc [" << align_size << "] failed!"; | |||
| } | |||
| total_dynamic_size_ += align_size; | |||
| dynamic_mem_offset_ = new_offset; | |||
| if (communication_mem) { | |||
| return device_mem_base_ + offset + mem_align_size_; | |||
| } else { | |||
| return device_mem_base_ + offset; | |||
| } | |||
| } | |||
| bool KernelRuntime::LaunchKernel(const session::KernelGraph *graph) { | |||
| MS_EXCEPTION_IF_NULL(graph); | |||
| if (!LaunchKernelMod(*graph)) { | |||
| @@ -731,29 +603,6 @@ bool KernelRuntime::LaunchKernel(const session::KernelGraph *graph) { | |||
| return true; | |||
| } | |||
| void KernelRuntime::MallocOpMemory(const DeviceAddressPtr address, size_t size, int flag) { | |||
| if (flag == kStaticMem) { | |||
| address->ptr_ = MallocStaticMem(size, false); | |||
| } else if (flag == kDynamicMem) { | |||
| address->ptr_ = MallocDynamicMem(size, false); | |||
| } else { | |||
| MS_LOG(EXCEPTION) << "Unknown memory type!"; | |||
| } | |||
| } | |||
| void *KernelRuntime::AllocTensorMemDynamic(size_t size) { | |||
| if (size == 0) { | |||
| MS_LOG(ERROR) << "AllocTensorMemDynamic size is 0."; | |||
| } | |||
| return nullptr; | |||
| } | |||
| void KernelRuntime::FreeTensorMemDynamic(void *device_ptr) { | |||
| if (device_ptr == nullptr) { | |||
| MS_LOG(ERROR) << "FreeTensorMemDynamic device_ptr is null."; | |||
| } | |||
| } | |||
| #ifdef ENABLE_DUMP_E2E | |||
| bool KernelRuntime::SetDumpConf() { | |||
| dump_conf_ptr_ = std::make_shared<Dump>(); | |||
| @@ -20,8 +20,7 @@ | |||
| #include <memory> | |||
| #include <string> | |||
| #include <map> | |||
| #include "pre_activate/mem_reuse/mem_reuse.h" | |||
| #include "pre_activate/mem_reuse/mem_reuse_allocator.h" | |||
| #include "device/device_address.h" | |||
| #include "ir/meta_tensor.h" | |||
| #include "predict/generator/utils/ir_model_util.h" | |||
| @@ -32,21 +31,15 @@ | |||
| #include "session/anf_runtime_algorithm.h" | |||
| #include "kernel/kernel.h" | |||
| #include "utils/context/ms_context.h" | |||
| #include "device/memory_manager.h" | |||
| // using mindspore::session::KernelGraph; | |||
| using mindspore::tensor::Tensor; | |||
| using TensorPtr = std::shared_ptr<Tensor>; | |||
| using MemReuseUtilPtr = mindspore::memreuse::MemReuseUtilPtr; | |||
| using mindspore::kernel::AddressPtr; | |||
| using AddressPtrList = std::vector<mindspore::kernel::AddressPtr>; | |||
| namespace mindspore { | |||
| namespace device { | |||
| const int kStaticMem = 0; | |||
| const int kDynamicMem = 1; | |||
| const int kReuseDynamicMem = 2; | |||
| const int kGetAllOuts = -1; | |||
| class KernelRuntime { | |||
| public: | |||
| KernelRuntime() = default; | |||
| @@ -65,7 +58,6 @@ class KernelRuntime { | |||
| DumpConfPtr GetDumpConf(); | |||
| #endif | |||
| virtual bool LoadTask(const session::KernelGraph *graph); | |||
| virtual void FreeHostMemory(); | |||
| // for GPU and D to impl | |||
| virtual void ReleaseDeviceRes() {} | |||
| void set_device_id(uint32_t device_id) { device_id_ = device_id; } | |||
| @@ -75,29 +67,17 @@ class KernelRuntime { | |||
| TypeId type_id) = 0; | |||
| virtual bool SyncStream() = 0; | |||
| void AssignStaticMemory(session::KernelGraph *graph); | |||
| void AssignDynamicMemory(const session::KernelGraph *graph); | |||
| void AssignDynamicMemory(session::KernelGraph *graph); | |||
| void ReuseAssignDynamicMemory(session::KernelGraph *graph); | |||
| void AssignNodeOutputMem(int flag, const AnfNodePtr &node, int index); | |||
| void AssignWorkSpaceMem(const AnfNodePtr &node); | |||
| void AssignWorkSpaceMem(int flag, const AnfNodePtr &node); | |||
| void AssignReuseWorkSpaceMem(const AnfNodePtr &node); | |||
| void AssignCommunicationNodeOutputMem(int flag, const AnfNodePtr &node); | |||
| void UpdateRefNodeOutputMem(const session::KernelGraph *graph); | |||
| void UpdateCommunicationOpInputMem(const AnfNodePtr &node); | |||
| bool IsCommunicationOp(const AnfNodePtr &node); | |||
| size_t GetCommonAlignSize(size_t input_size) const; | |||
| size_t GetCommunicationAlignSize(size_t input_size) const; | |||
| uint8_t *CalDeviceMem(const AnfNodePtr &node, size_t size, int flag, size_t index); | |||
| virtual uint8_t *MallocStaticMem(size_t size, bool communication_mem); | |||
| uint8_t *MallocDynamicMem(size_t size, bool communication_mem); | |||
| #ifdef ENABLE_DUMP_E2E | |||
| bool SetDumpConf(); | |||
| #endif | |||
| // Alloc memory use the dynamic memory pool. | |||
| virtual void *AllocTensorMemDynamic(size_t size); | |||
| // Free memory use the dynamic memory pool. | |||
| virtual void FreeTensorMemDynamic(void *device_ptr); | |||
| virtual void MallocOpMemory(const DeviceAddressPtr address, size_t size, int flag); | |||
| private: | |||
| void AssignStaticMemoryOutput(const session::KernelGraph *graph); | |||
| @@ -114,20 +94,11 @@ class KernelRuntime { | |||
| protected: | |||
| uint32_t device_id_{0}; | |||
| uint8_t *device_mem_base_{nullptr}; | |||
| uint8_t *device_mem_pool_base_{nullptr}; | |||
| uint64_t device_mem_size_{0}; | |||
| uint64_t device_mem_pool_size_{0}; | |||
| uint64_t dynamic_mem_offset_{0}; | |||
| uint64_t static_mem_offset_{0}; | |||
| const uint64_t mem_align_size_ = 512; | |||
| #ifdef ENABLE_DUMP_E2E | |||
| DumpConfPtr dump_conf_ptr_; | |||
| #endif | |||
| void *stream_ = nullptr; | |||
| size_t total_static_size_ = 0; | |||
| size_t total_dynamic_size_ = 0; | |||
| MemReuseUtilPtr mem_reuse_util_ptr_{nullptr}; | |||
| std::shared_ptr<MemoryManager> mem_manager_{nullptr}; | |||
| }; | |||
| using KernelRuntimePtr = std::shared_ptr<KernelRuntime>; | |||
| } // namespace device | |||
| @@ -0,0 +1,164 @@ | |||
| /** | |||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "device/memory_manager.h" | |||
| #include "session/anf_runtime_algorithm.h" | |||
| #include "utils/context/ms_context.h" | |||
| using mindspore::memreuse::BestFitMemReuse; | |||
| using mindspore::memreuse::MemReuseUtilPtr; | |||
| namespace mindspore { | |||
| namespace device { | |||
| size_t MemoryManager::GetCommonAlignSize(size_t input_size) const { | |||
| return (input_size + kMemAlignSize + 31) / kMemAlignSize * kMemAlignSize; | |||
| } | |||
| size_t MemoryManager::GetCommunicationAlignSize(size_t input_size) const { | |||
| return (input_size + kMemAlignSize - 1) / kMemAlignSize * kMemAlignSize + 2 * kMemAlignSize; | |||
| } | |||
| void MemoryManager::MallocReusedDynamicMem(session::KernelGraph *graph) { | |||
| MS_EXCEPTION_IF_NULL(graph); | |||
| MemReuseUtilPtr mem_reuse_util_ptr = std::make_shared<memreuse::MemReuseUtil>(); | |||
| MS_EXCEPTION_IF_NULL(mem_reuse_util_ptr); | |||
| // set all infos | |||
| mem_reuse_util_ptr->SetAllInfo(graph); | |||
| auto bestfit_mem_reuse = std::make_shared<BestFitMemReuse>(); | |||
| MS_EXCEPTION_IF_NULL(bestfit_mem_reuse); | |||
| bestfit_mem_reuse->Reuse(mem_reuse_util_ptr.get()); | |||
| size_t total_allocated_size = bestfit_mem_reuse->GetAllocatedSize(); | |||
| MS_LOG(INFO) << "TotalReuseDynamicSize [" << total_allocated_size << "]"; | |||
| mem_reuse_util_ptr_ = mem_reuse_util_ptr; | |||
| auto base_ptr = MallocDynamicMem(total_allocated_size, false); | |||
| mem_reuse_util_ptr_->set_mem_base(base_ptr); | |||
| } | |||
| uint8_t *MemoryManager::MallocOutputMem(const AnfNodePtr &node, size_t index, int flag, size_t size) { | |||
| MS_EXCEPTION_IF_NULL(node); | |||
| auto context_ptr = MsContext::GetInstance(); | |||
| MS_EXCEPTION_IF_NULL(context_ptr); | |||
| uint8_t *ptr = nullptr; | |||
| if (AnfAlgo::IsCommunicationOp(node)) { | |||
| bool communication_mem = false; | |||
| if (context_ptr->enable_hccl()) { | |||
| communication_mem = true; | |||
| } | |||
| if (flag == kStaticMem) { | |||
| ptr = MallocStaticMem(size, communication_mem); | |||
| } else { | |||
| ptr = MallocDynamicMem(size, communication_mem); | |||
| } | |||
| return ptr; | |||
| } | |||
| if (flag == kStaticMem) { | |||
| ptr = MallocStaticMem(size, false); | |||
| } else if (flag == kDynamicMem) { | |||
| ptr = MallocDynamicMem(size, false); | |||
| } else if (flag == kReuseDynamicMem) { | |||
| ptr = mem_reuse_util_ptr_->GetNodeOutputPtr(node, index); | |||
| } | |||
| return ptr; | |||
| } | |||
| uint8_t *MemoryManager::MallocWorkSpaceMem(const AnfNodePtr &node, size_t index, int flag, size_t size) { | |||
| if (flag == kReuseDynamicMem) { | |||
| return mem_reuse_util_ptr_->GetNodeWorkSpacePtr(node, index); | |||
| } | |||
| return MallocDynamicMem(size, false); | |||
| } | |||
| uint8_t *MemoryManager::MallocMem(int flag, size_t size) { | |||
| uint8_t *ptr = nullptr; | |||
| if (flag == kStaticMem) { | |||
| ptr = MallocStaticMem(size, false); | |||
| } else if (flag == kDynamicMem) { | |||
| ptr = MallocDynamicMem(size, false); | |||
| } | |||
| return ptr; | |||
| } | |||
| uint8_t *MemoryManager::MallocStaticMem(size_t size, bool communication_mem) { | |||
| size_t align_size = 0; | |||
| if (communication_mem) { | |||
| align_size = GetCommunicationAlignSize(size); | |||
| } else { | |||
| align_size = GetCommonAlignSize(size); | |||
| } | |||
| if (static_mem_offset_ < align_size) { | |||
| MS_LOG(EXCEPTION) << "Out of memory!!! total[" << device_mem_size_ << "](dynamic[" << total_dynamic_size_ | |||
| << "] static[" << total_static_size_ << "])" | |||
| << " malloc [" << align_size << "] failed!"; | |||
| } | |||
| total_static_size_ += align_size; | |||
| auto offset = static_mem_offset_ - align_size; | |||
| if (dynamic_mem_offset_ > offset) { | |||
| MS_LOG(EXCEPTION) << "Out of memory!!! total[" << device_mem_size_ << "](dynamic[" << total_dynamic_size_ | |||
| << "] static[" << total_static_size_ << "])" | |||
| << " malloc [" << align_size << "] failed!"; | |||
| } | |||
| static_mem_offset_ = offset; | |||
| if (communication_mem) { | |||
| return device_mem_base_ + offset + kMemAlignSize; | |||
| } else { | |||
| return device_mem_base_ + offset; | |||
| } | |||
| } | |||
| uint8_t *MemoryManager::MallocDynamicMem(size_t size, bool communication_mem) { | |||
| size_t align_size = 0; | |||
| if (communication_mem) { | |||
| align_size = GetCommunicationAlignSize(size); | |||
| } else { | |||
| align_size = GetCommonAlignSize(size); | |||
| } | |||
| uint64_t offset = dynamic_mem_offset_; | |||
| auto new_offset = dynamic_mem_offset_ + align_size; | |||
| if (new_offset > static_mem_offset_) { | |||
| MS_LOG(EXCEPTION) << "Out of memory!!! total[" << device_mem_size_ << "](dynamic[" << total_dynamic_size_ | |||
| << "] static[" << total_static_size_ << "])" | |||
| << " malloc [" << align_size << "] failed!"; | |||
| } | |||
| total_dynamic_size_ += align_size; | |||
| dynamic_mem_offset_ = new_offset; | |||
| if (communication_mem) { | |||
| return device_mem_base_ + offset + kMemAlignSize; | |||
| } else { | |||
| return device_mem_base_ + offset; | |||
| } | |||
| } | |||
| void MemoryManager::MallocMemFromMemPool(const DeviceAddressPtr address, size_t size) { | |||
| auto device_ptr = MallocMemFromMemPool(size); | |||
| MS_EXCEPTION_IF_NULL(device_ptr); | |||
| address->ptr_ = device_ptr; | |||
| address->from_mem_pool_ = true; | |||
| } | |||
| void *MemoryManager::MallocMemFromMemPool(size_t size) { | |||
| if (size == 0) { | |||
| MS_LOG(ERROR) << "MallocMemFromMemPool size is 0."; | |||
| } | |||
| return nullptr; | |||
| } | |||
| void MemoryManager::FreeMemFromMemPool(void *device_ptr) { | |||
| if (device_ptr == nullptr) { | |||
| MS_LOG(ERROR) << "FreeMemFromMemPool device_ptr is null."; | |||
| } | |||
| } | |||
| } // namespace device | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,68 @@ | |||
| /** | |||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_MINDSPORE_CCSRC_DEVICE_MEMORY_MANAGER_H_ | |||
| #define MINDSPORE_MINDSPORE_CCSRC_DEVICE_MEMORY_MANAGER_H_ | |||
| #include <memory> | |||
| #include "pre_activate/mem_reuse/mem_reuse.h" | |||
| #include "pre_activate/mem_reuse/mem_reuse_allocator.h" | |||
| namespace mindspore { | |||
| namespace device { | |||
| const int kStaticMem = 0; | |||
| const int kDynamicMem = 1; | |||
| const int kReuseDynamicMem = 2; | |||
| const int kGetAllOuts = -1; | |||
| const uint64_t kMemAlignSize = 512; | |||
| using MemReuseUtilPtr = mindspore::memreuse::MemReuseUtilPtr; | |||
| class MemoryManager { | |||
| public: | |||
| MemoryManager() = default; | |||
| virtual ~MemoryManager() = default; | |||
| virtual void MallocDeviceMemory() = 0; | |||
| virtual void FreeDeviceMemory() = 0; | |||
| void ResetDynamicMemory() { | |||
| total_dynamic_size_ = 0; | |||
| dynamic_mem_offset_ = 0; | |||
| } | |||
| void MallocReusedDynamicMem(session::KernelGraph *graph); | |||
| uint8_t *MallocOutputMem(const AnfNodePtr &node, size_t index, int flag, size_t size); | |||
| uint8_t *MallocWorkSpaceMem(const AnfNodePtr &node, size_t index, int flag, size_t size); | |||
| virtual uint8_t *MallocMem(int flag, size_t size); | |||
| virtual void MallocMemFromMemPool(const DeviceAddressPtr address, size_t size); | |||
| virtual void *MallocMemFromMemPool(size_t size); | |||
| virtual void FreeMemFromMemPool(void *device_ptr); | |||
| size_t GetCommonAlignSize(size_t input_size) const; | |||
| size_t GetCommunicationAlignSize(size_t input_size) const; | |||
| protected: | |||
| virtual uint8_t *MallocStaticMem(size_t size, bool communication_mem); | |||
| virtual uint8_t *MallocDynamicMem(size_t size, bool communication_mem); | |||
| uint8_t *device_mem_base_{nullptr}; | |||
| uint64_t device_mem_size_{0}; | |||
| uint64_t dynamic_mem_offset_{0}; | |||
| uint64_t static_mem_offset_{0}; | |||
| size_t total_static_size_ = 0; | |||
| size_t total_dynamic_size_ = 0; | |||
| MemReuseUtilPtr mem_reuse_util_ptr_{nullptr}; | |||
| }; | |||
| } // namespace device | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_MINDSPORE_CCSRC_DEVICE_MEMORY_MANAGER_H_ | |||
| @@ -39,45 +39,7 @@ namespace mindspore { | |||
| namespace kernel { | |||
| using FNodeAttrHandle = std::function<void(const std::shared_ptr<AnfNode> &anf_node, mindspore::NodeDef *proto)>; | |||
| const std::vector<std::string> local_framework_op_vec = {kInitDataSetQueue, kGetNext, kDropoutGenMask, kPrint}; | |||
| void InitDataSetQueueAttr(const std::shared_ptr<AnfNode> &anf_node, mindspore::NodeDef *proto) { | |||
| MS_EXCEPTION_IF_NULL(anf_node); | |||
| MS_EXCEPTION_IF_NULL(proto); | |||
| ::google::protobuf::Map<::std::string, ::mindspore::AttrValue> *node_attr = proto->mutable_attrs(); | |||
| MS_EXCEPTION_IF_NULL(node_attr); | |||
| std::string channel_name = AnfAlgo::GetNodeAttr<std::string>(anf_node, kQueueName); | |||
| (*node_attr)[kChannelName].set_s(channel_name); | |||
| } | |||
| void GetNextAttr(const std::shared_ptr<AnfNode> &anf_node, mindspore::NodeDef *proto) { | |||
| MS_EXCEPTION_IF_NULL(anf_node); | |||
| MS_EXCEPTION_IF_NULL(proto); | |||
| ::google::protobuf::Map<::std::string, ::mindspore::AttrValue> *node_attr = proto->mutable_attrs(); | |||
| MS_EXCEPTION_IF_NULL(node_attr); | |||
| std::string shared_name = AnfAlgo::GetNodeAttr<std::string>(anf_node, kSharedName); | |||
| (*node_attr)[kChannelName].set_s(shared_name); | |||
| } | |||
| void DropoutGenMaskAttr(const std::shared_ptr<AnfNode> &anf_node, mindspore::NodeDef *proto) { | |||
| MS_EXCEPTION_IF_NULL(anf_node); | |||
| MS_EXCEPTION_IF_NULL(proto); | |||
| ::google::protobuf::Map<::std::string, ::mindspore::AttrValue> *node_attr = proto->mutable_attrs(); | |||
| MS_EXCEPTION_IF_NULL(node_attr); | |||
| int seed = AnfAlgo::GetNodeAttr<int>(anf_node, kSeed); | |||
| int seed2 = AnfAlgo::GetNodeAttr<int>(anf_node, kSeed2); | |||
| (*node_attr)["seed"].set_i(seed); | |||
| (*node_attr)["seed2"].set_i(seed2); | |||
| } | |||
| void CreateAttrFuncMap(std::map<std::string, FNodeAttrHandle> *mOpAttrFuncMap) { | |||
| (void)mOpAttrFuncMap->emplace(std::pair<std::string, FNodeAttrHandle>(kInitDataSetQueue, InitDataSetQueueAttr)); | |||
| (void)mOpAttrFuncMap->emplace(std::pair<std::string, FNodeAttrHandle>(kGetNext, GetNextAttr)); | |||
| (void)mOpAttrFuncMap->emplace(std::pair<std::string, FNodeAttrHandle>(kDropoutGenMask, DropoutGenMaskAttr)); | |||
| } | |||
| const std::vector<std::string> local_framework_op_vec = {kInitData, kGetNext, kDropoutGenMask, kPrint}; | |||
| bool SetIOIputSize(const std::shared_ptr<AnfNode> &anf_node, const size_t &input_num, | |||
| std::vector<size_t> *input_size_list) { | |||
| @@ -147,24 +109,74 @@ bool SetIOSize(const std::shared_ptr<AnfNode> &anf_node, const std::shared_ptr<A | |||
| return true; | |||
| } | |||
| void ParseAttrValue(const std::string &type, const std::string &attr_name, const mindspore::ValuePtr &value, | |||
| ::google::protobuf::Map<::std::string, ::mindspore::AttrValue> *node_attr) { | |||
| MS_EXCEPTION_IF_NULL(node_attr); | |||
| if (type == "int") { | |||
| auto attr_value = GetValue<int>(value); | |||
| (*node_attr)[attr_name].set_i(attr_value); | |||
| } else if (type == "str") { | |||
| auto attr_value = GetValue<std::string>(value); | |||
| (*node_attr)[attr_name].set_s(attr_value); | |||
| } else if (type == "bool") { | |||
| auto attr_value = GetValue<bool>(value); | |||
| (*node_attr)[attr_name].set_b(attr_value); | |||
| } else if (type == "float") { | |||
| auto attr_value = GetValue<float>(value); | |||
| (*node_attr)[attr_name].set_f(attr_value); | |||
| } else if (type == "listInt") { | |||
| std::vector<int> attr_value; | |||
| auto value_type = value->type(); | |||
| MS_EXCEPTION_IF_NULL(value_type); | |||
| auto value_type_str = value_type->ToString(); | |||
| if (value_type_str == "Int32") { | |||
| int data = GetValue<int>(value); | |||
| attr_value.push_back(data); | |||
| } else { | |||
| attr_value = GetValue<std::vector<int>>(value); | |||
| } | |||
| mindspore::AttrValue input_shape_attr; | |||
| mindspore::AttrValue_ArrayValue *input_shape_attr_list = input_shape_attr.mutable_array(); | |||
| MS_EXCEPTION_IF_NULL(input_shape_attr_list); | |||
| for (const auto shape : attr_value) { | |||
| input_shape_attr_list->add_i(shape); | |||
| } | |||
| (*node_attr)[attr_name] = input_shape_attr; | |||
| } else { | |||
| MS_LOG(EXCEPTION) << "type: " << type << "not support"; | |||
| } | |||
| } | |||
| void SetNodeAttr(const std::shared_ptr<AnfNode> &anf_node, mindspore::NodeDef *proto) { | |||
| std::string op_name = AnfAlgo::GetCNodeName(anf_node); | |||
| if (op_name == "InitDataSetQueue") { | |||
| op_name = "InitData"; | |||
| if (op_name == kInitDataSetQueue) { | |||
| op_name = kInitData; | |||
| } | |||
| if (op_name == "Print") { | |||
| if (op_name == kPrint) { | |||
| return; | |||
| } | |||
| std::map<std::string, FNodeAttrHandle> mOpAttrFuncMap; | |||
| CreateAttrFuncMap(&mOpAttrFuncMap); | |||
| FNodeAttrHandle func_ptr = nullptr; | |||
| auto iter = mOpAttrFuncMap.find(op_name); | |||
| if (iter != mOpAttrFuncMap.end()) { | |||
| func_ptr = iter->second; | |||
| MS_EXCEPTION_IF_NULL(func_ptr); | |||
| func_ptr(anf_node, proto); | |||
| } else { | |||
| MS_LOG(ERROR) << "Don't support node [" << op_name << "] to set nodedef of attr"; | |||
| auto op_info_ptr = mindspore::kernel::OpLib::FindOp(op_name, OpImplyType::kAICPU); | |||
| MS_EXCEPTION_IF_NULL(op_info_ptr); | |||
| auto attrs_ptr = op_info_ptr->attrs_ptr(); | |||
| auto primitive = AnfAlgo::GetCNodePrimitive(anf_node); | |||
| MS_EXCEPTION_IF_NULL(primitive); | |||
| ::google::protobuf::Map<::std::string, ::mindspore::AttrValue> *node_attr = proto->mutable_attrs(); | |||
| for (const auto &attr_ptr : attrs_ptr) { | |||
| std::string attr_name = attr_ptr->name(); | |||
| std::string real_name; | |||
| auto value = primitive->GetAttr(attr_name); | |||
| if (value != nullptr) { | |||
| if (attr_name == kQueueName || attr_name == kSharedName) { | |||
| real_name = kChannelName; | |||
| } else if (attr_name == kSeed) { | |||
| real_name = "seed"; | |||
| } else if (attr_name == kSeed2) { | |||
| real_name = "seed2"; | |||
| } | |||
| std::string type = attr_ptr->type(); | |||
| ParseAttrValue(type, real_name, value, node_attr); | |||
| } | |||
| } | |||
| MS_LOG(INFO) << "Set node attr end!"; | |||
| } | |||
| @@ -17,68 +17,27 @@ | |||
| #include "kernel/aicpu/aicpu_kernel_metadata.h" | |||
| #include <memory> | |||
| #include <string> | |||
| #include "kernel/oplib/oplib.h" | |||
| #include "kernel/common_utils.h" | |||
| #include "kernel/aicpu/aicpu_util.h" | |||
| #include "session/anf_runtime_algorithm.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| constexpr auto kInitDataSetQueueOpName = "InitDataSetQueue"; | |||
| constexpr auto kGetNext = "GetNext"; | |||
| constexpr auto kDropoutGenMask = "DropoutGenMask"; | |||
| constexpr auto kPrint = "Print"; | |||
| const std::vector<std::string> AICPU_OPS = {kInitDataSetQueueOpName, kGetNext, kDropoutGenMask, kPrint}; | |||
| std::shared_ptr<KernelBuildInfo> CreateKernelInfo(const std::vector<std::string> &inputs_format, | |||
| const std::vector<TypeId> &inputs_device_type, | |||
| const std::vector<std::string> &outputs_format, | |||
| const std::vector<TypeId> &outputs_device_type) { | |||
| auto builder = KernelBuildInfo::KernelBuildInfoBuilder(); | |||
| builder.SetInputsFormat(inputs_format); | |||
| builder.SetInputsDeviceType(inputs_device_type); | |||
| builder.SetOutputsFormat(outputs_format); | |||
| builder.SetOutputsDeviceType(outputs_device_type); | |||
| builder.SetProcessor(AICPU); | |||
| builder.SetKernelType(AICPU_KERNEL); | |||
| builder.SetFusionType(OPAQUE); | |||
| return builder.Build(); | |||
| } | |||
| bool CheckIfExistAicpuMeta(const std::string &op_name) { | |||
| if (std::find(AICPU_OPS.begin(), AICPU_OPS.end(), op_name) != AICPU_OPS.end()) { | |||
| return false; | |||
| } | |||
| return true; | |||
| } | |||
| void AicpuMetadataInfo(const CNodePtr &kernel_node, std::vector<std::shared_ptr<KernelBuildInfo>> *kernel_info_list) { | |||
| MS_LOG(INFO) << "AicpuMetadataInfo."; | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| MS_EXCEPTION_IF_NULL(kernel_info_list); | |||
| std::string op_name = AnfAlgo::GetCNodeName(kernel_node); | |||
| if (CheckIfExistAicpuMeta(op_name)) { | |||
| MS_LOG(DEBUG) << "Aicpu doesn't have metadata of op [" << op_name << "]."; | |||
| return; | |||
| } | |||
| if (op_name == kInitDataSetQueueOpName) { | |||
| kernel_info_list->push_back(CreateKernelInfo({}, {}, {}, {})); | |||
| if (op_name == kInitDataSetQueue) { | |||
| op_name = kInitData; | |||
| } | |||
| if (op_name == kGetNext) { | |||
| std::vector<std::string> outputs_format; | |||
| std::vector<TypeId> outputs_type; | |||
| for (size_t output_index = 0; output_index < AnfAlgo::GetOutputTensorNum(kernel_node); ++output_index) { | |||
| outputs_format.emplace_back(kOpFormat_DEFAULT); | |||
| outputs_type.push_back(AnfAlgo::GetOutputInferDataType(kernel_node, output_index)); | |||
| } | |||
| kernel_info_list->push_back(CreateKernelInfo({}, {}, outputs_format, outputs_type)); | |||
| } | |||
| if (op_name == kDropoutGenMask) { | |||
| kernel_info_list->push_back(CreateKernelInfo({kOpFormat_NCHW, kOpFormat_NCHW}, | |||
| {kInt32->type_id(), kFloat16->type_id()}, {kOpFormat_NCHW}, | |||
| {kUInt8->type_id()})); | |||
| auto op_info_ptr = mindspore::kernel::OpLib::FindOp(op_name, OpImplyType::kAICPU); | |||
| if (op_info_ptr == nullptr) { | |||
| MS_LOG(WARNING) << "Aicpu doestn't have metadata of op [" << op_name << "]"; | |||
| return; | |||
| } | |||
| // For compatibility with the current framework | |||
| if (op_name == kPrint) { | |||
| std::vector<std::string> inputs_format; | |||
| std::vector<TypeId> inputs_type; | |||
| @@ -92,11 +51,20 @@ void AicpuMetadataInfo(const CNodePtr &kernel_node, std::vector<std::shared_ptr< | |||
| outputs_format.emplace_back(kOpFormat_DEFAULT); | |||
| outputs_type.push_back(AnfAlgo::GetOutputInferDataType(kernel_node, output_index)); | |||
| } | |||
| kernel_info_list->push_back(CreateKernelInfo(inputs_format, inputs_type, outputs_format, outputs_type)); | |||
| auto builder = KernelBuildInfo::KernelBuildInfoBuilder(); | |||
| builder.SetInputsFormat(inputs_format); | |||
| builder.SetInputsDeviceType(inputs_type); | |||
| builder.SetOutputsFormat(outputs_format); | |||
| builder.SetOutputsDeviceType(outputs_type); | |||
| builder.SetProcessor(AICPU); | |||
| builder.SetKernelType(AICPU_KERNEL); | |||
| builder.SetFusionType(OPAQUE); | |||
| kernel_info_list->push_back(builder.Build()); | |||
| return; | |||
| } | |||
| if (kernel_info_list->empty()) { | |||
| MS_LOG(INFO) << "Aicpu dose not has metadata of op[ " << op_name << "]."; | |||
| if (!ParseMetadata(kernel_node, op_info_ptr, AICPU, kernel_info_list)) { | |||
| MS_LOG(WARNING) << "Aicpu parsed metadata op [" << op_name << "] failed"; | |||
| return; | |||
| } | |||
| } | |||
| } // namespace kernel | |||
| @@ -24,7 +24,8 @@ | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| constexpr auto kInitDataSetQueue = "InitData"; | |||
| constexpr auto kInitDataSetQueue = "InitDataSetQueue"; | |||
| constexpr auto kInitData = "InitData"; | |||
| constexpr auto kGetNext = "GetNext"; | |||
| constexpr auto kDropoutGenMask = "DropoutGenMask"; | |||
| constexpr auto kPrint = "Print"; | |||
| @@ -417,6 +417,8 @@ void SetKernelBuildInfo(const std::shared_ptr<KernelBuildInfo::KernelBuildInfoBu | |||
| if (imply_type == kAKG) { | |||
| builder->SetKernelType(AUTO_DIFF_KERNEL); | |||
| } else if (imply_type == kAICPU) { | |||
| builder->SetKernelType(AICPU_KERNEL); | |||
| } else { | |||
| builder->SetKernelType(TBE_KERNEL); | |||
| } | |||
| @@ -471,6 +473,13 @@ bool ParseMetadata(const CNodePtr &kernel_node, const std::shared_ptr<const OpIn | |||
| return false; | |||
| } | |||
| kernel_info_list->push_back(builder->Build()); | |||
| } | |||
| } else { | |||
| if (processor == AICPU) { | |||
| auto builder = std::make_shared<KernelBuildInfo::KernelBuildInfoBuilder>(); | |||
| MS_EXCEPTION_IF_NULL(builder); | |||
| SetKernelBuildInfo(builder, processor, op_info_ptr); | |||
| kernel_info_list->push_back(builder->Build()); | |||
| } | |||
| } | |||
| @@ -23,7 +23,6 @@ | |||
| #include <vector> | |||
| #include "kernel/gpu/gpu_kernel.h" | |||
| #include "kernel/gpu/gpu_kernel_factory.h" | |||
| #include "dataset/util/make_unique.h" | |||
| #include "kernel/gpu/kernel_constants.h" | |||
| namespace mindspore { | |||
| @@ -74,8 +73,8 @@ class BiasAddGpuKernel : public GpuKernel { | |||
| // Expand to 4 dims for cudnnSetTensorNdDescriptorEx. | |||
| auto cudnn_dims = std::max(num_dims, 4UL); | |||
| std::unique_ptr<int[]> x_dims = mindspore::make_unique<int[]>(cudnn_dims); | |||
| std::unique_ptr<int[]> b_dims = mindspore::make_unique<int[]>(cudnn_dims); | |||
| std::unique_ptr<int[]> x_dims = std::make_unique<int[]>(cudnn_dims); | |||
| std::unique_ptr<int[]> b_dims = std::make_unique<int[]>(cudnn_dims); | |||
| for (size_t i = 0; i < cudnn_dims; i++) { | |||
| x_dims[i] = (i < num_dims) ? SizeToInt(x_shape[i]) : 1; | |||
| b_dims[i] = (i == pos) ? SizeToInt(x_shape[i]) : 1; | |||
| @@ -26,7 +26,6 @@ | |||
| #include "kernel/gpu/gpu_kernel.h" | |||
| #include "kernel/gpu/gpu_kernel_factory.h" | |||
| #include "kernel/gpu/kernel_constants.h" | |||
| #include "dataset/util/make_unique.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| @@ -84,8 +83,8 @@ class BiasAddGradGpuKernel : public GpuKernel { | |||
| // Expand to 4 dims for cudnnSetTensorNdDescriptorEx. | |||
| auto cudnn_dims = std::max(num_dims, 4UL); | |||
| std::unique_ptr<int[]> dy_dims = mindspore::make_unique<int[]>(cudnn_dims); | |||
| std::unique_ptr<int[]> db_dims = mindspore::make_unique<int[]>(cudnn_dims); | |||
| std::unique_ptr<int[]> dy_dims = std::make_unique<int[]>(cudnn_dims); | |||
| std::unique_ptr<int[]> db_dims = std::make_unique<int[]>(cudnn_dims); | |||
| for (size_t i = 0; i < cudnn_dims; i++) { | |||
| dy_dims[i] = (i < num_dims) ? SizeToInt(dy_shape[i]) : 1; | |||
| db_dims[i] = (i == pos) ? SizeToInt(dy_shape[i]) : 1; | |||
| @@ -22,7 +22,6 @@ | |||
| #include <memory> | |||
| #include "kernel/gpu/gpu_kernel.h" | |||
| #include "kernel/gpu/gpu_kernel_factory.h" | |||
| #include "dataset/util/make_unique.h" | |||
| #include "kernel/gpu/kernel_constants.h" | |||
| namespace mindspore { | |||
| @@ -144,8 +143,8 @@ class LstmGpuKernel : public GpuKernel { | |||
| int x_dims[3]{batch_size_, input_size_, 1}; | |||
| int y_dims[3]{batch_size_, hidden_size_ * (bidirectional_ ? 2 : 1), 1}; | |||
| x_desc_ = mindspore::make_unique<cudnnTensorDescriptor_t[]>(seq_len_); | |||
| y_desc_ = mindspore::make_unique<cudnnTensorDescriptor_t[]>(seq_len_); | |||
| x_desc_ = std::make_unique<cudnnTensorDescriptor_t[]>(seq_len_); | |||
| y_desc_ = std::make_unique<cudnnTensorDescriptor_t[]>(seq_len_); | |||
| for (size_t i = 0; i < IntToSize(seq_len_); ++i) { | |||
| CHECK_CUDNN_RET_WITH_EXCEPT(cudnnCreateTensorDescriptor(&x_desc_[i]), "create x_desc failed"); | |||
| @@ -23,7 +23,6 @@ | |||
| #include "kernel/gpu/gpu_kernel.h" | |||
| #include "kernel/gpu/gpu_kernel_factory.h" | |||
| #include "kernel/gpu/kernel_constants.h" | |||
| #include "dataset/util/make_unique.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| @@ -212,9 +211,9 @@ class LstmGradDataGpuKernel : public GpuKernel { | |||
| int x_dims[3]{batch_size_, input_size_, 1}; | |||
| int y_dims[3]{batch_size_, hidden_size_ * (bidirectional_ ? 2 : 1), 1}; | |||
| dx_desc_ = mindspore::make_unique<cudnnTensorDescriptor_t[]>(seq_len_); | |||
| y_desc_ = mindspore::make_unique<cudnnTensorDescriptor_t[]>(seq_len_); | |||
| dy_desc_ = mindspore::make_unique<cudnnTensorDescriptor_t[]>(seq_len_); | |||
| dx_desc_ = std::make_unique<cudnnTensorDescriptor_t[]>(seq_len_); | |||
| y_desc_ = std::make_unique<cudnnTensorDescriptor_t[]>(seq_len_); | |||
| dy_desc_ = std::make_unique<cudnnTensorDescriptor_t[]>(seq_len_); | |||
| for (size_t i = 0; i < IntToSize(seq_len_); ++i) { | |||
| CHECK_CUDNN_RET_WITH_EXCEPT(cudnnCreateTensorDescriptor(&dx_desc_[i]), "create x_desc failed"); | |||
| @@ -22,7 +22,6 @@ | |||
| #include <memory> | |||
| #include "kernel/gpu/gpu_kernel.h" | |||
| #include "kernel/gpu/gpu_kernel_factory.h" | |||
| #include "dataset/util/make_unique.h" | |||
| #include "kernel/gpu/kernel_constants.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| @@ -169,8 +168,8 @@ class LstmGradWeightGpuKernel : public GpuKernel { | |||
| int x_dims[3]{batch_size_, input_size_, 1}; | |||
| int y_dims[3]{batch_size_, hidden_size_ * (bidirectional_ ? 2 : 1), 1}; | |||
| x_desc_ = mindspore::make_unique<cudnnTensorDescriptor_t[]>(seq_len_); | |||
| y_desc_ = mindspore::make_unique<cudnnTensorDescriptor_t[]>(seq_len_); | |||
| x_desc_ = std::make_unique<cudnnTensorDescriptor_t[]>(seq_len_); | |||
| y_desc_ = std::make_unique<cudnnTensorDescriptor_t[]>(seq_len_); | |||
| for (size_t i = 0; i < IntToSize(seq_len_); ++i) { | |||
| CHECK_CUDNN_RET_WITH_EXCEPT(cudnnCreateTensorDescriptor(&x_desc_[i]), "create x_desc failed"); | |||
| @@ -24,7 +24,7 @@ | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| enum OpImplyType { kAKG = 0, kTBE }; | |||
| enum OpImplyType { kAKG = 0, kTBE = 1, kAICPU }; | |||
| enum OpIOType { kInput = 0, kOutput }; | |||
| class OpAttr { | |||
| @@ -39,6 +39,7 @@ constexpr auto kDtypeFormat = "dtype_format"; | |||
| constexpr auto kAttr = "attr"; | |||
| constexpr auto kIputs = "inputs"; | |||
| constexpr auto kOutputs = "outputs"; | |||
| constexpr auto kAiCPU = "AiCPU"; | |||
| constexpr auto kTbe = "TBE"; | |||
| constexpr auto kAkg = "akg"; | |||
| constexpr auto kAutodiff = "AutoDiff"; | |||
| @@ -60,6 +61,8 @@ std::string ImplTypeToStr(OpImplyType impl_type) { | |||
| return kTbe; | |||
| case kAKG: | |||
| return kAkg; | |||
| case kAICPU: | |||
| return kAiCPU; | |||
| default: | |||
| return "unknow"; | |||
| } | |||
| @@ -76,6 +79,9 @@ bool OpLib::RegOp(const std::string& json_string, const std::string& impl_path) | |||
| } else if (imply_type_string == kAutodiff) { | |||
| OpImplyType imply_type = kAKG; | |||
| ret = DecodeOpInfo(op_json, imply_type, impl_path); | |||
| } else if (imply_type_string == kAiCPU) { | |||
| OpImplyType imply_type = kAICPU; | |||
| ret = DecodeOpInfo(op_json, imply_type, impl_path); | |||
| } else { | |||
| MS_LOG(DEBUG) << "Not support imply_type"; | |||
| } | |||
| @@ -154,7 +160,9 @@ bool OpLib::DecodeAttr(const nlohmann::json& obj, const OpImplyType imply_type, | |||
| std::shared_ptr<OpAttr> op_attr = std::make_shared<OpAttr>(); | |||
| MS_EXCEPTION_IF_NULL(op_attr); | |||
| op_attr->set_name(obj.at(kName)); | |||
| op_attr->set_param_type(obj.at(kParamType)); | |||
| if (imply_type != kAICPU) { | |||
| op_attr->set_param_type(obj.at(kParamType)); | |||
| } | |||
| op_attr->set_type(obj.at(kType)); | |||
| if (imply_type == kTBE) { | |||
| op_attr->set_value(obj.at(kValue)); | |||
| @@ -242,9 +250,10 @@ std::shared_ptr<OpInfo> OpLib::FindOp(const std::string& op_name, OpImplyType im | |||
| auto context = MsContext::GetInstance(); | |||
| MS_EXCEPTION_IF_NULL(context); | |||
| bool is_gpu = (context->device_target() == kGPUDevice); | |||
| if ((is_gpu && imply_type == kTBE) || (!is_gpu && imply_type != kTBE)) { | |||
| MS_LOG(DEBUG) << "FindOp failed: opname:" << op_name << "imply_type:" << ImplTypeToStr(imply_type) | |||
| << "current op num:" << op_info_.size(); | |||
| if ((is_gpu && (imply_type == kTBE || imply_type == kAICPU)) || | |||
| (!is_gpu && (imply_type != kTBE && imply_type != kAICPU))) { | |||
| MS_LOG(ERROR) << "FindOp failed: opname:" << op_name << ", imply_type:" << ImplTypeToStr(imply_type) | |||
| << ", current op num:" << op_info_.size(); | |||
| return nullptr; | |||
| } | |||
| for (const auto& op_info : op_info_) { | |||
| @@ -253,8 +262,8 @@ std::shared_ptr<OpInfo> OpLib::FindOp(const std::string& op_name, OpImplyType im | |||
| return op_info; | |||
| } | |||
| } | |||
| MS_LOG(DEBUG) << "FindOp failed: opname:" << op_name << "imply_type:" << ImplTypeToStr(imply_type) | |||
| << "current op num:" << op_info_.size(); | |||
| MS_LOG(DEBUG) << "FindOp failed: opname:" << op_name << ", imply_type:" << ImplTypeToStr(imply_type) | |||
| << ", current op num:" << op_info_.size(); | |||
| return nullptr; | |||
| } | |||
| @@ -0,0 +1,178 @@ | |||
| /** | |||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "mindrecord/include/shard_error.h" | |||
| namespace mindspore { | |||
| namespace mindrecord { | |||
| std::string ErrnoToMessage(MSRStatus status) { | |||
| switch (status) { | |||
| case FAILED: | |||
| return "operator failed"; | |||
| break; | |||
| case SUCCESS: | |||
| return "operator success"; | |||
| break; | |||
| case OPEN_FILE_FAILED: | |||
| return "open file failed"; | |||
| break; | |||
| case CLOSE_FILE_FAILED: | |||
| return "close file failed"; | |||
| break; | |||
| case WRITE_METADATA_FAILED: | |||
| return "write metadata failed"; | |||
| break; | |||
| case WRITE_RAWDATA_FAILED: | |||
| return "write rawdata failed"; | |||
| break; | |||
| case GET_SCHEMA_FAILED: | |||
| return "get schema failed"; | |||
| break; | |||
| case ILLEGAL_RAWDATA: | |||
| return "illegal raw data"; | |||
| break; | |||
| case PYTHON_TO_JSON_FAILED: | |||
| return "pybind: python object to json failed"; | |||
| break; | |||
| case DIR_CREATE_FAILED: | |||
| return "directory create failed"; | |||
| break; | |||
| case OPEN_DIR_FAILED: | |||
| return "open directory failed"; | |||
| break; | |||
| case INVALID_STATISTICS: | |||
| return "invalid statistics object"; | |||
| break; | |||
| case OPEN_DATABASE_FAILED: | |||
| return "open database failed"; | |||
| break; | |||
| case CLOSE_DATABASE_FAILED: | |||
| return "close database failed"; | |||
| break; | |||
| case DATABASE_OPERATE_FAILED: | |||
| return "database operate failed"; | |||
| break; | |||
| case BUILD_SCHEMA_FAILED: | |||
| return "build schema failed"; | |||
| break; | |||
| case DIVISOR_IS_ILLEGAL: | |||
| return "divisor is illegal"; | |||
| break; | |||
| case INVALID_FILE_PATH: | |||
| return "file path is invalid"; | |||
| break; | |||
| case SECURE_FUNC_FAILED: | |||
| return "secure function failed"; | |||
| break; | |||
| case ALLOCATE_MEM_FAILED: | |||
| return "allocate memory failed"; | |||
| break; | |||
| case ILLEGAL_FIELD_NAME: | |||
| return "illegal field name"; | |||
| break; | |||
| case ILLEGAL_FIELD_TYPE: | |||
| return "illegal field type"; | |||
| break; | |||
| case SET_METADATA_FAILED: | |||
| return "set metadata failed"; | |||
| break; | |||
| case ILLEGAL_SCHEMA_DEFINITION: | |||
| return "illegal schema definition"; | |||
| break; | |||
| case ILLEGAL_COLUMN_LIST: | |||
| return "illegal column list"; | |||
| break; | |||
| case SQL_ERROR: | |||
| return "sql error"; | |||
| break; | |||
| case ILLEGAL_SHARD_COUNT: | |||
| return "illegal shard count"; | |||
| break; | |||
| case ILLEGAL_SCHEMA_COUNT: | |||
| return "illegal schema count"; | |||
| break; | |||
| case VERSION_ERROR: | |||
| return "data version is not matched"; | |||
| break; | |||
| case ADD_SCHEMA_FAILED: | |||
| return "add schema failed"; | |||
| break; | |||
| case ILLEGAL_Header_SIZE: | |||
| return "illegal header size"; | |||
| break; | |||
| case ILLEGAL_Page_SIZE: | |||
| return "illegal page size"; | |||
| break; | |||
| case ILLEGAL_SIZE_VALUE: | |||
| return "illegal size value"; | |||
| break; | |||
| case INDEX_FIELD_ERROR: | |||
| return "add index fields failed"; | |||
| break; | |||
| case GET_CANDIDATE_CATEGORYFIELDS_FAILED: | |||
| return "get candidate category fields failed"; | |||
| break; | |||
| case GET_CATEGORY_INFO_FAILED: | |||
| return "get category information failed"; | |||
| break; | |||
| case ILLEGAL_CATEGORY_ID: | |||
| return "illegal category id"; | |||
| break; | |||
| case ILLEGAL_ROWNUMBER_OF_PAGE: | |||
| return "illegal row number of page"; | |||
| break; | |||
| case ILLEGAL_SCHEMA_ID: | |||
| return "illegal schema id"; | |||
| break; | |||
| case DESERIALIZE_SCHEMA_FAILED: | |||
| return "deserialize schema failed"; | |||
| break; | |||
| case DESERIALIZE_STATISTICS_FAILED: | |||
| return "deserialize statistics failed"; | |||
| break; | |||
| case ILLEGAL_DB_FILE: | |||
| return "illegal db file"; | |||
| break; | |||
| case OVERWRITE_DB_FILE: | |||
| return "overwrite db file"; | |||
| break; | |||
| case OVERWRITE_MINDRECORD_FILE: | |||
| return "overwrite mindrecord file"; | |||
| break; | |||
| case ILLEGAL_MINDRECORD_FILE: | |||
| return "illegal mindrecord file"; | |||
| break; | |||
| case PARSE_JSON_FAILED: | |||
| return "parse json failed"; | |||
| break; | |||
| case ILLEGAL_PARAMETERS: | |||
| return "illegal parameters"; | |||
| break; | |||
| case GET_PAGE_BY_GROUP_ID_FAILED: | |||
| return "get page by group id failed"; | |||
| break; | |||
| case GET_SYSTEM_STATE_FAILED: | |||
| return "get system state failed"; | |||
| break; | |||
| case IO_FAILED: | |||
| return "io operate failed"; | |||
| break; | |||
| default: | |||
| return "invalid error no"; | |||
| } | |||
| } | |||
| } // namespace mindrecord | |||
| } // namespace mindspore | |||