Browse Source

!3808 remove predict

Merge pull request !3808 from 张学同/to_merge
tags/v0.7.0-beta
mindspore-ci-bot Gitee 5 years ago
parent
commit
11f786c9bd
100 changed files with 6 additions and 9642 deletions
  1. +3
    -12
      mindspore/ccsrc/CMakeLists.txt
  2. +0
    -7
      mindspore/ccsrc/backend/session/ascend_session.cc
  3. +0
    -3
      mindspore/ccsrc/backend/session/cpu_session.cc
  4. +0
    -5
      mindspore/ccsrc/backend/session/gpu_session.cc
  5. +0
    -4
      mindspore/ccsrc/pipeline/jit/init.cc
  6. +0
    -14
      mindspore/ccsrc/predict/CMakeLists.txt
  7. +0
    -229
      mindspore/ccsrc/predict/converter/attr_utils/convert_util.cc
  8. +0
    -60
      mindspore/ccsrc/predict/converter/attr_utils/convert_util.h
  9. +0
    -65
      mindspore/ccsrc/predict/converter/attr_utils/op_attr_type.h
  10. +0
    -49
      mindspore/ccsrc/predict/converter/executor_tensor.cc
  11. +0
    -70
      mindspore/ccsrc/predict/converter/executor_tensor.h
  12. +0
    -561
      mindspore/ccsrc/predict/converter/kernel2ms.cc
  13. +0
    -118
      mindspore/ccsrc/predict/converter/kernel2ms.h
  14. +0
    -110
      mindspore/ccsrc/predict/converter/lite_model/op_attr_packer.cc
  15. +0
    -58
      mindspore/ccsrc/predict/converter/lite_model/op_attr_packer.h
  16. +0
    -59
      mindspore/ccsrc/predict/converter/lite_model/operations/activation_packer.cc
  17. +0
    -35
      mindspore/ccsrc/predict/converter/lite_model/operations/add_packer.cc
  18. +0
    -34
      mindspore/ccsrc/predict/converter/lite_model/operations/addfold_packer.cc
  19. +0
    -34
      mindspore/ccsrc/predict/converter/lite_model/operations/argmax_packer.cc
  20. +0
    -34
      mindspore/ccsrc/predict/converter/lite_model/operations/batchnormfold_packer.cc
  21. +0
    -37
      mindspore/ccsrc/predict/converter/lite_model/operations/biasadd_packer.cc
  22. +0
    -37
      mindspore/ccsrc/predict/converter/lite_model/operations/cast_packer.cc
  23. +0
    -63
      mindspore/ccsrc/predict/converter/lite_model/operations/conv2d_packer.cc
  24. +0
    -34
      mindspore/ccsrc/predict/converter/lite_model/operations/fakequantwithminmax_packer.cc
  25. +0
    -34
      mindspore/ccsrc/predict/converter/lite_model/operations/fakequantwithminmaxperchannel_packer.cc
  26. +0
    -37
      mindspore/ccsrc/predict/converter/lite_model/operations/fusedbatchnorm_packer.cc
  27. +0
    -39
      mindspore/ccsrc/predict/converter/lite_model/operations/matmul_packer.cc
  28. +0
    -37
      mindspore/ccsrc/predict/converter/lite_model/operations/mean_packer.cc
  29. +0
    -34
      mindspore/ccsrc/predict/converter/lite_model/operations/mul_packer.cc
  30. +0
    -35
      mindspore/ccsrc/predict/converter/lite_model/operations/mulflod_packer.cc
  31. +0
    -61
      mindspore/ccsrc/predict/converter/lite_model/operations/pooling_packer.cc
  32. +0
    -36
      mindspore/ccsrc/predict/converter/lite_model/operations/reshape_packer.cc
  33. +0
    -36
      mindspore/ccsrc/predict/converter/lite_model/operations/scale_packer.cc
  34. +0
    -36
      mindspore/ccsrc/predict/converter/lite_model/operations/softmax_packer.cc
  35. +0
    -38
      mindspore/ccsrc/predict/converter/lite_model/operations/squeeze_packer.cc
  36. +0
    -31
      mindspore/ccsrc/predict/generator/ir/ir_model.cc
  37. +0
    -37
      mindspore/ccsrc/predict/generator/ir/ir_model.h
  38. +0
    -244
      mindspore/ccsrc/predict/generator/ir/ir_task_info.cc
  39. +0
    -295
      mindspore/ccsrc/predict/generator/ir/ir_task_info.h
  40. +0
    -43
      mindspore/ccsrc/predict/generator/utils/ir_model_util.cc
  41. +0
    -92
      mindspore/ccsrc/predict/generator/utils/ir_model_util.h
  42. +0
    -69
      mindspore/ccsrc/predict/predict.cc
  43. +0
    -32
      mindspore/ccsrc/predict/predict.h
  44. +0
    -42
      mindspore/ccsrc/predict/proto/DModel_ir.proto
  45. +0
    -125
      mindspore/ccsrc/predict/proto/Graph_ir.proto
  46. +0
    -155
      mindspore/ccsrc/predict/proto/ge_runtime_taskinfo.proto
  47. +0
    -17
      mindspore/ccsrc/predict/readme.txt
  48. +0
    -1
      mindspore/ccsrc/predict/schema/inner/readme.txt
  49. +0
    -212
      mindspore/ccsrc/predict/schema/ms.fbs
  50. +0
    -699
      mindspore/ccsrc/predict/schema/op.fbs
  51. +0
    -8
      mindspore/ccsrc/runtime/device/ascend/ascend_stream_assign.cc
  52. +0
    -1
      mindspore/ccsrc/runtime/device/kernel_runtime.h
  53. +0
    -2
      mindspore/ccsrc/utils/context/ms_context.cc
  54. +0
    -8
      mindspore/ccsrc/utils/context/ms_context.h
  55. +1
    -20
      mindspore/context.py
  56. +2
    -7
      mindspore/train/serialization.py
  57. +0
    -14
      predict/.gitignore
  58. +0
    -79
      predict/CMakeLists.txt
  59. +0
    -38
      predict/benchmark/CMakeLists.txt
  60. +0
    -0
      predict/benchmark/README.md
  61. +0
    -451
      predict/benchmark/benchmark.cc
  62. +0
    -142
      predict/benchmark/benchmark.h
  63. +0
    -24
      predict/benchmark/main.cc
  64. +0
    -17
      predict/common/CMakeLists.txt
  65. +0
    -57
      predict/common/common.h
  66. +0
    -79
      predict/common/file_utils.cc
  67. +0
    -39
      predict/common/file_utils.h
  68. +0
    -179
      predict/common/flag_parser.cc
  69. +0
    -291
      predict/common/flag_parser.h
  70. +0
    -77
      predict/common/func_utils.cc
  71. +0
    -35
      predict/common/func_utils.h
  72. +0
    -167
      predict/common/graph_util.cc
  73. +0
    -71
      predict/common/graph_util.h
  74. +0
    -26
      predict/common/module_registry.cc
  75. +0
    -97
      predict/common/module_registry.h
  76. +0
    -47
      predict/common/mslog.cc
  77. +0
    -230
      predict/common/mslog.h
  78. +0
    -44
      predict/common/op_utils.h
  79. +0
    -119
      predict/common/option.h
  80. +0
    -50
      predict/common/storage.cc
  81. +0
    -36
      predict/common/storage.h
  82. +0
    -228
      predict/common/utils.cc
  83. +0
    -154
      predict/common/utils.h
  84. +0
    -56
      predict/include/context.h
  85. +0
    -52
      predict/include/errorcode.h
  86. +0
    -139
      predict/include/session.h
  87. +0
    -259
      predict/include/tensor.h
  88. +0
    -1
      predict/module/CMakeLists.txt
  89. +0
    -27
      predict/module/tvm_kernel/.gitignore
  90. +0
    -4
      predict/module/tvm_kernel/.gitmodules
  91. +0
    -25
      predict/module/tvm_kernel/CMakeLists.txt
  92. +0
    -140
      predict/module/tvm_kernel/lite/CMakeLists.txt
  93. +0
    -94
      predict/module/tvm_kernel/lite/include/lite/api/km_api.h
  94. +0
    -17
      predict/module/tvm_kernel/lite/python/__init__.py
  95. +0
    -17
      predict/module/tvm_kernel/lite/python/arm_cpu/__init__.py
  96. +0
    -470
      predict/module/tvm_kernel/lite/python/arm_cpu/conv2d.py
  97. +0
    -477
      predict/module/tvm_kernel/lite/python/arm_cpu/deconv.py
  98. +0
    -289
      predict/module/tvm_kernel/lite/python/arm_cpu/depthwise_conv2d.py
  99. +0
    -472
      predict/module/tvm_kernel/lite/python/arm_cpu/matmul.py
  100. +0
    -17
      predict/module/tvm_kernel/lite/python/at_ops/__init__.py

+ 3
- 12
mindspore/ccsrc/CMakeLists.txt View File

@@ -60,11 +60,6 @@ if(ENABLE_GPU)
add_compile_definitions(ENABLE_GPU) add_compile_definitions(ENABLE_GPU)
endif () endif ()


## make flatuffer files
include_directories("${CMAKE_BINARY_DIR}/predict/schema/inner")
file(GLOB_RECURSE FLATBUFFER_IN RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "predict/schema/*.fbs")
set(FLATBUFFER_OU "${CMAKE_BINARY_DIR}/predict/schema/inner")
ms_build_flatbuffers("${FLATBUFFER_IN}" "${FLATBUFFER_IN}" flat_input "${FLATBUFFER_OU}")


## make protobuf files ## make protobuf files
file(COPY "${ms_onnx_INC}/onnx/onnx.proto" DESTINATION ${CMAKE_BINARY_DIR}/proto) file(COPY "${ms_onnx_INC}/onnx/onnx.proto" DESTINATION ${CMAKE_BINARY_DIR}/proto)
@@ -104,13 +99,9 @@ endif ()


if (ENABLE_D) if (ENABLE_D)
include_directories("${CMAKE_BINARY_DIR}/backend/kernel_compiler/aicpu") include_directories("${CMAKE_BINARY_DIR}/backend/kernel_compiler/aicpu")
include_directories("${CMAKE_BINARY_DIR}/predict/generator/ir")
file(GLOB_RECURSE PROTO_IN RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "backend/kernel_compiler/aicpu/proto/*.proto") file(GLOB_RECURSE PROTO_IN RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "backend/kernel_compiler/aicpu/proto/*.proto")
ms_protobuf_generate(PROTOSRCS PROTOHDRS ${PROTO_IN}) ms_protobuf_generate(PROTOSRCS PROTOHDRS ${PROTO_IN})
file(GLOB_RECURSE PROTO_INNER RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "predict/proto/*.proto")
ms_protobuf_generate(PREDICT_PROTOSRCS PREDICT_PROTOHDRS ${PROTO_INNER})

file(GLOB_RECURSE PROTO_DUMP RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "runtime/device/ascend/dump/proto/*.proto") file(GLOB_RECURSE PROTO_DUMP RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "runtime/device/ascend/dump/proto/*.proto")
ms_protobuf_generate(DUMP_PROTOSRCS PROTOHDRS ${PROTO_DUMP}) ms_protobuf_generate(DUMP_PROTOSRCS PROTOHDRS ${PROTO_DUMP})


@@ -139,7 +130,7 @@ set(SUB_COMP
frontend/operator frontend/operator
pipeline/jit pipeline/jit
pipeline/pynative pipeline/pynative
common debug gvar predict pybind_api utils vm
common debug gvar pybind_api utils vm
) )


foreach (_comp ${SUB_COMP}) foreach (_comp ${SUB_COMP})
@@ -147,7 +138,7 @@ foreach (_comp ${SUB_COMP})
string(REPLACE "/" "_" sub ${_comp}) string(REPLACE "/" "_" sub ${_comp})
if (TARGET _mindspore_${sub}_obj) if (TARGET _mindspore_${sub}_obj)
list(APPEND SUB_OBJECTS_SRC $<TARGET_OBJECTS:_mindspore_${sub}_obj>) list(APPEND SUB_OBJECTS_SRC $<TARGET_OBJECTS:_mindspore_${sub}_obj>)
add_dependencies(_mindspore_${sub}_obj proto_input flat_input)
add_dependencies(_mindspore_${sub}_obj proto_input )
endif () endif ()
endforeach () endforeach ()
add_subdirectory(${CMAKE_SOURCE_DIR}/mindspore/core/base base) add_subdirectory(${CMAKE_SOURCE_DIR}/mindspore/core/base base)
@@ -158,7 +149,7 @@ add_subdirectory(${CMAKE_SOURCE_DIR}/mindspore/core/utils util)
list(APPEND SUB_OBJECTS_SRC $<TARGET_OBJECTS:_mindspore_core_utils_obj>) list(APPEND SUB_OBJECTS_SRC $<TARGET_OBJECTS:_mindspore_core_utils_obj>)
add_subdirectory(${CMAKE_SOURCE_DIR}/mindspore/core/ir ir) add_subdirectory(${CMAKE_SOURCE_DIR}/mindspore/core/ir ir)
list(APPEND SUB_OBJECTS_SRC $<TARGET_OBJECTS:_mindspore_ir_obj>) list(APPEND SUB_OBJECTS_SRC $<TARGET_OBJECTS:_mindspore_ir_obj>)
add_dependencies(_mindspore_core_utils_obj _mindspore_base_obj _mindspore_ir_obj _mindspore_abstract_obj proto_input flat_input)
add_dependencies(_mindspore_core_utils_obj _mindspore_base_obj _mindspore_ir_obj _mindspore_abstract_obj proto_input )


set_property(SOURCE ${SUB_OBJECTS_SRC} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_ME) set_property(SOURCE ${SUB_OBJECTS_SRC} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_ME)
add_library(mindspore STATIC ${SUB_OBJECTS_SRC}) add_library(mindspore STATIC ${SUB_OBJECTS_SRC})


+ 0
- 7
mindspore/ccsrc/backend/session/ascend_session.cc View File

@@ -34,7 +34,6 @@
#include "runtime/device/kernel_adjust.h" #include "runtime/device/kernel_adjust.h"
#include "runtime/device/ascend/ascend_stream_assign.h" #include "runtime/device/ascend/ascend_stream_assign.h"
#include "runtime/device/ascend/ascend_label_assign.h" #include "runtime/device/ascend/ascend_label_assign.h"
#include "predict/predict.h"
#include "backend/session/anf_runtime_algorithm.h" #include "backend/session/anf_runtime_algorithm.h"
#include "ir/scalar.h" #include "ir/scalar.h"
#include "debug/anf_ir_dump.h" #include "debug/anf_ir_dump.h"
@@ -303,8 +302,6 @@ void AscendSession::CompileChildGraph(const KernelGraphPtr &child_graph) {
save_graphs_path + "/" + "select_kernel_after" + "_graph_" + std::to_string(child_graph->graph_id()) + ".ir"; save_graphs_path + "/" + "select_kernel_after" + "_graph_" + std::to_string(child_graph->graph_id()) + ".ir";
DumpIR(file_path, child_graph); DumpIR(file_path, child_graph);
} }
// convert kernel Graph to model
predictmodel::StepConvertGraph(child_graph);
// optimize graph // optimize graph
HardwareOptimize(child_graph); HardwareOptimize(child_graph);
// assign static memory of parameters // assign static memory of parameters
@@ -333,8 +330,6 @@ void AscendSession::RunGraph(const GraphId &graph_id, const std::vector<tensor::
InitPSParamAndOptim(kernel_graph, inputs); InitPSParamAndOptim(kernel_graph, inputs);
} }
#endif #endif
// convert inputs to model
predictmodel::StepConvertWeight(inputs);
{ {
py::gil_scoped_release release; py::gil_scoped_release release;
// run task on device // run task on device
@@ -1036,8 +1031,6 @@ void AscendSession::HardwareOptimize(NotNull<KernelGraphPtr> graph,
memo->insert(graph.get()); memo->insert(graph.get());


MS_LOG(INFO) << "Start to do HardwareOptimize in graph: " << graph->graph_id(); MS_LOG(INFO) << "Start to do HardwareOptimize in graph: " << graph->graph_id();
// convert kernel Graph to model
predictmodel::StepConvertGraph(graph.get());


HardwareOptimize(graph.get()); HardwareOptimize(graph.get());
for (auto &child_graph : graph->child_graph_order()) { for (auto &child_graph : graph->child_graph_order()) {


+ 0
- 3
mindspore/ccsrc/backend/session/cpu_session.cc View File

@@ -23,7 +23,6 @@
#include "common/utils.h" #include "common/utils.h"
#include "backend/session/anf_runtime_algorithm.h" #include "backend/session/anf_runtime_algorithm.h"
#include "runtime/device/kernel_runtime.h" #include "runtime/device/kernel_runtime.h"
#include "predict/predict.h"
#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h" #include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
#include "runtime/device/cpu/kernel_select_cpu.h" #include "runtime/device/cpu/kernel_select_cpu.h"
#include "backend/optimizer/common/optimizer.h" #include "backend/optimizer/common/optimizer.h"
@@ -79,7 +78,6 @@ GraphId CPUSession::CompileGraph(const AnfNodePtrList &lst, const AnfNodePtrList
Optimize(graph); Optimize(graph);
} }
#endif #endif
predictmodel::StepConvertGraph(graph);
MS_LOG(INFO) << "Build kernel"; MS_LOG(INFO) << "Build kernel";
BuildKernel(graph.get()); BuildKernel(graph.get());
MS_LOG(INFO) << "Assign kernel address"; MS_LOG(INFO) << "Assign kernel address";
@@ -100,7 +98,6 @@ void CPUSession::RunGraph(const GraphId &graph_id, const std::vector<tensor::Ten
std::vector<tensor::TensorPtr> need_sync_outputs; std::vector<tensor::TensorPtr> need_sync_outputs;
runtime_.BindInputOutput(kernel_graph.get(), inputs, outputs, &need_sync_outputs); runtime_.BindInputOutput(kernel_graph.get(), inputs, outputs, &need_sync_outputs);
MS_LOG(INFO) << "Run graph start"; MS_LOG(INFO) << "Run graph start";
predictmodel::StepConvertWeight(inputs);
auto execution_order = kernel_graph->execution_order(); auto execution_order = kernel_graph->execution_order();
Reorder(&execution_order); Reorder(&execution_order);




+ 0
- 5
mindspore/ccsrc/backend/session/gpu_session.cc View File

@@ -31,7 +31,6 @@
#include "backend/optimizer/gpu/replace_momentum_cast_fusion.h" #include "backend/optimizer/gpu/replace_momentum_cast_fusion.h"
#include "backend/optimizer/gpu/replace_addn_fusion.h" #include "backend/optimizer/gpu/replace_addn_fusion.h"
#include "runtime/device/kernel_runtime_manager.h" #include "runtime/device/kernel_runtime_manager.h"
#include "predict/predict.h"
#include "common/utils.h" #include "common/utils.h"
#include "common/trans.h" #include "common/trans.h"
#include "utils/context/ms_context.h" #include "utils/context/ms_context.h"
@@ -190,8 +189,6 @@ GraphId GPUSession::CompileGraph(const AnfNodePtrList &lst, const AnfNodePtrList
// Assign parameter keys. // Assign parameter keys.
AssignParamKey(graph); AssignParamKey(graph);
#endif #endif
// Convert kernel Graph to model
predictmodel::StepConvertGraph(graph);
// Start gpu kernel runtime // Start gpu kernel runtime
StartKernelRT(); StartKernelRT();
// Dump .pb graph before hardware optimization // Dump .pb graph before hardware optimization
@@ -245,8 +242,6 @@ void GPUSession::RunGraph(const GraphId &graph_id, const std::vector<tensor::Ten
} }
#endif #endif
MS_EXCEPTION_IF_NULL(kernel_graph); MS_EXCEPTION_IF_NULL(kernel_graph);
// Convert inputs to model
predictmodel::StepConvertWeight(inputs);
{ {
py::gil_scoped_release gil_release; py::gil_scoped_release gil_release;
// Run graph on GPU // Run graph on GPU


+ 0
- 4
mindspore/ccsrc/pipeline/jit/init.cc View File

@@ -123,10 +123,6 @@ PYBIND11_MODULE(_c_expression, m) {
"Set whether to enable reduce precision.") "Set whether to enable reduce precision.")
.def("get_save_graphs_path", &mindspore::MsContext::save_graphs_path, "Get save graphs path.") .def("get_save_graphs_path", &mindspore::MsContext::save_graphs_path, "Get save graphs path.")
.def("set_save_graphs_path", &mindspore::MsContext::set_save_graphs_path, "Set save graphs path.") .def("set_save_graphs_path", &mindspore::MsContext::set_save_graphs_path, "Set save graphs path.")
.def("get_save_ms_model_flag", &mindspore::MsContext::save_ms_model_flag, "Get whether to save ms model.")
.def("set_save_ms_model_flag", &mindspore::MsContext::set_save_ms_model_flag, "Set whether to save ms model.")
.def("get_save_ms_model_path", &mindspore::MsContext::save_ms_model_path, "Get path to save ms model.")
.def("set_save_ms_model_path", &mindspore::MsContext::set_save_ms_model_path, "Set path to save ms model")
.def("get_enable_dump", &mindspore::MsContext::enable_dump, "Get whether to enable dump.") .def("get_enable_dump", &mindspore::MsContext::enable_dump, "Get whether to enable dump.")
.def("set_enable_dump", &mindspore::MsContext::set_enable_dump, "Set whether to enable dump.") .def("set_enable_dump", &mindspore::MsContext::set_enable_dump, "Set whether to enable dump.")
.def("get_save_dump_path", &mindspore::MsContext::save_dump_path, "Get path to dump.") .def("get_save_dump_path", &mindspore::MsContext::save_dump_path, "Get path to dump.")


+ 0
- 14
mindspore/ccsrc/predict/CMakeLists.txt View File

@@ -1,14 +0,0 @@
file(GLOB_RECURSE _PREDICT_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
"predict.cc"
"generator/utils/ir_model_util.cc"
"converter/*.cc"
"converter/attr_utils/*.cc"
"converter/lite_model/*.cc"
"converter/lite_model/operations/*.cc"
)
if (ENABLE_D)
file(GLOB_RECURSE _D_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "generator/ir/*.cc")
list(APPEND _PREDICT_SRC_LIST ${_D_SRC_LIST})
endif ()
add_library(_mindspore_predict_obj OBJECT ${_PREDICT_SRC_LIST})

+ 0
- 229
mindspore/ccsrc/predict/converter/attr_utils/convert_util.cc View File

@@ -1,229 +0,0 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "predict/converter/attr_utils/convert_util.h"

namespace mindspore {
namespace predict {
namespace utils {
TypePtr GetTypePtr(const AnfNodePtr &anf_node) {
MS_EXCEPTION_IF_NULL(anf_node);
TypePtr type_ptr = anf_node->Type();
MS_EXCEPTION_IF_NULL(type_ptr);
if (type_ptr->isa<TensorType>()) {
auto tensor_ptr = type_ptr->cast<TensorTypePtr>();
MS_EXCEPTION_IF_NULL(tensor_ptr);
TypePtr elem = tensor_ptr->element();
return elem;
} else if (type_ptr->isa<Tuple>()) {
auto tuple_ptr = type_ptr->cast<TuplePtr>();
MS_EXCEPTION_IF_NULL(tuple_ptr);
auto tuple_i = (*tuple_ptr)[0];
MS_EXCEPTION_IF_NULL(tuple_i);
if (tuple_i->isa<TensorType>()) {
auto tensor_ptr = tuple_i->cast<TensorTypePtr>();
MS_EXCEPTION_IF_NULL(tensor_ptr);
TypePtr elem = tensor_ptr->element();
MS_EXCEPTION_IF_NULL(elem);
return elem;
} else if (tuple_i->isa<Number>()) {
return type_ptr;
} else {
MS_LOG(EXCEPTION) << "unsupported type: " << type_ptr->ToString();
}
} else if (type_ptr->isa<Number>()) {
return type_ptr;
}
std::string type_name = type_ptr->ToString();
MS_LOG(EXCEPTION)
<< "The output type of node should be a tensor type a number or a tuple of tensor type, but this is: "
<< type_name;
}

MsDataType GetMSDataType(TypeId ori_data_type) {
MsDataType dst_data_type;
switch (ori_data_type) {
case kNumberTypeFloat16:
dst_data_type = mindspore::predict::DataType_DT_FLOAT16;
return dst_data_type;
case kNumberTypeFloat32:
dst_data_type = mindspore::predict::DataType_DT_FLOAT;
return dst_data_type;
case kNumberTypeInt8:
dst_data_type = mindspore::predict::DataType_DT_INT8;
return dst_data_type;
case kNumberTypeInt32:
dst_data_type = mindspore::predict::DataType_DT_INT32;
return dst_data_type;
case kNumberTypeUInt8:
dst_data_type = mindspore::predict::DataType_DT_UINT8;
return dst_data_type;
case kNumberTypeUInt32:
dst_data_type = mindspore::predict::DataType_DT_UINT32;
return dst_data_type;
case kTypeUnknown:
dst_data_type = mindspore::predict::DataType_DT_UNDEFINED;
return dst_data_type;
default:
MS_LOG(EXCEPTION) << "Ms don't support this DataType";
}
}

MsFormat GetMsFormat(const std::string &format_str) {
if (format_str == kOpFormat_DEFAULT) {
MsFormat ms_format = predict::Format_NCHW;
return ms_format;
} else {
// all middle format default to NCHW
return predict::Format_NCHW;
}
}

TensorPtr GetParaAscendTensor(const AnfNodePtr &anf_node) {
MS_EXCEPTION_IF_NULL(anf_node);
if (!anf_node->isa<Parameter>()) {
return nullptr;
}
auto device_type_id = AnfAlgo::GetOutputDeviceDataType(anf_node, 0);
// device type_ptr
auto device_type_ptr = GetTypePtr(anf_node);
// device shape
auto shape = AnfAlgo::GetOutputDeviceShape(anf_node, 0);
std::vector<int> tensor_shape;
(void)std::transform(shape.begin(), shape.end(), std::back_inserter(tensor_shape), SizeToInt);
// device format
auto format = AnfAlgo::GetOutputFormat(anf_node, 0);
// device tensor
TensorPtr device_tensor = std::make_shared<tensor::Tensor>(device_type_id, tensor_shape);
// device info
device_tensor->SetDeviceInfo(format, device_type_ptr);
return device_tensor;
}

TensorPtr GetParaCpuTensor(const AnfNodePtr &anf_node) {
MS_EXCEPTION_IF_NULL(anf_node);
if (!(anf_node->isa<Parameter>())) {
return nullptr;
} else {
auto ori_type_id = AnfAlgo::GetOutputInferDataType(anf_node, 0);
auto ori_type_ptr = GetTypePtr(anf_node);
auto ori_shape = AnfAlgo::GetOutputInferShape(anf_node, 0);
std::vector<int> tensor_shape;
(void)std::transform(ori_shape.begin(), ori_shape.end(), std::back_inserter(tensor_shape), SizeToInt);
auto ori_format = AnfAlgo::GetOutputFormat(anf_node, 0);
TensorPtr cpu_tensor = std::make_shared<tensor::Tensor>(ori_type_id, tensor_shape);
cpu_tensor->SetDeviceInfo(ori_format, ori_type_ptr);
return cpu_tensor;
}
}

TensorPtr GetValueTensor(const ValueNodePtr &const_node) {
MS_EXCEPTION_IF_NULL(const_node);
auto value_ptr = const_node->value();
MS_EXCEPTION_IF_NULL(value_ptr);
if (!value_ptr->isa<tensor::Tensor>()) {
return nullptr;
}
TensorPtr tensor = value_ptr->cast<TensorPtr>();
MS_EXCEPTION_IF_NULL(tensor);
auto data_type = tensor->Dtype();
MS_EXCEPTION_IF_NULL(data_type);
auto type_id = data_type->type_id();
auto shape = tensor->shape();
TensorPtr tensor_constant = std::make_shared<tensor::Tensor>(type_id, shape);
tensor_constant->SetDeviceInfo(tensor->device_info().format_, tensor->device_info().data_type_);
return tensor_constant;
}

TensorPtr GetKernelCpuTensor(const CNodePtr &c_node_ptr, size_t inx) {
if (c_node_ptr == nullptr || inx >= AnfAlgo::GetOutputTensorNum(c_node_ptr)) {
MS_LOG(ERROR) << "GetKernelCpuTensor failed";
return nullptr;
}
auto ori_shape = AnfAlgo::GetOutputInferShape(c_node_ptr, inx);
auto ori_type_id = AnfAlgo::GetOutputInferDataType(c_node_ptr, inx);
std::vector<int> tensor_shape;
(void)std::transform(ori_shape.begin(), ori_shape.end(), std::back_inserter(tensor_shape), SizeToInt);
auto ori_output_type = GetTypePtr(c_node_ptr);
TensorPtr device_tensor = std::make_shared<tensor::Tensor>(ori_type_id, tensor_shape);
auto format = AnfAlgo::GetOutputFormat(c_node_ptr, inx);
device_tensor->SetDeviceInfo(format, ori_output_type);
return device_tensor;
}

TensorPtr GetKernelAscendTensor(const CNodePtr &c_node_ptr, size_t inx) {
if (c_node_ptr == nullptr || inx >= AnfAlgo::GetOutputTensorNum(c_node_ptr)) {
MS_LOG(ERROR) << "GetKernelAscendTensor failed";
return nullptr;
}
auto shape = AnfAlgo::GetOutputDeviceShape(c_node_ptr, inx);
std::vector<int> tensor_shape;
(void)std::transform(shape.begin(), shape.end(), std::back_inserter(tensor_shape), SizeToInt);
auto format = AnfAlgo::GetOutputFormat(c_node_ptr, inx);
auto type_id = AnfAlgo::GetOutputDeviceDataType(c_node_ptr, inx);
auto output_type_ptr = GetTypePtr(c_node_ptr);
TensorPtr device_tensor = std::make_shared<tensor::Tensor>(type_id, tensor_shape);
device_tensor->SetDeviceInfo(format, output_type_ptr);
return device_tensor;
}

TensorPtr GetOutputTensor(const AnfNodePtr &out_node, size_t inx) {
MS_EXCEPTION_IF_NULL(out_node);
auto shape = AnfAlgo::GetOutputInferShape(out_node, inx);
std::vector<int> tensor_shape;
(void)std::transform(shape.begin(), shape.end(), std::back_inserter(tensor_shape), SizeToInt);
auto type_id = AnfAlgo::GetOutputInferDataType(out_node, inx);
auto output_type_ptr = GetTypePtr(out_node);
auto format = AnfAlgo::GetOutputFormat(out_node, inx);
TensorPtr output_tensor = std::make_shared<tensor::Tensor>(type_id, tensor_shape);
output_tensor->SetDeviceInfo(format, output_type_ptr);
return output_tensor;
}

bool FindNodeInMap(const std::unordered_map<MsKernelKey, int> &node_map, const AnfNodePtr &node) {
return std::any_of(node_map.begin(), node_map.end(),
[node](const std::pair<MsKernelKey, int> &kernel_key) { return kernel_key.first == node.get(); });
}

bool SaveDeviceModelUtil(const std::shared_ptr<GraphDefT> &new_ms_graph_ptr, const std::string &save_path_name,
SubGraphDefT *sub_graph) {
MS_EXCEPTION_IF_NULL(new_ms_graph_ptr);
MS_EXCEPTION_IF_NULL(sub_graph);
// save mindspore schema to file
new_ms_graph_ptr->name = "default_graph";
std::unique_ptr<mindspore::predict::SubGraphDefT> sub_graph_ptr(sub_graph);
new_ms_graph_ptr->subgraphs.emplace_back(std::move(sub_graph_ptr));
// get flatbuffer builder
flatbuffers::FlatBufferBuilder builder(1024);
auto offset = mindspore::predict::GraphDef::Pack(builder, new_ms_graph_ptr.get());
builder.Finish(offset);
auto size = builder.GetSize();
if (size == 0) {
MS_LOG(ERROR) << "builder has no size";
return false;
}
auto content = builder.GetBufferPointer();
std::ofstream output(save_path_name);
if (!output.is_open()) {
MS_LOG(EXCEPTION) << "mindspore.mindspoire output failed";
}
(void)output.write((const char *)content, size);
output.close();
return true;
}
} // namespace utils
} // namespace predict
} // namespace mindspore

+ 0
- 60
mindspore/ccsrc/predict/converter/attr_utils/convert_util.h View File

@@ -1,60 +0,0 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MINDSPORE_CCSRC_PREDICT_CONVERTER_ATTR_UTILS_CONVERT_UTIL_H_
#define MINDSPORE_CCSRC_PREDICT_CONVERTER_ATTR_UTILS_CONVERT_UTIL_H_

#include <vector>
#include <utility>
#include <algorithm>
#include <memory>
#include <unordered_map>
#include <string>
#include <fstream>
#include "ir/tensor.h"
#include "backend/session/anf_runtime_algorithm.h"
#include "predict/schema/inner/ms_generated.h"

using TensorPtr = mindspore::tensor::TensorPtr;
using TensorPtrList = std::vector<mindspore::tensor::TensorPtr>;
using AllOutputTensors = std::unordered_map<int, TensorPtrList>;
using OpDefT = mindspore::predict::OpDefT;
using GraphDefT = mindspore::predict::GraphDefT;
using TensorDefT = mindspore::predict::TensorDefT;
using SubGraphDefT = mindspore::predict::SubGraphDefT;
using SubGraphPtr = std::unique_ptr<mindspore::predict::SubGraphDefT>;
using MsDataType = mindspore::predict::DataType;
using MsFormat = mindspore::predict::Format;
using MsKernelKey = void *;
namespace mindspore {
namespace predict {
namespace utils {
TypePtr GetTypePtr(const AnfNodePtr &anf_node);
MsDataType GetMSDataType(TypeId ori_data_type);
MsFormat GetMsFormat(const std::string &format_str);
TensorPtr GetParaAscendTensor(const AnfNodePtr &anf_node);
TensorPtr GetParaCpuTensor(const AnfNodePtr &anf_node);
TensorPtr GetValueTensor(const ValueNodePtr &const_node);
TensorPtr GetKernelCpuTensor(const CNodePtr &c_node_ptr, size_t inx);
TensorPtr GetKernelAscendTensor(const CNodePtr &c_node_ptr, size_t inx);
TensorPtr GetOutputTensor(const AnfNodePtr &out_node, size_t inx);
bool FindNodeInMap(const std::unordered_map<MsKernelKey, int> &Nodemap, const AnfNodePtr &node);
bool SaveDeviceModelUtil(const std::shared_ptr<GraphDefT> &new_ms_graph_ptr, const std::string &save_path_name,
SubGraphDefT *sub_graph_def_t);
} // namespace utils
} // namespace predict
} // namespace mindspore
#endif // MINDSPORE_CCSRC_PREDICT_CONVERTER_ATTR_UTILS_CONVERT_UTIL_H_

+ 0
- 65
mindspore/ccsrc/predict/converter/attr_utils/op_attr_type.h View File

@@ -1,65 +0,0 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MINDSPORE_CCSRC_PREDICT_CONVERTER_CPU_ATTR_UTILS_OP_ATTR_TYPE_H_
#define MINDSPORE_CCSRC_PREDICT_CONVERTER_CPU_ATTR_UTILS_OP_ATTR_TYPE_H_
namespace mindspore {
namespace predict {
namespace convert {
typedef enum CpuOpType {
CPU_OP_PAD = 0,
CPU_OP_MAXIMUM,
CPU_OP_CONCAT,
CPU_OP_SOFTMAX,
CPU_OP_ACTIVATION,
CPU_OP_CONV2D,
CPU_OP_FUSEDBATCHNORM,
CPU_OP_CAFFEBATCHNORM,
CPU_OP_SQUEEZE,
CPU_OP_BIASADD,
CPU_OP_POOLING,
CPU_OP_DEPTHWISECONV2D,
CPU_OP_DEDEPTHWISECONV2D,
CPU_OP_RESIZE,
CPU_OP_DETECTIONPOSTPROCESS,
CPU_OP_FULLCONNECTION,
CPU_OP_MEAN,
CPU_OP_DECONV2D,
CPU_OP_SCALE,
CPU_OP_ELTWISE,
CPU_OP_ADD,
CPU_OP_SLICE,
CPU_OP_MUL,
CPU_OP_EXP,
CPU_OP_RESHAPE,
CPU_OP_POWER,
CPU_OP_ARGMAX,
CPU_OP_ARGMAX_NETOUTPUT,
CPU_OP_MATMUL,
CPU_OP_CAFFEPRELU,
CPU_OP_STRIDEDSLICE,
CPU_OP_STACK,
CPU_OP_RANGE,
CPU_OP_EXPANDDIMS,
CPU_OP_TILE,
CPU_OP_CAST,
CPU_OP_CAFFECROP,
CPU_OP_PRESERVEED = 37
} CpuOpType_t;
} // namespace convert
} // namespace predict
} // namespace mindspore
#endif // MINDSPORE_CCSRC_PREDICT_CONVERTER_CPU_ATTR_UTILS_OP_ATTR_TYPE_H_

+ 0
- 49
mindspore/ccsrc/predict/converter/executor_tensor.cc View File

@@ -1,49 +0,0 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "predict/converter/executor_tensor.h"

namespace mindspore {
namespace executor {
int TensorCache::addExTensor(int tensor_key, const TensorPtr &tensor, int refCount, const std::vector<int> &host_shape,
ExTensorType stable, bool inc) {
MS_EXCEPTION_IF_NULL(tensor);
TensorPtr tmp_tensor = tensor;
ExTensorPtr ex_tensor_ptr =
std::make_shared<ExTensor>(tensor_key, tmp_tensor, refCount, nodeIndex, host_shape, stable);
int pre_index = ex_tensor_ptr->index_;
if (inc) {
nodeIndex++;
}
// no need to judge,just add to map directly
tensors[tensor_key].push_back(ex_tensor_ptr);
return pre_index;
}

std::vector<ExTensorPtr> TensorCache::findTensor(int key) {
std::vector<ExTensorPtr> ex_tensors;
auto iter = tensors.find(key);
if (iter != tensors.end()) {
return iter->second;
} else {
MS_LOG(INFO) << "can not find any tensorlist";
return ex_tensors;
}
}

void TensorCache::deleteTensor(int key) { (void)tensors.erase(key); }
} // namespace executor
} // namespace mindspore

+ 0
- 70
mindspore/ccsrc/predict/converter/executor_tensor.h View File

@@ -1,70 +0,0 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MINDSPORE_CCSRC_PREDICT_CONVERTER_EXECUTOR_TENSOR_H_
#define MINDSPORE_CCSRC_PREDICT_CONVERTER_EXECUTOR_TENSOR_H_

#include <vector>
#include <memory>
#include <unordered_map>
#include <utility>
#include "ir/tensor.h"

namespace mindspore {
namespace executor {
using TensorPtr = tensor::TensorPtr;
static constexpr int MS_MAX_REFCOUNT = 999;
enum ExTensorType { INPUTDATA, WEIGHTS, CONSTANT, KERNEL, OUTPUT };
class ExTensor {
public:
int key_;
TensorPtr device_tensor_ptr_;
int ref_count_;
int index_;
std::vector<int> host_shape_;
ExTensorType stable_;
ExTensor(int key, TensorPtr tensor_ptr, int ref_count, int index, std::vector<int> host_shape,
ExTensorType ex_tensor_type)
: key_(key),
device_tensor_ptr_(std::move(tensor_ptr)),
ref_count_(ref_count),
index_(index),
host_shape_(std::move(host_shape)),
stable_(ex_tensor_type) {}
~ExTensor() { host_shape_.clear(); }
};
using ExTensorPtr = std::shared_ptr<ExTensor>;
class TensorCache {
public:
TensorCache() = default;

~TensorCache() { tensors.clear(); }

int addExTensor(int tensor_key, const TensorPtr &tensor, int refCount, const std::vector<int> &host_shape,
ExTensorType stable, bool inc = true);
// just adjust for dynamic tensor
std::vector<ExTensorPtr> findTensor(int key);
void deleteTensor(int key);
const std::unordered_map<int, std::vector<ExTensorPtr>> &GetCachedTensor() const { return tensors; }

private:
std::unordered_map<int, std::vector<ExTensorPtr>> tensors;
int nodeIndex = 0;
};
using TensorCachePtr = std::shared_ptr<TensorCache>;
} // namespace executor
} // namespace mindspore
#endif // MINDSPORE_CCSRC_PREDICT_CONVERTER_EXECUTOR_TENSOR_H_

+ 0
- 561
mindspore/ccsrc/predict/converter/kernel2ms.cc View File

@@ -1,561 +0,0 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "predict/converter/kernel2ms.h"
#include <algorithm>
#include "ir/anf.h"
#include "predict/converter/lite_model/op_attr_packer.h"
#include "mindspore/ccsrc/frontend/operator/ops.h"

namespace mindspore {
namespace executor {
Kernel2Ms &Kernel2Ms::GetInstance() {
static Kernel2Ms instance;
return instance;
}

bool Kernel2Ms::SetMemResue() const {
MS_LOG(INFO) << "MemResue start";
return true;
}

bool Kernel2Ms::SetAllTensors(const TensorCachePtr &tensor_cache, SubGraphDefT *ms_graph) {
if (tensor_cache == nullptr || ms_graph == nullptr) {
return false;
}
const std::unordered_map<int, std::vector<ExTensorPtr>> &cachedTensors = tensor_cache->GetCachedTensor();
size_t total_size = 0;
if (cachedTensors.empty()) {
return false;
}
for (auto &iter : cachedTensors) {
auto ex_tensors = iter.second;
total_size += ex_tensors.size();
}
ms_graph->allTensors.resize(total_size);
for (auto &iter : cachedTensors) {
for (auto &ex_tensor : iter.second) {
std::unique_ptr<TensorDefT> ms_tensor(new TensorDefT());
auto device_tensor_tmp = ex_tensor->device_tensor_ptr_;
auto device_d_type = device_tensor_tmp->data_type();
ms_tensor->dataType = predict::utils::GetMSDataType(device_d_type);
auto device_shape = device_tensor_tmp->shape();
ms_tensor->dims.clear();
if (device_shape.empty()) {
ms_tensor->dims.push_back(1);
} else {
ms_tensor->dims.assign(device_shape.begin(), device_shape.end());
}
std::string format_str = device_tensor_tmp->device_info().format_;
ms_tensor->format = predict::utils::GetMsFormat(format_str);
ms_tensor->offset = 0;
auto stable = ex_tensor->stable_;
if (stable == INPUTDATA || stable == CONSTANT || stable == WEIGHTS) {
ms_tensor->refCount = MS_MAX_REFCOUNT;
} else {
ms_tensor->refCount = 0;
}
ms_graph->allTensors[IntToSize(ex_tensor->index_)] = std::move(ms_tensor);
}
}
return true;
}

bool Kernel2Ms::SetGraphOutputIdx(const KernelGraphPtr &kernel_graph_ptr, const TensorCachePtr &tensor_cache,
SubGraphDefT *ms_graph, AllOutputTensors *all_output_tensors) {
MS_EXCEPTION_IF_NULL(tensor_cache);
MS_EXCEPTION_IF_NULL(ms_graph);
MS_EXCEPTION_IF_NULL(all_output_tensors);
auto out_nodes = kernel_graph_ptr->outputs();
if (out_nodes.empty()) {
return false;
}
// maybe need to judge out_nodes is real && output must be CNode
for (size_t i = 0; i < out_nodes.size(); ++i) {
std::vector<AnfNodePtr> real_inputs_link;
std::vector<size_t> real_output_idx_link;
GetRealInpoutsPtr(out_nodes[i], &real_inputs_link, &real_output_idx_link);
if (real_inputs_link.empty()) {
MS_LOG(INFO) << "this graph output node is vitural node, has no real input";
continue;
}
for (size_t k = 0; k < real_inputs_link.size(); ++k) {
int key = node_indexs_[out_nodes[i].get()];
auto ex_tensor_list = tensor_cache->findTensor(key);
if (ex_tensor_list.empty()) {
MS_LOG(INFO) << "SetGraphOutputIdx do not add Extensor ";
continue;
}
auto ex_tensor = ex_tensor_list[real_output_idx_link[k]];
ex_tensor_list.clear();
ms_graph->outputIndex.push_back(ex_tensor->index_);
}
}
return true;
}

bool Kernel2Ms::SetOpOutputIdx(const CNodePtr &c_node_ptr, const TensorPtr &output_tensor,
const TensorCachePtr &tensor_cache, int ref_count, size_t order_index, OpDefT *ms_node) {
MS_EXCEPTION_IF_NULL(c_node_ptr);
MS_EXCEPTION_IF_NULL(output_tensor);
MS_EXCEPTION_IF_NULL(ms_node);
MS_EXCEPTION_IF_NULL(tensor_cache);
if (!predict::utils::FindNodeInMap(node_indexs_, c_node_ptr)) {
MS_LOG(ERROR) << "can not find any pk_key in inited node_indexs map";
return false;
}
int tensor_key = node_indexs_[c_node_ptr.get()];
auto host_shape = AnfAlgo::GetOutputInferShape(c_node_ptr, order_index);
std::vector<int> tensor_shape;
(void)std::transform(host_shape.begin(), host_shape.end(), std::back_inserter(tensor_shape), SizeToInt);
int outputIndex = tensor_cache->addExTensor(tensor_key, output_tensor, ref_count, tensor_shape, KERNEL);
ms_node->outputIndex.push_back(outputIndex);
return true;
}

void Kernel2Ms::GetRealInpoutsPtr(const AnfNodePtr &node, std::vector<AnfNodePtr> *real_inputs,
std::vector<size_t> *real_output_idx) {
MS_EXCEPTION_IF_NULL(real_inputs);
MS_EXCEPTION_IF_NULL(real_output_idx);
size_t default_idx = 0;
if (node->isa<CNode>()) {
auto c_node = node->cast<CNodePtr>();
MS_EXCEPTION_IF_NULL(c_node);
std::string c_node_name = GetCNodeFuncName(c_node);
if (c_node_name == prim::kPrimTupleGetItem->name()) {
auto v_node = c_node->inputs()[kTupleGetItemIndex]->cast<ValueNodePtr>();
MS_EXCEPTION_IF_NULL(v_node);
default_idx = IntToSize(GetValue<int>(v_node->value()));
real_inputs->push_back(c_node->inputs()[1]);
real_output_idx->push_back(default_idx);
return;
} else if (c_node_name == prim::kPrimDepend->name()) {
GetRealInpoutsPtr(c_node->inputs()[1], real_inputs, real_output_idx);
return;
} else if (c_node_name == prim::kPrimMakeTuple->name()) {
for (auto &in : c_node->inputs()) {
GetRealInpoutsPtr(in, real_inputs, real_output_idx);
}
return;
} else {
real_inputs->push_back(node);
real_output_idx->push_back(default_idx);
}
} else if (node->isa<Parameter>()) {
real_inputs->push_back(node);
real_output_idx->push_back(default_idx);
} else if (node->isa<ValueNode>()) {
real_inputs->push_back(node);
real_output_idx->push_back(default_idx);
}
}

bool Kernel2Ms::SetOpInputIdx(const CNodePtr &c_node_ptr, const TensorCachePtr &tensor_cache, OpDefT *ms_node) {
MS_EXCEPTION_IF_NULL(c_node_ptr);
MS_EXCEPTION_IF_NULL(tensor_cache);
MS_EXCEPTION_IF_NULL(ms_node);
for (size_t i = 1; i < c_node_ptr->inputs().size(); ++i) {
std::vector<AnfNodePtr> real_inputs;
std::vector<size_t> real_output_idx;
GetRealInpoutsPtr(c_node_ptr->inputs()[i], &real_inputs, &real_output_idx);
if (real_inputs.empty()) {
MS_LOG(INFO) << "kernel has no inputs: " << c_node_ptr.get() << " input size[%lu]" << c_node_ptr->inputs().size();
continue;
}
for (size_t j = 0; j < real_inputs.size(); ++j) {
int key = node_indexs_[real_inputs[j].get()];
std::vector<ExTensorPtr> ex_tensor_list = tensor_cache->findTensor(key);
if (ex_tensor_list.empty()) {
continue;
}
ExTensorPtr ex_tensor_ptr = ex_tensor_list[real_output_idx[j]];
ex_tensor_list.clear();
ms_node->inputIndex.push_back(ex_tensor_ptr->index_);
}
}
return true;
}

void Kernel2Ms::TransformGraphIndx() {
// transform index && anfnodeptr
if (node_indexs_.empty()) {
MS_LOG(EXCEPTION) << "node_indexs_ not ininted";
}
for (auto &item : node_indexs_) {
index_nodes_[item.second] = item.first;
}
}

bool Kernel2Ms::InitGraphInputsIndx(const KernelGraphPtr &kernel_graph_ptr) {
MS_EXCEPTION_IF_NULL(kernel_graph_ptr);
auto input_nodes = kernel_graph_ptr->inputs();
if (input_nodes.empty()) {
return false;
}
for (const auto &input_node : input_nodes) {
if (input_node->isa<Parameter>()) {
if (!predict::utils::FindNodeInMap(node_indexs_, input_node)) {
// init every parameter node
node_indexs_[input_node.get()] = graph_index_;
graph_index_++;
}
} else {
MS_LOG(INFO) << "This node is anfnode, no need to handle, continue. node info: " << input_node->ToString();
continue;
}
}
MS_LOG(DEBUG) << "inputs GraphIndex: " << graph_index_;
return true;
}

bool Kernel2Ms::InitGraphValueNodesIndx(const KernelGraphPtr &kernel_graph_ptr) {
MS_EXCEPTION_IF_NULL(kernel_graph_ptr);
if (kernel_graph_ptr->value_nodes().empty()) {
return false;
}
for (auto &item : kernel_graph_ptr->value_nodes()) {
if (item.first->isa<ValueNode>()) {
auto value_node = item.first->cast<ValueNodePtr>();
MS_EXCEPTION_IF_NULL(value_node);
if (value_node == nullptr) {
MS_LOG(WARNING) << "value_node is nullptr";
return false;
}
if (value_node->value() == nullptr) {
MS_LOG(ERROR) << "Constant value is null.";
return false;
}
if (!value_node->value()->isa<tensor::Tensor>()) {
continue;
}
if (!predict::utils::FindNodeInMap(node_indexs_, item.first)) {
// init node
auto node_ptr = item.first;
node_indexs_[node_ptr.get()] = graph_index_;
graph_index_++;
}
}
}
return true;
}

bool Kernel2Ms::InitGraphOpsIndx(const KernelGraphPtr &kernel_graph_ptr) {
MS_EXCEPTION_IF_NULL(kernel_graph_ptr);
auto kernels = kernel_graph_ptr->execution_order();
if (kernels.empty()) {
MS_LOG(WARNING) << "this graph has no kernel";
return false;
}
for (size_t i = 0; i < kernels.size(); ++i) {
// for each kernel's inputs foreach real_input
if (kernels[i]->isa<CNode>()) {
if (!predict::utils::FindNodeInMap(node_indexs_, kernels[i])) {
// init node
node_indexs_[kernels[i].get()] = graph_index_;
graph_index_++;
}
}
}
return true;
}

bool Kernel2Ms::InitGraphOutputsIndx(const KernelGraphPtr &kernel_graph_ptr) {
MS_EXCEPTION_IF_NULL(kernel_graph_ptr);
// graph output && their inputs should link together
auto out_nodes = kernel_graph_ptr->outputs();
if (out_nodes.empty()) {
MS_LOG(ERROR) << "this graph has no outputs";
return false;
}
for (auto &item : out_nodes) {
if (!predict::utils::FindNodeInMap(node_indexs_, item)) {
node_indexs_[item.get()] = graph_index_;
graph_index_++;
}
}
return true;
}

bool Kernel2Ms::InitGraphIndx(const KernelGraphPtr &kernel_graph_ptr) {
MS_EXCEPTION_IF_NULL(kernel_graph_ptr);
// only parameter
if (!InitGraphInputsIndx(kernel_graph_ptr)) {
return false;
}
// init value node
if (!InitGraphValueNodesIndx(kernel_graph_ptr)) {
return false;
}
// init op
if (!InitGraphOpsIndx(kernel_graph_ptr)) {
return false;
}
// init Graphoutput attention: out_put nodes have inputs
return InitGraphOutputsIndx(kernel_graph_ptr);
}

bool Kernel2Ms::SetGraphInputTensors(const KernelGraphPtr &kernel_graph_ptr, const TensorCachePtr &tensor_cache,
SubGraphDefT *ms_graph) {
MS_EXCEPTION_IF_NULL(tensor_cache);
MS_EXCEPTION_IF_NULL(ms_graph);
MS_EXCEPTION_IF_NULL(kernel_graph_ptr);
if (convert_mode_ == kConvertUnused) {
return false;
}
if (kernel_graph_ptr->inputs().empty()) {
return false;
}
for (const auto &input_node : kernel_graph_ptr->inputs()) {
if (input_node->isa<Parameter>()) {
ParameterPtr pk_node = std::dynamic_pointer_cast<Parameter>(input_node);
TensorPtr device_tensor;
if (convert_mode_ == kConvertCpuMode) {
device_tensor = predict::utils::GetParaCpuTensor(input_node);
} else {
device_tensor = predict::utils::GetParaAscendTensor(input_node);
}
if (device_tensor == nullptr) {
return false;
}
ExTensorType node_type;
if (AnfAlgo::IsParameterWeight(pk_node)) {
node_type = WEIGHTS;
} else {
node_type = INPUTDATA;
}
if (!predict::utils::FindNodeInMap(node_indexs_, input_node)) {
MS_LOG(WARNING) << "can not find any pk_key in inited node_indexs map";
return false;
}
auto pk_key = node_indexs_[input_node.get()];
all_output_tensors_[pk_key].push_back(device_tensor);
int nodeRefCount = SizeToInt(AnfAlgo::GetOutputTensorNum(input_node));
int nodeInputIdx =
tensor_cache->addExTensor(pk_key, device_tensor, nodeRefCount, device_tensor->shape(), node_type);
if (!AnfAlgo::IsParameterWeight(pk_node)) {
ms_graph->inputIndex.push_back(nodeInputIdx);
all_input_idxs_.push_back(nodeInputIdx);
} else {
input_weight_idxs_.push_back(nodeInputIdx);
all_input_idxs_.push_back(nodeInputIdx);
}
}
}
return true;
}

bool Kernel2Ms::SetGraphValueTensors(const KernelGraphPtr &kernel_graph_ptr, const TensorCachePtr &tensor_cache) {
MS_EXCEPTION_IF_NULL(kernel_graph_ptr);
MS_EXCEPTION_IF_NULL(tensor_cache);
for (auto &item : kernel_graph_ptr->value_nodes()) {
if (item.first->isa<ValueNode>()) {
auto const_node = item.first->cast<ValueNodePtr>();
auto tensor_constant = predict::utils::GetValueTensor(const_node);
if (tensor_constant == nullptr) {
continue;
}
if (!predict::utils::FindNodeInMap(node_indexs_, item.first)) {
MS_LOG(WARNING) << "can not find any pk_key in inited node_indexs map";
return false;
}
int constant_key = node_indexs_[(item.first).get()];
all_output_tensors_[constant_key].push_back(tensor_constant);
auto shape = tensor_constant->shape();
(void)tensor_cache->addExTensor(constant_key, tensor_constant, 0, shape, CONSTANT);
}
}
return true;
}

bool Kernel2Ms::SetGraphOpTensors(const KernelGraphPtr &kernel_graph_ptr, const TensorCachePtr &tensor_cache,
SubGraphDefT *ms_graph) {
MS_EXCEPTION_IF_NULL(kernel_graph_ptr);
MS_EXCEPTION_IF_NULL(tensor_cache);
MS_EXCEPTION_IF_NULL(ms_graph);
auto kernels = kernel_graph_ptr->execution_order();
if (kernels.empty()) {
MS_LOG(ERROR) << "this graph has no kernels";
return false;
}
for (auto &kernel : kernels) {
if (!predict::utils::FindNodeInMap(node_indexs_, kernel)) {
MS_LOG(ERROR) << "can not find any pk_key in inited node_indexs map";
return false;
}
auto kernel_key = node_indexs_[kernel.get()];
std::unique_ptr<OpDefT> ms_node(new OpDefT);
ms_node->name = kernel->fullname_with_scope();
ms_node->fmkType = mindspore::predict::FmkType_CAFFE;
auto c_name = AnfAlgo::GetCNodeName(kernel);
auto fun = predict::convert::OpAttrFactory::GetInstance()->GetPackFun(c_name);
if (fun == nullptr) {
MS_LOG(WARNING) << "get node [" << kernel->fullname_with_scope() << "] attr failed.";
} else if (!fun(kernel, ms_node.get())) {
MS_LOG(ERROR) << "set node [" << kernel->fullname_with_scope() << "] attr failed.";
return false;
}
auto output_size = AnfAlgo::GetOutputTensorNum(kernel);
int nodeRefCount = SizeToInt(output_size);
for (size_t j = 0; j < output_size; ++j) {
TensorPtr device_tensor;
if (convert_mode_ == kConvertCpuMode) {
device_tensor = predict::utils::GetKernelCpuTensor(kernel, j);
} else if (convert_mode_ == kConvertAscendMode) {
device_tensor = predict::utils::GetKernelAscendTensor(kernel, j);
}
if (device_tensor == nullptr) {
return false;
}
all_output_tensors_[kernel_key].push_back(device_tensor);
if (!SetOpOutputIdx(kernel, device_tensor, tensor_cache, nodeRefCount, j, ms_node.get())) {
return false;
}
}
tmp_op_nodes_.emplace_back(ms_node.release());
}
return true;
}

bool Kernel2Ms::KernelGraph2MsGraph(const KernelGraphPtr &kernel_graph_ptr) {
MS_EXCEPTION_IF_NULL(kernel_graph_ptr);
graph_index_ = 0;
all_output_tensors_.clear();
node_indexs_.clear();
index_nodes_.clear();
std::unique_ptr<SubGraphDefT> sub_ms_graph(new SubGraphDefT());
if (!InitGraphIndx(kernel_graph_ptr)) {
return false;
}
TransformGraphIndx();
tensor_cache_ptr_ = std::make_shared<TensorCache>();
// foreach node to init it's real output tensor
if (!SetGraphInputTensors(kernel_graph_ptr, tensor_cache_ptr_, sub_ms_graph.get())) {
return false;
}
// Get KernelGraph value node
if (!SetGraphValueTensors(kernel_graph_ptr, tensor_cache_ptr_)) {
return false;
}
// Get KernelGraph apply_kernel && add opNode
if (!SetGraphOpTensors(kernel_graph_ptr, tensor_cache_ptr_, sub_ms_graph.get())) {
return false;
}
// Get KernelGraph outputs
if (!SetGraphOutputIdx(kernel_graph_ptr, tensor_cache_ptr_, sub_ms_graph.get(), &all_output_tensors_)) {
return false;
}
auto kernels = kernel_graph_ptr->execution_order();
for (size_t i = 0; i < kernels.size(); ++i) {
auto ms_node = tmp_op_nodes_[i];
if (!SetOpInputIdx(kernels[i], tensor_cache_ptr_, ms_node)) {
return false;
}
std::unique_ptr<OpDefT> ms_node_tmp(ms_node);
sub_ms_graph->nodes.emplace_back(std::move(ms_node_tmp));
}
if (!SetAllTensors(tensor_cache_ptr_, sub_ms_graph.get())) {
return false;
}
if (!SetMemResue()) {
return false;
}
sub_ms_graph_ = std::move(sub_ms_graph);
sub_ms_graph_->name = "default_sub_graph";
return true;
}

bool Kernel2Ms::CheckInputSizes(const std::vector<TensorPtr> &input_tensors,
const std::vector<uint32_t> &all_input_idxs) {
if (input_tensors.size() != all_input_idxs.size()) {
MS_LOG(EXCEPTION) << "real input tensors size:" << input_tensors.size()
<< "not equal converted tesnors size:" << all_input_idxs.size() << "the graph has changed";
}
for (auto in : all_input_idxs) {
if (in < sub_ms_graph_->allTensors.size()) {
auto real_tensor = input_tensors[in];
auto convert_dims = sub_ms_graph_->allTensors[in]->dims;
auto real_dims = real_tensor->shape();
if (real_dims.size() != convert_dims.size()) {
return false;
} else {
for (size_t i = 0; i < convert_dims.size(); ++i) {
if (convert_dims[i] != real_dims[i]) {
return false;
}
}
}
} else {
MS_LOG(EXCEPTION) << "index: " << in << "in all_input_idxs is valid";
}
}
return true;
}

void Kernel2Ms::ReleaseContextRes() {
tmp_op_nodes_.clear();
node_indexs_.clear();
index_nodes_.clear();
tensor_cache_ptr_ = nullptr;
all_output_tensors_.clear();
}

bool Kernel2Ms::KernelInput2MS(const std::vector<TensorPtr> &input_tensors) {
const std::unordered_map<int, std::vector<ExTensorPtr>> &cache_tensors = tensor_cache_ptr_->GetCachedTensor();
if (cache_tensors.empty()) {
return false;
}
auto all_weights_idxs = GetAllInputWeightIdxs();
auto all_input_idxs = GetAllInputIdxs();
auto real_input_size = input_tensors.size();
// check tensor size
bool ret = CheckInputSizes(input_tensors, all_input_idxs);
std::vector<uint32_t> match_to_rel_idxs;
// indx order not matched,macth to it
if (!ret) {
for (auto idx : all_weights_idxs) {
auto macth_idx = real_input_size - idx;
match_to_rel_idxs.push_back(macth_idx);
}
} else {
match_to_rel_idxs = all_weights_idxs;
}
if (match_to_rel_idxs.size() == all_weights_idxs.size()) {
for (size_t j = 0; j < all_weights_idxs.size(); ++j) {
auto cache_idx = all_weights_idxs[j];
auto match_idx = match_to_rel_idxs[j];
auto real_tensor = input_tensors[match_idx];
auto real_size = LongToSize(real_tensor->data().nbytes());
auto real_data = real_tensor->data_c();
MS_EXCEPTION_IF_NULL(real_data);
if (sub_ms_graph_->allTensors[cache_idx] != nullptr) {
sub_ms_graph_->allTensors[cache_idx]->data.resize(real_size);
}
if (memcpy_s(sub_ms_graph_->allTensors[cache_idx]->data.data(), real_size, real_data, real_size) != 0) {
MS_LOG(ERROR) << "KernelInput2MS memcpy_s failed";
return false;
}
}
}
ReleaseContextRes();
return true;
}

bool Kernel2Ms::SaveDeviceModel(const std::shared_ptr<GraphDefT> &new_ms_graph_ptr, const std::string &save_path_name) {
MS_EXCEPTION_IF_NULL(new_ms_graph_ptr);
return predict::utils::SaveDeviceModelUtil(new_ms_graph_ptr, save_path_name, sub_ms_graph_.release());
}
} // namespace executor
} // namespace mindspore

+ 0
- 118
mindspore/ccsrc/predict/converter/kernel2ms.h View File

@@ -1,118 +0,0 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MINDSPORE_CCSRC_PREDICT_CONVERTER_KERNEL_TO_MS_H_
#define MINDSPORE_CCSRC_PREDICT_CONVERTER_KERNEL_TO_MS_H_

#include <string>
#include <unordered_map>
#include <memory>
#include <vector>
#include <utility>
#include "backend/session/kernel_graph.h"
#include "predict/converter/executor_tensor.h"
#include "predict/schema/inner/ms_generated.h"
#include "predict/converter/attr_utils/convert_util.h"

static constexpr size_t kTupleGetItemIndex = 2;
namespace mindspore {
namespace executor {
using KernelGraphPtr = std::shared_ptr<mindspore::session::KernelGraph>;
enum ConvertMode { kConvertCpuMode, kConvertAscendMode, kConvertUnused };
enum TargetMode { kCPUTarget, kGPUTarget, kUnknowTarget };
class Kernel2Ms {
public:
static Kernel2Ms &GetInstance();

Kernel2Ms(const Kernel2Ms &) = delete;

Kernel2Ms &operator=(const Kernel2Ms &) = delete;

bool KernelGraph2MsGraph(const KernelGraphPtr &kernel_graph_ptr);

bool KernelInput2MS(const std::vector<TensorPtr> &input_tensors);

ConvertMode convert_mode() const { return convert_mode_; }

void set_convert_mode(ConvertMode convert_mode) { convert_mode_ = convert_mode; }

TargetMode device_target() const { return device_target_; }

void set_device_target(TargetMode device_target) { device_target_ = device_target; }

bool SaveDeviceModel(const std::shared_ptr<GraphDefT> &new_ms_graph_ptr, const std::string &save_path_name);

private:
Kernel2Ms() : graph_index_(0) {}

void ReleaseContextRes();

~Kernel2Ms() = default;

bool SetAllTensors(const TensorCachePtr &tensor_cache, SubGraphDefT *sub_graph_def_t);

bool SetOpInputIdx(const CNodePtr &c_node_ptr, const TensorCachePtr &tensor_cache, OpDefT *ms_node);

bool SetOpOutputIdx(const CNodePtr &c_node_ptr, const TensorPtr &output_tensor, const TensorCachePtr &tensor_cache,
int ref_count, size_t order_index, OpDefT *ms_node);

bool SetGraphOutputIdx(const KernelGraphPtr &kernel_graph_ptr, const TensorCachePtr &tensor_cache,
SubGraphDefT *sub_graph_def_t, AllOutputTensors *all_output_tensors);

void TransformGraphIndx();

void GetRealInpoutsPtr(const AnfNodePtr &node, std::vector<AnfNodePtr> *real_inputs,
std::vector<size_t> *real_output_idx);

bool InitGraphIndx(const KernelGraphPtr &kernel_graph_ptr);

bool InitGraphInputsIndx(const KernelGraphPtr &kernel_graph_ptr);

bool InitGraphValueNodesIndx(const KernelGraphPtr &kernel_graph_ptr);

bool InitGraphOpsIndx(const KernelGraphPtr &kernel_graph_ptr);

bool InitGraphOutputsIndx(const KernelGraphPtr &kernel_graph_ptr);

bool SetGraphInputTensors(const KernelGraphPtr &kernel_graph_ptr, const TensorCachePtr &tensor_cache,
SubGraphDefT *sub_graph_def_t);

bool SetGraphValueTensors(const KernelGraphPtr &kernel_graph_ptr, const TensorCachePtr &tensor_cache);

bool SetGraphOpTensors(const KernelGraphPtr &kernel_graph_ptr, const TensorCachePtr &tensor_cache,
SubGraphDefT *sub_graph_def_t);
std::vector<uint32_t> GetAllInputWeightIdxs() const { return input_weight_idxs_; }
std::vector<uint32_t> GetAllInputIdxs() const { return all_input_idxs_; }

bool CheckInputSizes(const std::vector<TensorPtr> &input_tensors, const std::vector<uint32_t> &all_input_idxs);

bool SetMemResue() const;
SubGraphPtr sub_ms_graph_;
AllOutputTensors all_output_tensors_;
std::vector<OpDefT *> tmp_op_nodes_;
std::unordered_map<MsKernelKey, int> node_indexs_;
std::unordered_map<int, MsKernelKey> index_nodes_;
int graph_index_ = 0;
TensorCachePtr tensor_cache_ptr_ = nullptr;
ConvertMode convert_mode_ = kConvertCpuMode;
TargetMode device_target_ = kCPUTarget;
std::vector<uint32_t> input_weight_idxs_;
std::vector<uint32_t> all_input_idxs_;
};
using Kernel2MsPtr = std::shared_ptr<Kernel2Ms>;
} // namespace executor
} // namespace mindspore
#endif // MINDSPORE_CCSRC_PREDICT_CONVERTER_KERNEL_TO_MS_H_

+ 0
- 110
mindspore/ccsrc/predict/converter/lite_model/op_attr_packer.cc View File

@@ -1,110 +0,0 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "predict/converter/lite_model/op_attr_packer.h"
#include "./securec.h"

namespace mindspore {
namespace predict {
namespace convert {
// forward declare
bool Conv2dPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op);
bool MatMulPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op);
bool BiasAddPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op);
bool ReshapePacker(const CNodePtr &c_node_ptr, OpDefT *ms_op);
bool ActivationPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op);
bool PoolingPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op);
bool FusedBatchNormPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op);
bool AddPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op);
bool CastPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op);
bool MeanPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op);
bool SoftmaxPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op);
bool ScalePacker(const CNodePtr &c_node_ptr, OpDefT *ms_op);
bool AddFoldPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op);
bool ArgMaxPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op);
bool BatchNormFoldPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op);
bool FakeQuantWithMinMaxPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op);
bool FakeQuantWithMinMaxPerChannelPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op);
bool MulPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op);
bool MulFoldPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op);
bool SqueezePacker(const CNodePtr &c_node_ptr, OpDefT *ms_op);

OpAttrFactory::OpAttrFactory() {
pack_funs_ = {{"Conv2D", Conv2dPacker},
{"MatMul", MatMulPacker},
{"BiasAdd", BiasAddPacker},
{"Reshape", ReshapePacker},
{"Activation", ActivationPacker},
{"ReLU", ActivationPacker},
{"ReLU6", ActivationPacker},
{"EReLU", ActivationPacker},
{"LeakyReLU", ActivationPacker},
{"Sigmoid", ActivationPacker},
{"Softsign", ActivationPacker},
{"Softplus", ActivationPacker},
{"Tanh", ActivationPacker},
{"HSwish", ActivationPacker},
{"HSigmoid", ActivationPacker},
{"MaxPool", PoolingPacker},
{"MaxPool2D", PoolingPacker},
{"MeanPool", PoolingPacker},
{"GlobalPool", PoolingPacker},
{"FusedBatchNorm", FusedBatchNormPacker},
{"FusedBatchNormGrad", FusedBatchNormPacker},
{"Cast", CastPacker},
{"TensorAdd", AddPacker},
{"SoftMax", SoftmaxPacker},
{"SimpleMean", MeanPacker},
{"ReduceMean", MeanPacker},
{"AddFold", AddFoldPacker},
{"ArgMax", ArgMaxPacker},
{"BatchNorm", BatchNormFoldPacker},
{"FakeQuantPerLayer", FakeQuantWithMinMaxPacker},
{"FakeQuantPerChannel", FakeQuantWithMinMaxPerChannelPacker},
{"Mul", MulPacker},
{"MulFold", MulFoldPacker},
{"Squeeze", SqueezePacker}};
}
OpAttrPackFun OpAttrFactory::GetPackFun(const std::string &opType) {
if (pack_funs_.find(opType) == pack_funs_.end()) {
MS_LOG(WARNING) << "Op Attr pack fun [" << opType << "] not found.";
return nullptr;
}
return pack_funs_[opType];
}

mindspore::predict::Format GetAttrFormat(const std::string &format) {
if (format == kOpFormat_NCHW) {
return predict::Format::Format_NCHW;
} else if (format == kOpFormat_NHWC) {
return predict::Format::Format_NHWC;
} else {
return predict::Format::Format_NUM_OF_FORMAT;
}
}

mindspore::predict::PadMode GetAttrPadMode(const std::string &pad_mode) {
if (pad_mode == "same") {
return mindspore::predict::PadMode::PadMode_SAME;
} else if (pad_mode == "valid") {
return mindspore::predict::PadMode::PadMode_VALID;
} else {
return mindspore::predict::PadMode::PadMode_NOTSET;
}
}
} // namespace convert
} // namespace predict
} // namespace mindspore

+ 0
- 58
mindspore/ccsrc/predict/converter/lite_model/op_attr_packer.h View File

@@ -1,58 +0,0 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MINDSPORE_CCSRC_PREDICT_CONVERTER_OP_ATTR_PACKER_H_
#define MINDSPORE_CCSRC_PREDICT_CONVERTER_OP_ATTR_PACKER_H_

#include <utility>
#include <string>
#include <unordered_map>
#include "backend/session/anf_runtime_algorithm.h"
#include "predict/schema/inner/ms_generated.h"

static constexpr size_t kNIndex = 0;
static constexpr size_t kCIndex = 1;
static constexpr size_t kHIndex = 2;
static constexpr size_t kWIndex = 3;
static constexpr size_t kNCHWSize = 4;
namespace mindspore {
namespace predict {
namespace convert {
using OpAttrPackFun = bool (*)(const CNodePtr &c_node_ptr, OpDefT *ms_op);
class OpAttrFactory {
public:
static OpAttrFactory *GetInstance() {
static OpAttrFactory instance;
return &instance;
}
OpAttrFactory(const OpAttrFactory &) = delete;
OpAttrFactory &operator=(const OpAttrFactory &) = delete;
OpAttrPackFun GetPackFun(const std::string &op_type);
~OpAttrFactory() { pack_funs_.clear(); }
OpAttrFactory();

private:
std::unordered_map<std::string, OpAttrPackFun> pack_funs_;
};

mindspore::predict::Format GetAttrFormat(const std::string &format);

mindspore::predict::PadMode GetAttrPadMode(const std::string &pad_mode);
} // namespace convert
} // namespace predict
} // namespace mindspore

#endif // MINDSPORE_CCSRC_PREDICT_CONVERTER_CPU_OP_INFO_OP_ATTR_FACTORY_H_

+ 0
- 59
mindspore/ccsrc/predict/converter/lite_model/operations/activation_packer.cc View File

@@ -1,59 +0,0 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "predict/converter/lite_model/op_attr_packer.h"

namespace mindspore {
namespace predict {
namespace convert {
bool ActivationPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op) {
if (c_node_ptr == nullptr || ms_op == nullptr) {
return false;
}
std::unique_ptr<ActivationT> attr(new ActivationT());
MS_EXCEPTION_IF_NULL(attr);
if (AnfAlgo::GetCNodeName(c_node_ptr) == "ReLU") {
attr->type = predict::ActivationType::ActivationType_RELU;
} else if (AnfAlgo::GetCNodeName(c_node_ptr) == "Sigmoid") {
attr->type = predict::ActivationType::ActivationType_SIGMOID;
} else if (AnfAlgo::GetCNodeName(c_node_ptr) == "ReLU6") {
attr->type = predict::ActivationType::ActivationType_RELU6;
} else if (AnfAlgo::GetCNodeName(c_node_ptr) == "ELU") {
attr->type = predict::ActivationType::ActivationType_ELU;
} else if (AnfAlgo::GetCNodeName(c_node_ptr) == "Leaky_ReLU") {
attr->type = predict::ActivationType::ActivationType_LEAKY_RELU;
} else if (AnfAlgo::GetCNodeName(c_node_ptr) == "ABS") {
attr->type = predict::ActivationType::ActivationType_ABS;
} else if (AnfAlgo::GetCNodeName(c_node_ptr) == "ReLU1") {
attr->type = predict::ActivationType::ActivationType_RELU1;
} else if (AnfAlgo::GetCNodeName(c_node_ptr) == "Softsign") {
attr->type = predict::ActivationType::ActivationType_SOFTSIGN;
} else if (AnfAlgo::GetCNodeName(c_node_ptr) == "Softplus") {
attr->type = predict::ActivationType::ActivationType_SOFTPLUS;
} else if (AnfAlgo::GetCNodeName(c_node_ptr) == "Tanh") {
attr->type = predict::ActivationType::ActivationType_TANH;
} else {
attr->type = predict::ActivationType::ActivationType_UNKNOW;
MS_LOG(WARNING) << "unknow Activation";
}
ms_op->name = c_node_ptr->fullname_with_scope();
ms_op->attr.type = OpT_Activation;
ms_op->attr.value = attr.release();
return true;
}
} // namespace convert
} // namespace predict
} // namespace mindspore

+ 0
- 35
mindspore/ccsrc/predict/converter/lite_model/operations/add_packer.cc View File

@@ -1,35 +0,0 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "predict/converter/lite_model/op_attr_packer.h"

namespace mindspore {
namespace predict {
namespace convert {
bool AddPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op) {
if (c_node_ptr == nullptr || ms_op == nullptr) {
return false;
}
std::unique_ptr<AddT> attr(new AddT());
MS_EXCEPTION_IF_NULL(attr);
ms_op->name = c_node_ptr->fullname_with_scope();
ms_op->attr.type = OpT_Add;
ms_op->attr.value = attr.release();
return true;
}
} // namespace convert
} // namespace predict
} // namespace mindspore

+ 0
- 34
mindspore/ccsrc/predict/converter/lite_model/operations/addfold_packer.cc View File

@@ -1,34 +0,0 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "predict/converter/lite_model/op_attr_packer.h"

namespace mindspore {
namespace predict {
namespace convert {
bool AddFoldPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op) {
if (c_node_ptr == nullptr || ms_op == nullptr) {
return false;
}
std::unique_ptr<AddFoldT> attr(new AddFoldT());
MS_EXCEPTION_IF_NULL(attr);
ms_op->attr.type = OpT_AddFold;
ms_op->attr.value = attr.release();
return true;
}
} // namespace convert
} // namespace predict
} // namespace mindspore

+ 0
- 34
mindspore/ccsrc/predict/converter/lite_model/operations/argmax_packer.cc View File

@@ -1,34 +0,0 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "predict/converter/lite_model/op_attr_packer.h"

namespace mindspore {
namespace predict {
namespace convert {
bool ArgMaxPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op) {
if (c_node_ptr == nullptr || ms_op == nullptr) {
return false;
}
std::unique_ptr<ArgMaxT> attr(new ArgMaxT());
MS_EXCEPTION_IF_NULL(attr);
ms_op->attr.type = OpT_ArgMax;
ms_op->attr.value = attr.release();
return true;
}
} // namespace convert
} // namespace predict
} // namespace mindspore

+ 0
- 34
mindspore/ccsrc/predict/converter/lite_model/operations/batchnormfold_packer.cc View File

@@ -1,34 +0,0 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "predict/converter/lite_model/op_attr_packer.h"

namespace mindspore {
namespace predict {
namespace convert {
bool BatchNormFoldPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op) {
if (c_node_ptr == nullptr || ms_op == nullptr) {
return false;
}
std::unique_ptr<BatchNormFoldT> attr(new BatchNormFoldT());
MS_EXCEPTION_IF_NULL(attr);
ms_op->attr.type = OpT_BatchNormFold;
ms_op->attr.value = attr.release();
return true;
}
} // namespace convert
} // namespace predict
} // namespace mindspore

+ 0
- 37
mindspore/ccsrc/predict/converter/lite_model/operations/biasadd_packer.cc View File

@@ -1,37 +0,0 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <memory>
#include "predict/converter/lite_model/op_attr_packer.h"

namespace mindspore {
namespace predict {
namespace convert {
bool BiasAddPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op) {
if (c_node_ptr == nullptr || ms_op == nullptr) {
return false;
}
std::unique_ptr<BiasAddT> attr(new BiasAddT());
MS_EXCEPTION_IF_NULL(attr);
attr->axis = {1};
ms_op->name = c_node_ptr->fullname_with_scope();
ms_op->attr.type = OpT_BiasAdd;
ms_op->attr.value = attr.release();
return true;
}
} // namespace convert
} // namespace predict
} // namespace mindspore

+ 0
- 37
mindspore/ccsrc/predict/converter/lite_model/operations/cast_packer.cc View File

@@ -1,37 +0,0 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "predict/converter/lite_model/op_attr_packer.h"

namespace mindspore {
namespace predict {
namespace convert {
bool CastPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op) {
if (c_node_ptr == nullptr || ms_op == nullptr) {
return false;
}
std::unique_ptr<CastT> attr(new CastT());
MS_EXCEPTION_IF_NULL(attr);
attr->srcT = 0;
attr->dstT = 0;
ms_op->name = c_node_ptr->fullname_with_scope();
ms_op->attr.type = OpT_Cast;
ms_op->attr.value = attr.release();
return true;
}
} // namespace convert
} // namespace predict
} // namespace mindspore

+ 0
- 63
mindspore/ccsrc/predict/converter/lite_model/operations/conv2d_packer.cc View File

@@ -1,63 +0,0 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "predict/converter/lite_model/op_attr_packer.h"

namespace mindspore {
namespace predict {
namespace convert {
bool Conv2dPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op) {
if (c_node_ptr == nullptr || ms_op == nullptr) {
return false;
}
int kernel_group_value = AnfAlgo::GetNodeAttr<int>(c_node_ptr, "group");
int kernel_channel_value = AnfAlgo::GetNodeAttr<int>(c_node_ptr, "out_channel");
std::vector<int> kernel_size_value = AnfAlgo::GetNodeAttr<std::vector<int>>(c_node_ptr, "kernel_size");
std::string kernel_pad_mode_value = AnfAlgo::GetNodeAttr<std::string>(c_node_ptr, "pad_mode");
int kernel_pad_value = AnfAlgo::GetNodeAttr<int>(c_node_ptr, "pad");
auto kernel_stride_value = AnfAlgo::GetNodeAttr<std::vector<int>>(c_node_ptr, "stride");
auto kernel_dilation_value = AnfAlgo::GetNodeAttr<std::vector<int>>(c_node_ptr, "dilation");
std::string kernel_data_format_value = AnfAlgo::GetNodeAttr<std::string>(c_node_ptr, "data_format");
std::unique_ptr<Conv2DT> attr(new Conv2DT());
MS_EXCEPTION_IF_NULL(attr);
attr->format = GetAttrFormat(kernel_data_format_value);
attr->group = kernel_group_value;
auto in_shape = AnfAlgo::GetPrevNodeOutputInferShape(c_node_ptr, 1);
if (in_shape.size() != kNCHWSize) {
return false;
}
attr->channelIn = SizeToInt(in_shape[1]);
attr->channelOut = kernel_channel_value;
attr->kernelW = kernel_size_value[0];
attr->kernelH = kernel_size_value[1];
attr->strideW = kernel_stride_value[0];
attr->strideH = kernel_stride_value[1];
attr->padMode = GetAttrPadMode(kernel_pad_mode_value);
attr->padUp = kernel_pad_value;
attr->padDown = kernel_pad_value;
attr->padLeft = kernel_pad_value;
attr->padRight = kernel_pad_value;
attr->dilateW = kernel_dilation_value[0];
attr->dilateH = kernel_dilation_value[1];
attr->hasBias = false;
ms_op->name = c_node_ptr->fullname_with_scope();
ms_op->attr.type = OpT_Conv2D;
ms_op->attr.value = attr.release();
return true;
}
} // namespace convert
} // namespace predict
} // namespace mindspore

+ 0
- 34
mindspore/ccsrc/predict/converter/lite_model/operations/fakequantwithminmax_packer.cc View File

@@ -1,34 +0,0 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "predict/converter/lite_model/op_attr_packer.h"

namespace mindspore {
namespace predict {
namespace convert {
bool FakeQuantWithMinMaxPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op) {
if (c_node_ptr == nullptr || ms_op == nullptr) {
return false;
}
std::unique_ptr<FakeQuantWithMinMaxT> attr(new FakeQuantWithMinMaxT());
MS_EXCEPTION_IF_NULL(attr);
ms_op->attr.type = OpT_FakeQuantWithMinMax;
ms_op->attr.value = attr.release();
return true;
}
} // namespace convert
} // namespace predict
} // namespace mindspore

+ 0
- 34
mindspore/ccsrc/predict/converter/lite_model/operations/fakequantwithminmaxperchannel_packer.cc View File

@@ -1,34 +0,0 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "predict/converter/lite_model/op_attr_packer.h"

namespace mindspore {
namespace predict {
namespace convert {
bool FakeQuantWithMinMaxPerChannelPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op) {
if (c_node_ptr == nullptr || ms_op == nullptr) {
return false;
}
std::unique_ptr<FakeQuantWithMinMaxPerChannelT> attr(new FakeQuantWithMinMaxPerChannelT());
MS_EXCEPTION_IF_NULL(attr);
ms_op->attr.type = OpT_FakeQuantWithMinMaxPerChannel;
ms_op->attr.value = attr.release();
return true;
}
} // namespace convert
} // namespace predict
} // namespace mindspore

+ 0
- 37
mindspore/ccsrc/predict/converter/lite_model/operations/fusedbatchnorm_packer.cc View File

@@ -1,37 +0,0 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "predict/converter/lite_model/op_attr_packer.h"

namespace mindspore {
namespace predict {
namespace convert {
bool FusedBatchNormPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op) {
if (c_node_ptr == nullptr || ms_op == nullptr) {
return false;
}
std::unique_ptr<FusedBatchNormT> attr(new FusedBatchNormT());
MS_EXCEPTION_IF_NULL(attr);
auto kernel_epsilon = AnfAlgo::GetNodeAttr<float>(c_node_ptr, "epsilon");
attr->epsilon = kernel_epsilon;
ms_op->name = c_node_ptr->fullname_with_scope();
ms_op->attr.type = OpT_FusedBatchNorm;
ms_op->attr.value = attr.release();
return true;
}
} // namespace convert
} // namespace predict
} // namespace mindspore

+ 0
- 39
mindspore/ccsrc/predict/converter/lite_model/operations/matmul_packer.cc View File

@@ -1,39 +0,0 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "predict/converter/lite_model/op_attr_packer.h"

namespace mindspore {
namespace predict {
namespace convert {
bool MatMulPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op) {
if (c_node_ptr == nullptr || ms_op == nullptr) {
return false;
}
bool kernel_transpore_a = AnfAlgo::GetNodeAttr<bool>(c_node_ptr, "transpose_a");
bool kernel_transpore_b = AnfAlgo::GetNodeAttr<bool>(c_node_ptr, "transpose_b");
std::unique_ptr<MatMulT> attr(new MatMulT());
MS_EXCEPTION_IF_NULL(attr);
attr->transposeA = kernel_transpore_a;
attr->transposeB = kernel_transpore_b;
ms_op->name = c_node_ptr->fullname_with_scope();
ms_op->attr.type = OpT_MatMul;
ms_op->attr.value = attr.release();
return true;
}
} // namespace convert
} // namespace predict
} // namespace mindspore

+ 0
- 37
mindspore/ccsrc/predict/converter/lite_model/operations/mean_packer.cc View File

@@ -1,37 +0,0 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "predict/converter/lite_model/op_attr_packer.h"

namespace mindspore {
namespace predict {
namespace convert {
bool MeanPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op) {
if (c_node_ptr == nullptr || ms_op == nullptr) {
return false;
}
std::unique_ptr<MeanT> attr(new MeanT());
MS_EXCEPTION_IF_NULL(attr);
attr->axis = {1};
attr->keepDims = false;
ms_op->name = c_node_ptr->fullname_with_scope();
ms_op->attr.type = OpT_Mean;
ms_op->attr.value = attr.release();
return true;
}
} // namespace convert
} // namespace predict
} // namespace mindspore

+ 0
- 34
mindspore/ccsrc/predict/converter/lite_model/operations/mul_packer.cc View File

@@ -1,34 +0,0 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "predict/converter/lite_model/op_attr_packer.h"

namespace mindspore {
namespace predict {
namespace convert {
bool MulPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op) {
if (c_node_ptr == nullptr || ms_op == nullptr) {
return false;
}
std::unique_ptr<MulT> attr(new MulT());
MS_EXCEPTION_IF_NULL(attr);
ms_op->attr.type = OpT_Mul;
ms_op->attr.value = attr.release();
return true;
}
} // namespace convert
} // namespace predict
} // namespace mindspore

+ 0
- 35
mindspore/ccsrc/predict/converter/lite_model/operations/mulflod_packer.cc View File

@@ -1,35 +0,0 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "predict/converter/lite_model/op_attr_packer.h"

namespace mindspore {
namespace predict {
namespace convert {
bool MulFoldPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op) {
if (c_node_ptr == nullptr || ms_op == nullptr) {
return false;
}
std::unique_ptr<MulFoldT> attr(new MulFoldT());
MS_EXCEPTION_IF_NULL(attr);
ms_op->name = c_node_ptr->fullname_with_scope();
ms_op->attr.type = OpT_MulFold;
ms_op->attr.value = attr.release();
return true;
}
} // namespace convert
} // namespace predict
} // namespace mindspore

+ 0
- 61
mindspore/ccsrc/predict/converter/lite_model/operations/pooling_packer.cc View File

@@ -1,61 +0,0 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "predict/converter/lite_model/op_attr_packer.h"

namespace mindspore {
namespace predict {
namespace convert {
bool PoolingPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op) {
if (c_node_ptr == nullptr || ms_op == nullptr) {
return false;
}
std::unique_ptr<PoolingT> attr(new PoolingT());
MS_EXCEPTION_IF_NULL(attr);
std::string kernel_format_value = AnfAlgo::GetNodeAttr<std::string>(c_node_ptr, "data_format");
attr->format = GetAttrFormat(kernel_format_value);
auto c_name = AnfAlgo::GetCNodeName(c_node_ptr);
if (c_name == "MaxPool") {
ms_op->name = c_node_ptr->fullname_with_scope();
attr->poolingMode = mindspore::predict::PoolMode::PoolMode_MAX_POOLING;
} else if (c_name == "MeanPool") {
ms_op->name = c_node_ptr->fullname_with_scope();
attr->poolingMode = mindspore::predict::PoolMode::PoolMode_MEAN_POOLING;
} else if (c_name == "GlobalPool") {
ms_op->name = c_node_ptr->fullname_with_scope();
} else {
MS_LOG(ERROR) << "unknowed pooling type.";
return false;
}
std::vector<int> kernel_ksize = AnfAlgo::GetNodeAttr<std::vector<int>>(c_node_ptr, "ksize");
attr->windowW = kernel_ksize[kHIndex];
attr->windowH = kernel_ksize[kWIndex];
std::vector<int> kernel_strides = AnfAlgo::GetNodeAttr<std::vector<int>>(c_node_ptr, "strides");
attr->strideW = kernel_strides[kHIndex];
attr->strideH = kernel_strides[kWIndex];
std::string kernel_pad_mode_value = AnfAlgo::GetNodeAttr<std::string>(c_node_ptr, "padding");
attr->padMode = GetAttrPadMode(kernel_pad_mode_value);
attr->padUp = 0;
attr->padDown = 0;
attr->padLeft = 0;
attr->padRight = 0;
ms_op->attr.type = OpT_Pooling;
ms_op->attr.value = attr.release();
return true;
}
} // namespace convert
} // namespace predict
} // namespace mindspore

+ 0
- 36
mindspore/ccsrc/predict/converter/lite_model/operations/reshape_packer.cc View File

@@ -1,36 +0,0 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "predict/converter/lite_model/op_attr_packer.h"

namespace mindspore {
namespace predict {
namespace convert {
bool ReshapePacker(const CNodePtr &c_node_ptr, OpDefT *ms_op) {
if (c_node_ptr == nullptr || ms_op == nullptr) {
return false;
}
std::unique_ptr<ReshapeT> attr(new ReshapeT());
MS_EXCEPTION_IF_NULL(attr);
attr->format = predict::Format::Format_NCHW;
ms_op->name = c_node_ptr->fullname_with_scope();
ms_op->attr.type = OpT_Reshape;
ms_op->attr.value = attr.release();
return true;
}
} // namespace convert
} // namespace predict
} // namespace mindspore

+ 0
- 36
mindspore/ccsrc/predict/converter/lite_model/operations/scale_packer.cc View File

@@ -1,36 +0,0 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "predict/converter/lite_model/op_attr_packer.h"

namespace mindspore {
namespace predict {
namespace convert {
bool ScalePacker(const CNodePtr &c_node_ptr, OpDefT *ms_op) {
if (c_node_ptr == nullptr || ms_op == nullptr) {
return false;
}
std::unique_ptr<ScaleT> attr(new ScaleT());
MS_EXCEPTION_IF_NULL(attr);
attr->format = predict::Format::Format_NCHW;
ms_op->name = c_node_ptr->fullname_with_scope();
ms_op->attr.type = OpT_Scale;
ms_op->attr.value = attr.release();
return true;
}
} // namespace convert
} // namespace predict
} // namespace mindspore

+ 0
- 36
mindspore/ccsrc/predict/converter/lite_model/operations/softmax_packer.cc View File

@@ -1,36 +0,0 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "predict/converter/lite_model/op_attr_packer.h"

namespace mindspore {
namespace predict {
namespace convert {
bool SoftmaxPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op) {
if (c_node_ptr == nullptr || ms_op == nullptr) {
return false;
}
std::unique_ptr<SoftMaxT> attr(new SoftMaxT());
MS_EXCEPTION_IF_NULL(attr);
attr->axis = {1};
ms_op->name = c_node_ptr->fullname_with_scope();
ms_op->attr.type = OpT_SoftMax;
ms_op->attr.value = attr.release();
return true;
}
} // namespace convert
} // namespace predict
} // namespace mindspore

+ 0
- 38
mindspore/ccsrc/predict/converter/lite_model/operations/squeeze_packer.cc View File

@@ -1,38 +0,0 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "predict/converter/lite_model/op_attr_packer.h"

namespace mindspore {
namespace predict {
namespace convert {
bool SqueezePacker(const CNodePtr &c_node_ptr, OpDefT *ms_op) {
if (c_node_ptr == nullptr || ms_op == nullptr) {
return false;
}
std::unique_ptr<SqueezeT> attr(new SqueezeT());
MS_EXCEPTION_IF_NULL(attr);

std::vector<int> kernel_axis_value = AnfAlgo::GetNodeAttr<std::vector<int>>(c_node_ptr, "axis");
attr->axis = kernel_axis_value;

ms_op->attr.type = OpT_Squeeze;
ms_op->attr.value = attr.release();
return true;
}
} // namespace convert
} // namespace predict
} // namespace mindspore

+ 0
- 31
mindspore/ccsrc/predict/generator/ir/ir_model.cc View File

@@ -1,31 +0,0 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "predict/generator/ir/ir_model.h"

#include <utility>
#include <algorithm>

#include "utils/log_adapter.h"

namespace mindspore {
namespace generator {
IRModel::~IRModel() { ir_tasks_.clear(); }
void IRModel::SetIrTaskInfos(const std::vector<IRtaskInfoPtr> &ir_tasks) {
(void)std::copy(ir_tasks.begin(), ir_tasks.end(), std::back_inserter(ir_tasks_));
}
} // namespace generator
} // namespace mindspore

+ 0
- 37
mindspore/ccsrc/predict/generator/ir/ir_model.h View File

@@ -1,37 +0,0 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_EXECUTOR_GENERATOR_IR_IR_MODEL_H_
#define MINDSPORE_CCSRC_EXECUTOR_GENERATOR_IR_IR_MODEL_H_
#include <string>
#include <vector>
#include <memory>
#include "predict/generator/ir/ir_task_info.h"
namespace mindspore {
namespace generator {
class IRModel {
public:
void SetIrTaskInfos(const std::vector<IRtaskInfoPtr> &ir_tasks);
IRModel() = default;
~IRModel();

private:
std::vector<IRtaskInfoPtr> ir_tasks_;
};
using IrModelPtr = std::shared_ptr<IRModel>;
} // namespace generator
} // namespace mindspore

#endif // MINDSPORE_CCSRC_EXECUTOR_GENERATOR_IR_IR_MODEL_H_

+ 0
- 244
mindspore/ccsrc/predict/generator/ir/ir_task_info.cc View File

@@ -1,244 +0,0 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "predict/generator/ir/ir_task_info.h"
#include "utils/log_adapter.h"

namespace mindspore {
namespace generator {
bool CceIRTaskInfo::SerializeIRToProto() {
auto cce_task_def_ptr = std::unique_ptr<ge::model_runner::CceTaskDef>();
auto kernel_context_ptr = std::unique_ptr<ge::model_runner::KernelContext>();
MS_EXCEPTION_IF_NULL(cce_task_def_ptr);
MS_EXCEPTION_IF_NULL(kernel_context_ptr);
kernel_context_ptr->set_kernel_type(k_ctx_.kernel_type);
kernel_context_ptr->set_op_id(k_ctx_.op_id);
kernel_context_ptr->set_kernel_func_id(k_ctx_.kernel_func_id);
kernel_context_ptr->set_op_index(k_ctx_.op_index);
kernel_context_ptr->set_is_flowtable(k_ctx_.is_flowtable);
kernel_context_ptr->set_args_count(k_ctx_.args_count);
for (unsigned int i : k_ctx_.origin_op_index) {
kernel_context_ptr->add_origin_op_index(i);
}
void *tmp_args_offset = static_cast<void *>((k_ctx_.args_offset).data());
if (tmp_args_offset == nullptr) {
MS_LOG(WARNING) << "tmp_args_offset have no data";
return false;
}
kernel_context_ptr->set_args_offset(tmp_args_offset, k_ctx_.args_offset.size());
cce_task_def_ptr->set_allocated_kernel_context(std::move(kernel_context_ptr).get());
cce_task_def_ptr->set_stub_func(stub_func_);
cce_task_def_ptr->set_block_dim(block_dim_);
cce_task_def_ptr->set_args_size(args_size_);
void *tmp_sm_desc = static_cast<void *>(sm_desc_.data());
if (tmp_sm_desc == nullptr) {
MS_LOG(WARNING) << "tmp_sm_desc have no data";
return false;
}
cce_task_def_ptr->set_sm_desc(tmp_sm_desc, sm_desc_.size());

void *tmp_flow_table = static_cast<void *>(flow_table_.data());
if (tmp_flow_table == nullptr) {
MS_LOG(WARNING) << "tmp_flow_table have no data";
return false;
}
cce_task_def_ptr->set_flow_table(tmp_flow_table, flow_table_.size());
return true;
}

CceIRTaskInfo::~CceIRTaskInfo() {
args_.clear();
sm_desc_.clear();
flow_table_.clear();
}

bool TbeIRTaskInfo::SerializeIRToProto() {
auto tbe_task_def_ptr = std::unique_ptr<ge::model_runner::TbeTaskDef>();
MS_EXCEPTION_IF_NULL(tbe_task_def_ptr);
tbe_task_def_ptr->set_stub_func(stub_func_);
tbe_task_def_ptr->set_block_dim(block_dim_);
tbe_task_def_ptr->set_args_size(args_size_);
void *tmp_args = static_cast<void *>(args_.data());
if (tmp_args == nullptr) {
MS_LOG(WARNING) << "tmp_args have no data";
return false;
}
tbe_task_def_ptr->set_args(tmp_args, args_.size());
void *tmp_sm_desc = static_cast<void *>(sm_desc_.data());
if (tmp_sm_desc == nullptr) {
MS_LOG(WARNING) << "tmp_sm_desc have no data";
return false;
}
tbe_task_def_ptr->set_sm_desc(tmp_sm_desc, sm_desc_.size());
void *tmp_meta_data = static_cast<void *>(meta_data_.data());
if (tmp_meta_data == nullptr) {
MS_LOG(WARNING) << "tmp_meta_data have no data";
return false;
}
tbe_task_def_ptr->set_meta_data(tmp_meta_data, meta_data_.size());
for (auto &in : input_data_addrs_) {
tbe_task_def_ptr->add_input_addrs(in);
}
for (auto &ou : output_data_addrs_) {
tbe_task_def_ptr->add_output_addrs(ou);
}
for (auto &wk : workspace_addrs_) {
tbe_task_def_ptr->add_workspace_addrs(wk);
}
return true;
}

TbeIRTaskInfo::~TbeIRTaskInfo() {
args_.clear();
sm_desc_.clear();
meta_data_.clear();
input_data_addrs_.clear();
output_data_addrs_.clear();
workspace_addrs_.clear();
}

bool AicpuIRTaskInfo::SerializeIRToProto() {
auto aicpu_task_def_ptr = std::unique_ptr<ge::model_runner::AicpuTaskDef>();
MS_EXCEPTION_IF_NULL(aicpu_task_def_ptr);
aicpu_task_def_ptr->set_op_type(op_type_);
aicpu_task_def_ptr->set_flag(flag_);
for (auto &shape : input_data_shapes_) {
auto in_shape_ptr = aicpu_task_def_ptr->add_input_shapes();
for (auto &in_sh : shape) {
in_shape_ptr->add_shape(static_cast<uint32_t>(in_sh));
}
}
for (auto &shape : output_data_shapes_) {
auto ou_shape_ptr = aicpu_task_def_ptr->add_output_shapes();
for (auto &ou_sh : shape) {
ou_shape_ptr->add_shape(static_cast<uint32_t>(ou_sh));
}
}
for (auto &in_type : input_data_types_) {
aicpu_task_def_ptr->add_input_types(in_type);
}
for (auto &ou_type : output_data_types_) {
aicpu_task_def_ptr->add_output_types(ou_type);
}
for (auto &in_addr : input_data_addrs_) {
aicpu_task_def_ptr->add_input_addrs(in_addr);
}
for (auto &ou_addr : output_data_addrs_) {
aicpu_task_def_ptr->add_output_addrs(ou_addr);
}
void *tmp_node_def = static_cast<void *>(node_def_.data());
if (tmp_node_def == nullptr) {
MS_LOG(WARNING) << "tmp_node_def have no data";
return false;
}
aicpu_task_def_ptr->set_node_def(tmp_node_def, node_def_.size());
void *tmp_func_def = static_cast<void *>(func_def_.data());
if (tmp_func_def == nullptr) {
MS_LOG(WARNING) << "tmp_func_def have no data";
return false;
}
aicpu_task_def_ptr->set_func_def(tmp_func_def, func_def_.size());
return true;
}

AicpuIRTaskInfo::~AicpuIRTaskInfo() {
input_data_types_.clear();
input_data_shapes_.clear();
input_data_addrs_.clear();
output_data_types_.clear();
output_data_shapes_.clear();
output_data_addrs_.clear();
node_def_.clear();
func_def_.clear();
}

bool LabelIRTaskInfo::SerializeIRToProto() {
auto label_task_def_ptr = std::unique_ptr<ge::model_runner::LabelTaskDef>();
MS_EXCEPTION_IF_NULL(label_task_def_ptr);
label_task_def_ptr->set_label_id(label_id_);
return true;
}

bool EventIRTaskInfo::SerializeIRToProto() {
auto event_task_def_ptr = std::unique_ptr<ge::model_runner::EventTaskDef>();
MS_EXCEPTION_IF_NULL(event_task_def_ptr);
event_task_def_ptr->set_event_id(event_id_);
return true;
}

bool HcclIRTaskInfo::SerializeIRToProto() {
auto hccl_task_def_ptr = std::unique_ptr<ge::model_runner::HcclTaskDef>();
MS_EXCEPTION_IF_NULL(hccl_task_def_ptr);
hccl_task_def_ptr->set_hccl_type(hccl_type_);
hccl_task_def_ptr->set_input_addr(input_data_addr_);
hccl_task_def_ptr->set_output_addr(output_data_addr_);
auto tmp_wk = static_cast<void *>(workspace_.data());
hccl_task_def_ptr->set_workspace(tmp_wk, workspace_.size());
hccl_task_def_ptr->set_workspace_num(workspace_num_);
auto tmp_pri_def = static_cast<void *>(private_def_.data());
hccl_task_def_ptr->set_private_def(tmp_pri_def, private_def_.size());
hccl_task_def_ptr->set_ops_kernel_store(ops_kernel_store_);
hccl_task_def_ptr->set_count(count_);
hccl_task_def_ptr->set_root_id(root_id_);
hccl_task_def_ptr->set_op_type(op_type_);
hccl_task_def_ptr->set_data_type(data_type_);
return true;
}

HcclIRTaskInfo::~HcclIRTaskInfo() {
workspace_.clear();
private_def_.clear();
}

bool ProfilerIRTaskInfo::SerializeIRToProto() {
auto profiler_task_def_ptr = std::unique_ptr<ge::model_runner::ProfilerTaskDef>();
MS_EXCEPTION_IF_NULL(profiler_task_def_ptr);
profiler_task_def_ptr->set_log_id(log_id_);
profiler_task_def_ptr->set_flat(flat_);
profiler_task_def_ptr->set_notify(notify_);
return true;
}

bool MemcpyAsyncIRTaskInfo::SerializeIRToProto() {
auto mem_task_def_ptr = std::unique_ptr<ge::model_runner::MemcpyAsyncTaskDef>();
MS_EXCEPTION_IF_NULL(mem_task_def_ptr);
mem_task_def_ptr->set_dst(dst_);
mem_task_def_ptr->set_dst_max(dst_max_);
mem_task_def_ptr->set_src(src_);
mem_task_def_ptr->set_count(count_);
mem_task_def_ptr->set_kind(kind_);
return true;
}

bool StreamSwitchIRTaskInfo::SerializeIRToProto() {
auto stream_switch_task_def_ptr = std::unique_ptr<ge::model_runner::StreamSwitchTaskDef>();
MS_EXCEPTION_IF_NULL(stream_switch_task_def_ptr);
stream_switch_task_def_ptr->set_true_stream_id(true_stream_id_);
stream_switch_task_def_ptr->set_input_addr(input_addr_);
stream_switch_task_def_ptr->set_value_addr(value_addr_);
stream_switch_task_def_ptr->set_cond(cond_);
stream_switch_task_def_ptr->set_data_type(data_type_);
return true;
}

bool StreamActiveIRTaskInfo::SerializeIRToProto() {
auto stream_active_task_def_ptr = std::unique_ptr<ge::model_runner::StreamActiveTaskDef>();
MS_EXCEPTION_IF_NULL(stream_active_task_def_ptr);
stream_active_task_def_ptr->set_active_stream_id(active_stream_id_);
return true;
}
} // namespace generator
} // namespace mindspore

+ 0
- 295
mindspore/ccsrc/predict/generator/ir/ir_task_info.h View File

@@ -1,295 +0,0 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MINDSPORE_CCSRC_EXECUTOR_GENERATOR_IR_IR_TASK_H_
#define MINDSPORE_CCSRC_EXECUTOR_GENERATOR_IR_IR_TASK_H_
#include <cstdint>
#include <utility>
#include <memory>
#include <string>
#include <vector>
#include "proto/ge_runtime_taskinfo.pb.h"

namespace mindspore {
namespace generator {
using TaskType = ::ge::model_runner::TaskDef_TaskType;
enum TaskTmpType {
CCE_TMP_DEF = 0,
TBE_TMP_DEF = 1,
AICPU_TMP_DEF = 2,
LABEL_TMP_DEF = 3,
EVENT_TMP_DEF = 4,
HCCL_TMP_DEF = 5,
PROFILER_TRACE_TMP_DEF = 6,
MEMCPY_ASYNC_TMP_DEF = 7,
STREAM_SWITCH_TMP_DEF = 8,
STREAM_ACTIVE_TMP_DEF = 9
};

struct KernelContext {
uint32_t kernel_type = 0;
uint32_t op_id = 0;
uint32_t kernel_func_id = 0;
uint32_t op_index = 0;
bool is_flowtable = false;
std::vector<uint8_t> args_offset;
uint32_t args_count = 0;
std::vector<uint32_t> origin_op_index;
};

class IRtaskInfo {
public:
virtual ~IRtaskInfo() = default;
virtual bool SerializeIRToProto() = 0;

protected:
IRtaskInfo(TaskType task_type, TaskTmpType task_tmp_type, uint64_t stream_id)
: task_type_(task_type), task_tmp_type_(task_tmp_type), stream_id_(stream_id) {}

public:
uint64_t GetStreamId() const { return stream_id_; }
TaskType GetTaskType() const { return task_type_; }
TaskTmpType GetTaskTmpType() const { return task_tmp_type_; }

private:
TaskType task_type_;
TaskTmpType task_tmp_type_;
uint64_t stream_id_ = 0;
};

using IRtaskInfoPtr = std::shared_ptr<IRtaskInfo>;

class CceIRTaskInfo : public IRtaskInfo {
public:
CceIRTaskInfo(TaskType task_type, uint64_t stream_id, KernelContext k_ctx, std::string stub_func, uint32_t block_dim,
std::vector<uint8_t> args, uint32_t args_size, std::vector<uint8_t> sm_desc,
std::vector<uint8_t> flow_table)
: IRtaskInfo(task_type, CCE_TMP_DEF, stream_id),
k_ctx_(std::move(k_ctx)),
stub_func_(std::move(stub_func)),
block_dim_(block_dim),
args_(std::move(args)),
args_size_(args_size),
sm_desc_(std::move(sm_desc)),
flow_table_(std::move(flow_table)) {}
~CceIRTaskInfo() override;
bool SerializeIRToProto() override;

private:
KernelContext k_ctx_;
std::string stub_func_;
uint32_t block_dim_ = 0;
std::vector<uint8_t> args_;
// uintptr_t args_addr_;
uint32_t args_size_ = 0;
std::vector<uint8_t> sm_desc_;
std::vector<uint8_t> flow_table_;
};

class TbeIRTaskInfo : public IRtaskInfo {
public:
TbeIRTaskInfo(TaskType task_type, uint64_t stream_id, std::string stub_func, uint32_t block_dim,
std::vector<uint8_t> args, uint32_t args_size, std::vector<uint8_t> sm_desc,
std::vector<uint8_t> meta_data, std::vector<uintptr_t> input_data_addrs,
std::vector<uintptr_t> output_data_addrs, std::vector<uintptr_t> workspace_addrs)
: IRtaskInfo(task_type, TBE_TMP_DEF, stream_id),
stub_func_(std::move(stub_func)),
block_dim_(block_dim),
args_(std::move(args)),
args_size_(args_size),
sm_desc_(std::move(sm_desc)),
meta_data_(std::move(meta_data)),
input_data_addrs_(std::move(input_data_addrs)),
output_data_addrs_(std::move(output_data_addrs)),
workspace_addrs_(std::move(workspace_addrs)) {}
~TbeIRTaskInfo() override;
bool SerializeIRToProto() override;

private:
std::string stub_func_;
uint32_t block_dim_ = 0;
std::vector<uint8_t> args_;
uint32_t args_size_ = 0;
std::vector<uint8_t> sm_desc_;
// uintptr_t binary_;
// uint32_t binary_size_;
std::vector<uint8_t> meta_data_;
std::vector<uintptr_t> input_data_addrs_;
std::vector<uintptr_t> output_data_addrs_;
std::vector<uintptr_t> workspace_addrs_;
// std::vector<uint8_t> flow_table_;
};

class AicpuIRTaskInfo : public IRtaskInfo {
public:
AicpuIRTaskInfo(TaskType task_type, uint64_t stream_id, std::string op_type, uint32_t flag,
std::vector<uint32_t> input_data_types, std::vector<std::vector<size_t>> input_data_shapes,
std::vector<uintptr_t> input_data_addrs, std::vector<uint32_t> output_data_types,
std::vector<std::vector<size_t>> output_data_shapes, std::vector<uintptr_t> output_data_addrs,
std::vector<uint8_t> node_def, std::vector<uint8_t> func_def)
: IRtaskInfo(task_type, AICPU_TMP_DEF, stream_id),
op_type_(std::move(op_type)),
flag_(flag),
input_data_types_(std::move(input_data_types)),
input_data_shapes_(std::move(input_data_shapes)),
input_data_addrs_(std::move(input_data_addrs)),
output_data_types_(std::move(output_data_types)),
output_data_shapes_(std::move(output_data_shapes)),
output_data_addrs_(std::move(output_data_addrs)),
node_def_(std::move(node_def)),
func_def_(std::move(func_def)) {}
~AicpuIRTaskInfo() override;
bool SerializeIRToProto() override;

private:
std::string op_type_;
uint32_t flag_ = 0;
std::vector<uint32_t> input_data_types_;
std::vector<std::vector<size_t>> input_data_shapes_;
std::vector<uintptr_t> input_data_addrs_;
std::vector<uint32_t> output_data_types_;
std::vector<std::vector<size_t>> output_data_shapes_;
std::vector<uintptr_t> output_data_addrs_;
std::vector<uint8_t> node_def_;
std::vector<uint8_t> func_def_;
};

class LabelIRTaskInfo : public IRtaskInfo {
public:
LabelIRTaskInfo(TaskType task_type, uint64_t stream_id, uint32_t label_id)
: IRtaskInfo(task_type, LABEL_TMP_DEF, stream_id), label_id_(label_id) {}
~LabelIRTaskInfo() override {}
bool SerializeIRToProto() override;

private:
uint32_t label_id_ = 0;
};

class EventIRTaskInfo : public IRtaskInfo {
public:
EventIRTaskInfo(TaskType task_type, uint64_t stream_id, uint32_t event_id)
: IRtaskInfo(task_type, EVENT_TMP_DEF, stream_id), event_id_(event_id) {}
~EventIRTaskInfo() override {}
bool SerializeIRToProto() override;

private:
uint32_t event_id_ = 0;
};

class HcclIRTaskInfo : public IRtaskInfo {
public:
HcclIRTaskInfo(TaskType task_type, uint64_t stream_id, std::string hccl_type, uintptr_t input_data_addr,
uintptr_t output_data_addr, std::vector<uint8_t> workspace, int64_t workspace_num,
std::vector<uint8_t> private_def, uintptr_t ops_kernel_store, int32_t count, int64_t root_id,
int64_t op_type, int64_t data_type)
: IRtaskInfo(task_type, HCCL_TMP_DEF, stream_id),
hccl_type_(std::move(hccl_type)),
input_data_addr_(input_data_addr),
output_data_addr_(output_data_addr),
workspace_(std::move(workspace)),
workspace_num_(workspace_num),
private_def_(std::move(private_def)),
ops_kernel_store_(ops_kernel_store),
count_(count),
root_id_(root_id),
op_type_(op_type),
data_type_(data_type) {}
~HcclIRTaskInfo() override;
bool SerializeIRToProto() override;

private:
std::string hccl_type_;
uintptr_t input_data_addr_ = 0;
uintptr_t output_data_addr_ = 0;
std::vector<uint8_t> workspace_;
int64_t workspace_num_ = 0;
std::vector<uint8_t> private_def_;
uintptr_t ops_kernel_store_ = 0;
int32_t count_ = 0;
int64_t root_id_ = 0;
int64_t op_type_ = 0;
int64_t data_type_ = 0;
};

class ProfilerIRTaskInfo : public IRtaskInfo {
public:
ProfilerIRTaskInfo(TaskType task_type, uint64_t stream_id, uint64_t log_id, bool notify, uint32_t flat)
: IRtaskInfo(task_type, PROFILER_TRACE_TMP_DEF, stream_id), log_id_(log_id), notify_(notify), flat_(flat) {}
~ProfilerIRTaskInfo() override {}
bool SerializeIRToProto() override;

private:
uint64_t log_id_ = 0;
bool notify_ = false;
uint32_t flat_ = 0;
};

class MemcpyAsyncIRTaskInfo : public IRtaskInfo {
public:
MemcpyAsyncIRTaskInfo(TaskType task_type, uint32_t stream_id, uint64_t dst, uint64_t dst_max, uint64_t src,
uint64_t count, int64_t kind)
: IRtaskInfo(task_type, MEMCPY_ASYNC_TMP_DEF, stream_id),
dst_(dst),
dst_max_(dst_max),
src_(src),
count_(count),
kind_(kind) {}
~MemcpyAsyncIRTaskInfo() override {}
bool SerializeIRToProto() override;

private:
uint64_t dst_ = 0;
uint64_t dst_max_ = 0;
uint64_t src_ = 0;
uint64_t count_ = 0;
uint32_t kind_ = 0;
};

class StreamSwitchIRTaskInfo : public IRtaskInfo {
public:
StreamSwitchIRTaskInfo(TaskType task_type, uint64_t stream_id, uint32_t true_stream_id, uintptr_t input_addr,
uintptr_t value_addr, uint32_t cond, int64_t data_type)
: IRtaskInfo(task_type, STREAM_SWITCH_TMP_DEF, stream_id),
true_stream_id_(true_stream_id),
input_addr_(input_addr),
value_addr_(value_addr),
cond_(cond),
data_type_(data_type) {}
~StreamSwitchIRTaskInfo() override {}
bool SerializeIRToProto() override;

private:
uint32_t true_stream_id_ = 0;
uintptr_t input_addr_ = 0;
uintptr_t value_addr_ = 0;
uint32_t cond_ = 0;
int64_t data_type_ = 0;
};

class StreamActiveIRTaskInfo : public IRtaskInfo {
public:
StreamActiveIRTaskInfo(TaskType task_type, uint64_t stream_id, uint32_t active_stream_id)
: IRtaskInfo(task_type, STREAM_ACTIVE_TMP_DEF, stream_id), active_stream_id_(active_stream_id) {}
~StreamActiveIRTaskInfo() override {}
bool SerializeIRToProto() override;

private:
uint32_t active_stream_id_ = 0;
};
}; // namespace generator
} // namespace mindspore

#endif // MINDSPORE_CCSRC_EXECUTOR_GENERATOR_IR_IR_TASK_H_

+ 0
- 43
mindspore/ccsrc/predict/generator/utils/ir_model_util.cc View File

@@ -1,43 +0,0 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "predict/generator/utils/ir_model_util.h"
namespace mindspore {
namespace generator {
IRModelUtil &IRModelUtil::GetInstance() {
static IRModelUtil instance;
return instance;
}

void IRModelUtil::Init() {
MS_LOG(INFO) << "IRModel init success";
version_ = "defaultVersion";
stream_num_ = 0;
event_num_ = 0;
batch_num_ = 0;
memory_size_ = 0;
weight_size_ = 0;
var_size_ = 0;
logic_mem_base_ = 0;
logic_var_base_ = 0;
logic_var_base_ = 0;
priority_ = 0;
is_enable_save_model_ = false;
min_static_offset_ = 0;
max_dynamic_offset_ = 0;
}
} // namespace generator
} // namespace mindspore

+ 0
- 92
mindspore/ccsrc/predict/generator/utils/ir_model_util.h View File

@@ -1,92 +0,0 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MINDSPORE_CCSRC_PREDICT_GENERATOR_IR_IR_MODEL_UTIL_H_
#define MINDSPORE_CCSRC_PREDICT_GENERATOR_IR_IR_MODEL_UTIL_H_
#include <string>
#include <vector>
#include <memory>
#include <utility>
#include <algorithm>
#include "utils/log_adapter.h"

namespace mindspore {
namespace generator {
class IRModelUtil {
public:
static IRModelUtil &GetInstance();
IRModelUtil(const IRModelUtil &) = delete;
IRModelUtil &operator=(const IRModelUtil &) = delete;
void Init();

void set_version(const std::string &version) { version_ = version; }
void set_stream_num(uint32_t stream_num) { stream_num_ = stream_num; }
void set_event_num(uint32_t event_num) { event_num_ = event_num; }
void set_batch_num(uint32_t batch_num) { batch_num_ = batch_num; }
void set_memory_size(uint32_t memory_size) { memory_size_ = memory_size; }
void set_weight_size(uint32_t weight_size) { weight_size_ = weight_size; }
void set_var_size(uint32_t var_size) { var_size_ = var_size; }
void set_logic_mem_base(uint32_t logic_mem_base) { logic_mem_base_ = logic_mem_base; }
void set_logic_weight_base(uint32_t logic_weight_base) { logic_weight_base_ = logic_weight_base; }
void set_logic_var_base(uint32_t logic_var_base) { logic_var_base_ = logic_var_base; }
void set_priority(uint32_t priority) { priority_ = priority; }
void set_is_enable_save_model(bool is_enable_save_model) { is_enable_save_model_ = is_enable_save_model; }
void set_min_static_offset(uint64_t min_static_offset) { min_static_offset_ = min_static_offset; }
void set_max_dynamic_offset(uint64_t max_dynamic_offset) { max_dynamic_offset_ = max_dynamic_offset; }
void set_max_mem_size(uint64_t max_mem_size) { max_mem_size_ = max_mem_size; }
void set_irmodel_mem_base(uint8_t irmodel_mem_base) { irmodel_mem_base_ = irmodel_mem_base; }

std::string version() const { return version_; }
uint32_t stream_num() const { return stream_num_; }
uint32_t event_num() const { return event_num_; }
uint32_t batch_num() const { return batch_num_; }
uint64_t memory_size() const { return memory_size_; }
uint64_t weight_size() const { return weight_size_; }
uint64_t var_size() const { return var_size_; }
uint64_t logic_mem_base() const { return logic_mem_base_; }
uint64_t logic_weight_base() const { return logic_weight_base_; }
uint64_t logic_var_base() const { return logic_var_base_; }
uint32_t priority() const { return priority_; }
bool is_enable_save_model() const { return is_enable_save_model_; }
uint64_t min_static_offset() const { return min_static_offset_; }
uint64_t max_dynamic_offset() const { return max_dynamic_offset_; }
uint64_t max_mem_size() const { return max_mem_size_; }
uint8_t irmodel_mem_base() const { return irmodel_mem_base_; }

private:
IRModelUtil() = default;
~IRModelUtil() = default;
std::string version_;
uint32_t stream_num_ = 0;
uint32_t event_num_ = 0;
uint32_t batch_num_ = 0;
uint64_t memory_size_ = 0;
uint64_t weight_size_ = 0;
uint64_t var_size_ = 0;
uint64_t logic_mem_base_ = 0;
uint64_t logic_weight_base_ = 0;
uint64_t logic_var_base_ = 0;
uint32_t priority_ = 0;
bool is_enable_save_model_ = false;
uint64_t min_static_offset_ = 0;
uint64_t max_dynamic_offset_ = 0;
uint64_t max_mem_size_ = 0;
uint8_t irmodel_mem_base_ = 0;
};
} // namespace generator
} // namespace mindspore

#endif // MINDSPORE_CCSRC_PREDICT_GENERATOR_IR_IR_MODEL_UTIL_H_

+ 0
- 69
mindspore/ccsrc/predict/predict.cc View File

@@ -1,69 +0,0 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "predict/predict.h"

#include <memory>
#include <vector>
#include <string>

namespace mindspore {
namespace predictmodel {
void StepConvertGraph(const KernelGraphPtr &kernel_graph_ptr) {
MS_LOG(INFO) << "start convert_graph step";
// get kernel_graph. this graph can be origin or device, depends on which steps to persistence
MS_EXCEPTION_IF_NULL(kernel_graph_ptr);
bool save_ms_model = MsContext::GetInstance()->save_ms_model_flag();
if (save_ms_model) {
if (kernel_graph_ptr->inputs().empty()) {
return;
}
// set convert_mode: convert cpu info or convert Davnici
executor::Kernel2Ms::GetInstance().set_convert_mode(executor::kConvertCpuMode);
// convert kernel_graph to sub_ms_graph
bool ret = executor::Kernel2Ms::GetInstance().KernelGraph2MsGraph(kernel_graph_ptr);
if (!ret) {
MS_LOG(WARNING) << "convert to mindsporeGraph failed";
} else {
MS_LOG(INFO) << "convert to Graph success";
}
}
}

void StepConvertWeight(const std::vector<tensor::TensorPtr> &inputs) {
MS_LOG(INFO) << "start convert_input step";
// get all inputs tensor
bool save_ms_model = MsContext::GetInstance()->save_ms_model_flag();
std::string save_path = MsContext::GetInstance()->save_ms_model_path();
if (save_ms_model) {
if (inputs.empty()) {
return;
}
MS_LOG(INFO) << "save ms model is true to path " << save_path;
if (!executor::Kernel2Ms::GetInstance().KernelInput2MS(inputs)) {
MS_LOG(WARNING) << "convert mindspore kernel input failed";
}
auto new_ms_graph_ptr = std::make_shared<mindspore::predict::GraphDefT>();
bool ret = executor::Kernel2Ms::GetInstance().SaveDeviceModel(new_ms_graph_ptr, save_path);
if (!ret) {
MS_LOG(WARNING) << "convert to mindsporeGraph failed";
} else {
MS_LOG(INFO) << "save ms model success";
}
}
}
} // namespace predictmodel
} // namespace mindspore

+ 0
- 32
mindspore/ccsrc/predict/predict.h View File

@@ -1,32 +0,0 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MINDSPORE_CCSRC_PREDICT_H_
#define MINDSPORE_CCSRC_PREDICT_H_

#include <memory>
#include <vector>
#include "backend/session/session_basic.h"
#include "predict/converter/kernel2ms.h"

namespace mindspore {
namespace predictmodel {
using KernelGraphPtr = std::shared_ptr<mindspore::session::KernelGraph>;
void StepConvertGraph(const KernelGraphPtr &kernel_graph_ptr);
void StepConvertWeight(const std::vector<tensor::TensorPtr> &inputs);
} // namespace predictmodel
} // namespace mindspore
#endif // MINDSPORE_CCSRC_PREDICT_H_

+ 0
- 42
mindspore/ccsrc/predict/proto/DModel_ir.proto View File

@@ -1,42 +0,0 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

syntax = "proto3";
import public "Graph_ir.proto";
import public "ge_runtime_taskinfo.proto";
package ge.model_runner;
option cc_enable_arenas = true;

message ModelTaskDef {

string version = 1;

repeated TaskDef task = 10;

uint32 stream_num = 11;
uint32 event_num = 12;
uint32 batch_num_ = 13;

uint64 memory_size = 14;
uint64 weight_size = 15;
uint64 var_size_ = 16;

uint64 logic_mem_base_ = 17;
uint64 logic_weight_base_ = 18;
uint64 logic_var_base_ = 19;

uint32 priority_ = 20;
}

+ 0
- 125
mindspore/ccsrc/predict/proto/Graph_ir.proto View File

@@ -1,125 +0,0 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

syntax = "proto3";

package mindspore;

// Data type definition
enum DataType {
DT_UNDEFINED = 0;
// Basic types.
DT_BOOL = 1; // bool

DT_INT8 = 2; // int8_t
DT_INT16 = 3; // int16_t
DT_INT32 = 4; // int32_t
DT_INT64 = 5; // int64_t

DT_UINT8 = 6; // uint8_t
DT_UINT16 = 7; // uint16_t
DT_UINT32 = 8; // uint32_t
DT_UINT64 = 9; // uint64_t

DT_FLOAT16 = 10; // float 16
DT_FLOAT32 = 11; // float 32
DT_FLOAT64 = 12; // float 64

DT_STRING = 13; // string
DT_TENSOR = 14; // tensor
DT_GRAPH = 15; // graph

// list type
DT_BOOLS = 16; // list of bool

DT_INTS8 = 17; // list of int8_t
DT_INTS16 = 18; // list of int16_t
DT_INTS32 = 19; // list of int32_t
DT_INTS64 = 20; // list of int64_t

DT_UINTS8 = 21; // list of uint8_t
DT_UINTS16 = 22; // list of uint16_t
DT_UINTS32 = 23; // list of uint32_t
DT_UINTS64 = 24; // list of uint64_t

DT_FLOATS16 = 25; // list of float16
DT_FLOATS32 = 26; // list of float32
DT_FLOATS64 = 27; // list of float64

DT_STRINGS = 28; // list of string
DT_TENSORS = 29; // list of tensor
DT_GRAPHS = 30; // list of graph

DT_TUPLE = 31; // tuple
DT_LIST = 32; // list
DT_DICT = 33; // dictionary

// other types
DT_NONE = 34; // None
DT_SYM_INST = 35; // Symbolic Key Instance

// type related type
DT_BASE_INT = 36; // type generic int
DT_BASE_UINT = 37; // type generate unsigned int
DT_BASE_FLOAT = 38; // type generate float
DT_TYPE = 39; // type type
DT_ANYTHING = 40; // type anything
};

enum MSConst {
DEFAULT_REFCOUNT = 0;
WEIGHT_REFCOUNT = 999;
};

message TensorDef {
DataType data_type = 1;

repeated int64 dims = 2;

string format = 3;
string layout = 4;
uint32 refCount = 5;
uint64 offset = 6;
uint64 size = 7;
uint64 weight_size = 8;
bytes data = 9;
}

message OpDef {
string name = 1;
string type = 2;

string fwk_type = 3;
string opAttr = 4;
repeated int64 input_index = 5;
repeated int64 output_index = 6;
}

message GraphDef {
string name = 1;

repeated int64 input_index = 2;

repeated int64 output_index = 3;
uint64 mempool_size = 4;

repeated OpDef opdefs = 5;

repeated TensorDef alltensors = 6;
}




+ 0
- 155
mindspore/ccsrc/predict/proto/ge_runtime_taskinfo.proto View File

@@ -1,155 +0,0 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

syntax = "proto3";

package ge.model_runner;
option cc_enable_arenas = true;

message TaskDef {
enum TaskType {
CCE = 0;
TBE = 1;
AICPU = 2;
LABEL_SET = 3;
LABEL_SWITCH = 4;
LABEL_GOTO = 5;
EVENT_RECORD = 6;
EVENT_WAIT = 7;
FUSION_START = 8;
FUSION_END = 9;
HCCL = 10;
PROFILER_TRACE = 11;
MEMCPY_ASYNC = 12;
STREAM_SWITCH = 13;
STREAM_ACTIVE = 14;
// insert new task type here
REVSERVED = 23;
};

TaskType task_type = 1;
uint64 stream_id = 2;
oneof subclass {
CceTaskDef cce_task_def = 3;
TbeTaskDef tbe_task_def = 4;
AicpuTaskDef aicpu_task_def = 5;
LabelTaskDef label_task_def = 6;
EventTaskDef event_task_def = 7;
HcclTaskDef hccl_task_def = 8;
ProfilerTaskDef profiler_task_def = 9;
MemcpyAsyncTaskDef memcpy_async_task_def = 10;
StreamSwitchTaskDef stream_switch_task_def = 11;
StreamActiveTaskDef stream_active_task_def = 12;
}
}

message CceTaskDef {
KernelContext kernel_context = 1;
string stub_func = 2;
uint32 block_dim = 3;
bytes args = 4;
uint32 args_size = 5;
bytes sm_desc = 6;
bytes flow_table = 7;
}

message TbeTaskDef {
string stub_func = 1;
uint32 block_dim = 2;
bytes args = 3;
uint32 args_size = 4;
bytes sm_desc = 5;
bytes meta_data = 8;
repeated uint64 input_addrs = 9;
repeated uint64 output_addrs = 10;
repeated uint64 workspace_addrs = 11;
}

message AicpuTaskDef {
string op_type = 1;
uint32 flag = 2;
repeated uint32 input_types = 3;
repeated Shape input_shapes = 4;
repeated uint64 input_addrs = 5;
repeated uint32 output_types = 6;
repeated Shape output_shapes = 7;
repeated uint64 output_addrs = 8;
bytes node_def = 9;
bytes func_def = 10;
}

message Shape {
repeated uint32 shape = 1;
}

message LabelTaskDef {
uint32 label_id = 1;
}

message EventTaskDef {
uint32 event_id = 1;
}

message HcclTaskDef {
string hccl_type = 1;
uint64 input_addr = 2;
uint64 output_addr = 3;
bytes workspace = 4;
int64 workspace_num = 5;
bytes private_def = 6;
uint64 ops_kernel_store = 7;
int32 count = 8;
int64 root_id = 9;
int64 op_type = 10;
int64 data_type = 11;
}

message ProfilerTaskDef {
uint64 log_id = 1;
bool notify = 2;
uint32 flat = 3;
}

message MemcpyAsyncTaskDef {
uint64 dst = 1;
uint64 dst_max = 2;
uint64 src = 3;
uint64 count = 4;
uint32 kind = 5;
}

message StreamSwitchTaskDef {
uint32 true_stream_id = 1;
uint64 input_addr = 2;
uint64 value_addr = 3;
int64 cond = 4;
int64 data_type = 5;
}

message StreamActiveTaskDef {
uint32 active_stream_id = 1;
}

message KernelContext {
uint32 kernel_type = 1;
uint32 op_id = 2;
uint32 kernel_func_id = 3;
uint32 op_index = 4;
bool is_flowtable = 5;
bytes args_offset = 6;
uint32 args_count = 7;
repeated uint32 origin_op_index = 8;
}

+ 0
- 17
mindspore/ccsrc/predict/readme.txt View File

@@ -1,17 +0,0 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

this is a dictory for predict including saving model &&& saving taskinfos.

+ 0
- 1
mindspore/ccsrc/predict/schema/inner/readme.txt View File

@@ -1 +0,0 @@
this is a dictory for predict to gen fbs headers

+ 0
- 212
mindspore/ccsrc/predict/schema/ms.fbs View File

@@ -1,212 +0,0 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

include "op.fbs";

namespace mindspore.predict;

enum MSCONST: int {
WEIGHT_REFCOUNT = 999
}

table QuantParam {
scale: double;
zeroPoint: int;
min: double = 0;
max: double = 0;
narrowRange: bool = true;
numBits: int = 8;
}

table QuantParamArray {
param: [QuantParam]; //pre-channel
}

table TensorDef {
// data type
dataType: DataType;
// shape
dims: [int];
format: Format;
refCount: int;
offset: int;
data: [ubyte];
}

union OpT {
Concat,
SoftMax,
Activation,
Conv2D,
FusedBatchNorm,
CaffeBatchNorm,
BiasAdd,
Pooling,
DepthwiseConv2D,
DeDepthwiseConv2D,
Resize,
DetectionPostProcess,
FullConnection,
Mean,
DeConv2D,
Scale,
Reshape,
Eltwise,
NetOutput,
Add,
Sub,
MatMul,
StridedSlice,
Power,
Slice,
Stack,
Mul,
RealDiv,
Pad,
Maximum,
Minimum,
CaffePReLU,
LeakyReLU,
ArgMax,
ArgMin,
Exp,
CaffeCrop,
Range,
Rsqrt,
ExpandDims,
Tile,
Cast,
Shape,
Nchw2Nhwc,
Nhwc2Nchw,
QuantDTypeCast,
Split,
Permute,
FakeQuantWithMinMaxVars,
Equal,
Less,
Greater,
Min,
Floor,
Abs,
Neg,
Cos,
Sin,
Sqrt,
Square,
Constant,
Log,
Tan,
Atan,
Asin,
Clip,
Transpose,
Squeeze,
Unsqueeze,
Upsample,
Dropout,
Broadcast,
Lrn,
Prelu,
ZerosLike,
TopK,
SpaceToDepth,
SpaceToBatch,
SparseToDense,
ReverseSequence,
Rank,
Gather,
GatherNd,
Fill,
Elu,
DepthToSpace,
BatchToSpace,
AddN,
Ceil,
EmbeddingLookup,
EmbeddingLookupSparse,
FloorDiv,
FloorMod,
L2Norm,
LocalResponseNormalization,
MatrixDiag,
Reduce,
Reverse,
Round,
Select,
Scatter,
Unique,
Unstack,
LogicalAnd,
LogicalOr,
LogicalXor,
LogicalNot,
OnnxInt8Quantize,
OnnxInt8Dequantize,
FakeQuantWithMinMax,
FakeQuantWithMinMaxPerChannel,
BatchNormFold,
MulFold,
AddFold,
SquaredDifference
}

enum QuantType: int {
QUANT_NONE,
AwareTrainning,
WeightQuant,
PostTraining
}

enum FmkType: int {
TF,
CAFFE,
ONNX,
MS,
TFLITE
}

table OpDef {
name: string;
fmkType: FmkType;
attr: OpT;
inputIndex: [uint];
outputIndex: [uint];
quantType: QuantType = QUANT_NONE;
quantParam: [QuantParamArray];
}

table SubGraphDef {
name: string;
inputIndex: [uint];
outputIndex: [uint];
mempoolSize: uint;
nodes: [OpDef];
allTensors: [TensorDef]; // weight + input + output
}

table MempoolCfg {
size: uint;
shiftFactor: uint;
}

table GraphDef {
name: string;
mempoolCfg: MempoolCfg;
subgraphs: [SubGraphDef];
}

root_type GraphDef;

+ 0
- 699
mindspore/ccsrc/predict/schema/op.fbs View File

@@ -1,699 +0,0 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

namespace mindspore.predict;

enum ResizeMethod: byte {
UNKNOW = -1,
BILINEAR = 0,
NEAREST_NEIGHBOR = 1
}

enum DataType : int {
DT_FLOAT = 0,
DT_FLOAT16 = 1,
DT_INT8 = 2,
DT_INT32 = 3,
DT_UINT8 = 4,
DT_INT16 = 5,
DT_UINT32 = 8,
DT_INT64 = 9,
DT_UINT16 = 10,
DT_UNDEFINED = 16
}

enum Format : int {
NCHW = 0,
NHWC,
HWKC,
HWCK,
KCHW,
CKHW,
KHWC,
CHWK,
NC4HW4 = 100,
NUM_OF_FORMAT
}

enum ActivationType : byte {
NO_ACTIVATION = 0,
RELU = 1,
SIGMOID = 2,
RELU6 = 3,
ELU = 4,
LEAKY_RELU = 5,
ABS = 6,
RELU1 = 7,
SOFTSIGN = 8,
SOFTPLUS = 9,
TANH = 10,
SELU = 11,
HSWISH = 12,
HSIGMOID = 13,
THRESHOLDRELU = 14,
LINEAR = 15,
UNKNOW = 16
}

enum ReduceType : byte {
REDUCE_MAX = 0,
REDUCE_MEAN = 1,
REDUCE_ALL = 2,
REDUCE_ANY = 3,
REDUCE_LOG_SUM_EXP = 4,
REDUCE_PROD = 5,
REDUCE_SUM = 6,
UNKNOW = 7
}

enum PoolMode : byte {
MAX_POOLING = 0,
MEAN_POOLING = 1,
}

enum EltwiseMode : byte {
PROD = 0,
SUM = 1,
MAXIMUM = 2,
UNKNOW = 3
}

enum PadMode : byte {
NOTSET = 0,
SAME = 1,
VALID = 2,
CAFFE = 4
}

enum RoundMode : byte {
FLOOR = 0,
CEIL = 1
}

enum PaddingMode : byte {
CONSTANT = 0,
REFLECT = 1,
SYMMETRIC = 2,
MODE_RESERVED = 3
}

table Pad {
paddingmode: PaddingMode;
paddings: [int];
}

table Maximum {
}

table Minimum {
}

table Concat {
axis: int;
n: int;
}

table SoftMax {
axis: [int];
}

table Activation {
type: ActivationType = 0;
}

table Conv2D {
format: Format = 0;
group: int;
channelIn: int;
channelOut: int;
kernelW: int;
kernelH: int;
strideW: int;
strideH: int;
padMode: PadMode;
padUp: int;
padDown: int;
padLeft: int;
padRight: int;
dilateW: int;
dilateH: int;
hasBias: bool = false;
activationType: ActivationType = 0;
}

table FusedBatchNorm {
epsilon: float = 0.00001; // eg. epsilon=0.001
momentum: float = 0.9;
spatial: int = 1;
}

table CaffeBatchNorm {
epsilon: float; // eg. epsilon=0.001
}

table Shape {
}

table Nchw2Nhwc {

}

table Nhwc2Nchw {

}

table FakeQuantWithMinMaxVars {
narrowRange: bool;
numBits: int;
}

table BiasAdd {
axis: [int];
}

table Pooling {
format: Format = 0;
poolingMode: PoolMode;
global: bool = false;
windowW: int;
windowH: int;
strideW: int;
strideH: int;
padMode: PadMode;
padUp: int;
padDown: int;
padLeft: int;
padRight: int;
roundMode: RoundMode;
}

table DepthwiseConv2D {
format: Format = 0;
channelIn: int;
channelMultiplier: int;
kernelW: int;
kernelH: int;
strideW: int;
strideH: int;
padMode: PadMode;
padUp: int;
padDown: int;
padLeft: int;
padRight: int;
dilateW: int;
dilateH: int;
hasBias: bool = false;
activationType: ActivationType = 0;
}

table DeDepthwiseConv2D {
format: Format = 0;
channelIn: int;
channelMultiplier: int;
kernelW: int;
kernelH: int;
strideW: int;
strideH: int;
padMode: PadMode;
padUp: int;
padDown: int;
padLeft: int;
padRight: int;
dilateW: int;
dilateH: int;
hasBias: bool = false;
activationType: ActivationType = 0;
}


table Resize {
format: Format = 0;
method: ResizeMethod;
newHeight: long;
newWidth: long;
alignCorners: bool = false;
preserveAspectRatio: bool = false;
}

table DetectionPostProcess {
format: Format = 0;
inputSize: int;
hScale: float;
wScale: float;
xScale: float;
yScale: float;
NmsIouThreshold: float;
NmsScoreThreshold: float;
MaxDetections: long;
DetectionsPreClass: long;
MaxClassesPreDetection: long;
NumClasses: long;
UseRegularNms: bool;
}

table FullConnection {
hasBias: bool;
axis: int;
}

// Mean(input_tensor, axis, keep_dims)
table Mean {
axis: [int];
keepDims: bool = false;
}

table DeConv2D {
format: Format = 0;
group: int;
channelIn: int;
channelOut: int;
kernelW: int;
kernelH: int;
strideW: int;
strideH: int;
padMode: PadMode;
padUp: int;
padDown: int;
padLeft: int;
padRight: int;
dilateW: int;
dilateH: int;
hasBias: bool = false;
activationType: ActivationType = 0;
}

table Scale {
format: Format = 0;
}

table Eltwise {
mode: EltwiseMode;
}

table Add {
}

table Sub {
}

table Mul {
}

table RealDiv {
}

table Rsqrt {
}

table Equal {
}

table Less {
}

table Greater {
}

table Min {
}

table Slice {
format: Format = 0;
begin: [int];
size: [int];
}

table Floor {
}

table Abs {
}

table Neg {
}

table Exp {
}

table Cos {
}

table Sin {
}

table Sqrt {
}

table Square {
}

table Ceil {
}

table Log {
}

table Tan {
}

table Atan {
}

table Asin {
}

table Reshape {
format: Format = 0;
shape: [long];
}

table Power {
power: float;
scale: float;
shift: float;
}

table ArgMax {
axis: int;
outMaxValue: bool;
topK: int = 1;
keepDims: bool;
axisType: int;
}

table ArgMin {
axis: int;
outMaxValue: bool;
topK: int = 1;
keepDims: bool;
axisType: int;
}

table NetOutput {
}

table MatMul {
transposeA : bool = false;
transposeB : bool = false;
}

table CaffePReLU {
channelShared : bool = false;
}

table LeakyReLU {
negativeSlope: float;
}

table StridedSlice {
beginMask: int;
endMask: int;
ellipsisMask: int;
newAxisMask: int;
shrinkAxisMask: int;
begin: [int];
end: [int];
stride: [int];
isScale: [int];
}

table Stack {
axis: int;
n: int;
isScale: [int];
}

table Range {
dType: DataType;
start: int;
limit: int;
delta: int;
}

table ExpandDims {
dim: int;
}

table Tile {
multiples: [int];
}

table Cast {
srcT: int;
dstT: int;
}

table QuantDTypeCast {
srcT: DataType;
dstT: DataType;
}

table Split {
numberSplit: int;
sizeSplits: [int];
splitDim: int;
}

table CaffeCrop {
axis : long;
offsets : [long];
}

table Permute {
order: [long];
}

table Clip {
max: float;
min: float;
}

table Constant {
}


table Elu {
alpha: float = 1.0;
}

table Broadcast {
}

table Lrn {
alpha: float = 0.0001;
beta: float = 0.75;
bias: float = 1.0;
size: int;
}

enum ReduceMode : byte {
ReduceMean = 0,
ReduceMax = 1,
ReduceMin = 2,
ReduceProd = 3,
ReduceSum = 4,
ReduceSumSquare = 5
}

table Reduce {
axes: [int];
keepDims: int;
mode: ReduceMode;
}

table Prelu {
slope: [float];
}

table Transpose {
perm: [int];
conjugate: bool = false;
}

table Squeeze {
axis: [int];
}

table Unsqueeze {
axis: [int];
}

table Upsample {
mode: string;
scales: [float];
}

table Dropout {
ratio : float = 0.5;
}

table LocalResponseNormalization {
depth_radius: int;
bias: float;
alpha: float;
beta: float;
}

table ZerosLike {
}

table TopK {
k : int;
sorted : bool = true;
}

table SpaceToDepth {
blockSize : int;
format: Format = 0;
}

table SpaceToBatch {
blockShape : [int];
paddings : [int];
}

table SparseToDense {
validateIndices: bool;
}

table ReverseSequence {
seqAxis: int;
batchAxis: int;
}

table Rank {
}


table Gather {
axis: int;
batchDims: int;
}

table GatherNd {
batchDims: int;
}

table Fill {
dims: [int];
}

table DepthToSpace {
blockSize: int;
format: Format = 0;
}


table BatchToSpace {
blockShape: [int];
crops: [int];
}

table AddN {
N: int;
}


table EmbeddingLookup {
ids: [int];
maxNorm: float;
}

table EmbeddingLookupSparse {
spIds: [int];
spWeights: [float];
//combiner: Combiner=0;
maxNortm: float;
}

table FloorDiv {
}

table FloorMod {
}

table L2Norm {
axis: [int];
epsilon: float;
}

table LogicalAnd {
}

table LogicalOr {
}

table LogicalXor {
}

table LogicalNot {
}

table MatrixDiag {
k: int;
numRows: int;
numCols: int;
paddingValue: float;
}

table Select {
}

table TfReduce {
type: ReduceType = 7;
}

table Reverse {
axis: [int];
}

table Round {
}

table Scatter {
}

table Unique {
}

table Unstack {
num: int;
axis: int;
}

table OnnxInt8Quantize {
}

table OnnxInt8Dequantize {
}

table FakeQuantWithMinMax {
}

table FakeQuantWithMinMaxPerChannel {
}

table BatchNormFold {
}

table MulFold {
}

table AddFold {
}

table SquaredDifference {
}

+ 0
- 8
mindspore/ccsrc/runtime/device/ascend/ascend_stream_assign.cc View File

@@ -24,7 +24,6 @@
#include "common/utils.h" #include "common/utils.h"
#include "backend/session/anf_runtime_algorithm.h" #include "backend/session/anf_runtime_algorithm.h"
#include "runtime/device/kernel_adjust.h" #include "runtime/device/kernel_adjust.h"
#include "predict/generator/utils/ir_model_util.h"
#include "backend/optimizer/common/helper.h" #include "backend/optimizer/common/helper.h"
#include "utils/utils.h" #include "utils/utils.h"


@@ -53,13 +52,6 @@ void AscendStreamAssign::AssignStream(const NotNull<KernelGraphPtr> &graph_ptr)
GetStreamRelations(); GetStreamRelations();
PrintStreamGroups(); PrintStreamGroups();
FindEventRelations(graph_ptr); FindEventRelations(graph_ptr);

// Get info for D Model
AscendResourceMng &resource_manager = AscendResourceMng::GetInstance();
generator::IRModelUtil::GetInstance().set_event_num(resource_manager.get_cur_event_num());
generator::IRModelUtil::GetInstance().set_stream_num(resource_manager.get_cur_stream_num());
// Init to 1,temporarily
generator::IRModelUtil::GetInstance().set_batch_num(1);
} }
} }




+ 0
- 1
mindspore/ccsrc/runtime/device/kernel_runtime.h View File

@@ -24,7 +24,6 @@
#include "runtime/device/device_address.h" #include "runtime/device/device_address.h"
#include "ir/tensor.h" #include "ir/tensor.h"
#include "utils/convert_utils.h" #include "utils/convert_utils.h"
#include "predict/generator/utils/ir_model_util.h"
#ifdef ENABLE_DUMP_E2E #ifdef ENABLE_DUMP_E2E
#include "debug/e2e_dump.h" #include "debug/e2e_dump.h"
#endif #endif


+ 0
- 2
mindspore/ccsrc/utils/context/ms_context.cc View File

@@ -50,8 +50,6 @@ std::map<std::string, MsBackendPolicy> MsContext::policy_map_ = {{"ge", kMsBacke
MsContext::MsContext(const std::string &policy, const std::string &target) { MsContext::MsContext(const std::string &policy, const std::string &target) {
save_graphs_flag_ = false; save_graphs_flag_ = false;
save_graphs_path_ = "."; save_graphs_path_ = ".";
save_ms_model_flag_ = false;
save_ms_model_path_ = "./model.ms";
enable_dump_ = false; enable_dump_ = false;
save_dump_path_ = "."; save_dump_path_ = ".";
tsd_ref_ = 0; tsd_ref_ = 0;


+ 0
- 8
mindspore/ccsrc/utils/context/ms_context.h View File

@@ -102,12 +102,6 @@ class MsContext {
void set_enable_mem_reuse(bool enable_mem_reuse) { enable_mem_reuse_ = enable_mem_reuse; } void set_enable_mem_reuse(bool enable_mem_reuse) { enable_mem_reuse_ = enable_mem_reuse; }
bool enable_mem_reuse() const { return enable_mem_reuse_; } bool enable_mem_reuse() const { return enable_mem_reuse_; }


bool save_ms_model_flag() const { return save_ms_model_flag_; }
void set_save_ms_model_flag(bool save_ms_model_flag) { save_ms_model_flag_ = save_ms_model_flag; }

std::string save_ms_model_path() const { return save_ms_model_path_; }
void set_save_ms_model_path(const std::string &save_ms_model_path) { save_ms_model_path_ = save_ms_model_path; }

void set_enable_gpu_summary(bool enable_gpu_summary) { enable_gpu_summary_ = enable_gpu_summary; } void set_enable_gpu_summary(bool enable_gpu_summary) { enable_gpu_summary_ = enable_gpu_summary; }
bool enable_gpu_summary() const { return enable_gpu_summary_; } bool enable_gpu_summary() const { return enable_gpu_summary_; }


@@ -190,8 +184,6 @@ class MsContext {
bool enable_reduce_precision_; bool enable_reduce_precision_;
bool enable_loop_sink_; bool enable_loop_sink_;
bool enable_mem_reuse_; bool enable_mem_reuse_;
std::string save_ms_model_path_;
bool save_ms_model_flag_;
bool enable_gpu_summary_; bool enable_gpu_summary_;
bool enable_dump_; bool enable_dump_;
std::string save_dump_path_; std::string save_dump_path_;


+ 1
- 20
mindspore/context.py View File

@@ -234,22 +234,6 @@ class _Context:
if not success: if not success:
raise RuntimeError("Device id set failed!!!") raise RuntimeError("Device id set failed!!!")


@property
def save_ms_model(self):
return self._context_handle.get_save_ms_model_flag()

@save_ms_model.setter
def save_ms_model(self, save_ms_model_flag):
self._context_handle.set_save_ms_model_flag(save_ms_model_flag)

@property
def save_ms_model_path(self):
return self._context_handle.get_save_ms_model_path()

@save_ms_model_path.setter
def save_ms_model_path(self, save_ms_model_path):
self._context_handle.set_save_ms_model_path(save_ms_model_path)

@property @property
def enable_auto_mixed_precision(self): def enable_auto_mixed_precision(self):
return self._context_handle.get_auto_mixed_precision_flag() return self._context_handle.get_auto_mixed_precision_flag()
@@ -541,7 +525,7 @@ def reset_auto_parallel_context():




@args_type_check(mode=int, precompile_only=bool, device_target=str, device_id=int, save_graphs=bool, @args_type_check(mode=int, precompile_only=bool, device_target=str, device_id=int, save_graphs=bool,
save_graphs_path=str, save_ms_model=bool, save_ms_model_path=str, enable_dump=bool,
save_graphs_path=str, enable_dump=bool,
save_dump_path=str, enable_reduce_precision=bool, variable_memory_max_size=str, save_dump_path=str, enable_reduce_precision=bool, variable_memory_max_size=str,
enable_profiling=bool, profiling_options=str, enable_auto_mixed_precision=bool, enable_profiling=bool, profiling_options=str, enable_auto_mixed_precision=bool,
enable_graph_kernel=bool, check_bprop=bool, max_device_memory=str, print_file_path=str, enable_graph_kernel=bool, check_bprop=bool, max_device_memory=str, print_file_path=str,
@@ -569,8 +553,6 @@ def set_context(**kwargs):
device_id (int): Id of target device, the value must be in [0, device_num_per_host-1], device_id (int): Id of target device, the value must be in [0, device_num_per_host-1],
while device_num_per_host should no more than 4096. Default: 0. while device_num_per_host should no more than 4096. Default: 0.
save_graphs (bool): Whether to save graphs. Default: False. save_graphs (bool): Whether to save graphs. Default: False.
save_ms_model (bool): Whether to save lite model converted by graph. Default: False.
save_ms_model_path (str): Path to save converted lite model. Default: "."
save_graphs_path (str): Path to save graphs. Default: "." save_graphs_path (str): Path to save graphs. Default: "."
enable_auto_mixed_precision (bool): Whether to enable auto mixed precision. Default: True. enable_auto_mixed_precision (bool): Whether to enable auto mixed precision. Default: True.
enable_graph_kernel (bool): Whether to enable composition of basic primitives. These primitives would be enable_graph_kernel (bool): Whether to enable composition of basic primitives. These primitives would be
@@ -615,7 +597,6 @@ def set_context(**kwargs):
>>> context.set_context(device_id=0) >>> context.set_context(device_id=0)
>>> context.set_context(save_graphs=True, save_graphs_path="./model.ms") >>> context.set_context(save_graphs=True, save_graphs_path="./model.ms")
>>> context.set_context(enable_reduce_precision=True) >>> context.set_context(enable_reduce_precision=True)
>>> context.set_context(save_ms_model=True, save_ms_model_path=".")
>>> context.set_context(enable_dump=True, save_dump_path=".") >>> context.set_context(enable_dump=True, save_dump_path=".")
>>> context.set_context(reserve_class_name_in_scope=True) >>> context.set_context(reserve_class_name_in_scope=True)
>>> context.set_context(variable_memory_max_size="6GB") >>> context.set_context(variable_memory_max_size="6GB")


+ 2
- 7
mindspore/train/serialization.py View File

@@ -20,7 +20,6 @@ from threading import Thread, Lock
import numpy as np import numpy as np


import mindspore.nn as nn import mindspore.nn as nn
import mindspore.context as context
from mindspore import log as logger from mindspore import log as logger
from mindspore.train.checkpoint_pb2 import Checkpoint from mindspore.train.checkpoint_pb2 import Checkpoint
from mindspore.train.print_pb2 import Print from mindspore.train.print_pb2 import Print
@@ -457,18 +456,17 @@ def export(net, *inputs, file_name, file_format='GEIR'):
net (Cell): MindSpore network. net (Cell): MindSpore network.
inputs (Tensor): Inputs of the `net`. inputs (Tensor): Inputs of the `net`.
file_name (str): File name of model to export. file_name (str): File name of model to export.
file_format (str): MindSpore currently supports 'GEIR', 'ONNX' 'LITE' and 'BINARY' format for exported model.
file_format (str): MindSpore currently supports 'GEIR', 'ONNX' and 'BINARY' format for exported model.


- GEIR: Graph Engine Intermidiate Representation. An intermidiate representation format of - GEIR: Graph Engine Intermidiate Representation. An intermidiate representation format of
Ascend model. Ascend model.
- ONNX: Open Neural Network eXchange. An open format built to represent machine learning models. - ONNX: Open Neural Network eXchange. An open format built to represent machine learning models.
- LITE: Huawei model format for mobile. A lite model only for the MindSpore Lite
- BINARY: Binary format for model. An intermidiate representation format for models. - BINARY: Binary format for model. An intermidiate representation format for models.
""" """
logger.info("exporting model file:%s format:%s.", file_name, file_format) logger.info("exporting model file:%s format:%s.", file_name, file_format)
check_input_data(*inputs, data_class=Tensor) check_input_data(*inputs, data_class=Tensor)


supported_formats = ['GEIR', 'ONNX', 'LITE', 'BINARY']
supported_formats = ['GEIR', 'ONNX', 'BINARY']
if file_format not in supported_formats: if file_format not in supported_formats:
raise ValueError(f'Illegal file format {file_format}, it must be one of {supported_formats}') raise ValueError(f'Illegal file format {file_format}, it must be one of {supported_formats}')
# switch network mode to infer when it is training # switch network mode to infer when it is training
@@ -497,9 +495,6 @@ def export(net, *inputs, file_name, file_format='GEIR'):
with open(file_name, 'wb') as f: with open(file_name, 'wb') as f:
os.chmod(file_name, stat.S_IWUSR | stat.S_IRUSR) os.chmod(file_name, stat.S_IWUSR | stat.S_IRUSR)
f.write(onnx_stream) f.write(onnx_stream)
elif file_format == 'LITE': # file_format is 'LITE'
context.set_context(save_ms_model=True, save_ms_model_path=file_name)
net(*inputs)
# restore network training mode # restore network training mode
if is_training: if is_training:
net.set_train(mode=True) net.set_train(mode=True)


+ 0
- 14
predict/.gitignore View File

@@ -1,14 +0,0 @@
# git ignore file for predict

#flatbuf generated file
schema/*_generated.h
schema/inner/*_generated.h
module/tvm_module/lite/include/*_generated.h

#tvm fbs files
module/tvm_module/lite/tune/convert/*.fbs

#doTest dir
test/doTest/



+ 0
- 79
predict/CMakeLists.txt View File

@@ -1,79 +0,0 @@
cmake_minimum_required(VERSION 3.12.1)
project (mindspore-predict)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g")
set(CMAKE_BUILD_TYPE "Release")

set(CMAKE_CXX_STANDARD 11)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fvisibility=hidden")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility=hidden")
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -s")

option(ENABLE_ASAN "Enable Google Sanitizer to find memory bugs" OFF)
option(ENABLE_PREDICT_ARM64 "predict arm64" OFF)
option(ENABLE_PREDICT_ARM32 "predict arm32" OFF)

set(PREDICT_DIR ${CMAKE_CURRENT_SOURCE_DIR})
set(PREDICT_BUILD_DIR ${CMAKE_CURRENT_SOURCE_DIR}/build)
set(3RD_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../third_party)
set(DOTEST_DIR ${PREDICT_BUILD_DIR}/test/doTest)

include_directories(${3RD_DIR})
include_directories(${3RD_DIR}/flatbuffers/include/)
include_directories(${3RD_DIR}/protobuf/build/include/)
include_directories(${3RD_DIR}/googletest/googletest/include/)
include_directories(${3RD_DIR}/googletest/googlemock/include/)
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/module/tvm_kernel/lite/include/)
include_directories(${PREDICT_DIR}/module/tvm_kernel/incubator-tvm/3rdparty/dlpack/include)
include_directories(common)

if(ENABLE_PREDICT_ARM64 OR ENABLE_PREDICT_ARM32)
message("*********************predict compile arm*********************")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DMS_USE_ARM=1")
set(ANDROID_NDK $ENV{ANDROID_NDK})
if(ANDROID_NDK)
add_subdirectory(${3RD_DIR}/googletest ${CMAKE_BINARY_DIR}/googletest)
link_directories(${PREDICT_BUILD_DIR}/googletest/googlemock/gtest)

add_subdirectory(${3RD_DIR}/securec ${CMAKE_BINARY_DIR}/securec)
link_directories(${PREDICT_BUILD_DIR}/securec/src)
else()
message(FATAL_ERROR "please set ANDROID_NDK in environment variable for example: export ANDROID_NDK=/root/usr/android-ndk-r16b/")
endif()

include_directories(${ANDROID_SYSROOT}/usr/include/)
if(${ANDROID_ABI} STREQUAL "armeabi-v7a")
include_directories(${ANDROID_SYSROOT}/usr/include/arm-linux-androideabi)
elseif(${ANDROID_ABI} STREQUAL "arm64-v8a")
include_directories(${ANDROID_SYSROOT}/usr/include/aarch64-linux-android)
else()
include_directories(${ANDROID_SYSROOT}/usr/include/arm-linux-androideabi)
endif()

else()
# include libsecurec.a x86
message("*********************predict compile x86*********************")
if(EXISTS "${PREDICT_DIR}/../build/mindspore/securec/src/libsecurec.a")
link_directories(${PREDICT_DIR}/../build/mindspore/securec/src)
else()
include(${PREDICT_DIR}/../cmake/dependency_securec.cmake)
link_directories(${PREDICT_BUILD_DIR}/securec/src)
endif()

# include libgtest.so x86
if(EXISTS "${PREDICT_DIR}/../build/googletest/googlemock/gtest/libgtest.so")
link_directories(${PREDICT_DIR}/../build/googletest/googlemock/gtest)
else()
include(${PREDICT_DIR}/../cmake/dependency_gtest.cmake)
link_directories(${PREDICT_BUILD_DIR}/googletest/googlemock/gtest)
endif()
endif()

if (CODE_COVERAGE)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fprofile-arcs -ftest-coverage -O0")
endif()

add_subdirectory(common)
add_subdirectory(src)
add_subdirectory(benchmark)
add_subdirectory(test)
add_subdirectory(module)

+ 0
- 38
predict/benchmark/CMakeLists.txt View File

@@ -1,38 +0,0 @@

cmake_minimum_required(VERSION 3.12)
project(benchmark)

set(CMAKE_CXX_STANDARD 14)
set(CMAKE_BUILD_TYPE "Debug")

#include 3rd
include_directories(${3RD_DIR}/protobuf/build/include)
include_directories(${3RD_DIR}/securec/include)
include_directories(${3RD_DIR}/flatbuffers/include)
include_directories(${3RD_DIR}/googletest/googletest/include)
include_directories(${3RD_DIR}/googletest/googlemock/include)
include_directories(${PREDICT_DIR}/module/tvm_kernel/incubator-tvm/3rdparty/dlpack/include)
include_directories(${3RD_DIR}/flatbuffers/include)
include_directories(${3RD_DIR}/securec/include)

#include ms
include_directories(.)
include_directories(${PREDICT_DIR})

set(COMMON_SRC ${PREDICT_DIR}/common/flag_parser.cc
${PREDICT_DIR}/common/file_utils.cc
${PREDICT_DIR}/common/func_utils.cc
${PREDICT_DIR}/common/mslog.cc
${PREDICT_DIR}/common/utils.cc)

link_directories(${CMAKE_CURRENT_SOURCE_DIR}/../output/lib/)

add_executable(benchmark main.cc benchmark.cc ${COMMON_SRC})

target_link_libraries(benchmark mspredict libsecurec.a)
add_dependencies(benchmark tvm_kernel)
add_dependencies(benchmark securec)

add_custom_command(TARGET benchmark POST_BUILD
COMMAND mkdir -pv ${DOTEST_DIR}
COMMAND cp ${PREDICT_BUILD_DIR}/benchmark/benchmark ${DOTEST_DIR})

+ 0
- 0
predict/benchmark/README.md View File


+ 0
- 451
predict/benchmark/benchmark.cc View File

@@ -1,451 +0,0 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "benchmark/benchmark.h"
#include <random>
#include <limits>
#include <algorithm>
#include <utility>
#include <memory>
#include "include/session.h"

namespace mindspore {
namespace predict {
STATUS Benchmark::GenerateRandomData(size_t size, void *data) {
MS_ASSERT(data != nullptr);
char *castedData = static_cast<char *>(data);
for (size_t i = 0; i < size; i++) {
castedData[i] = static_cast<char>(i);
}
return RET_OK;
}

STATUS Benchmark::GenerateInputData() {
for (Tensor *tensor : msInputs) {
MS_ASSERT(tensor != nullptr);
auto ret = tensor->MallocData();
if (ret != RET_OK) {
MS_LOGE("MallocData for inTensor failed %d", ret);
return ret;
}
MS_ASSERT(tensor->GetData() != nullptr);
auto tensorByteSize = tensor->GetDataSize();
auto status = GenerateRandomData(tensorByteSize, tensor->GetData());
if (status != RET_OK) {
MS_LOGE("GenerateRandomData for inTensor failed %d", status);
return status;
}
}
return RET_OK;
}

STATUS Benchmark::LoadInput() {
size_t size = 0;
char *graphBuf = ReadFile(_flags->modelPath.c_str(), &size);
if (graphBuf == nullptr) {
MS_LOGE("Load graph failed, path %s", _flags->modelPath.c_str());
return RET_ERROR;
}

this->msInputs = session->GetInput();

if (_flags->inDataPath.empty()) {
auto status = GenerateInputData();
if (status != RET_OK) {
delete graphBuf;
MS_LOGE("Generate input data error %d", status);
return status;
}
} else {
auto status = ReadInputFile();
if (status != RET_OK) {
delete graphBuf;
MS_LOGE("ReadInputFile error, %d", status);
return status;
}
}
delete graphBuf;
return RET_OK;
}

STATUS Benchmark::ReadInputFile() {
MS_ASSERT(msInputs.size() <= 1);
if (msInputs.empty()) {
return RET_OK;
}
Tensor *inTensor = msInputs.at(0);
MS_ASSERT(inTensor != nullptr);

size_t size;
char *binBuf = ReadFile(_flags->inDataPath.c_str(), &size);
if (binBuf == nullptr) {
return RET_ERROR;
}
auto tensorDataSize = inTensor->GetDataSize();
if (size != tensorDataSize) {
MS_LOGE("Input binary file size error, required: %zu, in fact: %zu", tensorDataSize, size);
delete binBuf;
return RET_ERROR;
}
inTensor->SetData(binBuf);
binBuf = nullptr;

return RET_OK;
}

// calibData is FP32
STATUS Benchmark::ReadCalibData() {
const char *calibDataPath = _flags->calibDataPath.c_str();
// read calib data
std::ifstream inFile(calibDataPath);
if (!inFile.good()) {
MS_LOGE("file: %s is not exist", calibDataPath);
return RET_PARAM_INVALID;
}

if (!inFile.is_open()) {
MS_LOGE("file: %s open failed", calibDataPath);
inFile.close();
return RET_PARAM_INVALID;
}

std::string line;
MS_LOGI("Start reading calibData file");
std::string tensorName;
while (!inFile.eof()) {
getline(inFile, line);
std::stringstream stringLine1(line);
size_t dim = 0;
stringLine1 >> tensorName >> dim;
std::vector<size_t> dims;
size_t shapeSize = 1;
for (size_t i = 0; i < dim; i++) {
size_t tmpDim;
stringLine1 >> tmpDim;
dims.push_back(tmpDim);
shapeSize *= tmpDim;
}

getline(inFile, line);
std::stringstream stringLine2(line);
std::vector<float> tensorData;
for (size_t i = 0; i < shapeSize; i++) {
float tmpData;
stringLine2 >> tmpData;
tensorData.push_back(tmpData);
}

std::unique_ptr<CheckTensor> checkTensor(new CheckTensor(dims, tensorData));
this->calibData.insert(std::make_pair(tensorName, checkTensor.release()));
}
inFile.close();
MS_LOGI("Finish reading calibData file");
return RET_OK;
}

// tensorData need to be converter first
float Benchmark::CompareData(const std::string &nodeName, std::vector<int64_t> msShape, float *msTensorData) {
auto iter = this->calibData.find(nodeName);
if (iter != this->calibData.end()) {
std::vector<size_t> castedMSShape;
size_t shapeSize = 1;
for (int64_t dim : msShape) {
castedMSShape.push_back(size_t(dim));
shapeSize *= dim;
}

CheckTensor *calibTensor = iter->second;
if (calibTensor->shape != castedMSShape) {
std::ostringstream oss;
oss << "Shape of mslite output(";
for (auto dim : castedMSShape) {
oss << dim << ",";
}
oss << ") and shape source model output(";
for (auto dim : calibTensor->shape) {
oss << dim << ",";
}
oss << ") are different";
MS_LOGE("%s", oss.str().c_str());
return -1;
}

float meanBias = 0;
std::ostringstream outputData;
outputData << "Data of node " << nodeName << " : ";
for (size_t j = 0; j < shapeSize; j++) {
if (j < printNum) {
outputData << msTensorData[j] << " ";
}
if (fabs(calibTensor->data.at(j)) > minFloatThr) {
double bias = fabs(msTensorData[j] - calibTensor->data.at(j)) / fabs(calibTensor->data.at(j));
meanBias += bias;
}
}
meanBias /= shapeSize;
MS_LOGI("%s", outputData.str().c_str());

if (meanBias <= minFloatThr) {
MS_LOGI("Mean bias of node %s : 0%%", nodeName.c_str());
} else {
MS_LOGI("Mean bias of node %s : %f%%", nodeName.c_str(), meanBias * percentage);
}
return meanBias;
} else {
MS_LOGI("%s is not in Source Model output", nodeName.c_str());
return -1;
}
}

STATUS Benchmark::CompareOutput(const std::map<NODE_ID, std::vector<Tensor *>> &msOutputs) {
float totalBias = 0;
int totalSize = 0;
bool hasError = false;
for (const auto &msOutput : msOutputs) {
std::string nodeName = msOutput.first;
auto tensors = msOutput.second;
for (auto tensor : tensors) {
MS_ASSERT(tensor->GetData() != nullptr);
float bias = CompareData(nodeName, tensor->GetDims(), static_cast<float *>(tensor->GetData()));
if (bias >= 0) {
totalBias += bias;
totalSize++;
} else {
hasError = true;
break;
}
}
}

if (!hasError) {
float meanBias;
if (totalSize != 0) {
meanBias = totalBias / totalSize * percentage;
} else {
meanBias = 0;
}

MS_LOGI("Mean bias all node : %f%%", meanBias);

if (meanBias > 1) {
MS_LOGE("Mean bias of all nodes is too big: %f%%", meanBias);
return RET_ERROR;
} else {
return RET_OK;
}
} else {
MS_LOGE("Error in CompareData");
return RET_ERROR;
}
}

STATUS Benchmark::MarkPerformance() {
MS_LOGI("Running warm up loops...");
for (int i = 0; i < _flags->warmUpLoopCount; i++) {
auto status = session->Run(msInputs);
if (status != RET_OK) {
MS_LOGE("Inference error %d", status);
return status;
}
}

MS_LOGI("Running benchmark loops...");
uint64_t timeMin = maxTimeThr;
uint64_t timeMax = 0;
uint64_t timeAvg = 0;
for (int i = 0; i < _flags->loopCount; i++) {
uint64_t start = GetTimeUs();
auto status = session->Run(msInputs);
if (status != RET_OK) {
MS_LOGE("Inference error %d", status);
return status;
}

uint64_t end = GetTimeUs();
uint64_t time = end - start;
timeMin = std::min(timeMin, time);
timeMax = std::max(timeMax, time);
timeAvg += time;

msOutputs = session->GetAllOutput();
if (cleanData) {
for (auto &msOutput : msOutputs) {
for (auto &outputTensor : msOutput.second) {
delete outputTensor;
}
}
msOutputs.clear();
}
}
if (_flags->loopCount > 0) {
timeAvg /= _flags->loopCount;
MS_LOGI("MinRunTime = %f ms, MaxRuntime = %f ms, AvgRunTime = %f ms", timeMin / US2MS, timeMax / US2MS,
timeAvg / US2MS);
}
return RET_OK;
}

STATUS Benchmark::MarkAccuracy() {
MS_LOGI("MarkAccuracy");

auto status = session->Run(msInputs);
if (status != RET_OK) {
MS_LOGE("Inference error %d", status);
return status;
}
msOutputs = session->GetAllOutput();

ReadCalibData();
status = CompareOutput(msOutputs);
if (cleanData) {
for (auto &msOutput : msOutputs) {
for (auto &outputTensor : msOutput.second) {
delete outputTensor;
}
}
msOutputs.clear();
}
return status;
}

STATUS Benchmark::CleanData() {
if (cleanData) {
for (auto &msInput : msInputs) {
delete msInput;
}
msInputs.clear();
for (auto &data : calibData) {
data.second->shape.clear();
data.second->data.clear();
delete data.second;
}
calibData.clear();
}
return RET_OK;
}

STATUS Benchmark::RunBenchmark() {
// Load graph
std::string comment = modelName;

MS_LOGI("start reading model file");
size_t size = 0;
char *graphBuf = ReadFile(_flags->modelPath.c_str(), &size);
if (graphBuf == nullptr) {
MS_LOGE("Load graph failed while running %s", comment.c_str());
return RET_ERROR;
}

uint64_t startPrepareTime = GetTimeUs();
session = CreateSession(graphBuf, size, ctx);
if (session == nullptr) {
delete graphBuf;
MS_LOGE("new session failed while running %s", comment.c_str());
return RET_ERROR;
}
uint64_t endPrepareTime = GetTimeUs();
MS_LOGI("PrepareTime = %f ms, ", (endPrepareTime - startPrepareTime) / US2MS);

// Load input
MS_LOGI("start generate input data");
auto status = LoadInput();
if (status != RET_OK) {
delete graphBuf;
MS_LOGE("Generate input data error");
return status;
}

if (!_flags->calibDataPath.empty()) {
status = MarkAccuracy();
if (status != RET_OK) {
delete graphBuf;
MS_LOGE("Run MarkAccuracy error: %d", status);
return status;
}
} else {
status = MarkPerformance();
if (status != RET_OK) {
delete graphBuf;
MS_LOGE("Run MarkPerformance error: %d", status);
return status;
}
}

CleanData();
delete graphBuf;
return RET_OK;
}

STATUS Benchmark::Init() {
if (this->_flags == nullptr) {
return RET_ERROR;
}
MS_LOGI("ModelPath = %s", this->_flags->modelPath.c_str());
MS_LOGI("InDataPath = %s", this->_flags->inDataPath.c_str());
MS_LOGI("TensorDataType = %s", this->_flags->tensorDataTypeIn.c_str());
MS_LOGI("LoopCount = %d", this->_flags->loopCount);
MS_LOGI("WarmUpLoopCount = %d", this->_flags->warmUpLoopCount);
MS_LOGI("NumThreads = %d", this->_flags->numThreads);
MS_LOGI("calibDataPath = %s", this->_flags->calibDataPath.c_str());

this->_flags->inDataType = this->_flags->inDataTypeIn == "img" ? kImage : kBinary;
if (this->_flags->tensorDataTypeIn == "float") {
this->_flags->tensorDataType = DataType_DT_FLOAT;
}

if (_flags->modelPath.empty()) {
MS_LOGE("modelPath is required");
return RET_ERROR;
}

modelName = _flags->modelPath.substr(_flags->modelPath.find_last_of("/") + 1);

return RET_OK;
}

int RunBenchmark(int argc, const char **argv) {
BenchmarkFlags flags;
Option<std::string> err = flags.ParseFlags(argc, argv);

if (err.IsSome()) {
std::cerr << err.Get() << std::endl;
std::cerr << flags.Usage() << std::endl;
return -1;
}

if (flags.help) {
std::cerr << flags.Usage() << std::endl;
return 0;
}

Benchmark mBenchmark(&flags);
auto status = mBenchmark.Init();
if (status != RET_OK) {
MS_LOGE("Benchmark init Error : %d", status);
return 1;
}

status = mBenchmark.RunBenchmark();
if (status != RET_OK) {
MS_LOGE("Run Benchmark Error : %d", status);
return 1;
}

MS_LOGI("end of benchmark");
return 0;
}
} // namespace predict
} // namespace mindspore

+ 0
- 142
predict/benchmark/benchmark.h View File

@@ -1,142 +0,0 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef PREDICT_BENCHMARK_BENCHMARK_H_
#define PREDICT_BENCHMARK_BENCHMARK_H_

#include <getopt.h>
#include <signal.h>
#include <fstream>
#include <iostream>
#include <map>
#include <string>
#include <vector>
#include <memory>
#include <unordered_map>

#include "common/flag_parser.h"
#include "common/file_utils.h"
#include "common/func_utils.h"
#include "common/mslog.h"
#include "common/utils.h"
#include "include/errorcode.h"
#include "include/session.h"
#include "include/tensor.h"
#include "schema/inner/ms_generated.h"
#include "src/graph.h"
#include "src/graph_execution.h"
#include "src/op.h"

namespace mindspore {
namespace predict {
enum InDataType { kImage = 0, kBinary = 1 };

struct CheckTensor {
CheckTensor(const std::vector<size_t> &shape, const std::vector<float> &data) {
this->shape = shape;
this->data = data;
}
std::vector<size_t> shape;
std::vector<float> data;
};

class BenchmarkFlags : public virtual FlagParser {
public:
BenchmarkFlags() {
// common
AddFlag(&BenchmarkFlags::modelPath, "modelPath", "Input model path", "");
AddFlag(&BenchmarkFlags::tensorDataTypeIn, "tensorDataType", "Data type of input Tensor. float", "float");
AddFlag(&BenchmarkFlags::inDataPath, "inDataPath", "Input data path, if not set, use random input", "");
// MarkPerformance
AddFlag(&BenchmarkFlags::loopCount, "loopCount", "Run loop count", 10);
AddFlag(&BenchmarkFlags::numThreads, "numThreads", "Run threads number", 2);
AddFlag(&BenchmarkFlags::warmUpLoopCount, "warmUpLoopCount", "Run warm up loop", 3);
// MarkAccuracy
AddFlag(&BenchmarkFlags::calibDataPath, "calibDataPath", "Calibration data file path", "");
}

~BenchmarkFlags() override = default;

public:
// common
std::string modelPath;
std::string inDataPath;
InDataType inDataType;
std::string inDataTypeIn;
DataType tensorDataType;
std::string tensorDataTypeIn;
// MarkPerformance
int loopCount;
int numThreads;
int warmUpLoopCount;
// MarkAccuracy
std::string calibDataPath;
};

class Benchmark {
public:
explicit Benchmark(BenchmarkFlags *flags) : _flags(flags) {}

virtual ~Benchmark() = default;

STATUS Init();
STATUS RunBenchmark();

private:
// call GenerateInputData or ReadInputFile to init inputTensors
STATUS LoadInput();

// call GenerateRandomData to fill inputTensors
STATUS GenerateInputData();

STATUS GenerateRandomData(size_t size, void *data);

STATUS ReadInputFile();

STATUS ReadCalibData();

STATUS CleanData();

STATUS CompareOutput(const std::map<NODE_ID, std::vector<Tensor *>> &msOutputs);

float CompareData(const std::string &nodeName, std::vector<int64_t> msShape, float *msTensorData);

STATUS MarkPerformance();

STATUS MarkAccuracy();

private:
BenchmarkFlags *_flags;
std::shared_ptr<Session> session;
Context ctx;
std::vector<Tensor *> msInputs;
std::map<std::string, std::vector<Tensor *>> msOutputs;
std::unordered_map<std::string, CheckTensor *> calibData;
std::string modelName = "";
bool cleanData = true;

const float US2MS = 1000.0f;
const float percentage = 100.0f;
const int printNum = 50;
const float minFloatThr = 0.0000001f;

const uint64_t maxTimeThr = 1000000;
};

int RunBenchmark(int argc, const char **argv);
} // namespace predict
} // namespace mindspore
#endif // PREDICT_BENCHMARK_BENCHMARK_H_

+ 0
- 24
predict/benchmark/main.cc View File

@@ -1,24 +0,0 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <random>
#include <limits>
#include "benchmark/benchmark.h"

int main(int argc, const char **argv) {
signal(SIGSEGV, mindspore::predict::CoreDumpTraceFunc);
return mindspore::predict::RunBenchmark(argc, argv);
}

+ 0
- 17
predict/common/CMakeLists.txt View File

@@ -1,17 +0,0 @@
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../include)
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../)
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../../third_party)

add_compile_options(-fPIC)

add_library(common_mid OBJECT
${CMAKE_CURRENT_SOURCE_DIR}/common.h
${CMAKE_CURRENT_SOURCE_DIR}/graph_util.cc
${CMAKE_CURRENT_SOURCE_DIR}/file_utils.cc
${CMAKE_CURRENT_SOURCE_DIR}/flag_parser.cc
${CMAKE_CURRENT_SOURCE_DIR}/func_utils.cc
${CMAKE_CURRENT_SOURCE_DIR}/module_registry.cc
${CMAKE_CURRENT_SOURCE_DIR}/mslog.cc
${CMAKE_CURRENT_SOURCE_DIR}/storage.cc
${CMAKE_CURRENT_SOURCE_DIR}/utils.cc)

+ 0
- 57
predict/common/common.h View File

@@ -1,57 +0,0 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef PREDICT_COMMON_COMMON_H_
#define PREDICT_COMMON_COMMON_H_

#include <string>
#include "schema/inner/ms_generated.h"

namespace mindspore {
namespace predict {
enum NCHW_SHAPE { NCHW_N = 0, NCHW_C = 1, NCHW_H = 2, NCHW_W = 3 };
enum NHWC_SHAPE { NHWC_N = 0, NHWC_H = 1, NHWC_W = 2, NHWC_C = 3 };
enum HWCK_SHAPE { HWCK_H = 0, HWCK_W = 1, HWCK_C = 2, HWCK_K = 3 };
enum KCHW_SHAPE { KCHW_K = 0, KCHW_C = 1, KCHW_H = 2, KCHW_W = 3 };
enum CHW_SHAPE { CHW_C = 0, CHW_H = 1, CHW_W = 2 };
enum HWC_SHAPE { HWC_H = 0, HWC_W = 1, HWC_C = 2 };

static constexpr int TENSOR_MAX_REFCOUNT = 999;

static const char *DELIM_COLON = ":";
static const char *DELIM_COMMA = ",";
static const char *DELIM_SLASH = "/";
static const char *DELIM_DOUBLE_BACKSLASH = "\\";

// quantization relative
static const char QUANTIZED_UINT8[] = "QUANTIZED_UINT8";
static const char QUANTIZED_INT8[] = "QUANTIZED_INT8";
static const char QUANTIZED_INT16[] = "QUANTIZED_INT16";
static const char QUANTIZED_UINT16[] = "QUANTIZED_UINT16";
static const char QUANTIZED_FLOAT16[] = "FLOAT16";
static const char QUANTIZED_FLOAT32[] = "FLOAT32";
static const char QUANTIZATION_TYPE_DYNAMIC[] = "DYNAMIC";
static const char QUANTIZATION_TYPE_STATIC[] = "STATIC";
static const char CALIB_NORM[] = "NORM";

// dims
static const int32_t DIM_DEFAULT_SIZE = 4;

static const Format DEFAULT_FORMAT = Format_NCHW;
} // namespace predict
} // namespace mindspore

#endif // PREDICT_COMMON_COMMON_H_

+ 0
- 79
predict/common/file_utils.cc View File

@@ -1,79 +0,0 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "common/file_utils.h"
#include <climits>

namespace mindspore {
namespace predict {
char *ReadFile(const char *file, size_t *size) {
if (file == nullptr) {
MS_LOGE("file is nullptr");
return nullptr;
}
MS_ASSERT(size != nullptr);
std::ifstream ifs(RealPath(file));
if (!ifs.good()) {
MS_LOGE("file: %s is not exist", file);
return nullptr;
}

if (!ifs.is_open()) {
MS_LOGE("file: %s open failed", file);
return nullptr;
}

ifs.seekg(0, std::ios::end);
*size = ifs.tellg();
std::unique_ptr<char> buf(new (std::nothrow) char[*size]);
if (buf == nullptr) {
MS_LOGE("malloc buf failed, file:%s", file);
ifs.close();
return nullptr;
}

ifs.seekg(0, std::ios::beg);
ifs.read(buf.get(), *size);
ifs.close();

return buf.release();
}

std::string RealPath(const char *path) {
if (path == nullptr) {
MS_LOGE("path is nullptr");
return "";
}
if ((strlen(path)) >= PATH_MAX) {
MS_LOGE("path is too long");
return "";
}

std::shared_ptr<char> resolvedPath(new (std::nothrow) char[PATH_MAX]{0});
if (resolvedPath == nullptr) {
MS_LOGE("new resolvedPath failed");
return "";
}

auto ret = realpath(path, resolvedPath.get());
if (ret == nullptr) {
MS_LOGE("realpath failed");
return "";
}
return resolvedPath.get();
}
} // namespace predict
} // namespace mindspore

+ 0
- 39
predict/common/file_utils.h View File

@@ -1,39 +0,0 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef PREDICT_COMMON_FILE_UTILS_H_
#define PREDICT_COMMON_FILE_UTILS_H_

#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <string>
#include <iostream>
#include <memory>
#include <fstream>
#include "common/utils.h"
#include "common/mslog.h"
#include "include/tensor.h"

namespace mindspore {
namespace predict {
char *ReadFile(const char *file, size_t *size);

std::string RealPath(const char *path);
} // namespace predict
} // namespace mindspore

#endif // PREDICT_COMMON_FILE_UTILS_H_

+ 0
- 179
predict/common/flag_parser.cc View File

@@ -1,179 +0,0 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "common/flag_parser.h"

namespace mindspore {
namespace predict {
// parse flags read from command line
Option<std::string> FlagParser::ParseFlags(int argc, const char *const *argv, bool supportUnknown,
bool supportDuplicate) {
MS_ASSERT(argv != nullptr);
const int FLAG_PREFIX_LEN = 2;
// Get binary name
binName = GetFileName(argv[0]);

std::multimap<std::string, Option<std::string>> keyValues;
for (int i = 1; i < argc; i++) {
std::string tmp = argv[i];
Trim(&tmp);
const std::string flagItem(tmp);

if (flagItem == "--") {
break;
}

if (flagItem.find("--") == std::string::npos) {
continue;
}

std::string key;
Option<std::string> value = Option<std::string>(None());

size_t pos = flagItem.find_first_of("=");
if (pos == std::string::npos && flagItem.find("--no-") != std::string::npos) {
key = flagItem.substr(FLAG_PREFIX_LEN);
} else if (pos == std::string::npos) {
key = flagItem.substr(FLAG_PREFIX_LEN);
} else {
key = flagItem.substr(FLAG_PREFIX_LEN, pos - FLAG_PREFIX_LEN);
value = Option<std::string>(flagItem.substr(pos + 1));
}

keyValues.insert(std::pair<std::string, Option<std::string>>(key, value));
}

Option<std::string> ret = Option<std::string>(InnerParseFlags(&keyValues));
if (ret.IsSome()) {
return Option<std::string>(ret.Get());
}

return Option<std::string>(None());
}

bool FlagParser::GetRealFlagName(const std::string &oriFlagName, std::string *flagName) {
MS_ASSERT(flagName != nullptr);
const int BOOL_TYPE_FLAG_PREFIX_LEN = 3;
bool opaque = false;
if (StartsWithPrefix(oriFlagName, "no-")) {
*flagName = oriFlagName.substr(BOOL_TYPE_FLAG_PREFIX_LEN);
opaque = true;
} else {
*flagName = oriFlagName;
}
return opaque;
}

// Inner parse function
Option<std::string> FlagParser::InnerParseFlags(std::multimap<std::string, Option<std::string>> *keyValues) {
MS_ASSERT(keyValues != nullptr);
for (auto it = keyValues->begin(); it != keyValues->end(); ++it) {
std::string flagName;
bool opaque = GetRealFlagName((*it).first, &flagName);
Option<std::string> flagValue = (*it).second;

auto item = flags.find(flagName);
if (item == flags.end()) {
return Option<std::string>(std::string(flagName + " is not a valid flag"));
}
FlagInfo *flag = &(item->second);
if (flag == nullptr) {
return Option<std::string>("Failed: flag is nullptr");
}
if (flag->isParsed) {
return Option<std::string>("Failed: already parsed flag: " + flagName);
}
std::string tmpValue;
if (!flag->isBoolean) {
if (opaque) {
return Option<std::string>(flagName + " is not a boolean type");
}
if (flagValue.IsNone()) {
return Option<std::string>("No value provided for non-boolean type: " + flagName);
}
tmpValue = flagValue.Get();
} else {
if (flagValue.IsNone() || flagValue.Get().empty()) {
tmpValue = !opaque ? "true" : "false";
} else if (!opaque) {
tmpValue = flagValue.Get();
} else {
return Option<std::string>(std::string("Boolean flag can not have non-empty value"));
}
}
// begin to parse value
Option<Nothing> ret = flag->parse(this, tmpValue);
if (ret.IsNone()) {
return Option<std::string>("Failed to parse value for: " + flag->flagName);
}
flag->isParsed = true;
}

// to check flags not given in command line but added as in constructor
for (auto &flag : flags) {
if (flag.second.isRequired && !flag.second.isParsed) {
return Option<std::string>("Error, value of '" + flag.first + "' not provided");
}
}

return Option<std::string>(None());
}

void Replaceall(std::string *str, const std::string &oldValue, const std::string &newValue) {
if (str == nullptr) {
MS_LOGE("Input str is nullptr");
return;
}
while (true) {
std::string::size_type pos(0);
if ((pos = str->find(oldValue)) != std::string::npos) {
str->replace(pos, oldValue.length(), newValue);
} else {
break;
}
}
}

std::string FlagParser::Usage(const Option<std::string> &usgMsg) const {
// first line, brief of the usage
std::string usageString = usgMsg.IsSome() ? usgMsg.Get() + "\n" : "";
// usage of bin name
usageString += usageMsg.IsNone() ? "usage: " + binName + " [options]\n" : usageMsg.Get() + "\n";
// help line of help message, usageLine:message of parametors
std::string helpLine = "";
std::string usageLine = "";
uint32_t i = 0;
for (auto flag = flags.begin(); flag != flags.end(); flag++) {
std::string flagName = flag->second.flagName;
std::string helpInfo = flag->second.helpInfo;
// parameter line
std::string thisLine = flag->second.isBoolean ? " --[no-]" + flagName : " --" + flagName + "=VALUE";
if (++i < flags.size()) {
// add paramter help message of each line
thisLine += " " + helpInfo;
Replaceall(&helpInfo, "\n\r", "\n");
usageLine += thisLine + "\n";
} else {
// brief help message
helpLine = thisLine + " " + helpInfo + "\n";
}
}
// total usage is brief of usage+ brief of bin + help message + brief of
// paramters
return usageString + helpLine + usageLine;
}
} // namespace predict
} // namespace mindspore

+ 0
- 291
predict/common/flag_parser.h View File

@@ -1,291 +0,0 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef PREDICT_COMMON_FLAG_PARSER_H_
#define PREDICT_COMMON_FLAG_PARSER_H_

#include <functional>
#include <map>
#include <utility>
#include <string>

#include "common/utils.h"
#include "common/option.h"

namespace mindspore {
namespace predict {
struct FlagInfo;

struct Nothing {};

class FlagParser {
public:
FlagParser() { AddFlag(&FlagParser::help, "help", "print usage message", false); }

virtual ~FlagParser() = default;

// only support read flags from command line
virtual Option<std::string> ParseFlags(int argc, const char *const *argv, bool supportUnknown = false,
bool supportDuplicate = false);
std::string Usage(const Option<std::string> &usgMsg = Option<std::string>(None())) const;

template <typename Flags, typename T1, typename T2>
void AddFlag(T1 *t1, const std::string &flagName, const std::string &helpInfo, const T2 &t2);

template <typename Flags, typename T1, typename T2>
void AddFlag(T1 Flags::*t1, const std::string &flagName, const std::string &helpInfo, const T2 &t2);

template <typename Flags, typename T>
void AddFlag(T Flags::*t, const std::string &flagName, const std::string &helpInfo);

// Option-type fields
template <typename Flags, typename T>
void AddFlag(Option<T> Flags::*t, const std::string &flagName, const std::string &helpInfo);
bool help;

protected:
std::string binName;
Option<std::string> usageMsg;

private:
struct FlagInfo {
std::string flagName;
bool isRequired;
bool isBoolean;
std::string helpInfo;
bool isParsed;
std::function<Option<Nothing>(FlagParser *, const std::string &)> parse;
};

inline void AddFlag(const FlagInfo &flag);

// construct a temporary flag
template <typename Flags, typename T>
void ConstructFlag(Option<T> Flags::*t, const std::string &flagName, const std::string &helpInfo, FlagInfo *flag);

// construct a temporary flag
template <typename Flags, typename T1>
void ConstructFlag(T1 Flags::*t1, const std::string &flagName, const std::string &helpInfo, FlagInfo *flag);

Option<std::string> InnerParseFlags(std::multimap<std::string, Option<std::string>> *values);

bool GetRealFlagName(const std::string &oriFlagName, std::string *flagName);

std::map<std::string, FlagInfo> flags;
};

// convert to std::string
template <typename Flags, typename T>
Option<std::string> ConvertToString(T Flags::*t, const FlagParser &baseFlag) {
const Flags *flag = dynamic_cast<Flags *>(&baseFlag);
if (flag != nullptr) {
return std::to_string(flag->*t);
}

return Option<std::string>(None());
}

// construct for a Option-type flag
template <typename Flags, typename T>
void FlagParser::ConstructFlag(Option<T> Flags::*t1, const std::string &flagName, const std::string &helpInfo,
FlagInfo *flag) {
if (flag == nullptr) {
MS_LOGE("FlagInfo is nullptr");
return;
}
flag->flagName = flagName;
flag->helpInfo = helpInfo;
flag->isBoolean = typeid(T) == typeid(bool);
flag->isParsed = false;
}

// construct a temporary flag
template <typename Flags, typename T>
void FlagParser::ConstructFlag(T Flags::*t1, const std::string &flagName, const std::string &helpInfo, FlagInfo *flag) {
if (flag == nullptr) {
MS_LOGE("FlagInfo is nullptr");
return;
}
if (t1 == nullptr) {
MS_LOGE("t1 is nullptr");
return;
}
flag->flagName = flagName;
flag->helpInfo = helpInfo;
flag->isBoolean = typeid(T) == typeid(bool);
flag->isParsed = false;
}

inline void FlagParser::AddFlag(const FlagInfo &flagItem) { flags[flagItem.flagName] = flagItem; }

template <typename Flags, typename T>
void FlagParser::AddFlag(T Flags::*t, const std::string &flagName, const std::string &helpInfo) {
if (t == nullptr) {
MS_LOGE("t1 is nullptr");
return;
}

Flags *flag = dynamic_cast<Flags *>(this);
if (flag == nullptr) {
MS_LOGI("dynamic_cast failed");
return;
}

FlagInfo flagItem;

// flagItem is as a output parameter
ConstructFlag(t, flagName, helpInfo, &flagItem);
flagItem.parse = [t](FlagParser *base, const std::string &value) -> Option<Nothing> {
Flags *flag = dynamic_cast<Flags *>(base);
if (base != nullptr) {
Option<T> ret = Option<T>(GenericParseValue<T>(value));
if (ret.IsNone()) {
return Option<Nothing>(None());
} else {
flag->*t = ret.Get();
}
}

return Option<Nothing>(Nothing());
};

flagItem.isRequired = true;
flagItem.helpInfo +=
!helpInfo.empty() && helpInfo.find_last_of("\n\r") != helpInfo.size() - 1 ? " (default: " : "(default: ";
flagItem.helpInfo += ")";

// add this flag to a std::map
AddFlag(flagItem);
}

template <typename Flags, typename T1, typename T2>
void FlagParser::AddFlag(T1 *t1, const std::string &flagName, const std::string &helpInfo, const T2 &t2) {
if (t1 == nullptr) {
MS_LOGE("t1 is nullptr");
return;
}

FlagInfo flagItem;

// flagItem is as a output parameter
ConstructFlag(t1, flagName, helpInfo, flagItem);
flagItem.parse = [t1](FlagParser *base, const std::string &value) -> Option<Nothing> {
if (base != nullptr) {
Option<T1> ret = Option<T1>(GenericParseValue<T1>(value));
if (ret.IsNone()) {
return Option<T1>(None());
} else {
*t1 = ret.Get();
}
}

return Option<Nothing>(Nothing());
};

flagItem.isRequired = false;
*t1 = t2;

flagItem.helpInfo +=
!helpInfo.empty() && helpInfo.find_last_of("\n\r") != helpInfo.size() - 1 ? " (default: " : "(default: ";
flagItem.helpInfo += ToString(t2).Get();
flagItem.helpInfo += ")";

// add this flag to a std::map
AddFlag(flagItem);
}

template <typename Flags, typename T1, typename T2>
void FlagParser::AddFlag(T1 Flags::*t1, const std::string &flagName, const std::string &helpInfo, const T2 &t2) {
if (t1 == nullptr) {
MS_LOGE("t1 is nullptr");
return;
}

Flags *flag = dynamic_cast<Flags *>(this);
if (flag == nullptr) {
MS_LOGI("dynamic_cast failed");
return;
}

FlagInfo flagItem;

// flagItem is as a output parameter
ConstructFlag(t1, flagName, helpInfo, &flagItem);
flagItem.parse = [t1](FlagParser *base, const std::string &value) -> Option<Nothing> {
Flags *flag = dynamic_cast<Flags *>(base);
if (base != nullptr) {
Option<T1> ret = Option<T1>(GenericParseValue<T1>(value));
if (ret.IsNone()) {
return Option<Nothing>(None());
} else {
flag->*t1 = ret.Get();
}
}

return Option<Nothing>(Nothing());
};

flagItem.isRequired = false;
flag->*t1 = t2;

flagItem.helpInfo +=
!helpInfo.empty() && helpInfo.find_last_of("\n\r") != helpInfo.size() - 1 ? " (default: " : "(default: ";
flagItem.helpInfo += ToString(t2).Get();
flagItem.helpInfo += ")";

// add this flag to a std::map
AddFlag(flagItem);
}

// option-type add flag
template <typename Flags, typename T>
void FlagParser::AddFlag(Option<T> Flags::*t, const std::string &flagName, const std::string &helpInfo) {
if (t == nullptr) {
MS_LOGE("t is nullptr");
return;
}

Flags *flag = dynamic_cast<Flags *>(this);
if (flag == nullptr) {
MS_LOGE("dynamic_cast failed");
return;
}

FlagInfo flagItem;
// flagItem is as a output parameter
ConstructFlag(t, flagName, helpInfo, &flagItem);
flagItem.isRequired = false;
flagItem.parse = [t](FlagParser *base, const std::string &value) -> Option<Nothing> {
Flags *flag = dynamic_cast<Flags *>(base);
if (base != nullptr) {
Option<T> ret = Option<std::string>(GenericParseValue<T>(value));
if (ret.IsNone()) {
return Option<Nothing>(None());
} else {
flag->*t = Option<T>(Some(ret.Get()));
}
}

return Option<Nothing>(Nothing());
};

// add this flag to a std::map
AddFlag(flagItem);
}
} // namespace predict
} // namespace mindspore

#endif // PREDICT_COMMON_FLAG_PARSER_H_

+ 0
- 77
predict/common/func_utils.cc View File

@@ -1,77 +0,0 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "common/func_utils.h"

namespace mindspore {
namespace predict {
#if MS_USE_ARM
_Unwind_Reason_Code PrintTraceArm(_Unwind_Context *ctx, void *d) {
MS_ASSERT(ctx != nullptr);
MS_ASSERT(d != nullptr);
Dl_info info;
int *depth = static_cast<int *>(d);
auto ipAddr = static_cast<int64_t>(_Unwind_GetIP(ctx));
if (dladdr(reinterpret_cast<void *>(ipAddr), &info)) {
const char *symbol = "";
const char *dlfile = "";
if (info.dli_sname) {
symbol = info.dli_sname;
}
if (info.dli_fname) {
dlfile = info.dli_fname;
}
MS_PRINT_ERROR("#%d: (%08lx) %s %s ", *depth, ipAddr, dlfile, symbol);
}

(*depth)++;
return _URC_NO_REASON;
}
#endif

void CoreDumpTraceFunc(int iSignum) {
MS_PRINT_ERROR("----- start get backtrace info -----");
#if MS_USE_ARM
int depth = 0;
_Unwind_Backtrace(&PrintTraceArm, &depth);
#else
const auto maxDeep = 32;
const auto maxStringLen = 100;
void *apBuffer[maxStringLen];
char **ppStrings;

auto iStackDepth = backtrace(apBuffer, maxDeep);
if (0 > iStackDepth) {
KillProcess("Get backtrace depth failed");
return;
}
MS_PRINT_ERROR("Current stack depth is %d", iStackDepth);
ppStrings = backtrace_symbols(apBuffer, iStackDepth);
if (nullptr == ppStrings) {
KillProcess("Get backtrace_symbols failed");
return;
}

for (int iLoop = 0; iLoop < iStackDepth; iLoop++) {
MS_PRINT_ERROR("%s \n", ppStrings[iLoop]);
}
#endif
MS_PRINT_ERROR("----- finish get backtrace info -----");
KillProcess("Exit after core dump");
return; // try exit 1
}
} // namespace predict
} // namespace mindspore

+ 0
- 35
predict/common/func_utils.h View File

@@ -1,35 +0,0 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef PREDICT_COMMON_FUNC_UTILS_H_
#define PREDICT_COMMON_FUNC_UTILS_H_

#if MS_USE_ARM
#include <dlfcn.h>
#include <unwind.h>
#else
#include <execinfo.h>
#endif
#include "include/errorcode.h"
#include "common/mslog.h"

namespace mindspore {
namespace predict {
void CoreDumpTraceFunc(int iSignum);
} // namespace predict
} // namespace mindspore

#endif // PREDICT_COMMON_FUNC_UTILS_H_

+ 0
- 167
predict/common/graph_util.cc View File

@@ -1,167 +0,0 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "common/graph_util.h"
#include <fstream>
#include <sstream>
#include "common/mslog.h"
#include "include/errorcode.h"

namespace mindspore {
namespace predict {
OpGraph *OpGraph::Build(const SubGraphDef &subGraphDef) {
auto graph = std::unique_ptr<OpGraph>(new OpGraph());
if (graph == nullptr) {
MS_LOGE("malloc opgraph failed");
return nullptr;
}

auto nodeDefs = subGraphDef.nodes();
if (nodeDefs == nullptr) {
MS_LOGE("nodeDefs from subGraphDef is nullptr");
return nullptr;
}

uint32_t opCount = nodeDefs->size();
for (uint32_t i = 0; i < opCount; i++) {
auto nodeDef = nodeDefs->GetAs<NodeDef>(i);
MS_ASSERT(nodeDef != nullptr);
auto ret = graph->AddEdge(*nodeDef, *nodeDefs);
if (ret != RET_OK) {
MS_LOGE("%s add edge failed. ret:%d", nodeDef->opDef()->name()->c_str(), ret);
return nullptr;
}
}

return graph.release();
}

int OpGraph::AddEdge(const NodeDef &srcNodeDef, const flatbuffers::Vector<flatbuffers::Offset<NodeDef>> &nodeDefs) {
MS_ASSERT(srcNodeDef.opDef() != nullptr);
MS_ASSERT(srcNodeDef.opDef()->name() != nullptr);
NODE_ID srcId = std::string(srcNodeDef.opDef()->name()->c_str());
uint32_t opCount = nodeDefs.size();

MS_ASSERT(srcNodeDef.opDef()->outputIndex() != nullptr);
for (auto index : *(srcNodeDef.opDef()->outputIndex())) {
for (uint32_t i = 0; i < opCount; i++) {
auto dstNodeDef = nodeDefs.GetAs<NodeDef>(i);
bool find = false;
MS_ASSERT(dstNodeDef != nullptr);
MS_ASSERT(dstNodeDef->opDef() != nullptr);
auto inputIndex = dstNodeDef->opDef()->inputIndex();
MS_ASSERT(inputIndex != nullptr);
if (std::any_of(inputIndex->begin(), inputIndex->end(), [&index](int i) { return i == index; })) {
find = true;
}

if (!find) {
continue;
}
MS_ASSERT(dstNodeDef->opDef()->name() != nullptr);
NODE_ID dstId = std::string(dstNodeDef->opDef()->name()->c_str());
auto ret = AddEdge(srcId, dstId);
if (ret != RET_OK) {
return ret;
}
}
}

return RET_OK;
}

int OpGraph::AddEdge(const NODE_ID &srcId, const NODE_ID &dstId) {
auto srcNode = AddNode(srcId);
if (srcNode == nullptr) {
MS_LOGE("add srcNode failed");
return RET_ERROR;
}
srcNode->AddOutEdge(dstId);
auto dstNode = AddNode(dstId);
if (dstNode == nullptr) {
MS_LOGE("add dstNode failed");
return RET_ERROR;
}
dstNode->AddInEdge(srcId);
return RET_OK;
}

OpNode *OpGraph::GetNode(const NODE_ID &nodeId) {
auto node = nodes.find(nodeId);
if (node == nodes.end()) {
return nullptr;
}
return node->second;
}

OpNode *OpGraph::AddNode(const NODE_ID &nodeId) {
auto node = GetNode(nodeId);
if (node != nullptr) {
return node;
}
node = new (std::nothrow) OpNode(nodeId);
if (node == nullptr) {
MS_LOGE("new node failed");
return nullptr;
}
nodes[nodeId] = node;
return node;
}

std::unordered_set<NODE_ID> OpGraph::GetInputNode() {
std::unordered_set<NODE_ID> inputNodes;
for (const auto &iter : nodes) {
auto node = iter.second;
MS_ASSERT(node != nullptr);
if (node->GetAllInEdge().empty()) {
inputNodes.insert(node->ID());
}
}
return inputNodes;
}

std::unordered_set<NODE_ID> OpGraph::GetOutputNode() {
std::unordered_set<NODE_ID> outputNodes;
for (const auto &iter : nodes) {
auto node = iter.second;
MS_ASSERT(node != nullptr);
if (node->GetAllOutEdge().empty()) {
outputNodes.insert(node->ID());
}
}
return outputNodes;
}

OpGraph::~OpGraph() {
for (auto iter : nodes) {
if (iter.second != nullptr) {
delete iter.second;
}
}
nodes.clear();
}

NODE_ID OpNode::ID() { return id; }

void OpNode::AddInEdge(const NODE_ID &nodeId) { inEdges.insert(nodeId); }

void OpNode::AddOutEdge(const NODE_ID &nodeId) { outEdges.insert(nodeId); }

std::unordered_set<NODE_ID> OpNode::GetAllInEdge() { return inEdges; }

std::unordered_set<NODE_ID> OpNode::GetAllOutEdge() { return outEdges; }
} // namespace predict
} // namespace mindspore

+ 0
- 71
predict/common/graph_util.h View File

@@ -1,71 +0,0 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef PREDICT_COMMON_GRAPH_UTIL_H_
#define PREDICT_COMMON_GRAPH_UTIL_H_

#include <string>
#include <unordered_map>
#include <unordered_set>
#include <utility>
#include <vector>
#include <memory>
#include "common/utils.h"
#include "schema/inner/ms_generated.h"

namespace mindspore {
namespace predict {
using NODE_ID = std::string;

class OpNode {
public:
explicit OpNode(NODE_ID nodeId) : id(std::move(nodeId)) {}
NODE_ID ID();
void AddInEdge(const NODE_ID &nodeId);
void AddOutEdge(const NODE_ID &nodeId);
std::unordered_set<NODE_ID> GetAllInEdge();
std::unordered_set<NODE_ID> GetAllOutEdge();

protected:
NODE_ID id;
std::unordered_set<NODE_ID> inEdges;
std::unordered_set<NODE_ID> outEdges;
};

class OpGraph {
public:
OpGraph() = default;

~OpGraph();

static OpGraph *Build(const SubGraphDef &subGraphDef);

OpNode *GetNode(const NODE_ID &nodeId);
OpNode *AddNode(const NODE_ID &nodeId);
std::unordered_set<NODE_ID> GetInputNode();
std::unordered_set<NODE_ID> GetOutputNode();

private:
int AddEdge(const NODE_ID &srcId, const NODE_ID &dstId);
int AddEdge(const NodeDef &srcNodeDef, const flatbuffers::Vector<flatbuffers::Offset<NodeDef>> &nodeDefs);

protected:
std::unordered_map<NODE_ID, OpNode *> nodes;
};
} // namespace predict
} // namespace mindspore

#endif // PREDICT_COMMON_GRAPH_UTIL_H_

+ 0
- 26
predict/common/module_registry.cc View File

@@ -1,26 +0,0 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "common/module_registry.h"

namespace mindspore {
namespace predict {
ModuleRegistry *GetRegistryInstance() {
static ModuleRegistry registry;
return &registry;
}
} // namespace predict
} // namespace mindspore

+ 0
- 97
predict/common/module_registry.h View File

@@ -1,97 +0,0 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef PREDICT_COMMON_MODULE_REGISTRY_H_
#define PREDICT_COMMON_MODULE_REGISTRY_H_
#include <memory>
#include <string>
#include <unordered_map>
#include "common/mslog.h"

#define MSPREDICT_API __attribute__((visibility("default")))

namespace mindspore {
namespace predict {
class ModuleBase {
public:
virtual ~ModuleBase() = default;
};

template <typename T>
class Module;

class ModuleRegistry {
public:
ModuleRegistry() = default;

virtual ~ModuleRegistry() = default;

template <class T>
bool Register(const std::string &name, const T &t) {
modules[name] = &t;
return true;
}

template <class T>
std::shared_ptr<T> Create(const std::string &name) {
auto it = modules.find(name);
if (it == modules.end()) {
return nullptr;
}
auto *module = (Module<T> *)it->second;
if (module == nullptr) {
return nullptr;
} else {
return module->Create();
}
}

template <class T>
T *GetInstance(const std::string &name) {
auto it = modules.find(name);
if (it == modules.end()) {
return nullptr;
}
auto *module = (Module<T> *)it->second;
if (module == nullptr) {
return nullptr;
} else {
return module->GetInstance();
}
}

protected:
std::unordered_map<std::string, const ModuleBase *> modules;
};

ModuleRegistry *GetRegistryInstance() MSPREDICT_API;

template <class T>
class ModuleRegistrar {
public:
ModuleRegistrar(const std::string &name, const T &module) {
auto registryInstance = GetRegistryInstance();
if (registryInstance == nullptr) {
MS_LOGW("registryInstance is nullptr.");
} else {
registryInstance->Register(name, module);
}
}
};
} // namespace predict
} // namespace mindspore

#endif // PREDICT_COMMON_MODULE_REGISTRY_H_

+ 0
- 47
predict/common/mslog.cc View File

@@ -1,47 +0,0 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "common/mslog.h"
#include <iostream>
#include <cstdlib>
#include <climits>
#include <string>
#include "include/errorcode.h"

namespace mindspore {
namespace predict {
std::string GetEnv(const std::string &envvar) {
const char *value = std::getenv(envvar.c_str());
if (value == nullptr) {
return std::string();
}
return std::string(value);
}

bool IsPrint(int level) {
auto envString = GetEnv("MSLOG");
static int env = static_cast<int>(std::strtol(!envString.empty() ? envString.c_str() : "3", nullptr, 0));
if (env == INT_MIN || env == INT_MAX) {
env = WARN;
// enable the SP for binscope checking
std::string errorStr = "env exceeded the value that type int is able to represent";
MS_LOGE("%s", errorStr.c_str());
}

return level >= env;
}
} // namespace predict
} // namespace mindspore

+ 0
- 230
predict/common/mslog.h View File

@@ -1,230 +0,0 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef PREDICT_COMMON_MSLOG_H_
#define PREDICT_COMMON_MSLOG_H_

#include <syslog.h>
#include <unistd.h>
#include <csignal>
#include <iostream>
#include <sstream>
#include <string>

#if defined(__ANDROID__) || defined(ANDROID)
#include <android/log.h>
#endif
namespace mindspore {
namespace predict {
constexpr const char *TAG = "MS_PREDICT";

constexpr int DEBUG = 1;
constexpr int INFO = 2;
constexpr int WARN = 3;
constexpr int ERROR = 4;

#define MSPREDICT_API __attribute__((visibility("default")))

bool MSPREDICT_API IsPrint(int level);

#if !defined(__ANDROID__) && !defined(ANDROID)

#if LOG_TO_FILE
#define MS_LOGD(fmt, args...) \
{ \
if (mindspore::predict::IsPrint(mindspore::predict::DEBUG)) { \
syslog(LOG_DEBUG, "%s|%d|%s[%d]|: " #fmt, mindspore::predict::TAG, \getpid(), __func__, __LINE__, ##args); \
} \
}
#define MS_LOGI(fmt, args...) \
{ \
if (mindspore::predict::IsPrint(mindspore::predict::INFO)) { \
syslog(LOG_INFO, "%s|%d|%s[%d]|: " #fmt, mindspore::predict::TAG, \getpid(), __func__, __LINE__, ##args); \
} \
}
#define MS_LOGW(fmt, args...) \
{ \
if (mindspore::predict::IsPrint(mindspore::predict::WARN)) { \
syslog(LOG_WARNING, "%s|%d|%s[%d]|: " #fmt, mindspore::predict::TAG, \getpid(), __func__, __LINE__, ##args); \
} \
}
#define MS_LOGE(fmt, args...) \
{ \
if (mindspore::predict::IsPrint(mindspore::predict::ERROR)) { \
syslog(LOG_ERR, "%s|%d|%s[%d]|: " #fmt, mindspore::predict::TAG, getpid(), __func__, __LINE__, ##args); \
} \
}
#else

#define MS_LOGD(fmt, args...) \
{ \
if (mindspore::predict::IsPrint(mindspore::predict::DEBUG)) { \
printf("[DEBUG] %s|%d|%s|%s[%d]|: " #fmt "\r\n", mindspore::predict::TAG, getpid(), __FILE__, __func__, \
__LINE__, ##args); \
} \
}
#define MS_LOGI(fmt, args...) \
{ \
if (mindspore::predict::IsPrint(mindspore::predict::INFO)) { \
printf("[INFO] %s|%d|%s|%s[%d]|: " #fmt "\r\n", mindspore::predict::TAG, getpid(), __FILE__, __func__, \
__LINE__, ##args); \
} \
}
#define MS_LOGW(fmt, args...) \
{ \
if (mindspore::predict::IsPrint(mindspore::predict::WARN)) { \
printf("[WARN] %s|%d|%s|%s[%d]|: " #fmt "\r\n", mindspore::predict::TAG, getpid(), __FILE__, __func__, \
__LINE__, ##args); \
} \
}
#define MS_LOGE(fmt, args...) \
{ \
if (mindspore::predict::IsPrint(mindspore::predict::ERROR)) { \
printf("[ERROR] %s|%d|%s|%s[%d]|: " #fmt "\r\n", mindspore::predict::TAG, getpid(), __FILE__, __func__, \
__LINE__, ##args); \
} \
}
#endif

#else

#define MS_LOGD(fmt, args...) \
{ \
if (mindspore::predict::IsPrint(mindspore::predict::DEBUG)) \
__android_log_print(ANDROID_LOG_DEBUG, mindspore::predict::TAG, "|%d|%s[%d]|: " fmt, getpid(), __func__, \
__LINE__, ##args); \
}

#define MS_LOGI(fmt, args...) \
{ \
if (mindspore::predict::IsPrint(mindspore::predict::INFO)) \
__android_log_print(ANDROID_LOG_INFO, mindspore::predict::TAG, "|%d|%s[%d]|: " fmt, getpid(), __func__, \
__LINE__, ##args); \
}

#define MS_LOGW(fmt, args...) \
{ \
if (mindspore::predict::IsPrint(mindspore::predict::WARN)) \
__android_log_print(ANDROID_LOG_WARN, mindspore::predict::TAG, "|%d|%s[%d]|: " fmt, getpid(), __func__, \
__LINE__, ##args); \
}

#define MS_LOGE(fmt, args...) \
{ \
if (mindspore::predict::IsPrint(mindspore::predict::ERROR)) \
__android_log_print(ANDROID_LOG_ERROR, mindspore::predict::TAG, "|%d|%s[%d]|: " fmt, getpid(), __func__, \
__LINE__, ##args); \
}

#endif

#define MS_LOG(severity) std::cout << std::endl
#define MS_DLOG(verboselevel) std::cout << std::endl
// Kill the process for safe exiting.
inline void KillProcess(const std::string &ret) {
MS_LOG(ERROR) << "mindspore Exit Tip:" << ret;
if (raise(SIGKILL) != 0) {
MS_LOGE("Send SIGKILL to kill process failed");
}
}
} // namespace predict
} // namespace mindspore

#define MS_ASSERT(expression) \
do { \
if (!(expression)) { \
std::stringstream ss; \
ss << "Assertion failed: " << #expression << ", file: " << __FILE__ << ", line: " << __LINE__; \
mindspore::predict::KillProcess(ss.str()); \
} \
} while (0)

#define MS_EXIT(ret) \
do { \
std::stringstream ss; \
ss << (ret) << " ( file: " << __FILE__ << ", line: " << __LINE__ << " )."; \
mindspore::predict::KillProcess(ss.str()); \
} while (0)

#define MS_PRINT_ERROR(fmt, args...) \
printf(#fmt "\n", ##args); \
MS_LOGE(fmt, ##args);

#define MS_PRINT_INFO(fmt, args...) \
printf(fmt "\n", ##args); \
MS_LOGI(fmt, ##args);

constexpr int LOG_CHECK_EVERY_FIRSTNUM = 10;
constexpr int LOG_CHECK_EVERY_NUM1 = 10;
constexpr int LOG_CHECK_EVERY_NUM2 = 100;
constexpr int LOG_CHECK_EVERY_NUM3 = 1000;
constexpr int LOG_CHECK_EVERY_NUM4 = 10000;

#define LOG_CHECK_ID_CONCAT(word1, word2) word1##word2

#define LOG_CHECK_ID LOG_CHECK_ID_CONCAT(__FUNCTION__, __LINE__)

#define LOG_CHECK_FIRST_N \
[](uint32_t firstNum) { \
static uint32_t LOG_CHECK_ID = 0; \
++LOG_CHECK_ID; \
return (LOG_CHECK_ID <= firstNum); \
}

#define LOG_CHECK_EVERY_N1 \
[](uint32_t firstNum, uint32_t num) { \
static uint32_t LOG_CHECK_ID = 0; \
++LOG_CHECK_ID; \
return ((LOG_CHECK_ID <= firstNum) || (LOG_CHECK_ID % num == 0)); \
}

#define LOG_CHECK_EVERY_N2 \
[](uint32_t firstNum, uint32_t num1, uint32_t num2) { \
static uint32_t LOG_CHECK_ID = 0; \
++LOG_CHECK_ID; \
return ((LOG_CHECK_ID <= firstNum) || (LOG_CHECK_ID < num2 && LOG_CHECK_ID % num1 == 0) || \
(LOG_CHECK_ID % num2 == 0)); \
}

#define LOG_CHECK_EVERY_N3 \
[](uint32_t firstNum, uint32_t num1, uint32_t num2, uint32_t num3) { \
static uint32_t LOG_CHECK_ID = 0; \
++LOG_CHECK_ID; \
return ((LOG_CHECK_ID <= firstNum) || (LOG_CHECK_ID < num2 && LOG_CHECK_ID % num1 == 0) || \
(LOG_CHECK_ID < num3 && LOG_CHECK_ID % num2 == 0) || (LOG_CHECK_ID % num3 == 0)); \
}

#define LOG_CHECK_EVERY_N4 \
[](uint32_t firstNum, uint32_t num1, uint32_t num2, uint32_t num3, uint32_t num4) { \
static uint32_t LOG_CHECK_ID = 0; \
++LOG_CHECK_ID; \
return ((LOG_CHECK_ID <= firstNum) || (LOG_CHECK_ID < num2 && LOG_CHECK_ID % num1 == 0) || \
(LOG_CHECK_ID < num3 && LOG_CHECK_ID % num2 == 0) || (LOG_CHECK_ID < num4 && LOG_CHECK_ID % num3 == 0) || \
(LOG_CHECK_ID % num4 == 0)); \
}

#define LOG_CHECK_EVERY_N \
[]() { \
static uint32_t LOG_CHECK_ID = 0; \
++LOG_CHECK_ID; \
return ((LOG_CHECK_ID <= LOG_CHECK_EVERY_FIRSTNUM) || \
(LOG_CHECK_ID < LOG_CHECK_EVERY_NUM2 && LOG_CHECK_ID % LOG_CHECK_EVERY_NUM1 == 0) || \
(LOG_CHECK_ID < LOG_CHECK_EVERY_NUM3 && LOG_CHECK_ID % LOG_CHECK_EVERY_NUM2 == 0) || \
(LOG_CHECK_ID < LOG_CHECK_EVERY_NUM4 && LOG_CHECK_ID % LOG_CHECK_EVERY_NUM3 == 0) || \
(LOG_CHECK_ID % LOG_CHECK_EVERY_NUM4 == 0)); \
}

#endif // PREDICT_COMMON_MSLOG_H_

+ 0
- 44
predict/common/op_utils.h View File

@@ -1,44 +0,0 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef PREDICT_COMMON_OP_UTILS_H_
#define PREDICT_COMMON_OP_UTILS_H_

#include <functional>
#include <string>
#include "schema/inner/ms_generated.h"

namespace mindspore {
namespace predict {
inline OpT GetOpType(const OpDef &opDef) { return opDef.attr_type(); }

inline OpT GetOpType(const NodeDef &nodeDef) { return GetOpType(*(nodeDef.opDef())); }

inline std::string GetOpTypeName(const NodeDef &nodeDef) { return EnumNameOpT(GetOpType(nodeDef)); }

inline std::string GetOpTypeName(const OpDef &opDef) { return EnumNameOpT(GetOpType(opDef)); }

inline OpT GetOpType(const OpDefT &opDefT) { return opDefT.attr.type; }

inline OpT GetOpType(const NodeDefT &nodeDefT) { return GetOpType(*(nodeDefT.opDef.get())); }

inline std::string GetOpTypeName(const NodeDefT &nodeDefT) { return EnumNameOpT(GetOpType(nodeDefT)); }

inline std::string GetOpTypeName(const OpDefT &opDefT) { return EnumNameOpT(GetOpType(opDefT)); }
} // namespace predict
} // namespace mindspore

#endif // PREDICT_COMMON_OP_UTILS_H_

+ 0
- 119
predict/common/option.h View File

@@ -1,119 +0,0 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef PREDICT_COMMON_OPTION_H_
#define PREDICT_COMMON_OPTION_H_

#include <type_traits>
#include <utility>
#include "common/mslog.h"

namespace mindspore {
namespace predict {
template <typename T>
struct InnerSome {
explicit InnerSome(const T &t) : _t(std::move(t)) {}

T _t;
};

template <typename T>
InnerSome<typename std::decay<T>::type> Some(T &&t) {
return InnerSome<typename std::decay<T>::type>(std::forward<T>(t));
}

struct None {};

template <typename T>
class Option {
public:
Option() : state(NONE) {}

explicit Option(const T &t) : data(t), state(SOME) {}

explicit Option(T &&t) : data(std::move(t)), state(SOME) {}

explicit Option(const InnerSome<T> &some) : data(some._t), state(SOME) {}

explicit Option(const None &none) : state(NONE) {}

Option(const Option<T> &that) : state(that.state) {
if (that.IsSome()) {
new (&data) T(that.data);
}
}

virtual ~Option() = default;

bool IsNone() const { return state == NONE; }

bool IsSome() const { return state == SOME; }

const T &Get() const & {
MS_ASSERT(IsSome());
return data;
}

T &Get() & {
MS_ASSERT(IsSome());
return data;
}

T &&Get() && {
MS_ASSERT(IsSome());
return std::move(data);
}

const T &&Get() const && {
MS_ASSERT(IsSome());
return std::move(data);
}

// oprerator override
Option<T> &operator=(const Option<T> &that) {
if (&that != this) {
if (IsSome()) {
data.~T();
}
state = that.state;
if (that.IsSome()) {
new (&data) T(that.data);
}
}

return *this;
}

bool operator==(const Option<T> &that) const {
return (IsNone() && that.IsNone()) || (IsSome() && that.IsSome() && data == that.data);
}

bool operator!=(const Option<T> &that) const { return !(*this == that); }

bool operator==(const T &that) const { return IsSome() && data == that; }

bool operator!=(const T &that) const { return !(*this == that); }

private:
enum State { NONE = 0, SOME = 1 };

T data;
State state;
};
} // namespace predict
} // namespace mindspore

#endif // PREDICT_COMMON_OPTION_H_

+ 0
- 50
predict/common/storage.cc View File

@@ -1,50 +0,0 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "common/storage.h"
#include "flatbuffers/flatbuffers.h"
#include "common/mslog.h"
#include "common/file_utils.h"

namespace mindspore {
namespace predict {
int Storage::Save(const GraphDefT &graph, const std::string &outputPath) {
flatbuffers::FlatBufferBuilder builder(flatSize);
auto offset = GraphDef::Pack(builder, &graph);
builder.Finish(offset);
int size = builder.GetSize();
auto content = builder.GetBufferPointer();
if (content == nullptr) {
MS_LOGE("GetBufferPointer nullptr");
return RET_ERROR;
}
std::string realPath = RealPath(outputPath.c_str());
if (realPath.empty()) {
MS_LOGE("Output file path '%s' is not valid", outputPath.c_str());
return RET_ERROR;
}

std::ofstream output(realPath, std::ofstream::binary);
if (!output.is_open()) {
MS_LOGE("ofstream open failed");
return RET_ERROR;
}
output.write((const char *)content, size);
output.close();
return RET_OK;
}
} // namespace predict
} // namespace mindspore

+ 0
- 36
predict/common/storage.h View File

@@ -1,36 +0,0 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef PREDICT_COMMON_STORAGE_H_
#define PREDICT_COMMON_STORAGE_H_

#include <fstream>
#include <string>
#include "include/errorcode.h"
#include "flatbuffers/flatbuffers.h"
#include "schema/inner/ms_generated.h"

namespace mindspore {
namespace predict {
class Storage {
public:
int Save(const GraphDefT &graph, const std::string &outputPath);
const int flatSize = 1024;
};
} // namespace predict
} // namespace mindspore

#endif // PREDICT_COMMON_STORAGE_H_

+ 0
- 228
predict/common/utils.cc View File

@@ -1,228 +0,0 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "common/utils.h"

namespace mindspore {
namespace predict {
uint64_t GetTimeUs() {
struct timespec ts = {0, 0};
if (clock_gettime(CLOCK_MONOTONIC, &ts) != 0) {
return 0;
}
// USECS_IN_SEC *NSECS_IN_USEC;
auto retval = static_cast<uint64_t>((ts.tv_sec * USEC) + (ts.tv_nsec / MSEC));
return retval;
}

static const unsigned int FP32_BIT_SIZE = 32;
static const unsigned int FP32_EXPONENT_BIAS = 127;
static const unsigned int FP32_SIGNIFICAND = 23;

static const unsigned int FP32_EXPONENT_MAX = 255;

static const unsigned int FP16_BIT_SIZE = 16;
static const unsigned int FP16_EXPONENT_BIAS = 15;
static const unsigned int FP16_SIGNIFICAND = 10;

static const int FP16_EXPONENT_MAX = 30;
static const int FP16_EXPONENT_MIN = -10;

float ShortToFloat32(int16_t srcValue) {
uint16_t expHalf16 = srcValue & 0x7C00;
int exp1 = static_cast<int>(expHalf16);
uint16_t mantissa16 = srcValue & 0x03FF;
int mantissa1 = static_cast<int>(mantissa16);
int sign = static_cast<int>(srcValue & 0x8000);
sign = sign << FP16_BIT_SIZE;

// nan or inf
if (expHalf16 == 0x7C00) {
// nan
if (mantissa16 > 0) {
int res = (0x7FC00000 | sign);
int *iRes = &res;
MS_ASSERT(iRes != nullptr);
auto fres = static_cast<float>(*iRes);
return fres;
}
// inf
int res = (0x7F800000 | sign);
int *iRes = &res;
MS_ASSERT(iRes != nullptr);
auto fres = static_cast<float>(*iRes);
return fres;
}
if (expHalf16 != 0) {
exp1 += ((FP32_EXPONENT_BIAS - FP16_EXPONENT_BIAS) << FP16_SIGNIFICAND); // exponents converted to float32 bias
int res = (exp1 | mantissa1);
res = res << (FP32_SIGNIFICAND - FP16_SIGNIFICAND);
res = (res | sign);
int *iRes = &res;

auto fres = static_cast<float>(*iRes);
return fres;
}

int xmm1 = exp1 > (1 << FP16_SIGNIFICAND) ? exp1 : (1 << FP16_SIGNIFICAND);
xmm1 = (xmm1 << (FP32_SIGNIFICAND - FP16_SIGNIFICAND));
xmm1 += ((FP32_EXPONENT_BIAS - FP16_EXPONENT_BIAS - FP16_SIGNIFICAND)
<< FP32_SIGNIFICAND); // add the bias difference to xmm1
xmm1 = xmm1 | sign; // Combine with the sign mask

auto res = static_cast<float>(mantissa1); // Convert mantissa to float
res *= static_cast<float>(xmm1);

return res;
}

int16_t Float32ToShort(float srcValue) {
auto srcValueBit = static_cast<unsigned int>(srcValue);
int sign = srcValueBit >> (FP32_BIT_SIZE - 1);
int mantissa = srcValueBit & 0x007FFFFF;
// exponent
int exp = ((srcValueBit & 0x7F800000) >> FP32_SIGNIFICAND) + FP16_EXPONENT_BIAS - FP32_EXPONENT_BIAS;
int16_t res;
if (exp > 0 && exp < FP16_EXPONENT_MAX) {
// use rte rounding mode, round the significand, combine sign, exponent and significand into a short.
res = (sign << (FP16_BIT_SIZE - 1)) | (exp << FP16_SIGNIFICAND) |
((mantissa + 0x00001000) >> (FP32_SIGNIFICAND - FP16_SIGNIFICAND));
} else if (srcValueBit == 0) {
res = 0;
} else {
if (exp <= 0) {
if (exp < FP16_EXPONENT_MIN) {
// value is less than min half float point
res = 0;
} else {
// normalized single, magnitude is less than min normal half float point.
mantissa = (mantissa | 0x00800000) >> (1 - exp);
// round to nearest
if ((mantissa & 0x00001000) > 0) {
mantissa = mantissa + 0x00002000;
}
// combine sign & mantissa (exp is zero to get denormalized number)
res = (sign << FP16_EXPONENT_BIAS) | (mantissa >> (FP32_SIGNIFICAND - FP16_SIGNIFICAND));
}
} else if (exp == (FP32_EXPONENT_MAX - FP32_EXPONENT_BIAS + FP16_EXPONENT_BIAS)) {
if (mantissa == 0) {
// input float is infinity, return infinity half
res = (sign << FP16_EXPONENT_BIAS) | 0x7C00;
} else {
// input float is NaN, return half NaN
res = (sign << FP16_EXPONENT_BIAS) | 0x7C00 | (mantissa >> (FP32_SIGNIFICAND - FP16_SIGNIFICAND));
}
} else {
// exp > 0, normalized single, round to nearest
if ((mantissa & 0x00001000) > 0) {
mantissa = mantissa + 0x00002000;
if ((mantissa & 0x00800000) > 0) {
mantissa = 0;
exp = exp + 1;
}
}
if (exp > FP16_EXPONENT_MAX) {
// exponent overflow - return infinity half
res = (sign << FP16_EXPONENT_BIAS) | 0x7C00;
} else {
// combine sign, exp and mantissa into normalized half
res = (sign << FP16_EXPONENT_BIAS) | (exp << FP16_SIGNIFICAND) |
(mantissa >> (FP32_SIGNIFICAND - FP16_SIGNIFICAND));
}
}
}
return res;
}
std::string Remove(const std::string &from, const std::string &subStr, Mode mode) {
std::string result = from;
if (mode == PREFIX) {
if (from.substr(0, subStr.length()) == subStr) {
result = from.substr(subStr.size());
}
} else if (mode == SUFFIX) {
if (from.rfind(subStr) == from.size() - subStr.size()) {
result = from.substr(0, from.size() - subStr.size());
}
} else {
size_t index;
while ((index = result.find(subStr)) != std::string::npos) {
result = result.erase(index, subStr.size());
}
}

return result;
}

std::vector<std::string> StrSplit(const std::string &str, const std::string &pattern) {
std::string::size_type pos;
std::vector<std::string> result;
std::string tmpStr(str + pattern);
std::string::size_type size = tmpStr.size();

for (std::string::size_type i = 0; i < size; i++) {
pos = tmpStr.find(pattern, i);
if (pos < size) {
std::string s = tmpStr.substr(i, pos - i);
result.push_back(s);
i = pos + pattern.size() - 1;
}
}
return result;
}

std::vector<std::string> Tokenize(const std::string &src, const std::string &delimiters,
const Option<size_t> &maxTokenNum) {
if (maxTokenNum.IsSome() && maxTokenNum.Get() == 0) {
return {};
}

std::vector<std::string> tokens;
size_t offset = 0;

while (true) {
size_t nonDelimiter = src.find_first_not_of(delimiters, offset);
if (nonDelimiter == std::string::npos) {
break;
}
size_t delimiter = src.find_first_of(delimiters, nonDelimiter);
if (delimiter == std::string::npos || (maxTokenNum.IsSome() && tokens.size() == maxTokenNum.Get() - 1)) {
tokens.push_back(src.substr(nonDelimiter));
break;
}

tokens.push_back(src.substr(nonDelimiter, delimiter - nonDelimiter));
offset = delimiter;
}
return tokens;
}

void ShortToFloat32(const int16_t *srcdata, float *dstdata, size_t elementSize) {
MS_ASSERT(srcdata != nullptr);
MS_ASSERT(dstdata != nullptr);
for (size_t i = 0; i < elementSize; i++) {
dstdata[i] = ShortToFloat32(srcdata[i]);
}
}

void Float32ToShort(const float *srcdata, int16_t *dstdata, size_t elementSize) {
MS_ASSERT(srcdata != nullptr);
MS_ASSERT(dstdata != nullptr);
for (size_t i = 0; i < elementSize; i++) {
dstdata[i] = Float32ToShort(srcdata[i]);
}
}
} // namespace predict
} // namespace mindspore

+ 0
- 154
predict/common/utils.h View File

@@ -1,154 +0,0 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef PREDICT_COMMON_UTILS_H_
#define PREDICT_COMMON_UTILS_H_

#include <stdint.h>
#include <ctime>
#include <cstdint>
#include <vector>
#include <string>
#include "common/mslog.h"
#include "common/option.h"
#include "include/errorcode.h"

namespace mindspore {
namespace predict {
const int USEC = 1000000;
const int MSEC = 1000;

uint64_t GetTimeUs();

int16_t Float32ToShort(float srcValue);

float ShortToFloat32(int16_t srcValue);

void ShortToFloat32(const int16_t *srcData, float *dstData, size_t elementSize);

void Float32ToShort(const float *srcData, int16_t *dstData, size_t elementSize);

template <typename T>
bool IsContain(const std::vector<T> &vec, T element) {
for (auto iter = vec.begin(); iter != vec.end(); iter++) {
if (*iter == element) {
return true;
}
}
return false;
}

const char WHITESPACE[] = "\t\n\v\f\r ";
const char STR_TRUE[] = "true";
const char STR_FALSE[] = "false";

template <typename T>
Option<std::string> ToString(T t) {
std::ostringstream out;
out << t;
if (!out.good()) {
return Option<std::string>(None());
}

return Option<std::string>(out.str());
}

template <>
inline Option<std::string> ToString(bool value) {
return value ? Option<std::string>(STR_TRUE) : Option<std::string>(STR_FALSE);
}

// get the file name from a given path
// for example: "/usr/bin", we will get "bin"
inline std::string GetFileName(const std::string &path) {
char delim = '/';

size_t i = path.rfind(delim, path.length());
if (i != std::string::npos) {
return (path.substr(i + 1, path.length() - i));
}

return "";
}

// trim the white space character in a string
// see also: macro WHITESPACE defined above
inline void Trim(std::string *input) {
if (input == nullptr) {
return;
}
if (input->empty()) {
return;
}

input->erase(0, input->find_first_not_of(WHITESPACE));
input->erase(input->find_last_not_of(WHITESPACE) + 1);
}

// to judge whether a string is starting with prefix
// for example: "hello world" is starting with "hello"
inline bool StartsWithPrefix(const std::string &source, const std::string &prefix) {
if (source.length() < prefix.length()) {
return false;
}

return (source.compare(0, prefix.length(), prefix) == 0);
}

// split string
std::vector<std::string> StrSplit(const std::string &str, const std::string &pattern);

// tokenize string
std::vector<std::string> Tokenize(const std::string &src, const std::string &delimiters,
const Option<size_t> &maxTokenNum = Option<size_t>(None()));

enum Mode { PREFIX, SUFFIX, ANY };

// remove redundant character
std::string Remove(const std::string &from, const std::string &subStr, Mode mode = ANY);

template <typename T>
inline Option<T> GenericParseValue(const std::string &value) {
T ret;
std::istringstream input(value);
input >> ret;

if (input && input.eof()) {
return Option<T>(ret);
}

return Option<T>(None());
}

template <>
inline Option<std::string> GenericParseValue(const std::string &value) {
return Option<std::string>(value);
}

template <>
inline Option<bool> GenericParseValue(const std::string &value) {
if (value == "true") {
return Option<bool>(true);
} else if (value == "false") {
return Option<bool>(false);
}

return Option<bool>(None());
}
} // namespace predict
} // namespace mindspore

#endif // PREDICT_COMMON_UTILS_H_

+ 0
- 56
predict/include/context.h View File

@@ -1,56 +0,0 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef PREDICT_INCLUDE_CONTEXT_H_
#define PREDICT_INCLUDE_CONTEXT_H_

#include <memory>
#include "dlpack/dlpack.h"
#include "include/tensor.h"

#define MSPREDICT_API __attribute__((visibility("default")))

namespace mindspore {
namespace predict {
///\brief Resource management definition of MindSpore predict.
class MSPREDICT_API Context {
public:
///\brief Constructor of MindSpore predict context using default value for parameters.
///
///\return Instance of MindSpore predict context.
Context();

///\brief Custum constructor of MindSpore predict context using input value for parameters.
///
///\param[in] threadNum The number of thread during the runtime.
///\param[in] allocator The memory management during the runtime
///\param[in] deviceCtx The device information during the runtime.
///
///\return Instance of MindSpore predict context.
Context(int threadNum, std::shared_ptr<Allocator> allocator, DLContext deviceCtx);

///\brief Destructor of MindSpore predict context.
virtual ~Context();

public:
DLContext deviceCtx;
int threadNum = 1;
std::shared_ptr<Allocator> allocator;
};
} // namespace predict
} // namespace mindspore

#endif // PREDICT_INCLUDE_CONTEXT_H_

+ 0
- 52
predict/include/errorcode.h View File

@@ -1,52 +0,0 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef PREDICT_INCLUDE_ERRORCODE_H_
#define PREDICT_INCLUDE_ERRORCODE_H_

namespace mindspore {
namespace predict {
using STATUS = int;

/* Success */
constexpr int RET_OK = 0; /**< No error occurs. */

/* Common error code, range: [-1, -100]*/
constexpr int RET_ERROR = -1; /**< Common error code. */
constexpr int RET_NULL_PTR = -2; /**< NULL pointer returned.*/
constexpr int RET_PARAM_INVALID = -3; /**< Invalid parameter.*/
constexpr int RET_NO_CHANGE = -4; /**< No change. */

/* Executor error code, range: [-101,-200] */
constexpr int RET_OUT_OF_TENSOR_RANGE = -101; /**< Failed to checking range. */
constexpr int RET_INPUT_TENSOR_ERROR = -102; /**< Failed to checking input tensor. */
constexpr int RET_REENTRANT_ERROR = -103; /**< Exist executor running. */

/* Graph error code, range: [-201,-300] */
constexpr int RET_GRAPH_FILE_ERR = -201; /**< Failed to verify graph file. */

/* Node error code, range: [-301,-400] */
constexpr int RET_NOT_FIND_OP = -301; /**< Failed to find OP. */
constexpr int RET_INVALID_OP_NAME = -302; /**< Invalid OP name. */
constexpr int RET_INVALID_OP_ATTR = -303; /**< Invalid OP attr. */
constexpr int RET_OP_EXECUTE_FAILURE = -304; /**< Failed to execution OP. */

/* Tensor error code, range: [-401,-500] */
constexpr int RET_FORMAT_ERR = -401; /**< Failed to checking tensor format. */
} // namespace predict
} // namespace mindspore

#endif // PREDICT_INCLUDE_ERRORCODE_H_

+ 0
- 139
predict/include/session.h View File

@@ -1,139 +0,0 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef PREDICT_INCLUDE_SESSION_H_
#define PREDICT_INCLUDE_SESSION_H_

#include <memory>
#include <string>
#include <vector>
#include <map>
#include <unordered_set>
#include "include/context.h"
#include "include/tensor.h"

#define MSPREDICT_API __attribute__((visibility("default")))

namespace mindspore {
namespace predict {
using NODE_ID = std::string;

///\brief Graph defined by MindSpore predict.
///
///\note
/// The caller does not need to care about detailed implementation of this class, so just list the class name here.
class Graph;

///\brief GraphExecution defined by MindSpore predict.
///
///\note
/// The caller does not need to care about detailed implementation of this class, so just list the class name here.
class GraphExecution;

///\brief MindSpore predict session.
///
/// This class represents session of MindSpore predict.
///
///\note
/// The caller needs to allocate and free memory of inputs and outputs.
/// New Session is not suggested, please use CreateSession function to create new session class.
class MSPREDICT_API Session {
public:
///\brief Constructor of MindSpore predict session.
///
///\param[in] ctx The context of the session.
///
///\return Instance of MindSpore predict session.
explicit Session(const Context &ctx);

///\brief Destructor of MindSpore predict session.
~Session();

///\brief Init the session.
///
///\param[in] ctx The context of the session.
///\param[in] size The size of the session.
///\param[in] graphBuf The buffer of the graph, used for build session.
///
///\return Return RET_OK if the initialization is success, otherwhise return RET_ERROR.
int Init(const char *graphBuf, size_t size);

///\brief Get the input of session.
///
///\return Input node's input tensors if found, empty vector otherwise.
///
///\note
/// The caller needs to allocate and free memory of inputs.
std::vector<Tensor *> GetInput();

///\brief Run the session.
///
///\param[in] inputs The input of the session.
///
///\return Return RET_OK if run success, otherwhise return RET_ERROR.
///\note
/// Currently input tensors' data format only support FORMAT_NCHW.
/// Currently input tensors' data type only support FLOAT.
int Run(const std::vector<Tensor *> &inputs);

///\brief Get the output of session.
///
///\param[in] nodeName Given output node name.
///
///\return Output node's output tensors if found, empty vector otherwise.
///
///\note
/// The caller needs to free memory of outputs.
std::vector<Tensor *> GetOutput(const std::string &nodeName);

///\brief Get the all output of session.
///
///\return Every output node's output tensors.
///
///\note
/// The caller needs to free memory of outputs.
std::map<std::string, std::vector<Tensor *>> GetAllOutput();

protected:
///\brief Init the executor.
///
///\return Return RET_OK if the initialization is success, otherwhise return RET_ERROR.
int InitExecutor();

const Context &_ctx;
Graph *_graph = nullptr;
GraphExecution *_executor = nullptr;
bool reinitExecutor = true;
};

///\brief MindSpore predict neural network session create function
///
/// This function used to create MindSpore predict neural network session, which will be used to run the neural network.
///
///\param[in] sessionName The name of the session.
///\param[in] graphBuf The buffer of the graph, used for build session.
///\param[in] size The size of the session.
///\param[in] ctx The context of the session.
///
///\return Instance of MindSpore predict session.
///
///\note
/// The caller needs to allocate and free memory of graph buffer.
std::shared_ptr<Session> MSPREDICT_API CreateSession(const char *graphBuf, size_t size, const Context &ctx);
} // namespace predict
} // namespace mindspore

#endif // PREDICT_INCLUDE_SESSION_H_

+ 0
- 259
predict/include/tensor.h View File

@@ -1,259 +0,0 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef PREDICT_INCLUDE_TENSOR_H_
#define PREDICT_INCLUDE_TENSOR_H_

#include <memory>
#include <vector>
#include "dlpack/dlpack.h"
#include "schema/inner/ms_generated.h"

#define MSPREDICT_API __attribute__((visibility("default")))

namespace mindspore {
namespace predict {
///\brief Allocator definition of MindSpore predict.
class Allocator;

///\brief Tensor definition of MindSpore predict.
class MSPREDICT_API Tensor {
public:
///\brief Constructor of MindSpore predict tensor.
///
///\param[in] tensor Define the parameters of the tensor.
///\param[in] copyData Malloc data for the tensor, and copy origin data from
/// input tensor.
///
///\return Instance of MindSpore predict tensor.
Tensor(const Tensor &tensor, bool copyData = false);

///\brief Constructor of MindSpore predict tensor.
///
///\param[in] dt Data Type of the tensor, see introduction to 'enum DataType'
/// for supported type.
///\param[in] dims Dimension Values such as height and width, which defined
/// the shape of the tensor.
///\param[in] format Tensor format, see introduction to 'enum Format' for
/// supported format.
///\param[in] data Data of the tensor.
///
///\return Instance of MindSpore predict tensor.
///
///\note
/// Length of data should align with dt, format and dims, otherwise the
/// application might run into unexpected error,
/// such as segment fault.
/// For example, dt is DT_FLOAT, format is FORMAT_NCHW, dims is [1,3,300,300],
/// then minimum length of data should
/// be 1 * 3 * 300 * 300 * sizeof(float).
Tensor(DataType dt, const std::vector<int64_t> &dims, Format format, void *data);

///\brief Destructor of MindSpore predict tensor.
~Tensor();

///\brief Get MindSpore predict tensor.
///
///\param[in] Definition of the tensor.
///
///\return Address of MindSpore predict tensor.
static Tensor *CopyFromTensorDef(const TensorDef &tensordef);

///\brief Get dtype of MindSpore predict tensor.
///
///\return Dtype of MindSpore predict tensor.
DLDataType GetTensorDtype() const;

///\brief Get data of MindSpore predict tensor.
///
///\return Address of MindSpore predict tensor data.
void *GetData() const;

///\brief Set data of MindSpore predict tensor.
///
///\param[in] data Address for data of the MindSpore predict tensor instance.
///
///\note
/// Length of data should align with dt, format and dims, otherwise the
/// application might run into unexpected error,
/// such as segment fault.
/// For example, dt is DT_FLOAT, format is FORMAT_NCHW, dims is [1,3,300,300],
/// then minimum length of data should
/// be 1 * 3 * 300 * 300 * sizeof(float).
void SetData(void *data);

///\brief Get data type of MindSpore predict tensor.
///
///\return Data Type of the tensor.
DataType GetDataType() const;

///\brief Set data type of MindSpore predict tensor.
///
///\param[in] dt Data Type of the tensor, see introduction to 'enum DataType'
/// for supported type.
void SetDataType(DataType dt);

///\brief Get number of dimension of MindSpore predict tensor.
///
///\return Number of dimension of the MindSpore predict tensor.
int GetNDim() const;

///\brief Get dimension of MindSpore predict tensor.
///
///\return Dimension of the MindSpore predict tensor.
std::vector<int64_t> GetDims() const;

///\brief Set dimension of MindSpore predict tensor.
///
///\param[in] dims Vector that has values of dimension.
void SetDims(const std::vector<int64_t> &dims);

///\brief Get format of MindSpore predict tensor.
///
///\return Format of the MindSpore predict tensor.
Format GetFormat() const { return format; }

///\brief Set format of MindSpore predict tensor.
///
///\param[in] format Format of the tensor.
void SetFormat(Format format) { this->format = format; }

///\brief Get reference count of MindSpore predict tensor.
///
///\return Reference count of the MindSpore predict tensor.
int RefCount() { return refCount; }

///\brief Increase reference count of MindSpore predict tensor.
///
///\param[in] ref The increase of the reference count.
void AddRef(int ref) { refCount += ref; }

///\brief Decrease reference count of MindSpore predict tensor.
///
///\param[in] ref The decrease of the reference count.
void DefRef(int ref) { refCount -= ref; }

///\brief Get element size of MindSpore predict tensor.
///
///\return Element size of MindSpore predict tensor.
size_t GetElementSize() const;

///\brief Get data size of MindSpore predict tensor.
///
///\return Data size of MindSpore predict tensor.
size_t GetDataSize() const;

///\brief Get element size of MindSpore predict tensor in NC4HW4 format.
///
///\param[in] isNhwc Whether the current format is NHWC.
///
///\return Element size of MindSpore predict tensor in NC4HW4 format.
size_t GetNC4HW4ElementSize(bool isNhwc);

///\brief Get data size of MindSpore predict tensor in NC4HW4 format.
///
///\param[in] isNhwc Whether the current format is NHWC.
///
///\return Data size of MindSpore predict tensor in NC4HW4 format.
size_t GetNC4HW4DataSize(bool isNhwc);

///\brief Malloc data for the MindSpore predict tensor.
///
///\param[in] allocator The malloc source for data.
///\param[in] refCount The reference count of the data.
///
///\return Return RET_OK if the data is successfully allocated, otherwhise return RET_ERROR.
int MallocData(std::shared_ptr<Allocator> allocator = nullptr, int refCount = 0);

///\brief Free the MindSpore predict tensor.
void FreeTensor();

///\brief Free the data of MindSpore predict tensor.
void ForceFreeData();

///\brief Free the data of MindSpore predict tensor.
void FreeData();

///\brief Compare data size of MindSpore predict tensor in NC4HW4 format.
///
///\param[in] dst The compare tensor.
///
///\return The result of fuction.
bool CompareShape(const Tensor &dst);

///\brief Compare shape of MindSpore predict tensor with another shape.
///
///\param[in] other The compare shape information.
///
///\return The result of function.
bool CompareShape(const std::vector<int64_t> &other);

///\brief Get instance of MindSpore predict tensor.
///
///\return Instance of MindSpore predict dlTensor.
DLTensor *GetDLTensor() { return &dlTensor; }

///\brief Get height of MindSpore predict tensor.
///
///\return Height of MindSpore predict tensor.
int64_t Height() const;

///\brief Get width of MindSpore predict tensor.
///
///\return Width of MindSpore predict tensor.
int64_t Width() const;

///\brief Get channel of MindSpore predict tensor.
///
///\return Channel of MindSpore predict tensor.
int64_t Channel() const;

///\brief Get batch of MindSpore predict tensor.
///
///\return Batch of MindSpore predict tensor.
int64_t Batch() const;

///\brief Get stride of MindSpore predict tensor.
///
///\param[in] index the index of stride.
///
///\return Stride of MindSpore predict tensor.
int64_t Stride(int index) const;

///\brief Set stride of MindSpore predict tensor by input.
///
///\param[in] index Index of stride
///\param[in] stride The stride to set
void SetStride(int index, int64_t stride);

///\brief Set stride of MindSpore predict tensor by dims.
void SetStride();
void SetScale(bool isScale = true);

private:
bool isScale = false;
int refCount = 0;
int isConst;
Format format;
DLTensor dlTensor;
std::shared_ptr<Allocator> allocator = nullptr;
std::vector<float> scale;
std::vector<int> zeroPoint;
};
} // namespace predict
} // namespace mindspore

#endif // PREDICT_INCLUDE_TENSOR_H_

+ 0
- 1
predict/module/CMakeLists.txt View File

@@ -1 +0,0 @@
add_subdirectory(tvm_kernel)

+ 0
- 27
predict/module/tvm_kernel/.gitignore View File

@@ -1,27 +0,0 @@
# Created by .ignore support plugin
#

# filter python
*.pyc

# filter build
*.so
*.o

# filter coverage
coverage/

# filter report
*.xml

# filter tvm
3rdparty/

# filter build
build/
cmake-build-debug/
.idea/
TFLite_Detection_PostProcess_CI
app_run
output
tvm

+ 0
- 4
predict/module/tvm_kernel/.gitmodules View File

@@ -1,4 +0,0 @@
[submodule "3rdparty/incubator-tvm"]
path = 3rdparty/incubator-tvm
url = https://github.com/dmlc/tvm.git
branch = v0.5

+ 0
- 25
predict/module/tvm_kernel/CMakeLists.txt View File

@@ -1,25 +0,0 @@
cmake_minimum_required(VERSION 3.12.1)
project(autotensor LANGUAGES CXX)
set (MINDSPORE "${PROJECT_SOURCE_DIR}/../../..")
set (TVM_KERNEL_LITE "${PROJECT_SOURCE_DIR}/lite")
set (THIRDPARTY "${MINDSPORE}/third_party")
set (TVM_CLEAN_SOURCE "${THIRDPARTY}/incubator-tvm")
set (TVM_BUILD_SOURCE "${PROJECT_SOURCE_DIR}/incubator-tvm")
set (BUILD_DIR "${PROJECT_SOURCE_DIR}")
set (TVM_KERNEL_OUTPUT_DIR ${CMAKE_CURRENT_BINARY_DIR})
set (TVM_OUTPUT_DIR ${TVM_KERNEL_OUTPUT_DIR}/incubator-tvm)

set (LLVM_CONFIG $ENV{LLVM_PATH})
if (NOT LLVM_CONFIG)
message(FATAL_ERROR "please set LLVM_PATH in env")
endif()
set (CMAKE_BUILD_TYPE "Release")

include(${TVM_BUILD_SOURCE}/cmake/util/Util.cmake)
include(${TVM_BUILD_SOURCE}/cmake/util/FindLLVM.cmake)
if(EXISTS ${TVM_BUILD_SOURCE}/cmake/config.cmake)
include(${TVM_BUILD_SOURCE}/cmake/config.cmake)
endif()
add_subdirectory(${TVM_KERNEL_LITE})
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)


+ 0
- 140
predict/module/tvm_kernel/lite/CMakeLists.txt View File

@@ -1,140 +0,0 @@
cmake_minimum_required(VERSION 3.12)
set(CMAKE_CXX_STANDARD 14)

if(ENABLE_PREDICT_ARM64)
set(TARGS "arm64")
elseif(ENABLE_PREDICT_ARM32)
set(TARGS "arm32")
else()
set(TARGS "x86")
endif()
message("TARGET is set to ${TARGS}")

set(CMAKE_VERBOSE_MAKEFILE ON)
set(CMAKE_SKIP_RPATH TRUE)

if(MSVC)
message("not support MSVC")
else(MSVC)
include(CheckCXXCompilerFlag)
check_cxx_compiler_flag("-std=c++11" SUPPORT_CXX11)
if ("${CMAKE_BUILD_TYPE}" STREQUAL "Debug")
message("Build in Debug mode")
set(CMAKE_C_FLAGS "-O0 -g -Wall -Werror -fPIC [${CMAKE_C_FLAGS} -rdynamic")
set(CMAKE_CXX_FLAGS "-O0 -g -Wall -Werror -fPIC -std=c++11 ${CMAKE_CXX_FLAGS} -rdynamic")
else()
set(CMAKE_C_FLAGS "-D_FORTIFY_SOURCE=2 -O2 -fno-rtti -fvisibility=hidden -Wall -Werror -fPIC -fstack-protector-strong ${CMAKE_C_FLAGS}")
set(CMAKE_CXX_FLAGS "-D_FORTIFY_SOURCE=2 -O2 -fno-rtti -fvisibility=hidden -Wall -Werror -fPIC -fstack-protector-strong -std=c++11 ${CMAKE_CXX_FLAGS}")
set(CMAKE_EXE_LINKER_FLAGS "-Wl,-z,relro,-z,now -Wl,-z,noexecstack")
endif ()
if (CMAKE_CXX_COMPILER_ID MATCHES "GNU" AND
CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 7.0)
set(CMAKE_CXX_FLAGS "-Wall -Werror -faligned-new ${CMAKE_CXX_FLAGS}")
endif()
if (CODE_COVERAGE)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Werror -fprofile-arcs -ftest-coverage -O0")
endif()
endif(MSVC)


if("${TARGS}" STREQUAL "x86")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -D__x86_64__ -fno-strict-aliasing")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__x86_64__ -fno-strict-aliasing")
endif()


set(PRJ_SRC_DIR "${PROJECT_SOURCE_DIR}")
set(PRJ_KLIB_DIR "${PROJECT_SOURCE_DIR}")
set(PRJ_LITE_DIR "${PROJECT_SOURCE_DIR}/lite")

# include directories
message("current PRJ DIR: ${PROJECT_SOURCE_DIR}")
message("current SUB_PRJ DIR: ${PRJ_SRC_DIR}")
message("current KLIB DIR: ${PRJ_KLIB_DIR}")
message("current PRJ_LITE_DIR: ${PRJ_LITE_DIR}")
message("CMAKE_CURRENT_BINARY_DIR: ${CMAKE_CURRENT_BINARY_DIR}")
set(DMLC_CORE "${TVM_BUILD_SOURCE}/3rdparty/dmlc-core")
set(DLPACK "${TVM_BUILD_SOURCE}/3rdparty/dlpack")
set(PREDICT "${PRJ_SRC_DIR}/../../")
set(SECUREC "${PRJ_SRC_DIR}/../../../third_party/securec")
message("include dir: ${DLPACK}/include")
include_directories(${DLPACK}/include)
include_directories(${DMLC_CORE}/include)
include_directories(${TVM_BUILD_SOURCE}/include)
include_directories(${TVM_BUILD_SOURCE}/src/pass)
include_directories(${PRJ_LITE_DIR})
include_directories(${PRJ_LITE_DIR}/include)
include_directories(${PRJ_LITE_DIR}/../../..)
include_directories(${PRJ_LITE_DIR}/../../../include)
include_directories(${PRJ_LITE_DIR}/../../../src/runtime)
include_directories(${PRJ_LITE_DIR}/../../../common)
include_directories(${SECUREC})
message("SECUREC: " "${SECUREC}/build/src")
include_directories(${PREDICT})
include_directories(${PREDICT}/src)
include_directories(${PRJ_SRC_DIR}/../../../third_party/flatbuffers/include)
include_directories(${PRJ_SRC_DIR}/../../../third_party)
# Source file lists
file(GLOB_RECURSE TVM_KERNEL_SRC
src/api/*.cc
src/tflite/TFLite_Detection_PostProcess.cc)

set (TVM_RUNTIME_FLG $ENV{TVM_RUNTIME_ON})
if ("${TVM_RUNTIME_FLG}" STREQUAL "true")
message("Using TVM runtime function")
file(GLOB TVM_RUNTIME_SRCS
${TVM_ROOT}/apps/howto_deploy/tvm_runtime_pack.cc)
else()
message("Using LITE runtime function")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DLITE_RUNTIME_ON -DTVM_RUNTIME_HEADER_ONLY -DLITE_THREAD_POOL_SHARED")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DLITE_RUNTIME_ON -DTVM_RUNTIME_HEADER_ONLY -DLITE_THREAD_POOL_SHARED")
file(GLOB_RECURSE TVM_RUNTIME_SRCS
${PREDICT}/src/runtime/*.cc)
endif()

if("${TARGS}" STREQUAL "arm32" OR "${TARGS}" STREQUAL "arm64")
set(CMAKE_SKIP_BUILD_RPATH TRUE)
set(CMAKE_BUILD_WITH_INSTALL_RPATH TRUE)
set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE)
endif()

set(LIB_X86_PATH "${PRJ_KLIB_DIR}/build/lib_x86")
set(LIB_ARM64_PATH "${PRJ_KLIB_DIR}/build/lib_arm64")
set(LIB_ARM32_PATH "${PRJ_KLIB_DIR}/build/lib_arm32")
if("${TARGS}" STREQUAL "x86")
set(KLIBS_PATH "${LIB_X86_PATH}")
elseif("${TARGS}" STREQUAL "arm64")
set(KLIBS_PATH "${LIB_ARM64_PATH}")
elseif("${TARGS}" STREQUAL "arm32")
set(KLIBS_PATH "${LIB_ARM32_PATH}")
else()
message(ERROR " not suport ${TARGS}")
endif()

file(GLOB_RECURSE KERNEL_LIBS "${KLIBS_PATH}/*.o")
message("KERNEL_PATH= ${KLIBS_PATH}")

add_compile_options(-DTVM_CUDA_RUNTIM=0)
add_compile_options(-DTVM_METAL_RUNTIM=0)
add_compile_options(-DTVM_OPENCL_RUNTIM=0)

link_directories(${KLIBS_PATH})

add_library(tvm_runtime_pack STATIC ${TVM_RUNTIME_SRCS})
add_library(kernel_manager STATIC ${TVM_KERNEL_SRC})
add_library(tvm_kernel_static STATIC ${TVM_KERNEL_SRC} ${KERNEL_LIBS})
add_library(tvm_kernel SHARED ${TVM_KERNEL_SRC} ${KERNEL_LIBS})
set_target_properties(tvm_kernel PROPERTIES LINK_FLAGS "-Wl,-z,relro,-z,now -Wl,-z,noexecstack")

set(KERNEL_LD_LIB tvm_runtime_pack dl)

if("${TARGS}" STREQUAL "x86")
set(KERNEL_LD_LIB ${KERNEL_LD_LIB} pthread)
else()
set(ANDROID_ALLOW_UNDEFINED_SYMBOLS TRUE)
endif()

target_link_libraries(tvm_kernel ${KERNEL_LD_LIB} libsecurec.a)
target_link_libraries(tvm_kernel_static OBJECT tvm_runtime_pack libsecurec.a)

add_dependencies(tvm_kernel securec)

+ 0
- 94
predict/module/tvm_kernel/lite/include/lite/api/km_api.h View File

@@ -1,94 +0,0 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this ${file} except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef PREDICT_MODULE_TVM_KERNEL_LITE_INCLUDE_LITE_API_KM_API_H_
#define PREDICT_MODULE_TVM_KERNEL_LITE_INCLUDE_LITE_API_KM_API_H_

#include <dlpack/dlpack.h>
#include <functional>
#include <string>
#include <vector>
#include "schema/inner/ms_generated.h"
#include "schema/inner/op_generated.h"

#define PUBLIC __attribute__((visibility("default")))

/*!
* \brief Call tvm kernel.
* \param fid tvm kernel id.
* \param tensors tvm kernel arguments.
* \return 0 if SUCCESS.
*/
PUBLIC int CallKernel(const std::string &fid, const std::vector<DLTensor *> &tensors);

/*!
* \brief Get tvm kernel by id.
* \param fid tvm kernel id.
* \return std::function if SUCCESS else nullptr.
*/
PUBLIC std::function<int(const std::vector<DLTensor *> &)> GetKernel(const std::string &fid);

/*!
* \brief Get tvm kernel by OpDef.
* \param opdef defined by predict schema.
* \param tensors.
* \param option.
* \return std::function if SUCCESS else nullptr.
*/
struct PUBLIC KernelOption {
int numThreads = 0;
std::string device;
};

PUBLIC std::function<int(const std::vector<DLTensor *> &)> GetKernel(const mindspore::predict::OpDef &opdef,
const std::vector<DLTensor *> &tensors,
const KernelOption &option);

/*!
* \brief load TVM Kernel lib
* \param mode 0 indicate shared lib
* \param fname shared lib path when mode equals 0
* \return 0 if SUCCESS
*/
PUBLIC void InitKernelManager(int mode, const std::string &fname);

/*
* \brief config ThreadPool using mode
* \param mode: -1 using mid speed cpu first, 1 using higher speed cpu first
* \param nthreads: threads num to be used, can't exceed cpu num
* if mode==-1 bind mid cpu first
* if mode==1 bind higher cpu first
* if mode==0 no bind
* \param execute_self: cur thread do arithmetic or not
* execute_self: true cur thread do arithmetic work
* execute_self: false cur thread not do arithmetic work
*/
PUBLIC void ConfigThreadPool(int mode = -1, int nthreads = 2, bool execute_self = true);

/*
* \brief provid simple api for mslite, mslite not care mode
*/
inline void CfgThreadPool(int nthread) { ConfigThreadPool(-1, nthread, true); }

/*
* the Callback function to do cpu bind for master thread.
*/
PUBLIC void DoMasterThreadBind(bool bindflg);

PUBLIC void DoAllThreadBind(bool ifBind);

#undef PUBLIC

#endif // PREDICT_MODULE_TVM_KERNEL_LITE_INCLUDE_LITE_API_KM_API_H_

+ 0
- 17
predict/module/tvm_kernel/lite/python/__init__.py View File

@@ -1,17 +0,0 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Neural network operators"""
# from . import arm_cpu
# from . import at_ops

+ 0
- 17
predict/module/tvm_kernel/lite/python/arm_cpu/__init__.py View File

@@ -1,17 +0,0 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Schedule for ARM CPU"""

from . import conv2d

+ 0
- 470
predict/module/tvm_kernel/lite/python/arm_cpu/conv2d.py View File

@@ -1,470 +0,0 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Conv2D schedule for ARM CPU"""
from __future__ import absolute_import as _abs

import functools

import tvm
from tvm import autotvm
import tvm.contrib.nnpack

from topi.generic import schedule_conv2d_nchw
from topi.util import traverse_inline, get_const_tuple
from topi.nn import pad, conv2d
from topi.nn.util import get_const_int, get_pad_tuple


@autotvm.register_topi_compute(conv2d, "arm_cpu", ["asm"])
def conv2d_arm_cpu(cfg, data, kernel, strides, padding, dilation, out_dtype):
"""TOPI compute callback for conv2d

Parameters
----------
cfg: ConfigEntity
The config for this template

data : tvm.Tensor
4-D with shape [batch, in_channel, in_height, in_width]

kernel : tvm.Tensor
4-D with shape [num_filter, in_channel, filter_height, filter_width] or
pre-packed 5-D with shape [num_filter_chunk, in_channel, filter_height,
filter_width, num_filter_block]

strides : list of two ints
[stride_height, stride_width]

padding : list of two ints
[pad_height, pad_width]

dilation : list of two ints
[dilation_height, dilation_width]

out_dtype: str
The output type. This is used for mixed precision.

Returns
-------
output : tvm.Tensor
4-D with shape [batch, out_channel, out_height, out_width]
"""
args = _gen_cfg(cfg, data, kernel, strides, padding, dilation, num_tile=2)
return _conv_spatial_pack_asm(
args, data, kernel, strides, padding, dilation, out_dtype
)


@autotvm.register_topi_schedule(schedule_conv2d_nchw, "arm_cpu", ["asm"])
def schedule_conv2d_nchw_arm_cpu(outs):
"""TOPI schedule callback for conv2d

Parameters
----------
outs: Array of Tensor
The computation graph description of conv2d
in the format of an array of tensors.

Returns
-------
s: Schedule
The computation schedule for conv2d.
"""
s = _conv_schedule_asm(outs)
return s


def _gen_cfg(cfg, data, kernel, strides, padding, dilation, num_tile):
"""_gen_cfg"""
if len(kernel.shape) == 4:
co_, _, kh_, kw_ = get_const_tuple(kernel.shape)
else: # kernel tensor is pre packed
co_, _, kh_, kw_, vc_ = get_const_tuple(kernel.shape)
co_ = co_ * vc_

if isinstance(dilation, int):
dilation_h = dilation_w = dilation
else:
dilation_h, dilation_w = dilation

n_, ci_, ih_, iw_ = get_const_tuple(data.shape)

dilated_kernel_h = (kh_ - 1) * dilation_h + 1
dilated_kernel_w = (kw_ - 1) * dilation_w + 1
pad_top, pad_left, pad_bottom, pad_right = get_pad_tuple(
padding, (dilated_kernel_h, dilated_kernel_w)
)
hstr, wstr = strides if isinstance(strides, (tuple, list)) else (strides, strides)
oh_ = (ih_ + pad_top + pad_bottom - dilated_kernel_h) // hstr + 1
ow_ = (iw_ + pad_left + pad_right - dilated_kernel_w) // wstr + 1

n, co, oh, ow = cfg.axis(n_), cfg.axis(co_), cfg.axis(oh_), cfg.axis(ow_)
ci, kh, kw = cfg.reduce_axis(ci_), cfg.reduce_axis(kh_), cfg.reduce_axis(kw_)

if num_tile == 2: # for arm cpu
candidate_vc = []
for iv in range(3, co_):
if co_ % iv == 0:
candidate_vc.append([co_ // iv, iv])
candidate_vc.append([1, co_])
co, vc = cfg.define_split(
"tile_co", co, num_outputs=2, policy="candidate", candidate=candidate_vc
)
oh, vh = cfg.define_split("tile_oh", oh, num_outputs=2)
ow, vw = cfg.define_split("tile_ow", ow, num_outputs=2)
elif num_tile == 3: # for mali gpu
co, _, vc = cfg.define_split("tile_co", co, num_outputs=3)
oh, _, vh = cfg.define_split("tile_oh", oh, num_outputs=3)
ow, _, vw = cfg.define_split("tile_ow", ow, num_outputs=3)
else:
raise RuntimeError("Invalid num_tile")

cfg.define_reorder(
"reorder_0",
[n, co, oh, ow, ci, kh, kw, vh, vw, vc],
policy="candidate",
candidate=[[n, co, oh, ow, ci, kh, kw, vh, vw, vc],],
)

vc_ = cfg["tile_co"].size[-1]
vh_ = cfg["tile_oh"].size[-1]
vw_ = cfg["tile_ow"].size[-1]
is_var = False
return (is_var, vh_, vw_, vc_)

def _conv_spatial_pack_asm(args, data, kernel, strides, padding,
dilation, out_dtype):
"""_conv_spatial_pack_asm"""
is_var, vh_, vw_, vc_ = args

# create workload according to raw arguments
out_dtype = out_dtype or data.dtype
n_, ci_, ih_, iw_ = data.shape if is_var else get_const_tuple(data.shape)

if isinstance(dilation, int):
dilation_h = dilation_w = dilation
else:
dilation_h, dilation_w = dilation

if len(kernel.shape) == 4:
pre_packed = False
co_, _, kh_, kw_ = kernel.shape if is_var else get_const_tuple(kernel.shape)
else: # kernel tensor is pre packed
pre_packed = True
co_, _, kh_, kw_, vc_ = kernel.shape if is_var else get_const_tuple(kernel.shape)
co_ = co_ * vc_

dilated_kernel_h = (kh_ - 1) * dilation_h + 1
dilated_kernel_w = (kw_ - 1) * dilation_w + 1
pad_top, pad_left, pad_bottom, pad_right = get_pad_tuple(
padding, (dilated_kernel_h, dilated_kernel_w)
)
hstr, wstr = strides if isinstance(strides, (tuple, list)) else (strides, strides)
oh_ = (ih_ + pad_top + pad_bottom - dilated_kernel_h) // hstr + 1
ow_ = (iw_ + pad_left + pad_right - dilated_kernel_w) // wstr + 1
data_pad = pad(data, [0, 0, pad_top, pad_left], [0, 0, pad_bottom, pad_right])

oh_div = oh_ // vh_
ow_div = ow_ // vw_
kvshape = (co_ // vc_, ci_, kh_, kw_, vc_)
ovshape = (n_, co_ // vc_, oh_div, ow_div, vh_, vw_, vc_)
oshape = (n_, co_, oh_div * vh_, ow_div * vw_)

if dilation_h != 1 or dilation_w != 1:
# undilate input data
dvshape = (n_, oh_ // vh_, ow_ // vw_, kh_, kw_, vh_, vw_, ci_)
data_vec = tvm.compute(
dvshape,
lambda n, h, w, kh, kw, vh, vw, ci: data_pad[n][ci][
(h * vh_ + vh) * hstr + kh * dilation_h
][(w * vw_ + vw) * wstr + kw * dilation_w],
name="data_vec_undilated",
)
else:
dvshape = (
n_,
oh_ // vh_,
ow_ // vw_,
(vh_ - 1) * hstr + kh_,
(vw_ - 1) * wstr + kw_,
ci_,
)
data_vec = tvm.compute(
dvshape,
lambda n, h, w, vh, vw, ci: data_pad[n][ci][h * vh_ * hstr + vh][
w * vw_ * wstr + vw
],
name="data_vec",
)

if pre_packed:
kernel_vec = kernel
else:
kernel_vec = tvm.compute(
kvshape,
lambda co, ci, kh, kw, vc: kernel[co * vc_ + vc][ci][kh][kw],
name="kernel_vec",
)

ci = tvm.reduce_axis((0, ci_), name="ci")
kh = tvm.reduce_axis((0, kh_), name="kh")
kw = tvm.reduce_axis((0, kw_), name="kw")

# asm begin----
type_map = {
"int8": "int32",
"uint8": "uint32",
"float32": "float32",
"float16": "float16",
}
acum_dtype = type_map[data.dtype]
attrs = {
"SH": hstr,
"SW": wstr,
"PH": pad_top,
"PW": pad_left,
"DILA_H": dilation_h,
"DILA_W": dilation_w,
"VH": vh_,
"VW": vw_,
"VC": vc_,
"ACUM_DTYPE": acum_dtype,
}
# asm end----

if dilation_h != 1 or dilation_w != 1:
conv = tvm.compute(
ovshape,
lambda n, co, h, w, vh, vw, vc: tvm.sum(
data_vec[n, h, w, kh, kw, vh, vw, ci].astype(out_dtype)
* kernel_vec[co, ci, kh, kw, vc].astype(out_dtype),
axis=[ci, kh, kw],
),
name="conv",
attrs=attrs,
)
else:
conv = tvm.compute(
ovshape,
lambda n, co, h, w, vh, vw, vc: tvm.sum(
data_vec[n, h, w, vh * hstr + kh, vw * wstr + kw, ci].astype(out_dtype)
* kernel_vec[co, ci, kh, kw, vc].astype(out_dtype),
axis=[ci, kh, kw],
),
name="conv",
attrs=attrs,
)

output = tvm.compute(
oshape,
lambda n, co, h, w: conv[n][co // vc_][h // vh_][w // vw_][h % vh_][w % vw_][
co % vc_
],
name="output_unpack",
tag="asm_conv2d_output",
)

return output


def intrin_conv(args):
"""intrin_conv"""
(
ci_,
vh_,
vw_,
vc_,
kh_,
kw_,
sh_,
sw_,
dila_h,
dila_w,
dtype,
acum_dtype,
opname,
core_id,
) = args
hstr, wstr = sh_, sw_
ci_ = tvm.var("ci_") if ci_ is None else ci_
kvshape = (ci_, kh_, kw_, vc_)
ovshape = (vh_, vw_, vc_)
if dila_h != 1 or dila_w != 1:
dvshape = (kh_, kw_, vh_, vw_, ci_)
else:
dvshape = ((vh_ - 1) * hstr + kh_, (vw_ - 1) * wstr + kw_, ci_)

data_vec = tvm.placeholder(dvshape, name="a", dtype=dtype)
kernel_vec = tvm.placeholder(kvshape, name="b", dtype=dtype)
ci = tvm.reduce_axis((0, ci_), name="ci")
kh = tvm.reduce_axis((0, kh_), name="kh")
kw = tvm.reduce_axis((0, kw_), name="kw")
if dila_h != 1 or dila_w != 1:
conv = tvm.compute(
ovshape,
lambda vh, vw, vc: tvm.sum(
data_vec[kh, kw, vh, vw, ci].astype(acum_dtype)
* kernel_vec[ci, kh, kw, vc].astype(acum_dtype),
axis=[ci, kh, kw],
),
name="conv",
)
else:
conv = tvm.compute(
ovshape,
lambda vh, vw, vc: tvm.sum(
data_vec[vh * hstr + kh, vw * wstr + kw, ci].astype(acum_dtype)
* kernel_vec[ci, kh, kw, vc].astype(acum_dtype),
axis=[ci, kh, kw],
),
name="conv",
)

stride_a = [
functools.reduce(lambda x, y: x * y, dvshape[i + 1: len(dvshape)])
for i in range(0, len(dvshape) - 1)
]
stride_a.append(1)
stride_b = [
functools.reduce(lambda x, y: x * y, kvshape[i + 1: len(kvshape)])
for i in range(0, len(kvshape) - 1)
]
stride_b.append(1)
stride_c = [
functools.reduce(lambda x, y: x * y, ovshape[i + 1: len(ovshape)])
for i in range(0, len(ovshape) - 1)
]
stride_c.append(1)

a_buffer = tvm.decl_buffer(
data_vec.shape, data_vec.dtype, name="A", offset_factor=1, strides=stride_a
)
b_buffer = tvm.decl_buffer(
kernel_vec.shape, kernel_vec.dtype, name="B", offset_factor=1, strides=stride_b
)
c_buffer = tvm.decl_buffer(
conv.shape, conv.dtype, name="C", offset_factor=1, strides=stride_c
)

def intrin_func(ins, outs):
aa, bb = ins
cc = outs[0]

def _body():
ib = tvm.ir_builder.create()
ib.emit(
tvm.call_extern(
"int32",
opname,
cc.access_ptr("w"),
aa.access_ptr("r"),
bb.access_ptr("r"),
ci_,
vh_,
vw_,
vc_,
kh_,
sh_,
core_id,
)
)
return ib.get()

return _body()

return tvm.decl_tensor_intrin(
conv.op, intrin_func, binds={data_vec: a_buffer, kernel_vec: b_buffer, conv: c_buffer}
)


def _schedule_asm(s, data_vec, kernel_vec, conv, output, last):
"""schedule implementation"""
n, co, oh, ow, vh, vw, vc = s[conv].op.axis

axis_extent = []
for i in (vh, vw, vc):
axis_extent.append(get_const_int(i.dom.extent))
reduce_extent = []
for i in s[conv].op.reduce_axis[1:]:
reduce_extent.append(get_const_int(i.dom.extent))
vh_, vw_, vc_ = axis_extent

# schedule fusion
n, co, h, w = s[last].op.axis
co, vc = s[last].split(co, vc_)
oh, vh = s[last].split(h, vh_)
ow, vw = s[last].split(w, vw_)
s[last].reorder(n, co, oh, ow, vh, vw, vc)
if last != output:
s[output].compute_inline()

s[conv].compute_at(s[last], ow)

# mark parallel
s[last].parallel(co)

if data_vec.op.name == "data_vec_undilated":
_, h, _, _, _, _, _, _ = s[data_vec].op.axis
else:
_, h, _, _, _, _ = s[data_vec].op.axis
s[data_vec].parallel(h)

if kernel_vec.op.name == "kernel_vec":
co, _, _, _, _ = s[kernel_vec].op.axis
if autotvm.GLOBAL_SCOPE.in_tuning:
# kernel packing will be pre-computed during compilation, so we skip
# this part to make tuning records correct
s[kernel_vec].pragma(co, "debug_skip_region")
else:
s[kernel_vec].parallel(co)
elif kernel_vec.op.name == "kernel_vec_conv2d_transpose": # for conv2d transpose
co, _, _, _, _ = s[kernel_vec].op.axis
s[kernel_vec].parallel(co)

return s


def _conv_schedule_asm(outs):
"""_conv_schedule_asm"""
s = tvm.create_schedule([x.op for x in outs])

def _callback(op):
if "asm_conv2d_output" in op.tag:
# schedule conv2d
output = op.output(0)
conv = op.input_tensors[0]

sidx = 0
if conv.op.input_tensors[0].name == "attr":
sidx = 1
data_vec = conv.op.input_tensors[sidx]
data_pad = data_vec.op.input_tensors[0]
s[data_pad].compute_inline()

kernel_vec = conv.op.input_tensors[sidx + 1]
if kernel_vec.op.name == "kernel_vec":
kernel = kernel_vec.op.input_tensors[0]
else:
kernel = kernel_vec
if (isinstance(kernel.op, tvm.tensor.ComputeOp) and "dilate" in kernel.op.tag):
s[kernel].compute_inline()

if conv.op.input_tensors[0].name == "attr":
_schedule_asm(s, data_vec, kernel_vec, conv, output, outs[0])
else:
_schedule_asm(s, data_vec, kernel_vec, conv, output, outs[0])

traverse_inline(s, outs[0].op, _callback)
return s

+ 0
- 477
predict/module/tvm_kernel/lite/python/arm_cpu/deconv.py View File

@@ -1,477 +0,0 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Conv2D_transpose of stride=2, kernel=2*2 schedule for ARM CPU"""
from __future__ import absolute_import as _abs

import functools

import tvm
from tvm import autotvm
import tvm.contrib.nnpack

from topi.generic import schedule_conv2d_nchw
from topi.util import traverse_inline, get_const_tuple
from topi.nn import conv2d


@autotvm.register_topi_compute(conv2d, "arm_cpu", ["deconv"])
def conv2d_arm_cpu_deconv(cfg, data, kernel, out_dtype):
"""TOPI compute callback for conv2d

Parameters
----------
cfg: ConfigEntity
The config for this template

data : tvm.Tensor
4-D with shape [batch, in_channel, in_height, in_width]

kernel : tvm.Tensor
4-D with shape [num_filter, in_channel, filter_height, filter_width] or
pre-packed 5-D with shape [num_filter_chunk, in_channel, filter_height,
filter_width, num_filter_block]

out_dtype: str
The output type. This is used for mixed precision.

Returns
-------
output : tvm.Tensor
4-D with shape [batch, out_channel, out_height, out_width]
"""
args = _gen_cfg_deconv(cfg, data, kernel, num_tile=2)
return _conv_spatial_pack_deconv(
args, data, kernel, out_dtype
)


@autotvm.register_topi_schedule(schedule_conv2d_nchw, "arm_cpu", ["deconv"])
def schedule_conv2d_nchw_arm_cpu_deconv(cfg, outs):
"""TOPI schedule callback for conv2d

Parameters
----------
cfg: ConfigEntity
The config for this template

outs: Array of Tensor
The computation graph description of conv2d
in the format of an array of tensors.

Returns
-------
s: Schedule
The computation schedule for conv2d.
"""
s = _conv_schedule_deconv(cfg, outs)
return s


def _gen_cfg_deconv(cfg, data, kernel, num_tile):
"""generation config from input args"""
if len(kernel.shape) == 4:
co_, _, _, _ = get_const_tuple(kernel.shape)
else: # kernel tensor is pre packed
co_, _, _, _, vc_ = get_const_tuple(kernel.shape)
co_ = co_ * vc_

if len(data.shape) == 4:
_, ci_, ih_, iw_ = get_const_tuple(data.shape)
c4 = 4
ci_ = ci_ // 4
else:
_, ci_, ih_, iw_, c4 = get_const_tuple(data.shape)

oh_ = ih_ * 2
ow_ = iw_ * 2

co, oh, ow = cfg.axis(co_), cfg.axis(oh_), cfg.axis(ow_)
ci, ki = cfg.reduce_axis(ci_), cfg.reduce_axis(c4)

if num_tile == 2: # for arm cpu
candidate_vc = [[co_ // c4, c4]]
co, vc = cfg.define_split(
"tile_co", co, num_outputs=2, policy="candidate", candidate=candidate_vc
)
candidate_vw = []
for iv in range(4, ow_ + 1): # [4, 6, 8, 12, 16, 24, 32, 40]:
if iv % 4 == 0 and (ow_ % iv == 0):
candidate_vw.append([ow_ // iv, iv])
ow, vw = cfg.define_split(
"tile_ow", ow, num_outputs=2, policy="candidate", candidate=candidate_vw
)
candidate_vh = [[1, 2]]
oh, vh = cfg.define_split(
"tile_oh", oh, num_outputs=2, policy="candidate", candidate=candidate_vh
)
elif num_tile == 3: # for mali gpu
co, _, vc = cfg.define_split("tile_co", co, num_outputs=3)
oh, _, vh = cfg.define_split("tile_oh", oh, num_outputs=3)
ow, _, vw = cfg.define_split("tile_ow", ow, num_outputs=3)
else:
raise RuntimeError("Invalid num_tile")

cfg.define_annotate("ann_reduce", [ci, ki], policy="try_unroll")
cfg.define_annotate("ann_spatial", [vh, vw, vc], policy="try_unroll_vec")

vc_ = cfg["tile_co"].size[-1]
vh_ = cfg["tile_oh"].size[-1]
vw_ = cfg["tile_ow"].size[-1]
is_var = False
return (is_var, vh_, vw_, vc_)


def _conv_spatial_pack_deconv(args, data, kernel, out_dtype):
"""conv2d_arm_cpu_deconv inner implement"""
is_var, vh_, vw_, vc_ = args
# create workload according to raw arguments
out_dtype = out_dtype or data.dtype
if len(data.shape) == 4:
n_, ci_, ih_, iw_ = data.shape if is_var else get_const_tuple(data.shape)
c4 = 4
ci_ = ci_ // c4
else:
n_, ci_, ih_, iw_, c4 = data.shape if is_var else get_const_tuple(data.shape)

if len(kernel.shape) == 4:
pre_packed = False
_, co_, kh_, kw_ = kernel.shape if is_var else get_const_tuple(kernel.shape)
else: # kernel tensor is pre packed
pre_packed = True
_, co_, kh_, kw_, vc_ = kernel.shape if is_var else get_const_tuple(kernel.shape)
co_ = co_ * c4

oh_ = ih_ * 2
ow_ = iw_ * 2
ow_div = ow_ // vw_
oh_div = oh_ // vh_
kvshape = (co_ // vc_, kh_, kw_, ci_, c4, c4)
ovshape = (n_, co_ // vc_, oh_div, ow_div, vh_, vw_, c4)

dvshape = (n_, ih_ // (vh_ // 2), iw_ // (vw_ // 2), vh_ // 2, ci_, vw_ // 2, c4)
if len(data.shape) == 4:
data_vec = tvm.compute(
dvshape,
lambda n, h, w, vh, ci, vw, ki: data[n][ci * c4 + ki][h * vh_ // 2 + vh][
w * vw_ // 2 + vw
],
name="data_vec",
)
else:
data_vec = tvm.compute(
dvshape,
lambda n, h, w, vh, ci, vw, ki: data[n][ci][h * vh_ // 2 + vh][
w * vw_ // 2 + vw
][ki],
name="data_vec",
)

if pre_packed:
kernel_vec = kernel
else:
kernel_vec = tvm.compute(
kvshape,
lambda co, kh, kw, ci, ki, vc: kernel[ci * c4 + ki][co * vc_ + vc][kh][kw],
name="kernel_vec",
)

ci = tvm.reduce_axis((0, ci_), name="ci")
ki = tvm.reduce_axis((0, c4), name="ki")

type_map = {
"int8": "int32",
"uint8": "uint32",
"float32": "float32",
"float16": "float16",
}
acum_dtype = type_map[data.dtype]
attrs = {
"SH": 2,
"SW": 2,
"PH": 0,
"PW": 0,
"DILA_H": 1,
"DILA_W": 1,
"VH": vh_,
"VW": vw_,
"VC": vc_,
"ACUM_DTYPE": acum_dtype,
}

conv = tvm.compute(
ovshape,
lambda n, co, h, w, vh, vw, vc: tvm.sum(
data_vec[n, h, w, vh // 2, ci, vw // 2, ki].astype(out_dtype)
* kernel_vec[co, (h * vh_ + vh) % 2, (w * vw_ + vw) % 2, ci, ki, vc].astype(
out_dtype
),
axis=[ci, ki],
),
name="conv",
attrs=attrs,
)
if len(data.shape) == 4:
osshape = (n_, co_, oh_, ow_div * vw_)
output = tvm.compute(
osshape,
lambda n, co, h, w: conv[n][co // c4][h][w // vw_][w % vw_][co % c4],
name="output_unpack",
tag="deconv_conv2d_output",
)
else:
osshape = (n_, co_ // c4, oh_, ow_div * vw_, c4)
output = tvm.compute(
osshape,
lambda n, co, h, w, vc: conv[n][co][h // vh_][w // vw_][h % vh_][w % vw_][vc],
name="output_unpack",
tag="deconv_conv2d_output",
)

return output


def intrin_deconv(args):
"""deconv inner implement"""
(
ci_,
vh_,
vw_,
vc_,
kh_,
kw_,
sh_,
sw_,
dila_h,
dila_w,
dtype,
acum_dtype,
opname,
core_id,
) = args
hstr, wstr = sh_, sw_
ci_ = tvm.var("ci_") if ci_ is None else ci_
kvshape = (ci_, kh_, kw_, vc_)
ovshape = (vh_, vw_, vc_)
if dila_h != 1 or dila_w != 1:
dvshape = (kh_, kw_, vh_, vw_, ci_)
else:
dvshape = ((vh_ - 1) * hstr + kh_, (vw_ - 1) * wstr + kw_, ci_)

data_vec = tvm.placeholder(dvshape, name="a", dtype=dtype)
kernel_vec = tvm.placeholder(kvshape, name="b", dtype=dtype)
ci = tvm.reduce_axis((0, ci_), name="ci")
kh = tvm.reduce_axis((0, kh_), name="kh")
kw = tvm.reduce_axis((0, kw_), name="kw")
if DILA_H != 1 or dila_w != 1:
conv = tvm.compute(
ovshape,
lambda vh, vw, vc: tvm.sum(
data_vec[kh, kw, vh, vw, ci].astype(acum_dtype)
* kernel_vec[ci, kh, kw, vc].astype(acum_dtype),
axis=[ci, kh, kw],
),
name="conv",
)
else:
conv = tvm.compute(
ovshape,
lambda vh, vw, vc: tvm.sum(
data_vec[vh * hstr + kh, vw * wstr + kw, ci].astype(acum_dtype)
* kernel_vec[ci, kh, kw, vc].astype(acum_dtype),
axis=[ci, kh, kw],
),
name="conv",
)

stride_a = [
functools.reduce(lambda x, y: x * y, dvshape[i + 1: len(dvshape)])
for i in range(0, len(dvshape) - 1)
]
stride_a.append(1)
stride_b = [
functools.reduce(lambda x, y: x * y, kvshape[i + 1: len(kvshape)])
for i in range(0, len(kvshape) - 1)
]
stride_b.append(1)
stride_c = [
functools.reduce(lambda x, y: x * y, ovshape[i + 1: len(ovshape)])
for i in range(0, len(ovshape) - 1)
]
stride_c.append(1)

a_buffer = tvm.decl_buffer(
data_vec.shape, data_vec.dtype, name="A", offset_factor=1, strides=stride_a
)
b_buffer = tvm.decl_buffer(
kernel_vec.shape, kernel_vec.dtype, name="B", offset_factor=1, strides=stride_b
)
c_buffer = tvm.decl_buffer(
conv.shape, conv.dtype, name="C", offset_factor=1, strides=stride_c
)

def intrin_func(ins, outs):
aa, bb = ins
cc = outs[0]

def _body():
ib = tvm.ir_builder.create()
ib.emit(
tvm.call_extern(
"int32",
opname,
cc.access_ptr("w"),
aa.access_ptr("r"),
bb.access_ptr("r"),
ci_,
vh_,
vw_,
vc_,
kh_,
sh_,
core_id,
)
)
return ib.get()

return _body()

return tvm.decl_tensor_intrin(
conv.op, intrin_func, binds={data_vec: a_buffer, kernel_vec: b_buffer, conv: c_buffer}
)


def _schedule_deconv(cfg, s, data_vec, kernel_vec, conv, output, last):
"""schedule implementation"""
is_tune = bool(isinstance(cfg, (tvm.autotvm.ConfigEntity, tvm.autotvm.ConfigSpace)))
if is_tune:
vh_ = cfg["tile_oh"].size[-1]
vw_ = cfg["tile_ow"].size[-1]
vc_ = cfg["tile_co"].size[-1]
cfg = {
"ci_": tvm.var("ci_"),
"VH": vh_,
"VW": vw_,
"VC": vc_,
"tile_oh": vh_,
"tile_ow": vw_,
"tile_co": vc_,
"tile_ci": 4,
"ann_reduce": cfg["ann_reduce"].anns,
"ann_spatial": cfg["ann_spatial"].anns,
} # ,'reorder_0':cfg['reorder_0'].perm}
else:
pass
n, co, oh, ow, vh, vw, vc = s[conv].op.axis
ci, ki = s[conv].op.reduce_axis
s[conv].reorder(n, co, oh, ow, ci, vw, ki, vc)
if cfg["ann_reduce"][0] == "unroll":
s[conv].unroll(ci)
elif cfg["ann_reduce"][0] == "vec":
s[conv].vectorize(ci)
if cfg["ann_reduce"][1] == "unroll":
s[conv].unroll(ki)
elif cfg["ann_reduce"][1] == "vec":
s[conv].vectorize(ki)
if cfg["ann_spatial"][0] == "vec":
s[conv].vectorize(vh)
elif cfg["ann_spatial"][0] == "unroll":
s[conv].unroll(vh)
if cfg["ann_spatial"][1] == "vec":
s[conv].vectorize(vw)
elif cfg["ann_spatial"][1] == "unroll":
s[conv].unroll(vw)
if cfg["ann_spatial"][2] == "vec":
s[conv].vectorize(vc)
elif cfg["ann_spatial"][2] == "unroll":
s[conv].unroll(vc)

# schedule conv
attrs = conv.op.attrs
vh_, vw_, vc_ = (attrs["VH"].value, attrs["VW"].value, attrs["VC"].value)

# schedule fusion
if len(s[last].op.axis) == 4:
n, co, h, w = s[last].op.axis
co, vc = s[last].split(co, vc_)
ow, vw = s[last].split(w, vw_)
oh, vh = s[last].split(h, vh_)
s[last].reorder(n, co, oh, ow, vh, vw, vc)
else:
n, co, h, w, vc = s[last].op.axis
oh, vh = s[last].split(h, vh_)
ow, vw = s[last].split(w, vw_)
s[last].reorder(n, co, oh, ow, vh, vw, vc)
if last != output and isinstance(output.op, tvm.tensor.ComputeOp):
s[output].compute_inline()
if cfg["ann_spatial"][0] == "vec":
s[last].vectorize(vh)
elif cfg["ann_spatial"][0] == "unroll":
s[last].unroll(vh)
if cfg["ann_spatial"][1] == "vec":
s[last].vectorize(vw)
elif cfg["ann_spatial"][1] == "unroll":
s[last].unroll(vw)
if cfg["ann_spatial"][2] == "vec":
s[last].vectorize(vc)
elif cfg["ann_spatial"][2] == "unroll":
s[last].unroll(vc)

s[conv].compute_at(s[last], ow)

# mark parallel
s[last].parallel(co)

if data_vec.op.name == "data_vec_undilated":
_, h, _, _, _, _, _, _, _ = s[data_vec].op.axis
else:
_, h, _, _, _, _, _ = s[data_vec].op.axis
s[data_vec].parallel(h)

co, _, _, _, _, vc = s[kernel_vec].op.axis
s[kernel_vec].parallel(co)
if cfg["ann_spatial"][2] == "vec":
s[kernel_vec].vectorize(vc)
elif cfg["ann_spatial"][2] == "unroll":
s[kernel_vec].unroll(vc)
return s


def _conv_schedule_deconv(cfg, outs):
"""schedule_conv2d_nchw_arm_cpu_deconv inner implementation"""
s = tvm.create_schedule([x.op for x in outs])

def _callback(op):
if "deconv_conv2d_output" in op.tag:
# schedule conv2d
output = op.output(0)
conv = op.input_tensors[0]

sidx = 0
if conv.op.input_tensors[0].name == "attr":
sidx = 1
data_vec = conv.op.input_tensors[sidx]

kernel_vec = conv.op.input_tensors[sidx + 1]
if kernel_vec.op.name == "kernel_vec":
kernel = kernel_vec.op.input_tensors[0]
else:
kernel = kernel_vec
if (isinstance(kernel.op, tvm.tensor.ComputeOp) and "dilate" in kernel.op.tag):
s[kernel].compute_inline()

_schedule_deconv(cfg, s, data_vec, kernel_vec, conv, output, outs[0])

traverse_inline(s, outs[0].op, _callback)
return s

+ 0
- 289
predict/module/tvm_kernel/lite/python/arm_cpu/depthwise_conv2d.py View File

@@ -1,289 +0,0 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Depthwise convolution schedule for ARM CPU"""

import tvm
from tvm import autotvm

from topi.generic import schedule_depthwise_conv2d_nchw
from topi.nn import depthwise_conv2d_nchw, pad
from topi.util import traverse_inline, get_const_tuple
from topi.nn.util import get_pad_tuple

# register customized schedule for arm cpu.
@autotvm.register_topi_schedule(
schedule_depthwise_conv2d_nchw, ["arm_cpu", "cpu"], ["custom"]
)
def schedule_depthwise_conv2d_nchw_arm(cfg, outs):
"""Schedule depthwise conv2d

Parameters
----------
cfg: ConfigEntity
The configuration of this template
outs: Array of Tensor
The computation graph description of depthwise convolution2d
in the format of an array of tensors.

Returns
-------
s: Schedule
The computation schedule for depthwise_conv2d nchw.
"""
s = _depthwise_schedule_spatial_pack(cfg, outs)
return s


@autotvm.register_topi_compute(depthwise_conv2d_nchw, ["arm_cpu", "cpu"], ["custom"])
def depthwise_conv2d_arm_cpu(cfg, data, kernel, strides, padding, dilation, out_dtype):
"""TOPI compute callback for depthwise_conv2d nchw

Parameters
----------
cfg: ConfigEntity
The config for this template

data : tvm.Tensor
4-D with shape [batch, in_channel, in_height, in_width]

kernel : tvm.Tensor
4-D with shape [num_filter, multiplier, filter_height, filter_width] or
pre-packed 5-D with shape [num_filter_chunk, multiplier, filter_height,
filter_width, num_filter_block]

strides : list of two ints
[stride_height, stride_width]

padding : list of two ints
[pad_height, pad_width]

dilation : list of two ints
[dilation_height, dilation_width]

out_dtype: str
The output type. This is used for mixed precision.

Returns
-------
output : tvm.Tensor
4-D with shape [batch, out_channel, out_height, out_width]
"""

return _depthwise_spatial_pack(
cfg, data, kernel, strides, padding, dilation, out_dtype
)


def _depthwise_spatial_pack(args, data, kernel, strides, padding, dilation, out_dtype):
"""depthwise_conv2d_arm_cpu's inner implement"""
is_var, u_vh, u_vw, u_vc = args
out_dtype = out_dtype or data.dtype

u_n, u_c, ih, iw = data.shape if is_var else get_const_tuple(data.shape)

if isinstance(dilation, int):
dilation_h = dilation_w = dilation
else:
dilation_h, dilation_w = dilation

if len(kernel.shape) == 4:
pre_packed = False
u_c, um, ukh, ukw = kernel.shape if is_var else get_const_tuple(kernel.shape)
else: # kernel tensor is pre packed
pre_packed = True
u_c, um, ukh, ukw, u_vc = kernel.shape if is_var else get_const_tuple(kernel.shape)
u_c = u_c * u_vc

dilated_kernel_h = (ukh - 1) * dilation_h + 1
dilated_kernel_w = (ukw - 1) * dilation_w + 1

pad_top, pad_left, pad_down, pad_right = get_pad_tuple(
padding, (dilated_kernel_h, dilated_kernel_w)
)
hstr, wstr = strides if isinstance(strides, (tuple, list)) else (strides, strides)
u_oh = (ih + pad_top + pad_down - dilated_kernel_h) // hstr + 1
u_ow = (iw + pad_left + pad_right - dilated_kernel_w) // wstr + 1
# pack data
hpad = pad_top + pad_down
wpad = pad_left + pad_right
dopad = hpad != 0 or wpad != 0
if dopad:
data_pad = pad(
data,
(0, 0, pad_top, pad_left),
(0, 0, pad_down, pad_right),
name="data_pad",
)
else:
data_pad = data

oh_div = u_oh // u_vh
ow_div = u_ow // u_vw
kvshape = (u_c // u_vc, um, ukh, ukw, u_vc)
ovshape = (u_n, u_c * um // u_vc, oh_div, u_ow // u_vw, u_vh, u_vw, u_vc)
oshape = (u_n, u_c * um, oh_div * u_vh, ow_div * u_vw)

if dilation_h != 1 or dilation_w != 1:
# undilate input data
dvshape = (u_n, oh_div, ow_div, u_c, ukh, ukw, u_vh, u_vw)
data_vec = tvm.compute(
dvshape,
lambda n, h, w, c, kh, kw, vh, vw: data_pad[n][c][
(h * u_vh + vh) * hstr + kh * dilation_h
][(w * u_vw + vw) * wstr + kw * dilation_w],
name="data_vec_undilated",
)
else:
dvshape = (u_n, oh_div, ow_div, u_c, u_vh * hstr + ukh - 1, u_vw * wstr + ukw - 1)
data_vec = tvm.compute(
dvshape,
lambda n, h, w, c, vh, vw: data_pad[n][c][h * u_vh * hstr + vh][
w * u_vw * wstr + vw
],
name="data_vec",
)

if pre_packed:
kernel_vec = kernel
else:
kernel_vec = tvm.compute(
kvshape,
lambda co, m, kh, kw, vc: kernel[co * u_vc + vc][m][kh][kw],
name="kernel_vec",
)

kh = tvm.reduce_axis((0, ukh), name="kh")
kw = tvm.reduce_axis((0, ukw), name="kw")

if dilation_h != 1 or dilation_w != 1:
conv = tvm.compute(
ovshape,
lambda n, co, h, w, vh, vw, vc: tvm.sum(
data_vec[n, h, w, (co * u_vc + vc) // um, kh, kw, vh, vw].astype(out_dtype)
* kernel_vec[co // um, co % um, kh, kw, vc].astype(out_dtype),
axis=[kh, kw],
),
name="depthwise_conv",
)
else:
conv = tvm.compute(
ovshape,
lambda n, co, h, w, vh, vw, vc: tvm.sum(
data_vec[
n, h, w, (co * u_vc + vc) // um, vh * hstr + kh, vw * wstr + kw
].astype(out_dtype)
* kernel_vec[co // um, co % um, kh, kw, vc].astype(out_dtype),
axis=[kh, kw],
),
name="depthwise_conv",
)

output = tvm.compute(
oshape,
lambda n, co, h, w: conv[n][co // u_vc][h // u_vh][w // u_vw][h % u_vh][w % u_vw][
co % u_vc
],
name="output_unpack",
tag="spatial_depthwise_conv_nchw_output",
)
return output


def _schedule_spatial_pack(cfg, s, data_vec, kernel_vec, conv, output, last):
"""schedule implementation"""
u_vc = cfg["tile_co"].size[-1] if not isinstance(cfg, dict) else cfg["VC"]
u_vh = cfg["tile_oh"].size[-1] if not isinstance(cfg, dict) else cfg["VH"]
u_vw = cfg["tile_ow"].size[-1] if not isinstance(cfg, dict) else cfg["VW"]

n, co, oh, ow, vh, vw, vc = s[conv].op.axis
kh, kw = s[conv].op.reduce_axis

if data_vec.op.name == "data_vec_undilated":
_, _, dv_ow, _, _, _, _, _ = s[data_vec].op.axis
else:
_, _, dv_ow, _, _, _ = s[data_vec].op.axis

data_pad = data_vec.op.input_tensors[0]

if isinstance(data_pad.op, tvm.tensor.ComputeOp):
s[data_pad].vectorize(list(s[data_pad].op.axis)[-1])
s[data_pad].compute_at(s[data_vec], dv_ow)

s[data_vec].vectorize(list(s[data_vec].op.axis)[-1])
s[data_vec].compute_at(s[conv], ow)

# schedule conv
s[conv].reorder(n, co, oh, ow, kh, kw, vh, vw, vc)
s[conv].unroll(kh)
s[conv].unroll(vh)
s[conv].vectorize(vw)
s[conv].unroll(vc)
s[conv].parallel(co)

n, co, h, w = s[last].op.axis
co, vc = s[last].split(co, u_vc)
oh, vh = s[last].split(h, u_vh)
ow, vw = s[last].split(w, u_vw)
if last != output:
s[output].compute_inline()
s[last].vectorize(vw)
s[last].unroll(vc)
else:
s[last].vectorize(vw)
s[conv].compute_at(s[last], oh)

# mark parallel
s[last].parallel(co)

if data_vec.op.name == "data_vec_undilated":
_, h, _, _, _, _, _, _ = s[data_vec].op.axis
else:
_, h, _, _, _, _ = s[data_vec].op.axis
s[data_vec].parallel(h)

if kernel_vec.op.name == "kernel_vec":
co, _, _, _, _ = s[kernel_vec].op.axis
if autotvm.GLOBAL_SCOPE.in_tuning:
# kernel packing will be pre-computed during compliation, so we skip
# this part to make tuning records correct
s[kernel_vec].pragma(co, "debug_skip_region")
else:
s[kernel_vec].parallel(co)

return s


def _depthwise_schedule_spatial_pack(cfg, outs):
"""schedule_depthwise_conv2d_nchw_arm's inner implement"""
outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs
s = tvm.create_schedule([x.op for x in outs])

def _callback(op):
if op.tag == "spatial_depthwise_conv_nchw_output":
output = op.output(0)
conv = op.input_tensors[0]
data_vec = conv.op.input_tensors[0]
kernel_vec = conv.op.input_tensors[1]
if kernel_vec.op.name == "kernel_vec":
kernel = kernel_vec.op.input_tensors[0]
else:
kernel = kernel_vec
if isinstance(kernel.op, tvm.tensor.ComputeOp) and "dilate" in kernel.op.tag:
s[kernel].compute_inline()

_schedule_spatial_pack(cfg, s, data_vec, kernel_vec, conv, output, outs[0])

traverse_inline(s, outs[0].op, _callback)
return s

+ 0
- 472
predict/module/tvm_kernel/lite/python/arm_cpu/matmul.py View File

@@ -1,472 +0,0 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Conv2D schedule for ARM CPU"""
from __future__ import absolute_import as _abs

import functools

import tvm
from tvm import autotvm
import tvm.contrib.nnpack

from topi.generic import schedule_conv2d_nchw
from topi.util import traverse_inline
from topi.nn import conv2d


@autotvm.register_topi_compute(conv2d, "arm_cpu", ["matmul"])
def matmul_arm_cpu(cfg, a_, b_, layout, out_dtype):
"""TOPI compute callback for

Parameters
----------
cfg: ConfigEntity
The config for this template

a_ : tvm.Tensor
2-D with shape [M, k_]

b_ : tvm.Tensor
2-D with shape [k_, N]

out_dtype: str
The output type. This is used for mixed precision.

Returns
-------
output : tvm.Tensor
4-D with shape [batch, out_channel, out_height, out_width]
"""
args = _gen_cfg(cfg, a_, b_)
return _matmul_spatial_pack_asm(args, a_, b_, layout, out_dtype)


@autotvm.register_topi_schedule(schedule_conv2d_nchw, "arm_cpu", ["matmul"])
def schedule_matmul_arm_cpu(cfg, outs):
"""TOPI schedule callback for conv2d

Parameters
----------
cfg: ConfigEntity
The config for this template

outs: Array of Tensor
The computation graph description of conv2d
in the format of an array of tensors.

Returns
-------
s: Schedule
The computation schedule for conv2d.
"""
s = _matmul_schedule_asm(cfg, outs)
return s


def _gen_cfg(cfg, a_, b_):
"""get best loginfo from cfg"""
if len(a_.shape) == 2:
w_, ci_ = get_const_tuple(a_.shape)
h_ = 1
elif len(a_.shape) == 3:
_, ci_, w_ = get_const_tuple(a_.shape)
h_ = 1
elif len(a_.shape) == 4:
_, ci_, h_, w_ = get_const_tuple(a_.shape)
else:
raise ValueError("not support shape: " + a_.shape)

co_, k_ = get_const_tuple(b_.shape)

oh, ow = cfg.axis(h_), cfg.axis(w_)
co = cfg.axis(co_)
k = cfg.reduce_axis(k_)

oh, vh = cfg.define_split("tile_oh", oh, num_outputs=2)
ow, vw = cfg.define_split("tile_ow", ow, num_outputs=2)
oc, vc = cfg.define_split("tile_co", co, num_outputs=2)

cfg.define_reorder(
"reorder_0",
[oc, oh, ow, k, vh, vw, vc],
policy="candidate",
candidate=[[oc, oh, ow, k, vh, vw, vc],],
)

vh_ = cfg["tile_oh"].size[-1]
vw_ = cfg["tile_ow"].size[-1]
vc_ = cfg["tile_co"].size[-1]
is_var = False
is_transpose = False
return (is_var, is_transpose, ci_, vh_, vw_, vc_)


def _matmul_spatial_pack_asm(args, a_, b_, layout, out_dtype):
"""matmul_spatial_pack_asm's inner interace"""
is_var, is_transpose, ci_, vh_, vw_, vc_ = args

# create workload according to raw arguments
out_dtype = out_dtype or a_.dtype
if layout == "NCHW":
batch, k_, h_, w_ = a_.shape if is_var else get_const_tuple(a_.shape)
n_, _ = b_.shape if is_var else get_const_tuple(b_.shape)
elif layout == "NCH":
batch, k_, h_ = a_.shape if is_var else get_const_tuple(a_.shape)
n_, _ = b_.shape if is_var else get_const_tuple(b_.shape)
w_ = 1
elif layout == "NC":
w_, k_ = a_.shape if is_var else get_const_tuple(a_.shape)
n_, _ = b_.shape if is_var else get_const_tuple(b_.shape)
h_ = 1
else:
raise ValueError("not support layout: " + layout)

ki = tvm.reduce_axis((0, k_), name="ki")
type_map = {
"int8": "int32",
"uint8": "uint32",
"float32": "float32",
"float16": "float16",
}
acum_dtype = type_map[a_.dtype]
attrs = {"ci_": ci_, "vh_": vh_, "vw_": vw_, "vc_": vc_, "ACUM_DTYPE": acum_dtype}

if layout == "NCHW":
h_div = h_ // vh_
w_div = w_ // vw_
n_div = n_ // vc_
avshape = (batch, h_div, w_div, vh_, vw_, k_)
bvshape = (n_div, k_, vc_)
ovshape = (batch, n_div, h_div, w_div, vh_, vw_, vc_)

a_vec = tvm.compute(
avshape,
lambda n, oh, ow, vh, vw, ci: a_[n][ci][oh * vh_ + vh][ow * vw_ + vw],
name="a_vec",
)
b_vec = tvm.compute(
bvshape, lambda oc, ci, vc: b_[oc * vc_ + vc][ci], name="b_vec"
)

ma = tvm.compute(
ovshape,
lambda n, oc, oh, ow, vh, vw, vc: tvm.sum(
a_vec[n, oh, ow, vh, vw, ki].astype(out_dtype)
* b_vec[oc, ki, vc].astype(out_dtype),
axis=[ki],
),
name="matmul",
attrs=attrs,
)

if is_transpose:
oshape = (batch, h_div * vh_, w_div * vw_, n_div * vc_)

output = tvm.compute(
oshape,
lambda n, h, w, c: ma[n][c // vc_][h // vh_][w // vw_][h % vh_][w % vw_][
c % vc_
],
name="output_unpack",
tag="asm_matmul_output",
)
else:
oshape = (batch, n_div * vc_, h_div * vh_, w_div * vw_)
output = tvm.compute(
oshape,
lambda n, c, h, w: ma[n][c // vc_][h // vh_][w // vw_][h % vh_][w % vw_][
c % vc_
],
name="output_unpack",
tag="asm_matmul_output",
)
elif layout == "NCH":
w_div = w_ // vw_
n_div = n_ // vc_
avshape = (batch, w_div, vw_, k_)
bvshape = (n_div, k_, vc_)
ovshape = (batch, n_div, w_div, vw_, vc_)
oshape = (batch, n_div * vc_, w_div * vw_)

a_vec = tvm.compute(
avshape, lambda b, om, vw, ci: a_[b][ci][om * vw_ + vw], name="a_vec"
)
b_vec = tvm.compute(
bvshape, lambda on, ci, vc: b_[on * vc_ + vc][ci], name="b_vec"
)

ma = tvm.compute(
ovshape,
lambda b, on, om, vm, vn: tvm.sum(
a_vec[b, om, vm, ki].astype(out_dtype)
* b_vec[on, ki, vn].astype(out_dtype),
axis=[ki],
),
name="matmul",
attrs=attrs,
)

output = tvm.compute(
oshape,
lambda b, n, m: ma[b][n // vc_][m // vw_][m % vw_][n % vc_],
name="output_unpack",
tag="asm_matmul_output",
)
elif layout == "NC":
w_div = w_ // vw_
n_div = n_ // vc_
avshape = (w_div, vw_, k_)
bvshape = (n_div, k_, vc_)
ovshape = (w_div, n_div, vw_, vc_)
oshape = (w_div * vw_, n_div * vc_)

a_vec = tvm.compute(
avshape, lambda om, vw, ci: a_[om * vw_ + vw][ci], name="a_vec"
)
b_vec = tvm.compute(
bvshape, lambda on, ci, vc: b_[on * vc_ + vc][ci], name="b_vec"
)

ma = tvm.compute(
ovshape,
lambda om, on, vm, vn: tvm.sum(
a_vec[om, vm, ki].astype(out_dtype)
* b_vec[on, ki, vn].astype(out_dtype),
axis=[ki],
),
name="matmul",
attrs=attrs,
)

output = tvm.compute(
oshape,
lambda m, n: ma[m // vw_][n // vc_][m % vw_][n % vc_],
name="output_unpack",
tag="asm_matmul_output",
)
else:
raise ValueError("not support layout: " + layout)

return output


def intrin_conv(args):
"""intrin_conv is a conv inner interface"""
(
ndim,
ci_,
vh_,
vw_,
vc_,
_,
_,
_,
_,
_,
_,
_,
_,
dtype,
acum_dtype,
opname,
core_id,
) = args
ci_ = tvm.var("ci_") if ci_ is None else ci_
kvshape = (ci_, vc_)
if ndim == 2:
dvshape = (vw_, ci_)
ovshape = (vw_, vc_)

data_vec = tvm.placeholder(dvshape, name="a", dtype=dtype)
kernel_vec = tvm.placeholder(kvshape, name="b", dtype=dtype)
ci = tvm.reduce_axis((0, ci_), name="ci")
conv = tvm.compute(
ovshape,
lambda vw, vc: tvm.sum(
data_vec[vw, ci].astype(acum_dtype)
* kernel_vec[ci, vc].astype(acum_dtype),
axis=[ci],
),
name="conv",
)
else:
dvshape = (vh_, vw_, ci_)
ovshape = (vh_, vw_, vc_)

data_vec = tvm.placeholder(dvshape, name="a", dtype=dtype)
kernel_vec = tvm.placeholder(kvshape, name="b", dtype=dtype)
ci = tvm.reduce_axis((0, ci_), name="ci")
conv = tvm.compute(
ovshape,
lambda vh, vw, vc: tvm.sum(
data_vec[vh, vw, ci].astype(acum_dtype)
* kernel_vec[ci, vc].astype(acum_dtype),
axis=[ci],
),
name="conv",
)

stride_a = [
functools.reduce(lambda x, y: x * y, dvshape[i + 1: len(dvshape)])
for i in range(0, len(dvshape) - 1)
]
stride_a.append(1)
stride_b = [
functools.reduce(lambda x, y: x * y, kvshape[i + 1: len(kvshape)])
for i in range(0, len(kvshape) - 1)
]
stride_b.append(1)
stride_c = [
functools.reduce(lambda x, y: x * y, ovshape[i + 1: len(ovshape)])
for i in range(0, len(ovshape) - 1)
]
stride_c.append(1)

ab_ = tvm.decl_buffer(
data_vec.shape, data_vec.dtype, name="a_", offset_factor=1, strides=stride_a
)
bb_ = tvm.decl_buffer(
kernel_vec.shape, kernel_vec.dtype, name="b_", offset_factor=1, strides=stride_b
)
cb_ = tvm.decl_buffer(
conv.shape, conv.dtype, name="C", offset_factor=1, strides=stride_c
)

def intrin_func(ins, outs):
aa, bb = ins
cc = outs[0]

def _body():
b_ = tvm.ir_builder.create()
b_.emit(
tvm.call_extern(
"int32",
opname,
cc.access_ptr("w"),
aa.access_ptr("r"),
bb.access_ptr("r"),
ci_,
vh_,
vw_,
vc_,
core_id,
)
)
return b_.get()

return _body()

return tvm.decl_tensor_intrin(
conv.op, intrin_func, binds={data_vec: ab_, kernel_vec: bb_, conv: cb_}
)


def _schedule_asm(cfg, s, a_vec, b_vec, mat, output, last):
"""schedule implementation"""
is_transpose = 0 if not isinstance(cfg, dict) else cfg["is_transpose"]
attrs = mat.op.attrs
vh_, vw_, vc_ = (attrs["vh_"].value, attrs["vw_"].value, attrs["vc_"].value)

# axis split and reorder
if len(a_vec.shape) == 3:
ow, oc = s[last].op.axis
oc, vc = s[last].split(oc, vc_)
ow, vw = s[last].split(ow, vw_)
s[last].reorder(ow, oc, vw, vc)
s[last].vectorize(vc)
oh = ow = oc
elif len(a_vec.shape) == 4:
n, oc, ow, vw, vc = s[last].op.axis
oc, vc = s[last].split(oc, vc_)
ow, vw = s[last].split(ow, vw_)
s[last].reorder(n, oc, ow, vw, vc)
elif len(a_vec.shape) == 6:
if is_transpose:
n, oh, ow, oc = s[last].op.axis
else:
n, oc, oh, ow = s[last].op.axis
oc, vc = s[last].split(oc, vc_)
oh, vh = s[last].split(oh, vh_)
ow, vw = s[last].split(ow, vw_)
s[last].reorder(n, oc, oh, ow, vh, vw, vc)
else:
raise ValueError("not support a_vec: " + str(len(a_vec.shape)))
if last != output and isinstance(output.op, tvm.tensor.ComputeOp):
s[output].compute_inline()

s[mat].compute_at(s[last], ow)
s[mat].vectorize(s[mat].op.axis[-1])

# mark parallel
s[last].parallel(oh)

if len(a_vec.shape) == 3:
om, _, _ = s[a_vec].op.axis
s[a_vec].compute_at(s[last], ow)
s[a_vec].parallel(om)
elif len(a_vec.shape) == 4:
_, om, _, _ = s[a_vec].op.axis
s[a_vec].compute_at(s[last], ow)
s[a_vec].parallel(om)
else:
_, oh, _, _, _, _ = s[a_vec].op.axis
s[a_vec].parallel(oh)
s[a_vec].vectorize(s[a_vec].op.axis[-1])
s[a_vec].compute_inline()

oc, _, _ = s[b_vec].op.axis
s[b_vec].parallel(oc)
s[b_vec].vectorize(s[b_vec].op.axis[-1])
s[b_vec].compute_inline()
return s


def _matmul_schedule_asm(cfg, outs):
"""schedule_conv2d_nchw schedule implementation"""
s = tvm.create_schedule([x.op for x in outs])

def _callback(op):
if "asm_matmul_output" in op.tag:
# schedule conv2d
output = op.output(0)
mat = op.input_tensors[0]

sidx = 0
if mat.op.input_tensors[0].name == "attr":
sidx = 1
a_vec = mat.op.input_tensors[sidx]
b_vec = mat.op.input_tensors[sidx + 1]

def recurs_inline(a_):
if a_.op.input_tensors:
a1 = a_.op.input_tensors[0]
if a1.shape == a_.shape:
s[a1].compute_inline()
recurs_inline(a1)

def recurs_inline_(a_):
if isinstance(a_, tvm.tensor.ComputeOp):
if a_.op.input_tensors:
a1 = a_.op.input_tensors[0]
s[a1].compute_inline()
recurs_inline_(a1)

recurs_inline_(a_vec)
recurs_inline_(b_vec)

_schedule_asm(cfg, s, a_vec, b_vec, mat, output, outs[0])

traverse_inline(s, outs[0].op, _callback)
return s

+ 0
- 17
predict/module/tvm_kernel/lite/python/at_ops/__init__.py View File

@@ -1,17 +0,0 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Neural network operators"""
# from .at_lib import *
# from .at_gen import *

Some files were not shown because too many files changed in this diff

Loading…
Cancel
Save