| @@ -60,11 +60,6 @@ if(ENABLE_GPU) | |||||
| add_compile_definitions(ENABLE_GPU) | add_compile_definitions(ENABLE_GPU) | ||||
| endif () | endif () | ||||
| ## make flatuffer files | |||||
| include_directories("${CMAKE_BINARY_DIR}/predict/schema/inner") | |||||
| file(GLOB_RECURSE FLATBUFFER_IN RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "predict/schema/*.fbs") | |||||
| set(FLATBUFFER_OU "${CMAKE_BINARY_DIR}/predict/schema/inner") | |||||
| ms_build_flatbuffers("${FLATBUFFER_IN}" "${FLATBUFFER_IN}" flat_input "${FLATBUFFER_OU}") | |||||
| ## make protobuf files | ## make protobuf files | ||||
| file(COPY "${ms_onnx_INC}/onnx/onnx.proto" DESTINATION ${CMAKE_BINARY_DIR}/proto) | file(COPY "${ms_onnx_INC}/onnx/onnx.proto" DESTINATION ${CMAKE_BINARY_DIR}/proto) | ||||
| @@ -104,13 +99,9 @@ endif () | |||||
| if (ENABLE_D) | if (ENABLE_D) | ||||
| include_directories("${CMAKE_BINARY_DIR}/backend/kernel_compiler/aicpu") | include_directories("${CMAKE_BINARY_DIR}/backend/kernel_compiler/aicpu") | ||||
| include_directories("${CMAKE_BINARY_DIR}/predict/generator/ir") | |||||
| file(GLOB_RECURSE PROTO_IN RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "backend/kernel_compiler/aicpu/proto/*.proto") | file(GLOB_RECURSE PROTO_IN RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "backend/kernel_compiler/aicpu/proto/*.proto") | ||||
| ms_protobuf_generate(PROTOSRCS PROTOHDRS ${PROTO_IN}) | ms_protobuf_generate(PROTOSRCS PROTOHDRS ${PROTO_IN}) | ||||
| file(GLOB_RECURSE PROTO_INNER RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "predict/proto/*.proto") | |||||
| ms_protobuf_generate(PREDICT_PROTOSRCS PREDICT_PROTOHDRS ${PROTO_INNER}) | |||||
| file(GLOB_RECURSE PROTO_DUMP RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "runtime/device/ascend/dump/proto/*.proto") | file(GLOB_RECURSE PROTO_DUMP RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "runtime/device/ascend/dump/proto/*.proto") | ||||
| ms_protobuf_generate(DUMP_PROTOSRCS PROTOHDRS ${PROTO_DUMP}) | ms_protobuf_generate(DUMP_PROTOSRCS PROTOHDRS ${PROTO_DUMP}) | ||||
| @@ -139,7 +130,7 @@ set(SUB_COMP | |||||
| frontend/operator | frontend/operator | ||||
| pipeline/jit | pipeline/jit | ||||
| pipeline/pynative | pipeline/pynative | ||||
| common debug gvar predict pybind_api utils vm | |||||
| common debug gvar pybind_api utils vm | |||||
| ) | ) | ||||
| foreach (_comp ${SUB_COMP}) | foreach (_comp ${SUB_COMP}) | ||||
| @@ -147,7 +138,7 @@ foreach (_comp ${SUB_COMP}) | |||||
| string(REPLACE "/" "_" sub ${_comp}) | string(REPLACE "/" "_" sub ${_comp}) | ||||
| if (TARGET _mindspore_${sub}_obj) | if (TARGET _mindspore_${sub}_obj) | ||||
| list(APPEND SUB_OBJECTS_SRC $<TARGET_OBJECTS:_mindspore_${sub}_obj>) | list(APPEND SUB_OBJECTS_SRC $<TARGET_OBJECTS:_mindspore_${sub}_obj>) | ||||
| add_dependencies(_mindspore_${sub}_obj proto_input flat_input) | |||||
| add_dependencies(_mindspore_${sub}_obj proto_input ) | |||||
| endif () | endif () | ||||
| endforeach () | endforeach () | ||||
| add_subdirectory(${CMAKE_SOURCE_DIR}/mindspore/core/base base) | add_subdirectory(${CMAKE_SOURCE_DIR}/mindspore/core/base base) | ||||
| @@ -158,7 +149,7 @@ add_subdirectory(${CMAKE_SOURCE_DIR}/mindspore/core/utils util) | |||||
| list(APPEND SUB_OBJECTS_SRC $<TARGET_OBJECTS:_mindspore_core_utils_obj>) | list(APPEND SUB_OBJECTS_SRC $<TARGET_OBJECTS:_mindspore_core_utils_obj>) | ||||
| add_subdirectory(${CMAKE_SOURCE_DIR}/mindspore/core/ir ir) | add_subdirectory(${CMAKE_SOURCE_DIR}/mindspore/core/ir ir) | ||||
| list(APPEND SUB_OBJECTS_SRC $<TARGET_OBJECTS:_mindspore_ir_obj>) | list(APPEND SUB_OBJECTS_SRC $<TARGET_OBJECTS:_mindspore_ir_obj>) | ||||
| add_dependencies(_mindspore_core_utils_obj _mindspore_base_obj _mindspore_ir_obj _mindspore_abstract_obj proto_input flat_input) | |||||
| add_dependencies(_mindspore_core_utils_obj _mindspore_base_obj _mindspore_ir_obj _mindspore_abstract_obj proto_input ) | |||||
| set_property(SOURCE ${SUB_OBJECTS_SRC} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_ME) | set_property(SOURCE ${SUB_OBJECTS_SRC} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_ME) | ||||
| add_library(mindspore STATIC ${SUB_OBJECTS_SRC}) | add_library(mindspore STATIC ${SUB_OBJECTS_SRC}) | ||||
| @@ -34,7 +34,6 @@ | |||||
| #include "runtime/device/kernel_adjust.h" | #include "runtime/device/kernel_adjust.h" | ||||
| #include "runtime/device/ascend/ascend_stream_assign.h" | #include "runtime/device/ascend/ascend_stream_assign.h" | ||||
| #include "runtime/device/ascend/ascend_label_assign.h" | #include "runtime/device/ascend/ascend_label_assign.h" | ||||
| #include "predict/predict.h" | |||||
| #include "backend/session/anf_runtime_algorithm.h" | #include "backend/session/anf_runtime_algorithm.h" | ||||
| #include "ir/scalar.h" | #include "ir/scalar.h" | ||||
| #include "debug/anf_ir_dump.h" | #include "debug/anf_ir_dump.h" | ||||
| @@ -303,8 +302,6 @@ void AscendSession::CompileChildGraph(const KernelGraphPtr &child_graph) { | |||||
| save_graphs_path + "/" + "select_kernel_after" + "_graph_" + std::to_string(child_graph->graph_id()) + ".ir"; | save_graphs_path + "/" + "select_kernel_after" + "_graph_" + std::to_string(child_graph->graph_id()) + ".ir"; | ||||
| DumpIR(file_path, child_graph); | DumpIR(file_path, child_graph); | ||||
| } | } | ||||
| // convert kernel Graph to model | |||||
| predictmodel::StepConvertGraph(child_graph); | |||||
| // optimize graph | // optimize graph | ||||
| HardwareOptimize(child_graph); | HardwareOptimize(child_graph); | ||||
| // assign static memory of parameters | // assign static memory of parameters | ||||
| @@ -333,8 +330,6 @@ void AscendSession::RunGraph(const GraphId &graph_id, const std::vector<tensor:: | |||||
| InitPSParamAndOptim(kernel_graph, inputs); | InitPSParamAndOptim(kernel_graph, inputs); | ||||
| } | } | ||||
| #endif | #endif | ||||
| // convert inputs to model | |||||
| predictmodel::StepConvertWeight(inputs); | |||||
| { | { | ||||
| py::gil_scoped_release release; | py::gil_scoped_release release; | ||||
| // run task on device | // run task on device | ||||
| @@ -1036,8 +1031,6 @@ void AscendSession::HardwareOptimize(NotNull<KernelGraphPtr> graph, | |||||
| memo->insert(graph.get()); | memo->insert(graph.get()); | ||||
| MS_LOG(INFO) << "Start to do HardwareOptimize in graph: " << graph->graph_id(); | MS_LOG(INFO) << "Start to do HardwareOptimize in graph: " << graph->graph_id(); | ||||
| // convert kernel Graph to model | |||||
| predictmodel::StepConvertGraph(graph.get()); | |||||
| HardwareOptimize(graph.get()); | HardwareOptimize(graph.get()); | ||||
| for (auto &child_graph : graph->child_graph_order()) { | for (auto &child_graph : graph->child_graph_order()) { | ||||
| @@ -23,7 +23,6 @@ | |||||
| #include "common/utils.h" | #include "common/utils.h" | ||||
| #include "backend/session/anf_runtime_algorithm.h" | #include "backend/session/anf_runtime_algorithm.h" | ||||
| #include "runtime/device/kernel_runtime.h" | #include "runtime/device/kernel_runtime.h" | ||||
| #include "predict/predict.h" | |||||
| #include "backend/kernel_compiler/cpu/cpu_kernel_factory.h" | #include "backend/kernel_compiler/cpu/cpu_kernel_factory.h" | ||||
| #include "runtime/device/cpu/kernel_select_cpu.h" | #include "runtime/device/cpu/kernel_select_cpu.h" | ||||
| #include "backend/optimizer/common/optimizer.h" | #include "backend/optimizer/common/optimizer.h" | ||||
| @@ -79,7 +78,6 @@ GraphId CPUSession::CompileGraph(const AnfNodePtrList &lst, const AnfNodePtrList | |||||
| Optimize(graph); | Optimize(graph); | ||||
| } | } | ||||
| #endif | #endif | ||||
| predictmodel::StepConvertGraph(graph); | |||||
| MS_LOG(INFO) << "Build kernel"; | MS_LOG(INFO) << "Build kernel"; | ||||
| BuildKernel(graph.get()); | BuildKernel(graph.get()); | ||||
| MS_LOG(INFO) << "Assign kernel address"; | MS_LOG(INFO) << "Assign kernel address"; | ||||
| @@ -100,7 +98,6 @@ void CPUSession::RunGraph(const GraphId &graph_id, const std::vector<tensor::Ten | |||||
| std::vector<tensor::TensorPtr> need_sync_outputs; | std::vector<tensor::TensorPtr> need_sync_outputs; | ||||
| runtime_.BindInputOutput(kernel_graph.get(), inputs, outputs, &need_sync_outputs); | runtime_.BindInputOutput(kernel_graph.get(), inputs, outputs, &need_sync_outputs); | ||||
| MS_LOG(INFO) << "Run graph start"; | MS_LOG(INFO) << "Run graph start"; | ||||
| predictmodel::StepConvertWeight(inputs); | |||||
| auto execution_order = kernel_graph->execution_order(); | auto execution_order = kernel_graph->execution_order(); | ||||
| Reorder(&execution_order); | Reorder(&execution_order); | ||||
| @@ -31,7 +31,6 @@ | |||||
| #include "backend/optimizer/gpu/replace_momentum_cast_fusion.h" | #include "backend/optimizer/gpu/replace_momentum_cast_fusion.h" | ||||
| #include "backend/optimizer/gpu/replace_addn_fusion.h" | #include "backend/optimizer/gpu/replace_addn_fusion.h" | ||||
| #include "runtime/device/kernel_runtime_manager.h" | #include "runtime/device/kernel_runtime_manager.h" | ||||
| #include "predict/predict.h" | |||||
| #include "common/utils.h" | #include "common/utils.h" | ||||
| #include "common/trans.h" | #include "common/trans.h" | ||||
| #include "utils/context/ms_context.h" | #include "utils/context/ms_context.h" | ||||
| @@ -190,8 +189,6 @@ GraphId GPUSession::CompileGraph(const AnfNodePtrList &lst, const AnfNodePtrList | |||||
| // Assign parameter keys. | // Assign parameter keys. | ||||
| AssignParamKey(graph); | AssignParamKey(graph); | ||||
| #endif | #endif | ||||
| // Convert kernel Graph to model | |||||
| predictmodel::StepConvertGraph(graph); | |||||
| // Start gpu kernel runtime | // Start gpu kernel runtime | ||||
| StartKernelRT(); | StartKernelRT(); | ||||
| // Dump .pb graph before hardware optimization | // Dump .pb graph before hardware optimization | ||||
| @@ -245,8 +242,6 @@ void GPUSession::RunGraph(const GraphId &graph_id, const std::vector<tensor::Ten | |||||
| } | } | ||||
| #endif | #endif | ||||
| MS_EXCEPTION_IF_NULL(kernel_graph); | MS_EXCEPTION_IF_NULL(kernel_graph); | ||||
| // Convert inputs to model | |||||
| predictmodel::StepConvertWeight(inputs); | |||||
| { | { | ||||
| py::gil_scoped_release gil_release; | py::gil_scoped_release gil_release; | ||||
| // Run graph on GPU | // Run graph on GPU | ||||
| @@ -123,10 +123,6 @@ PYBIND11_MODULE(_c_expression, m) { | |||||
| "Set whether to enable reduce precision.") | "Set whether to enable reduce precision.") | ||||
| .def("get_save_graphs_path", &mindspore::MsContext::save_graphs_path, "Get save graphs path.") | .def("get_save_graphs_path", &mindspore::MsContext::save_graphs_path, "Get save graphs path.") | ||||
| .def("set_save_graphs_path", &mindspore::MsContext::set_save_graphs_path, "Set save graphs path.") | .def("set_save_graphs_path", &mindspore::MsContext::set_save_graphs_path, "Set save graphs path.") | ||||
| .def("get_save_ms_model_flag", &mindspore::MsContext::save_ms_model_flag, "Get whether to save ms model.") | |||||
| .def("set_save_ms_model_flag", &mindspore::MsContext::set_save_ms_model_flag, "Set whether to save ms model.") | |||||
| .def("get_save_ms_model_path", &mindspore::MsContext::save_ms_model_path, "Get path to save ms model.") | |||||
| .def("set_save_ms_model_path", &mindspore::MsContext::set_save_ms_model_path, "Set path to save ms model") | |||||
| .def("get_enable_dump", &mindspore::MsContext::enable_dump, "Get whether to enable dump.") | .def("get_enable_dump", &mindspore::MsContext::enable_dump, "Get whether to enable dump.") | ||||
| .def("set_enable_dump", &mindspore::MsContext::set_enable_dump, "Set whether to enable dump.") | .def("set_enable_dump", &mindspore::MsContext::set_enable_dump, "Set whether to enable dump.") | ||||
| .def("get_save_dump_path", &mindspore::MsContext::save_dump_path, "Get path to dump.") | .def("get_save_dump_path", &mindspore::MsContext::save_dump_path, "Get path to dump.") | ||||
| @@ -1,14 +0,0 @@ | |||||
| file(GLOB_RECURSE _PREDICT_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} | |||||
| "predict.cc" | |||||
| "generator/utils/ir_model_util.cc" | |||||
| "converter/*.cc" | |||||
| "converter/attr_utils/*.cc" | |||||
| "converter/lite_model/*.cc" | |||||
| "converter/lite_model/operations/*.cc" | |||||
| ) | |||||
| if (ENABLE_D) | |||||
| file(GLOB_RECURSE _D_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "generator/ir/*.cc") | |||||
| list(APPEND _PREDICT_SRC_LIST ${_D_SRC_LIST}) | |||||
| endif () | |||||
| add_library(_mindspore_predict_obj OBJECT ${_PREDICT_SRC_LIST}) | |||||
| @@ -1,229 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "predict/converter/attr_utils/convert_util.h" | |||||
| namespace mindspore { | |||||
| namespace predict { | |||||
| namespace utils { | |||||
| TypePtr GetTypePtr(const AnfNodePtr &anf_node) { | |||||
| MS_EXCEPTION_IF_NULL(anf_node); | |||||
| TypePtr type_ptr = anf_node->Type(); | |||||
| MS_EXCEPTION_IF_NULL(type_ptr); | |||||
| if (type_ptr->isa<TensorType>()) { | |||||
| auto tensor_ptr = type_ptr->cast<TensorTypePtr>(); | |||||
| MS_EXCEPTION_IF_NULL(tensor_ptr); | |||||
| TypePtr elem = tensor_ptr->element(); | |||||
| return elem; | |||||
| } else if (type_ptr->isa<Tuple>()) { | |||||
| auto tuple_ptr = type_ptr->cast<TuplePtr>(); | |||||
| MS_EXCEPTION_IF_NULL(tuple_ptr); | |||||
| auto tuple_i = (*tuple_ptr)[0]; | |||||
| MS_EXCEPTION_IF_NULL(tuple_i); | |||||
| if (tuple_i->isa<TensorType>()) { | |||||
| auto tensor_ptr = tuple_i->cast<TensorTypePtr>(); | |||||
| MS_EXCEPTION_IF_NULL(tensor_ptr); | |||||
| TypePtr elem = tensor_ptr->element(); | |||||
| MS_EXCEPTION_IF_NULL(elem); | |||||
| return elem; | |||||
| } else if (tuple_i->isa<Number>()) { | |||||
| return type_ptr; | |||||
| } else { | |||||
| MS_LOG(EXCEPTION) << "unsupported type: " << type_ptr->ToString(); | |||||
| } | |||||
| } else if (type_ptr->isa<Number>()) { | |||||
| return type_ptr; | |||||
| } | |||||
| std::string type_name = type_ptr->ToString(); | |||||
| MS_LOG(EXCEPTION) | |||||
| << "The output type of node should be a tensor type a number or a tuple of tensor type, but this is: " | |||||
| << type_name; | |||||
| } | |||||
| MsDataType GetMSDataType(TypeId ori_data_type) { | |||||
| MsDataType dst_data_type; | |||||
| switch (ori_data_type) { | |||||
| case kNumberTypeFloat16: | |||||
| dst_data_type = mindspore::predict::DataType_DT_FLOAT16; | |||||
| return dst_data_type; | |||||
| case kNumberTypeFloat32: | |||||
| dst_data_type = mindspore::predict::DataType_DT_FLOAT; | |||||
| return dst_data_type; | |||||
| case kNumberTypeInt8: | |||||
| dst_data_type = mindspore::predict::DataType_DT_INT8; | |||||
| return dst_data_type; | |||||
| case kNumberTypeInt32: | |||||
| dst_data_type = mindspore::predict::DataType_DT_INT32; | |||||
| return dst_data_type; | |||||
| case kNumberTypeUInt8: | |||||
| dst_data_type = mindspore::predict::DataType_DT_UINT8; | |||||
| return dst_data_type; | |||||
| case kNumberTypeUInt32: | |||||
| dst_data_type = mindspore::predict::DataType_DT_UINT32; | |||||
| return dst_data_type; | |||||
| case kTypeUnknown: | |||||
| dst_data_type = mindspore::predict::DataType_DT_UNDEFINED; | |||||
| return dst_data_type; | |||||
| default: | |||||
| MS_LOG(EXCEPTION) << "Ms don't support this DataType"; | |||||
| } | |||||
| } | |||||
| MsFormat GetMsFormat(const std::string &format_str) { | |||||
| if (format_str == kOpFormat_DEFAULT) { | |||||
| MsFormat ms_format = predict::Format_NCHW; | |||||
| return ms_format; | |||||
| } else { | |||||
| // all middle format default to NCHW | |||||
| return predict::Format_NCHW; | |||||
| } | |||||
| } | |||||
| TensorPtr GetParaAscendTensor(const AnfNodePtr &anf_node) { | |||||
| MS_EXCEPTION_IF_NULL(anf_node); | |||||
| if (!anf_node->isa<Parameter>()) { | |||||
| return nullptr; | |||||
| } | |||||
| auto device_type_id = AnfAlgo::GetOutputDeviceDataType(anf_node, 0); | |||||
| // device type_ptr | |||||
| auto device_type_ptr = GetTypePtr(anf_node); | |||||
| // device shape | |||||
| auto shape = AnfAlgo::GetOutputDeviceShape(anf_node, 0); | |||||
| std::vector<int> tensor_shape; | |||||
| (void)std::transform(shape.begin(), shape.end(), std::back_inserter(tensor_shape), SizeToInt); | |||||
| // device format | |||||
| auto format = AnfAlgo::GetOutputFormat(anf_node, 0); | |||||
| // device tensor | |||||
| TensorPtr device_tensor = std::make_shared<tensor::Tensor>(device_type_id, tensor_shape); | |||||
| // device info | |||||
| device_tensor->SetDeviceInfo(format, device_type_ptr); | |||||
| return device_tensor; | |||||
| } | |||||
| TensorPtr GetParaCpuTensor(const AnfNodePtr &anf_node) { | |||||
| MS_EXCEPTION_IF_NULL(anf_node); | |||||
| if (!(anf_node->isa<Parameter>())) { | |||||
| return nullptr; | |||||
| } else { | |||||
| auto ori_type_id = AnfAlgo::GetOutputInferDataType(anf_node, 0); | |||||
| auto ori_type_ptr = GetTypePtr(anf_node); | |||||
| auto ori_shape = AnfAlgo::GetOutputInferShape(anf_node, 0); | |||||
| std::vector<int> tensor_shape; | |||||
| (void)std::transform(ori_shape.begin(), ori_shape.end(), std::back_inserter(tensor_shape), SizeToInt); | |||||
| auto ori_format = AnfAlgo::GetOutputFormat(anf_node, 0); | |||||
| TensorPtr cpu_tensor = std::make_shared<tensor::Tensor>(ori_type_id, tensor_shape); | |||||
| cpu_tensor->SetDeviceInfo(ori_format, ori_type_ptr); | |||||
| return cpu_tensor; | |||||
| } | |||||
| } | |||||
| TensorPtr GetValueTensor(const ValueNodePtr &const_node) { | |||||
| MS_EXCEPTION_IF_NULL(const_node); | |||||
| auto value_ptr = const_node->value(); | |||||
| MS_EXCEPTION_IF_NULL(value_ptr); | |||||
| if (!value_ptr->isa<tensor::Tensor>()) { | |||||
| return nullptr; | |||||
| } | |||||
| TensorPtr tensor = value_ptr->cast<TensorPtr>(); | |||||
| MS_EXCEPTION_IF_NULL(tensor); | |||||
| auto data_type = tensor->Dtype(); | |||||
| MS_EXCEPTION_IF_NULL(data_type); | |||||
| auto type_id = data_type->type_id(); | |||||
| auto shape = tensor->shape(); | |||||
| TensorPtr tensor_constant = std::make_shared<tensor::Tensor>(type_id, shape); | |||||
| tensor_constant->SetDeviceInfo(tensor->device_info().format_, tensor->device_info().data_type_); | |||||
| return tensor_constant; | |||||
| } | |||||
| TensorPtr GetKernelCpuTensor(const CNodePtr &c_node_ptr, size_t inx) { | |||||
| if (c_node_ptr == nullptr || inx >= AnfAlgo::GetOutputTensorNum(c_node_ptr)) { | |||||
| MS_LOG(ERROR) << "GetKernelCpuTensor failed"; | |||||
| return nullptr; | |||||
| } | |||||
| auto ori_shape = AnfAlgo::GetOutputInferShape(c_node_ptr, inx); | |||||
| auto ori_type_id = AnfAlgo::GetOutputInferDataType(c_node_ptr, inx); | |||||
| std::vector<int> tensor_shape; | |||||
| (void)std::transform(ori_shape.begin(), ori_shape.end(), std::back_inserter(tensor_shape), SizeToInt); | |||||
| auto ori_output_type = GetTypePtr(c_node_ptr); | |||||
| TensorPtr device_tensor = std::make_shared<tensor::Tensor>(ori_type_id, tensor_shape); | |||||
| auto format = AnfAlgo::GetOutputFormat(c_node_ptr, inx); | |||||
| device_tensor->SetDeviceInfo(format, ori_output_type); | |||||
| return device_tensor; | |||||
| } | |||||
| TensorPtr GetKernelAscendTensor(const CNodePtr &c_node_ptr, size_t inx) { | |||||
| if (c_node_ptr == nullptr || inx >= AnfAlgo::GetOutputTensorNum(c_node_ptr)) { | |||||
| MS_LOG(ERROR) << "GetKernelAscendTensor failed"; | |||||
| return nullptr; | |||||
| } | |||||
| auto shape = AnfAlgo::GetOutputDeviceShape(c_node_ptr, inx); | |||||
| std::vector<int> tensor_shape; | |||||
| (void)std::transform(shape.begin(), shape.end(), std::back_inserter(tensor_shape), SizeToInt); | |||||
| auto format = AnfAlgo::GetOutputFormat(c_node_ptr, inx); | |||||
| auto type_id = AnfAlgo::GetOutputDeviceDataType(c_node_ptr, inx); | |||||
| auto output_type_ptr = GetTypePtr(c_node_ptr); | |||||
| TensorPtr device_tensor = std::make_shared<tensor::Tensor>(type_id, tensor_shape); | |||||
| device_tensor->SetDeviceInfo(format, output_type_ptr); | |||||
| return device_tensor; | |||||
| } | |||||
| TensorPtr GetOutputTensor(const AnfNodePtr &out_node, size_t inx) { | |||||
| MS_EXCEPTION_IF_NULL(out_node); | |||||
| auto shape = AnfAlgo::GetOutputInferShape(out_node, inx); | |||||
| std::vector<int> tensor_shape; | |||||
| (void)std::transform(shape.begin(), shape.end(), std::back_inserter(tensor_shape), SizeToInt); | |||||
| auto type_id = AnfAlgo::GetOutputInferDataType(out_node, inx); | |||||
| auto output_type_ptr = GetTypePtr(out_node); | |||||
| auto format = AnfAlgo::GetOutputFormat(out_node, inx); | |||||
| TensorPtr output_tensor = std::make_shared<tensor::Tensor>(type_id, tensor_shape); | |||||
| output_tensor->SetDeviceInfo(format, output_type_ptr); | |||||
| return output_tensor; | |||||
| } | |||||
| bool FindNodeInMap(const std::unordered_map<MsKernelKey, int> &node_map, const AnfNodePtr &node) { | |||||
| return std::any_of(node_map.begin(), node_map.end(), | |||||
| [node](const std::pair<MsKernelKey, int> &kernel_key) { return kernel_key.first == node.get(); }); | |||||
| } | |||||
| bool SaveDeviceModelUtil(const std::shared_ptr<GraphDefT> &new_ms_graph_ptr, const std::string &save_path_name, | |||||
| SubGraphDefT *sub_graph) { | |||||
| MS_EXCEPTION_IF_NULL(new_ms_graph_ptr); | |||||
| MS_EXCEPTION_IF_NULL(sub_graph); | |||||
| // save mindspore schema to file | |||||
| new_ms_graph_ptr->name = "default_graph"; | |||||
| std::unique_ptr<mindspore::predict::SubGraphDefT> sub_graph_ptr(sub_graph); | |||||
| new_ms_graph_ptr->subgraphs.emplace_back(std::move(sub_graph_ptr)); | |||||
| // get flatbuffer builder | |||||
| flatbuffers::FlatBufferBuilder builder(1024); | |||||
| auto offset = mindspore::predict::GraphDef::Pack(builder, new_ms_graph_ptr.get()); | |||||
| builder.Finish(offset); | |||||
| auto size = builder.GetSize(); | |||||
| if (size == 0) { | |||||
| MS_LOG(ERROR) << "builder has no size"; | |||||
| return false; | |||||
| } | |||||
| auto content = builder.GetBufferPointer(); | |||||
| std::ofstream output(save_path_name); | |||||
| if (!output.is_open()) { | |||||
| MS_LOG(EXCEPTION) << "mindspore.mindspoire output failed"; | |||||
| } | |||||
| (void)output.write((const char *)content, size); | |||||
| output.close(); | |||||
| return true; | |||||
| } | |||||
| } // namespace utils | |||||
| } // namespace predict | |||||
| } // namespace mindspore | |||||
| @@ -1,60 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_CCSRC_PREDICT_CONVERTER_ATTR_UTILS_CONVERT_UTIL_H_ | |||||
| #define MINDSPORE_CCSRC_PREDICT_CONVERTER_ATTR_UTILS_CONVERT_UTIL_H_ | |||||
| #include <vector> | |||||
| #include <utility> | |||||
| #include <algorithm> | |||||
| #include <memory> | |||||
| #include <unordered_map> | |||||
| #include <string> | |||||
| #include <fstream> | |||||
| #include "ir/tensor.h" | |||||
| #include "backend/session/anf_runtime_algorithm.h" | |||||
| #include "predict/schema/inner/ms_generated.h" | |||||
| using TensorPtr = mindspore::tensor::TensorPtr; | |||||
| using TensorPtrList = std::vector<mindspore::tensor::TensorPtr>; | |||||
| using AllOutputTensors = std::unordered_map<int, TensorPtrList>; | |||||
| using OpDefT = mindspore::predict::OpDefT; | |||||
| using GraphDefT = mindspore::predict::GraphDefT; | |||||
| using TensorDefT = mindspore::predict::TensorDefT; | |||||
| using SubGraphDefT = mindspore::predict::SubGraphDefT; | |||||
| using SubGraphPtr = std::unique_ptr<mindspore::predict::SubGraphDefT>; | |||||
| using MsDataType = mindspore::predict::DataType; | |||||
| using MsFormat = mindspore::predict::Format; | |||||
| using MsKernelKey = void *; | |||||
| namespace mindspore { | |||||
| namespace predict { | |||||
| namespace utils { | |||||
| TypePtr GetTypePtr(const AnfNodePtr &anf_node); | |||||
| MsDataType GetMSDataType(TypeId ori_data_type); | |||||
| MsFormat GetMsFormat(const std::string &format_str); | |||||
| TensorPtr GetParaAscendTensor(const AnfNodePtr &anf_node); | |||||
| TensorPtr GetParaCpuTensor(const AnfNodePtr &anf_node); | |||||
| TensorPtr GetValueTensor(const ValueNodePtr &const_node); | |||||
| TensorPtr GetKernelCpuTensor(const CNodePtr &c_node_ptr, size_t inx); | |||||
| TensorPtr GetKernelAscendTensor(const CNodePtr &c_node_ptr, size_t inx); | |||||
| TensorPtr GetOutputTensor(const AnfNodePtr &out_node, size_t inx); | |||||
| bool FindNodeInMap(const std::unordered_map<MsKernelKey, int> &Nodemap, const AnfNodePtr &node); | |||||
| bool SaveDeviceModelUtil(const std::shared_ptr<GraphDefT> &new_ms_graph_ptr, const std::string &save_path_name, | |||||
| SubGraphDefT *sub_graph_def_t); | |||||
| } // namespace utils | |||||
| } // namespace predict | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_CCSRC_PREDICT_CONVERTER_ATTR_UTILS_CONVERT_UTIL_H_ | |||||
| @@ -1,65 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_CCSRC_PREDICT_CONVERTER_CPU_ATTR_UTILS_OP_ATTR_TYPE_H_ | |||||
| #define MINDSPORE_CCSRC_PREDICT_CONVERTER_CPU_ATTR_UTILS_OP_ATTR_TYPE_H_ | |||||
| namespace mindspore { | |||||
| namespace predict { | |||||
| namespace convert { | |||||
| typedef enum CpuOpType { | |||||
| CPU_OP_PAD = 0, | |||||
| CPU_OP_MAXIMUM, | |||||
| CPU_OP_CONCAT, | |||||
| CPU_OP_SOFTMAX, | |||||
| CPU_OP_ACTIVATION, | |||||
| CPU_OP_CONV2D, | |||||
| CPU_OP_FUSEDBATCHNORM, | |||||
| CPU_OP_CAFFEBATCHNORM, | |||||
| CPU_OP_SQUEEZE, | |||||
| CPU_OP_BIASADD, | |||||
| CPU_OP_POOLING, | |||||
| CPU_OP_DEPTHWISECONV2D, | |||||
| CPU_OP_DEDEPTHWISECONV2D, | |||||
| CPU_OP_RESIZE, | |||||
| CPU_OP_DETECTIONPOSTPROCESS, | |||||
| CPU_OP_FULLCONNECTION, | |||||
| CPU_OP_MEAN, | |||||
| CPU_OP_DECONV2D, | |||||
| CPU_OP_SCALE, | |||||
| CPU_OP_ELTWISE, | |||||
| CPU_OP_ADD, | |||||
| CPU_OP_SLICE, | |||||
| CPU_OP_MUL, | |||||
| CPU_OP_EXP, | |||||
| CPU_OP_RESHAPE, | |||||
| CPU_OP_POWER, | |||||
| CPU_OP_ARGMAX, | |||||
| CPU_OP_ARGMAX_NETOUTPUT, | |||||
| CPU_OP_MATMUL, | |||||
| CPU_OP_CAFFEPRELU, | |||||
| CPU_OP_STRIDEDSLICE, | |||||
| CPU_OP_STACK, | |||||
| CPU_OP_RANGE, | |||||
| CPU_OP_EXPANDDIMS, | |||||
| CPU_OP_TILE, | |||||
| CPU_OP_CAST, | |||||
| CPU_OP_CAFFECROP, | |||||
| CPU_OP_PRESERVEED = 37 | |||||
| } CpuOpType_t; | |||||
| } // namespace convert | |||||
| } // namespace predict | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_CCSRC_PREDICT_CONVERTER_CPU_ATTR_UTILS_OP_ATTR_TYPE_H_ | |||||
| @@ -1,49 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "predict/converter/executor_tensor.h" | |||||
| namespace mindspore { | |||||
| namespace executor { | |||||
| int TensorCache::addExTensor(int tensor_key, const TensorPtr &tensor, int refCount, const std::vector<int> &host_shape, | |||||
| ExTensorType stable, bool inc) { | |||||
| MS_EXCEPTION_IF_NULL(tensor); | |||||
| TensorPtr tmp_tensor = tensor; | |||||
| ExTensorPtr ex_tensor_ptr = | |||||
| std::make_shared<ExTensor>(tensor_key, tmp_tensor, refCount, nodeIndex, host_shape, stable); | |||||
| int pre_index = ex_tensor_ptr->index_; | |||||
| if (inc) { | |||||
| nodeIndex++; | |||||
| } | |||||
| // no need to judge,just add to map directly | |||||
| tensors[tensor_key].push_back(ex_tensor_ptr); | |||||
| return pre_index; | |||||
| } | |||||
| std::vector<ExTensorPtr> TensorCache::findTensor(int key) { | |||||
| std::vector<ExTensorPtr> ex_tensors; | |||||
| auto iter = tensors.find(key); | |||||
| if (iter != tensors.end()) { | |||||
| return iter->second; | |||||
| } else { | |||||
| MS_LOG(INFO) << "can not find any tensorlist"; | |||||
| return ex_tensors; | |||||
| } | |||||
| } | |||||
| void TensorCache::deleteTensor(int key) { (void)tensors.erase(key); } | |||||
| } // namespace executor | |||||
| } // namespace mindspore | |||||
| @@ -1,70 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_CCSRC_PREDICT_CONVERTER_EXECUTOR_TENSOR_H_ | |||||
| #define MINDSPORE_CCSRC_PREDICT_CONVERTER_EXECUTOR_TENSOR_H_ | |||||
| #include <vector> | |||||
| #include <memory> | |||||
| #include <unordered_map> | |||||
| #include <utility> | |||||
| #include "ir/tensor.h" | |||||
| namespace mindspore { | |||||
| namespace executor { | |||||
| using TensorPtr = tensor::TensorPtr; | |||||
| static constexpr int MS_MAX_REFCOUNT = 999; | |||||
| enum ExTensorType { INPUTDATA, WEIGHTS, CONSTANT, KERNEL, OUTPUT }; | |||||
| class ExTensor { | |||||
| public: | |||||
| int key_; | |||||
| TensorPtr device_tensor_ptr_; | |||||
| int ref_count_; | |||||
| int index_; | |||||
| std::vector<int> host_shape_; | |||||
| ExTensorType stable_; | |||||
| ExTensor(int key, TensorPtr tensor_ptr, int ref_count, int index, std::vector<int> host_shape, | |||||
| ExTensorType ex_tensor_type) | |||||
| : key_(key), | |||||
| device_tensor_ptr_(std::move(tensor_ptr)), | |||||
| ref_count_(ref_count), | |||||
| index_(index), | |||||
| host_shape_(std::move(host_shape)), | |||||
| stable_(ex_tensor_type) {} | |||||
| ~ExTensor() { host_shape_.clear(); } | |||||
| }; | |||||
| using ExTensorPtr = std::shared_ptr<ExTensor>; | |||||
| class TensorCache { | |||||
| public: | |||||
| TensorCache() = default; | |||||
| ~TensorCache() { tensors.clear(); } | |||||
| int addExTensor(int tensor_key, const TensorPtr &tensor, int refCount, const std::vector<int> &host_shape, | |||||
| ExTensorType stable, bool inc = true); | |||||
| // just adjust for dynamic tensor | |||||
| std::vector<ExTensorPtr> findTensor(int key); | |||||
| void deleteTensor(int key); | |||||
| const std::unordered_map<int, std::vector<ExTensorPtr>> &GetCachedTensor() const { return tensors; } | |||||
| private: | |||||
| std::unordered_map<int, std::vector<ExTensorPtr>> tensors; | |||||
| int nodeIndex = 0; | |||||
| }; | |||||
| using TensorCachePtr = std::shared_ptr<TensorCache>; | |||||
| } // namespace executor | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_CCSRC_PREDICT_CONVERTER_EXECUTOR_TENSOR_H_ | |||||
| @@ -1,561 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "predict/converter/kernel2ms.h" | |||||
| #include <algorithm> | |||||
| #include "ir/anf.h" | |||||
| #include "predict/converter/lite_model/op_attr_packer.h" | |||||
| #include "mindspore/ccsrc/frontend/operator/ops.h" | |||||
| namespace mindspore { | |||||
| namespace executor { | |||||
| Kernel2Ms &Kernel2Ms::GetInstance() { | |||||
| static Kernel2Ms instance; | |||||
| return instance; | |||||
| } | |||||
| bool Kernel2Ms::SetMemResue() const { | |||||
| MS_LOG(INFO) << "MemResue start"; | |||||
| return true; | |||||
| } | |||||
| bool Kernel2Ms::SetAllTensors(const TensorCachePtr &tensor_cache, SubGraphDefT *ms_graph) { | |||||
| if (tensor_cache == nullptr || ms_graph == nullptr) { | |||||
| return false; | |||||
| } | |||||
| const std::unordered_map<int, std::vector<ExTensorPtr>> &cachedTensors = tensor_cache->GetCachedTensor(); | |||||
| size_t total_size = 0; | |||||
| if (cachedTensors.empty()) { | |||||
| return false; | |||||
| } | |||||
| for (auto &iter : cachedTensors) { | |||||
| auto ex_tensors = iter.second; | |||||
| total_size += ex_tensors.size(); | |||||
| } | |||||
| ms_graph->allTensors.resize(total_size); | |||||
| for (auto &iter : cachedTensors) { | |||||
| for (auto &ex_tensor : iter.second) { | |||||
| std::unique_ptr<TensorDefT> ms_tensor(new TensorDefT()); | |||||
| auto device_tensor_tmp = ex_tensor->device_tensor_ptr_; | |||||
| auto device_d_type = device_tensor_tmp->data_type(); | |||||
| ms_tensor->dataType = predict::utils::GetMSDataType(device_d_type); | |||||
| auto device_shape = device_tensor_tmp->shape(); | |||||
| ms_tensor->dims.clear(); | |||||
| if (device_shape.empty()) { | |||||
| ms_tensor->dims.push_back(1); | |||||
| } else { | |||||
| ms_tensor->dims.assign(device_shape.begin(), device_shape.end()); | |||||
| } | |||||
| std::string format_str = device_tensor_tmp->device_info().format_; | |||||
| ms_tensor->format = predict::utils::GetMsFormat(format_str); | |||||
| ms_tensor->offset = 0; | |||||
| auto stable = ex_tensor->stable_; | |||||
| if (stable == INPUTDATA || stable == CONSTANT || stable == WEIGHTS) { | |||||
| ms_tensor->refCount = MS_MAX_REFCOUNT; | |||||
| } else { | |||||
| ms_tensor->refCount = 0; | |||||
| } | |||||
| ms_graph->allTensors[IntToSize(ex_tensor->index_)] = std::move(ms_tensor); | |||||
| } | |||||
| } | |||||
| return true; | |||||
| } | |||||
| bool Kernel2Ms::SetGraphOutputIdx(const KernelGraphPtr &kernel_graph_ptr, const TensorCachePtr &tensor_cache, | |||||
| SubGraphDefT *ms_graph, AllOutputTensors *all_output_tensors) { | |||||
| MS_EXCEPTION_IF_NULL(tensor_cache); | |||||
| MS_EXCEPTION_IF_NULL(ms_graph); | |||||
| MS_EXCEPTION_IF_NULL(all_output_tensors); | |||||
| auto out_nodes = kernel_graph_ptr->outputs(); | |||||
| if (out_nodes.empty()) { | |||||
| return false; | |||||
| } | |||||
| // maybe need to judge out_nodes is real && output must be CNode | |||||
| for (size_t i = 0; i < out_nodes.size(); ++i) { | |||||
| std::vector<AnfNodePtr> real_inputs_link; | |||||
| std::vector<size_t> real_output_idx_link; | |||||
| GetRealInpoutsPtr(out_nodes[i], &real_inputs_link, &real_output_idx_link); | |||||
| if (real_inputs_link.empty()) { | |||||
| MS_LOG(INFO) << "this graph output node is vitural node, has no real input"; | |||||
| continue; | |||||
| } | |||||
| for (size_t k = 0; k < real_inputs_link.size(); ++k) { | |||||
| int key = node_indexs_[out_nodes[i].get()]; | |||||
| auto ex_tensor_list = tensor_cache->findTensor(key); | |||||
| if (ex_tensor_list.empty()) { | |||||
| MS_LOG(INFO) << "SetGraphOutputIdx do not add Extensor "; | |||||
| continue; | |||||
| } | |||||
| auto ex_tensor = ex_tensor_list[real_output_idx_link[k]]; | |||||
| ex_tensor_list.clear(); | |||||
| ms_graph->outputIndex.push_back(ex_tensor->index_); | |||||
| } | |||||
| } | |||||
| return true; | |||||
| } | |||||
| bool Kernel2Ms::SetOpOutputIdx(const CNodePtr &c_node_ptr, const TensorPtr &output_tensor, | |||||
| const TensorCachePtr &tensor_cache, int ref_count, size_t order_index, OpDefT *ms_node) { | |||||
| MS_EXCEPTION_IF_NULL(c_node_ptr); | |||||
| MS_EXCEPTION_IF_NULL(output_tensor); | |||||
| MS_EXCEPTION_IF_NULL(ms_node); | |||||
| MS_EXCEPTION_IF_NULL(tensor_cache); | |||||
| if (!predict::utils::FindNodeInMap(node_indexs_, c_node_ptr)) { | |||||
| MS_LOG(ERROR) << "can not find any pk_key in inited node_indexs map"; | |||||
| return false; | |||||
| } | |||||
| int tensor_key = node_indexs_[c_node_ptr.get()]; | |||||
| auto host_shape = AnfAlgo::GetOutputInferShape(c_node_ptr, order_index); | |||||
| std::vector<int> tensor_shape; | |||||
| (void)std::transform(host_shape.begin(), host_shape.end(), std::back_inserter(tensor_shape), SizeToInt); | |||||
| int outputIndex = tensor_cache->addExTensor(tensor_key, output_tensor, ref_count, tensor_shape, KERNEL); | |||||
| ms_node->outputIndex.push_back(outputIndex); | |||||
| return true; | |||||
| } | |||||
| void Kernel2Ms::GetRealInpoutsPtr(const AnfNodePtr &node, std::vector<AnfNodePtr> *real_inputs, | |||||
| std::vector<size_t> *real_output_idx) { | |||||
| MS_EXCEPTION_IF_NULL(real_inputs); | |||||
| MS_EXCEPTION_IF_NULL(real_output_idx); | |||||
| size_t default_idx = 0; | |||||
| if (node->isa<CNode>()) { | |||||
| auto c_node = node->cast<CNodePtr>(); | |||||
| MS_EXCEPTION_IF_NULL(c_node); | |||||
| std::string c_node_name = GetCNodeFuncName(c_node); | |||||
| if (c_node_name == prim::kPrimTupleGetItem->name()) { | |||||
| auto v_node = c_node->inputs()[kTupleGetItemIndex]->cast<ValueNodePtr>(); | |||||
| MS_EXCEPTION_IF_NULL(v_node); | |||||
| default_idx = IntToSize(GetValue<int>(v_node->value())); | |||||
| real_inputs->push_back(c_node->inputs()[1]); | |||||
| real_output_idx->push_back(default_idx); | |||||
| return; | |||||
| } else if (c_node_name == prim::kPrimDepend->name()) { | |||||
| GetRealInpoutsPtr(c_node->inputs()[1], real_inputs, real_output_idx); | |||||
| return; | |||||
| } else if (c_node_name == prim::kPrimMakeTuple->name()) { | |||||
| for (auto &in : c_node->inputs()) { | |||||
| GetRealInpoutsPtr(in, real_inputs, real_output_idx); | |||||
| } | |||||
| return; | |||||
| } else { | |||||
| real_inputs->push_back(node); | |||||
| real_output_idx->push_back(default_idx); | |||||
| } | |||||
| } else if (node->isa<Parameter>()) { | |||||
| real_inputs->push_back(node); | |||||
| real_output_idx->push_back(default_idx); | |||||
| } else if (node->isa<ValueNode>()) { | |||||
| real_inputs->push_back(node); | |||||
| real_output_idx->push_back(default_idx); | |||||
| } | |||||
| } | |||||
| bool Kernel2Ms::SetOpInputIdx(const CNodePtr &c_node_ptr, const TensorCachePtr &tensor_cache, OpDefT *ms_node) { | |||||
| MS_EXCEPTION_IF_NULL(c_node_ptr); | |||||
| MS_EXCEPTION_IF_NULL(tensor_cache); | |||||
| MS_EXCEPTION_IF_NULL(ms_node); | |||||
| for (size_t i = 1; i < c_node_ptr->inputs().size(); ++i) { | |||||
| std::vector<AnfNodePtr> real_inputs; | |||||
| std::vector<size_t> real_output_idx; | |||||
| GetRealInpoutsPtr(c_node_ptr->inputs()[i], &real_inputs, &real_output_idx); | |||||
| if (real_inputs.empty()) { | |||||
| MS_LOG(INFO) << "kernel has no inputs: " << c_node_ptr.get() << " input size[%lu]" << c_node_ptr->inputs().size(); | |||||
| continue; | |||||
| } | |||||
| for (size_t j = 0; j < real_inputs.size(); ++j) { | |||||
| int key = node_indexs_[real_inputs[j].get()]; | |||||
| std::vector<ExTensorPtr> ex_tensor_list = tensor_cache->findTensor(key); | |||||
| if (ex_tensor_list.empty()) { | |||||
| continue; | |||||
| } | |||||
| ExTensorPtr ex_tensor_ptr = ex_tensor_list[real_output_idx[j]]; | |||||
| ex_tensor_list.clear(); | |||||
| ms_node->inputIndex.push_back(ex_tensor_ptr->index_); | |||||
| } | |||||
| } | |||||
| return true; | |||||
| } | |||||
| void Kernel2Ms::TransformGraphIndx() { | |||||
| // transform index && anfnodeptr | |||||
| if (node_indexs_.empty()) { | |||||
| MS_LOG(EXCEPTION) << "node_indexs_ not ininted"; | |||||
| } | |||||
| for (auto &item : node_indexs_) { | |||||
| index_nodes_[item.second] = item.first; | |||||
| } | |||||
| } | |||||
| bool Kernel2Ms::InitGraphInputsIndx(const KernelGraphPtr &kernel_graph_ptr) { | |||||
| MS_EXCEPTION_IF_NULL(kernel_graph_ptr); | |||||
| auto input_nodes = kernel_graph_ptr->inputs(); | |||||
| if (input_nodes.empty()) { | |||||
| return false; | |||||
| } | |||||
| for (const auto &input_node : input_nodes) { | |||||
| if (input_node->isa<Parameter>()) { | |||||
| if (!predict::utils::FindNodeInMap(node_indexs_, input_node)) { | |||||
| // init every parameter node | |||||
| node_indexs_[input_node.get()] = graph_index_; | |||||
| graph_index_++; | |||||
| } | |||||
| } else { | |||||
| MS_LOG(INFO) << "This node is anfnode, no need to handle, continue. node info: " << input_node->ToString(); | |||||
| continue; | |||||
| } | |||||
| } | |||||
| MS_LOG(DEBUG) << "inputs GraphIndex: " << graph_index_; | |||||
| return true; | |||||
| } | |||||
| bool Kernel2Ms::InitGraphValueNodesIndx(const KernelGraphPtr &kernel_graph_ptr) { | |||||
| MS_EXCEPTION_IF_NULL(kernel_graph_ptr); | |||||
| if (kernel_graph_ptr->value_nodes().empty()) { | |||||
| return false; | |||||
| } | |||||
| for (auto &item : kernel_graph_ptr->value_nodes()) { | |||||
| if (item.first->isa<ValueNode>()) { | |||||
| auto value_node = item.first->cast<ValueNodePtr>(); | |||||
| MS_EXCEPTION_IF_NULL(value_node); | |||||
| if (value_node == nullptr) { | |||||
| MS_LOG(WARNING) << "value_node is nullptr"; | |||||
| return false; | |||||
| } | |||||
| if (value_node->value() == nullptr) { | |||||
| MS_LOG(ERROR) << "Constant value is null."; | |||||
| return false; | |||||
| } | |||||
| if (!value_node->value()->isa<tensor::Tensor>()) { | |||||
| continue; | |||||
| } | |||||
| if (!predict::utils::FindNodeInMap(node_indexs_, item.first)) { | |||||
| // init node | |||||
| auto node_ptr = item.first; | |||||
| node_indexs_[node_ptr.get()] = graph_index_; | |||||
| graph_index_++; | |||||
| } | |||||
| } | |||||
| } | |||||
| return true; | |||||
| } | |||||
| bool Kernel2Ms::InitGraphOpsIndx(const KernelGraphPtr &kernel_graph_ptr) { | |||||
| MS_EXCEPTION_IF_NULL(kernel_graph_ptr); | |||||
| auto kernels = kernel_graph_ptr->execution_order(); | |||||
| if (kernels.empty()) { | |||||
| MS_LOG(WARNING) << "this graph has no kernel"; | |||||
| return false; | |||||
| } | |||||
| for (size_t i = 0; i < kernels.size(); ++i) { | |||||
| // for each kernel's inputs foreach real_input | |||||
| if (kernels[i]->isa<CNode>()) { | |||||
| if (!predict::utils::FindNodeInMap(node_indexs_, kernels[i])) { | |||||
| // init node | |||||
| node_indexs_[kernels[i].get()] = graph_index_; | |||||
| graph_index_++; | |||||
| } | |||||
| } | |||||
| } | |||||
| return true; | |||||
| } | |||||
| bool Kernel2Ms::InitGraphOutputsIndx(const KernelGraphPtr &kernel_graph_ptr) { | |||||
| MS_EXCEPTION_IF_NULL(kernel_graph_ptr); | |||||
| // graph output && their inputs should link together | |||||
| auto out_nodes = kernel_graph_ptr->outputs(); | |||||
| if (out_nodes.empty()) { | |||||
| MS_LOG(ERROR) << "this graph has no outputs"; | |||||
| return false; | |||||
| } | |||||
| for (auto &item : out_nodes) { | |||||
| if (!predict::utils::FindNodeInMap(node_indexs_, item)) { | |||||
| node_indexs_[item.get()] = graph_index_; | |||||
| graph_index_++; | |||||
| } | |||||
| } | |||||
| return true; | |||||
| } | |||||
| bool Kernel2Ms::InitGraphIndx(const KernelGraphPtr &kernel_graph_ptr) { | |||||
| MS_EXCEPTION_IF_NULL(kernel_graph_ptr); | |||||
| // only parameter | |||||
| if (!InitGraphInputsIndx(kernel_graph_ptr)) { | |||||
| return false; | |||||
| } | |||||
| // init value node | |||||
| if (!InitGraphValueNodesIndx(kernel_graph_ptr)) { | |||||
| return false; | |||||
| } | |||||
| // init op | |||||
| if (!InitGraphOpsIndx(kernel_graph_ptr)) { | |||||
| return false; | |||||
| } | |||||
| // init Graphoutput attention: out_put nodes have inputs | |||||
| return InitGraphOutputsIndx(kernel_graph_ptr); | |||||
| } | |||||
| bool Kernel2Ms::SetGraphInputTensors(const KernelGraphPtr &kernel_graph_ptr, const TensorCachePtr &tensor_cache, | |||||
| SubGraphDefT *ms_graph) { | |||||
| MS_EXCEPTION_IF_NULL(tensor_cache); | |||||
| MS_EXCEPTION_IF_NULL(ms_graph); | |||||
| MS_EXCEPTION_IF_NULL(kernel_graph_ptr); | |||||
| if (convert_mode_ == kConvertUnused) { | |||||
| return false; | |||||
| } | |||||
| if (kernel_graph_ptr->inputs().empty()) { | |||||
| return false; | |||||
| } | |||||
| for (const auto &input_node : kernel_graph_ptr->inputs()) { | |||||
| if (input_node->isa<Parameter>()) { | |||||
| ParameterPtr pk_node = std::dynamic_pointer_cast<Parameter>(input_node); | |||||
| TensorPtr device_tensor; | |||||
| if (convert_mode_ == kConvertCpuMode) { | |||||
| device_tensor = predict::utils::GetParaCpuTensor(input_node); | |||||
| } else { | |||||
| device_tensor = predict::utils::GetParaAscendTensor(input_node); | |||||
| } | |||||
| if (device_tensor == nullptr) { | |||||
| return false; | |||||
| } | |||||
| ExTensorType node_type; | |||||
| if (AnfAlgo::IsParameterWeight(pk_node)) { | |||||
| node_type = WEIGHTS; | |||||
| } else { | |||||
| node_type = INPUTDATA; | |||||
| } | |||||
| if (!predict::utils::FindNodeInMap(node_indexs_, input_node)) { | |||||
| MS_LOG(WARNING) << "can not find any pk_key in inited node_indexs map"; | |||||
| return false; | |||||
| } | |||||
| auto pk_key = node_indexs_[input_node.get()]; | |||||
| all_output_tensors_[pk_key].push_back(device_tensor); | |||||
| int nodeRefCount = SizeToInt(AnfAlgo::GetOutputTensorNum(input_node)); | |||||
| int nodeInputIdx = | |||||
| tensor_cache->addExTensor(pk_key, device_tensor, nodeRefCount, device_tensor->shape(), node_type); | |||||
| if (!AnfAlgo::IsParameterWeight(pk_node)) { | |||||
| ms_graph->inputIndex.push_back(nodeInputIdx); | |||||
| all_input_idxs_.push_back(nodeInputIdx); | |||||
| } else { | |||||
| input_weight_idxs_.push_back(nodeInputIdx); | |||||
| all_input_idxs_.push_back(nodeInputIdx); | |||||
| } | |||||
| } | |||||
| } | |||||
| return true; | |||||
| } | |||||
| bool Kernel2Ms::SetGraphValueTensors(const KernelGraphPtr &kernel_graph_ptr, const TensorCachePtr &tensor_cache) { | |||||
| MS_EXCEPTION_IF_NULL(kernel_graph_ptr); | |||||
| MS_EXCEPTION_IF_NULL(tensor_cache); | |||||
| for (auto &item : kernel_graph_ptr->value_nodes()) { | |||||
| if (item.first->isa<ValueNode>()) { | |||||
| auto const_node = item.first->cast<ValueNodePtr>(); | |||||
| auto tensor_constant = predict::utils::GetValueTensor(const_node); | |||||
| if (tensor_constant == nullptr) { | |||||
| continue; | |||||
| } | |||||
| if (!predict::utils::FindNodeInMap(node_indexs_, item.first)) { | |||||
| MS_LOG(WARNING) << "can not find any pk_key in inited node_indexs map"; | |||||
| return false; | |||||
| } | |||||
| int constant_key = node_indexs_[(item.first).get()]; | |||||
| all_output_tensors_[constant_key].push_back(tensor_constant); | |||||
| auto shape = tensor_constant->shape(); | |||||
| (void)tensor_cache->addExTensor(constant_key, tensor_constant, 0, shape, CONSTANT); | |||||
| } | |||||
| } | |||||
| return true; | |||||
| } | |||||
| bool Kernel2Ms::SetGraphOpTensors(const KernelGraphPtr &kernel_graph_ptr, const TensorCachePtr &tensor_cache, | |||||
| SubGraphDefT *ms_graph) { | |||||
| MS_EXCEPTION_IF_NULL(kernel_graph_ptr); | |||||
| MS_EXCEPTION_IF_NULL(tensor_cache); | |||||
| MS_EXCEPTION_IF_NULL(ms_graph); | |||||
| auto kernels = kernel_graph_ptr->execution_order(); | |||||
| if (kernels.empty()) { | |||||
| MS_LOG(ERROR) << "this graph has no kernels"; | |||||
| return false; | |||||
| } | |||||
| for (auto &kernel : kernels) { | |||||
| if (!predict::utils::FindNodeInMap(node_indexs_, kernel)) { | |||||
| MS_LOG(ERROR) << "can not find any pk_key in inited node_indexs map"; | |||||
| return false; | |||||
| } | |||||
| auto kernel_key = node_indexs_[kernel.get()]; | |||||
| std::unique_ptr<OpDefT> ms_node(new OpDefT); | |||||
| ms_node->name = kernel->fullname_with_scope(); | |||||
| ms_node->fmkType = mindspore::predict::FmkType_CAFFE; | |||||
| auto c_name = AnfAlgo::GetCNodeName(kernel); | |||||
| auto fun = predict::convert::OpAttrFactory::GetInstance()->GetPackFun(c_name); | |||||
| if (fun == nullptr) { | |||||
| MS_LOG(WARNING) << "get node [" << kernel->fullname_with_scope() << "] attr failed."; | |||||
| } else if (!fun(kernel, ms_node.get())) { | |||||
| MS_LOG(ERROR) << "set node [" << kernel->fullname_with_scope() << "] attr failed."; | |||||
| return false; | |||||
| } | |||||
| auto output_size = AnfAlgo::GetOutputTensorNum(kernel); | |||||
| int nodeRefCount = SizeToInt(output_size); | |||||
| for (size_t j = 0; j < output_size; ++j) { | |||||
| TensorPtr device_tensor; | |||||
| if (convert_mode_ == kConvertCpuMode) { | |||||
| device_tensor = predict::utils::GetKernelCpuTensor(kernel, j); | |||||
| } else if (convert_mode_ == kConvertAscendMode) { | |||||
| device_tensor = predict::utils::GetKernelAscendTensor(kernel, j); | |||||
| } | |||||
| if (device_tensor == nullptr) { | |||||
| return false; | |||||
| } | |||||
| all_output_tensors_[kernel_key].push_back(device_tensor); | |||||
| if (!SetOpOutputIdx(kernel, device_tensor, tensor_cache, nodeRefCount, j, ms_node.get())) { | |||||
| return false; | |||||
| } | |||||
| } | |||||
| tmp_op_nodes_.emplace_back(ms_node.release()); | |||||
| } | |||||
| return true; | |||||
| } | |||||
| bool Kernel2Ms::KernelGraph2MsGraph(const KernelGraphPtr &kernel_graph_ptr) { | |||||
| MS_EXCEPTION_IF_NULL(kernel_graph_ptr); | |||||
| graph_index_ = 0; | |||||
| all_output_tensors_.clear(); | |||||
| node_indexs_.clear(); | |||||
| index_nodes_.clear(); | |||||
| std::unique_ptr<SubGraphDefT> sub_ms_graph(new SubGraphDefT()); | |||||
| if (!InitGraphIndx(kernel_graph_ptr)) { | |||||
| return false; | |||||
| } | |||||
| TransformGraphIndx(); | |||||
| tensor_cache_ptr_ = std::make_shared<TensorCache>(); | |||||
| // foreach node to init it's real output tensor | |||||
| if (!SetGraphInputTensors(kernel_graph_ptr, tensor_cache_ptr_, sub_ms_graph.get())) { | |||||
| return false; | |||||
| } | |||||
| // Get KernelGraph value node | |||||
| if (!SetGraphValueTensors(kernel_graph_ptr, tensor_cache_ptr_)) { | |||||
| return false; | |||||
| } | |||||
| // Get KernelGraph apply_kernel && add opNode | |||||
| if (!SetGraphOpTensors(kernel_graph_ptr, tensor_cache_ptr_, sub_ms_graph.get())) { | |||||
| return false; | |||||
| } | |||||
| // Get KernelGraph outputs | |||||
| if (!SetGraphOutputIdx(kernel_graph_ptr, tensor_cache_ptr_, sub_ms_graph.get(), &all_output_tensors_)) { | |||||
| return false; | |||||
| } | |||||
| auto kernels = kernel_graph_ptr->execution_order(); | |||||
| for (size_t i = 0; i < kernels.size(); ++i) { | |||||
| auto ms_node = tmp_op_nodes_[i]; | |||||
| if (!SetOpInputIdx(kernels[i], tensor_cache_ptr_, ms_node)) { | |||||
| return false; | |||||
| } | |||||
| std::unique_ptr<OpDefT> ms_node_tmp(ms_node); | |||||
| sub_ms_graph->nodes.emplace_back(std::move(ms_node_tmp)); | |||||
| } | |||||
| if (!SetAllTensors(tensor_cache_ptr_, sub_ms_graph.get())) { | |||||
| return false; | |||||
| } | |||||
| if (!SetMemResue()) { | |||||
| return false; | |||||
| } | |||||
| sub_ms_graph_ = std::move(sub_ms_graph); | |||||
| sub_ms_graph_->name = "default_sub_graph"; | |||||
| return true; | |||||
| } | |||||
| bool Kernel2Ms::CheckInputSizes(const std::vector<TensorPtr> &input_tensors, | |||||
| const std::vector<uint32_t> &all_input_idxs) { | |||||
| if (input_tensors.size() != all_input_idxs.size()) { | |||||
| MS_LOG(EXCEPTION) << "real input tensors size:" << input_tensors.size() | |||||
| << "not equal converted tesnors size:" << all_input_idxs.size() << "the graph has changed"; | |||||
| } | |||||
| for (auto in : all_input_idxs) { | |||||
| if (in < sub_ms_graph_->allTensors.size()) { | |||||
| auto real_tensor = input_tensors[in]; | |||||
| auto convert_dims = sub_ms_graph_->allTensors[in]->dims; | |||||
| auto real_dims = real_tensor->shape(); | |||||
| if (real_dims.size() != convert_dims.size()) { | |||||
| return false; | |||||
| } else { | |||||
| for (size_t i = 0; i < convert_dims.size(); ++i) { | |||||
| if (convert_dims[i] != real_dims[i]) { | |||||
| return false; | |||||
| } | |||||
| } | |||||
| } | |||||
| } else { | |||||
| MS_LOG(EXCEPTION) << "index: " << in << "in all_input_idxs is valid"; | |||||
| } | |||||
| } | |||||
| return true; | |||||
| } | |||||
| void Kernel2Ms::ReleaseContextRes() { | |||||
| tmp_op_nodes_.clear(); | |||||
| node_indexs_.clear(); | |||||
| index_nodes_.clear(); | |||||
| tensor_cache_ptr_ = nullptr; | |||||
| all_output_tensors_.clear(); | |||||
| } | |||||
| bool Kernel2Ms::KernelInput2MS(const std::vector<TensorPtr> &input_tensors) { | |||||
| const std::unordered_map<int, std::vector<ExTensorPtr>> &cache_tensors = tensor_cache_ptr_->GetCachedTensor(); | |||||
| if (cache_tensors.empty()) { | |||||
| return false; | |||||
| } | |||||
| auto all_weights_idxs = GetAllInputWeightIdxs(); | |||||
| auto all_input_idxs = GetAllInputIdxs(); | |||||
| auto real_input_size = input_tensors.size(); | |||||
| // check tensor size | |||||
| bool ret = CheckInputSizes(input_tensors, all_input_idxs); | |||||
| std::vector<uint32_t> match_to_rel_idxs; | |||||
| // indx order not matched,macth to it | |||||
| if (!ret) { | |||||
| for (auto idx : all_weights_idxs) { | |||||
| auto macth_idx = real_input_size - idx; | |||||
| match_to_rel_idxs.push_back(macth_idx); | |||||
| } | |||||
| } else { | |||||
| match_to_rel_idxs = all_weights_idxs; | |||||
| } | |||||
| if (match_to_rel_idxs.size() == all_weights_idxs.size()) { | |||||
| for (size_t j = 0; j < all_weights_idxs.size(); ++j) { | |||||
| auto cache_idx = all_weights_idxs[j]; | |||||
| auto match_idx = match_to_rel_idxs[j]; | |||||
| auto real_tensor = input_tensors[match_idx]; | |||||
| auto real_size = LongToSize(real_tensor->data().nbytes()); | |||||
| auto real_data = real_tensor->data_c(); | |||||
| MS_EXCEPTION_IF_NULL(real_data); | |||||
| if (sub_ms_graph_->allTensors[cache_idx] != nullptr) { | |||||
| sub_ms_graph_->allTensors[cache_idx]->data.resize(real_size); | |||||
| } | |||||
| if (memcpy_s(sub_ms_graph_->allTensors[cache_idx]->data.data(), real_size, real_data, real_size) != 0) { | |||||
| MS_LOG(ERROR) << "KernelInput2MS memcpy_s failed"; | |||||
| return false; | |||||
| } | |||||
| } | |||||
| } | |||||
| ReleaseContextRes(); | |||||
| return true; | |||||
| } | |||||
| bool Kernel2Ms::SaveDeviceModel(const std::shared_ptr<GraphDefT> &new_ms_graph_ptr, const std::string &save_path_name) { | |||||
| MS_EXCEPTION_IF_NULL(new_ms_graph_ptr); | |||||
| return predict::utils::SaveDeviceModelUtil(new_ms_graph_ptr, save_path_name, sub_ms_graph_.release()); | |||||
| } | |||||
| } // namespace executor | |||||
| } // namespace mindspore | |||||
| @@ -1,118 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_CCSRC_PREDICT_CONVERTER_KERNEL_TO_MS_H_ | |||||
| #define MINDSPORE_CCSRC_PREDICT_CONVERTER_KERNEL_TO_MS_H_ | |||||
| #include <string> | |||||
| #include <unordered_map> | |||||
| #include <memory> | |||||
| #include <vector> | |||||
| #include <utility> | |||||
| #include "backend/session/kernel_graph.h" | |||||
| #include "predict/converter/executor_tensor.h" | |||||
| #include "predict/schema/inner/ms_generated.h" | |||||
| #include "predict/converter/attr_utils/convert_util.h" | |||||
| static constexpr size_t kTupleGetItemIndex = 2; | |||||
| namespace mindspore { | |||||
| namespace executor { | |||||
| using KernelGraphPtr = std::shared_ptr<mindspore::session::KernelGraph>; | |||||
| enum ConvertMode { kConvertCpuMode, kConvertAscendMode, kConvertUnused }; | |||||
| enum TargetMode { kCPUTarget, kGPUTarget, kUnknowTarget }; | |||||
| class Kernel2Ms { | |||||
| public: | |||||
| static Kernel2Ms &GetInstance(); | |||||
| Kernel2Ms(const Kernel2Ms &) = delete; | |||||
| Kernel2Ms &operator=(const Kernel2Ms &) = delete; | |||||
| bool KernelGraph2MsGraph(const KernelGraphPtr &kernel_graph_ptr); | |||||
| bool KernelInput2MS(const std::vector<TensorPtr> &input_tensors); | |||||
| ConvertMode convert_mode() const { return convert_mode_; } | |||||
| void set_convert_mode(ConvertMode convert_mode) { convert_mode_ = convert_mode; } | |||||
| TargetMode device_target() const { return device_target_; } | |||||
| void set_device_target(TargetMode device_target) { device_target_ = device_target; } | |||||
| bool SaveDeviceModel(const std::shared_ptr<GraphDefT> &new_ms_graph_ptr, const std::string &save_path_name); | |||||
| private: | |||||
| Kernel2Ms() : graph_index_(0) {} | |||||
| void ReleaseContextRes(); | |||||
| ~Kernel2Ms() = default; | |||||
| bool SetAllTensors(const TensorCachePtr &tensor_cache, SubGraphDefT *sub_graph_def_t); | |||||
| bool SetOpInputIdx(const CNodePtr &c_node_ptr, const TensorCachePtr &tensor_cache, OpDefT *ms_node); | |||||
| bool SetOpOutputIdx(const CNodePtr &c_node_ptr, const TensorPtr &output_tensor, const TensorCachePtr &tensor_cache, | |||||
| int ref_count, size_t order_index, OpDefT *ms_node); | |||||
| bool SetGraphOutputIdx(const KernelGraphPtr &kernel_graph_ptr, const TensorCachePtr &tensor_cache, | |||||
| SubGraphDefT *sub_graph_def_t, AllOutputTensors *all_output_tensors); | |||||
| void TransformGraphIndx(); | |||||
| void GetRealInpoutsPtr(const AnfNodePtr &node, std::vector<AnfNodePtr> *real_inputs, | |||||
| std::vector<size_t> *real_output_idx); | |||||
| bool InitGraphIndx(const KernelGraphPtr &kernel_graph_ptr); | |||||
| bool InitGraphInputsIndx(const KernelGraphPtr &kernel_graph_ptr); | |||||
| bool InitGraphValueNodesIndx(const KernelGraphPtr &kernel_graph_ptr); | |||||
| bool InitGraphOpsIndx(const KernelGraphPtr &kernel_graph_ptr); | |||||
| bool InitGraphOutputsIndx(const KernelGraphPtr &kernel_graph_ptr); | |||||
| bool SetGraphInputTensors(const KernelGraphPtr &kernel_graph_ptr, const TensorCachePtr &tensor_cache, | |||||
| SubGraphDefT *sub_graph_def_t); | |||||
| bool SetGraphValueTensors(const KernelGraphPtr &kernel_graph_ptr, const TensorCachePtr &tensor_cache); | |||||
| bool SetGraphOpTensors(const KernelGraphPtr &kernel_graph_ptr, const TensorCachePtr &tensor_cache, | |||||
| SubGraphDefT *sub_graph_def_t); | |||||
| std::vector<uint32_t> GetAllInputWeightIdxs() const { return input_weight_idxs_; } | |||||
| std::vector<uint32_t> GetAllInputIdxs() const { return all_input_idxs_; } | |||||
| bool CheckInputSizes(const std::vector<TensorPtr> &input_tensors, const std::vector<uint32_t> &all_input_idxs); | |||||
| bool SetMemResue() const; | |||||
| SubGraphPtr sub_ms_graph_; | |||||
| AllOutputTensors all_output_tensors_; | |||||
| std::vector<OpDefT *> tmp_op_nodes_; | |||||
| std::unordered_map<MsKernelKey, int> node_indexs_; | |||||
| std::unordered_map<int, MsKernelKey> index_nodes_; | |||||
| int graph_index_ = 0; | |||||
| TensorCachePtr tensor_cache_ptr_ = nullptr; | |||||
| ConvertMode convert_mode_ = kConvertCpuMode; | |||||
| TargetMode device_target_ = kCPUTarget; | |||||
| std::vector<uint32_t> input_weight_idxs_; | |||||
| std::vector<uint32_t> all_input_idxs_; | |||||
| }; | |||||
| using Kernel2MsPtr = std::shared_ptr<Kernel2Ms>; | |||||
| } // namespace executor | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_CCSRC_PREDICT_CONVERTER_KERNEL_TO_MS_H_ | |||||
| @@ -1,110 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "predict/converter/lite_model/op_attr_packer.h" | |||||
| #include "./securec.h" | |||||
| namespace mindspore { | |||||
| namespace predict { | |||||
| namespace convert { | |||||
| // forward declare | |||||
| bool Conv2dPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op); | |||||
| bool MatMulPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op); | |||||
| bool BiasAddPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op); | |||||
| bool ReshapePacker(const CNodePtr &c_node_ptr, OpDefT *ms_op); | |||||
| bool ActivationPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op); | |||||
| bool PoolingPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op); | |||||
| bool FusedBatchNormPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op); | |||||
| bool AddPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op); | |||||
| bool CastPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op); | |||||
| bool MeanPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op); | |||||
| bool SoftmaxPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op); | |||||
| bool ScalePacker(const CNodePtr &c_node_ptr, OpDefT *ms_op); | |||||
| bool AddFoldPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op); | |||||
| bool ArgMaxPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op); | |||||
| bool BatchNormFoldPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op); | |||||
| bool FakeQuantWithMinMaxPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op); | |||||
| bool FakeQuantWithMinMaxPerChannelPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op); | |||||
| bool MulPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op); | |||||
| bool MulFoldPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op); | |||||
| bool SqueezePacker(const CNodePtr &c_node_ptr, OpDefT *ms_op); | |||||
| OpAttrFactory::OpAttrFactory() { | |||||
| pack_funs_ = {{"Conv2D", Conv2dPacker}, | |||||
| {"MatMul", MatMulPacker}, | |||||
| {"BiasAdd", BiasAddPacker}, | |||||
| {"Reshape", ReshapePacker}, | |||||
| {"Activation", ActivationPacker}, | |||||
| {"ReLU", ActivationPacker}, | |||||
| {"ReLU6", ActivationPacker}, | |||||
| {"EReLU", ActivationPacker}, | |||||
| {"LeakyReLU", ActivationPacker}, | |||||
| {"Sigmoid", ActivationPacker}, | |||||
| {"Softsign", ActivationPacker}, | |||||
| {"Softplus", ActivationPacker}, | |||||
| {"Tanh", ActivationPacker}, | |||||
| {"HSwish", ActivationPacker}, | |||||
| {"HSigmoid", ActivationPacker}, | |||||
| {"MaxPool", PoolingPacker}, | |||||
| {"MaxPool2D", PoolingPacker}, | |||||
| {"MeanPool", PoolingPacker}, | |||||
| {"GlobalPool", PoolingPacker}, | |||||
| {"FusedBatchNorm", FusedBatchNormPacker}, | |||||
| {"FusedBatchNormGrad", FusedBatchNormPacker}, | |||||
| {"Cast", CastPacker}, | |||||
| {"TensorAdd", AddPacker}, | |||||
| {"SoftMax", SoftmaxPacker}, | |||||
| {"SimpleMean", MeanPacker}, | |||||
| {"ReduceMean", MeanPacker}, | |||||
| {"AddFold", AddFoldPacker}, | |||||
| {"ArgMax", ArgMaxPacker}, | |||||
| {"BatchNorm", BatchNormFoldPacker}, | |||||
| {"FakeQuantPerLayer", FakeQuantWithMinMaxPacker}, | |||||
| {"FakeQuantPerChannel", FakeQuantWithMinMaxPerChannelPacker}, | |||||
| {"Mul", MulPacker}, | |||||
| {"MulFold", MulFoldPacker}, | |||||
| {"Squeeze", SqueezePacker}}; | |||||
| } | |||||
| OpAttrPackFun OpAttrFactory::GetPackFun(const std::string &opType) { | |||||
| if (pack_funs_.find(opType) == pack_funs_.end()) { | |||||
| MS_LOG(WARNING) << "Op Attr pack fun [" << opType << "] not found."; | |||||
| return nullptr; | |||||
| } | |||||
| return pack_funs_[opType]; | |||||
| } | |||||
| mindspore::predict::Format GetAttrFormat(const std::string &format) { | |||||
| if (format == kOpFormat_NCHW) { | |||||
| return predict::Format::Format_NCHW; | |||||
| } else if (format == kOpFormat_NHWC) { | |||||
| return predict::Format::Format_NHWC; | |||||
| } else { | |||||
| return predict::Format::Format_NUM_OF_FORMAT; | |||||
| } | |||||
| } | |||||
| mindspore::predict::PadMode GetAttrPadMode(const std::string &pad_mode) { | |||||
| if (pad_mode == "same") { | |||||
| return mindspore::predict::PadMode::PadMode_SAME; | |||||
| } else if (pad_mode == "valid") { | |||||
| return mindspore::predict::PadMode::PadMode_VALID; | |||||
| } else { | |||||
| return mindspore::predict::PadMode::PadMode_NOTSET; | |||||
| } | |||||
| } | |||||
| } // namespace convert | |||||
| } // namespace predict | |||||
| } // namespace mindspore | |||||
| @@ -1,58 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_CCSRC_PREDICT_CONVERTER_OP_ATTR_PACKER_H_ | |||||
| #define MINDSPORE_CCSRC_PREDICT_CONVERTER_OP_ATTR_PACKER_H_ | |||||
| #include <utility> | |||||
| #include <string> | |||||
| #include <unordered_map> | |||||
| #include "backend/session/anf_runtime_algorithm.h" | |||||
| #include "predict/schema/inner/ms_generated.h" | |||||
| static constexpr size_t kNIndex = 0; | |||||
| static constexpr size_t kCIndex = 1; | |||||
| static constexpr size_t kHIndex = 2; | |||||
| static constexpr size_t kWIndex = 3; | |||||
| static constexpr size_t kNCHWSize = 4; | |||||
| namespace mindspore { | |||||
| namespace predict { | |||||
| namespace convert { | |||||
| using OpAttrPackFun = bool (*)(const CNodePtr &c_node_ptr, OpDefT *ms_op); | |||||
| class OpAttrFactory { | |||||
| public: | |||||
| static OpAttrFactory *GetInstance() { | |||||
| static OpAttrFactory instance; | |||||
| return &instance; | |||||
| } | |||||
| OpAttrFactory(const OpAttrFactory &) = delete; | |||||
| OpAttrFactory &operator=(const OpAttrFactory &) = delete; | |||||
| OpAttrPackFun GetPackFun(const std::string &op_type); | |||||
| ~OpAttrFactory() { pack_funs_.clear(); } | |||||
| OpAttrFactory(); | |||||
| private: | |||||
| std::unordered_map<std::string, OpAttrPackFun> pack_funs_; | |||||
| }; | |||||
| mindspore::predict::Format GetAttrFormat(const std::string &format); | |||||
| mindspore::predict::PadMode GetAttrPadMode(const std::string &pad_mode); | |||||
| } // namespace convert | |||||
| } // namespace predict | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_CCSRC_PREDICT_CONVERTER_CPU_OP_INFO_OP_ATTR_FACTORY_H_ | |||||
| @@ -1,59 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "predict/converter/lite_model/op_attr_packer.h" | |||||
| namespace mindspore { | |||||
| namespace predict { | |||||
| namespace convert { | |||||
| bool ActivationPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op) { | |||||
| if (c_node_ptr == nullptr || ms_op == nullptr) { | |||||
| return false; | |||||
| } | |||||
| std::unique_ptr<ActivationT> attr(new ActivationT()); | |||||
| MS_EXCEPTION_IF_NULL(attr); | |||||
| if (AnfAlgo::GetCNodeName(c_node_ptr) == "ReLU") { | |||||
| attr->type = predict::ActivationType::ActivationType_RELU; | |||||
| } else if (AnfAlgo::GetCNodeName(c_node_ptr) == "Sigmoid") { | |||||
| attr->type = predict::ActivationType::ActivationType_SIGMOID; | |||||
| } else if (AnfAlgo::GetCNodeName(c_node_ptr) == "ReLU6") { | |||||
| attr->type = predict::ActivationType::ActivationType_RELU6; | |||||
| } else if (AnfAlgo::GetCNodeName(c_node_ptr) == "ELU") { | |||||
| attr->type = predict::ActivationType::ActivationType_ELU; | |||||
| } else if (AnfAlgo::GetCNodeName(c_node_ptr) == "Leaky_ReLU") { | |||||
| attr->type = predict::ActivationType::ActivationType_LEAKY_RELU; | |||||
| } else if (AnfAlgo::GetCNodeName(c_node_ptr) == "ABS") { | |||||
| attr->type = predict::ActivationType::ActivationType_ABS; | |||||
| } else if (AnfAlgo::GetCNodeName(c_node_ptr) == "ReLU1") { | |||||
| attr->type = predict::ActivationType::ActivationType_RELU1; | |||||
| } else if (AnfAlgo::GetCNodeName(c_node_ptr) == "Softsign") { | |||||
| attr->type = predict::ActivationType::ActivationType_SOFTSIGN; | |||||
| } else if (AnfAlgo::GetCNodeName(c_node_ptr) == "Softplus") { | |||||
| attr->type = predict::ActivationType::ActivationType_SOFTPLUS; | |||||
| } else if (AnfAlgo::GetCNodeName(c_node_ptr) == "Tanh") { | |||||
| attr->type = predict::ActivationType::ActivationType_TANH; | |||||
| } else { | |||||
| attr->type = predict::ActivationType::ActivationType_UNKNOW; | |||||
| MS_LOG(WARNING) << "unknow Activation"; | |||||
| } | |||||
| ms_op->name = c_node_ptr->fullname_with_scope(); | |||||
| ms_op->attr.type = OpT_Activation; | |||||
| ms_op->attr.value = attr.release(); | |||||
| return true; | |||||
| } | |||||
| } // namespace convert | |||||
| } // namespace predict | |||||
| } // namespace mindspore | |||||
| @@ -1,35 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "predict/converter/lite_model/op_attr_packer.h" | |||||
| namespace mindspore { | |||||
| namespace predict { | |||||
| namespace convert { | |||||
| bool AddPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op) { | |||||
| if (c_node_ptr == nullptr || ms_op == nullptr) { | |||||
| return false; | |||||
| } | |||||
| std::unique_ptr<AddT> attr(new AddT()); | |||||
| MS_EXCEPTION_IF_NULL(attr); | |||||
| ms_op->name = c_node_ptr->fullname_with_scope(); | |||||
| ms_op->attr.type = OpT_Add; | |||||
| ms_op->attr.value = attr.release(); | |||||
| return true; | |||||
| } | |||||
| } // namespace convert | |||||
| } // namespace predict | |||||
| } // namespace mindspore | |||||
| @@ -1,34 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "predict/converter/lite_model/op_attr_packer.h" | |||||
| namespace mindspore { | |||||
| namespace predict { | |||||
| namespace convert { | |||||
| bool AddFoldPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op) { | |||||
| if (c_node_ptr == nullptr || ms_op == nullptr) { | |||||
| return false; | |||||
| } | |||||
| std::unique_ptr<AddFoldT> attr(new AddFoldT()); | |||||
| MS_EXCEPTION_IF_NULL(attr); | |||||
| ms_op->attr.type = OpT_AddFold; | |||||
| ms_op->attr.value = attr.release(); | |||||
| return true; | |||||
| } | |||||
| } // namespace convert | |||||
| } // namespace predict | |||||
| } // namespace mindspore | |||||
| @@ -1,34 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "predict/converter/lite_model/op_attr_packer.h" | |||||
| namespace mindspore { | |||||
| namespace predict { | |||||
| namespace convert { | |||||
| bool ArgMaxPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op) { | |||||
| if (c_node_ptr == nullptr || ms_op == nullptr) { | |||||
| return false; | |||||
| } | |||||
| std::unique_ptr<ArgMaxT> attr(new ArgMaxT()); | |||||
| MS_EXCEPTION_IF_NULL(attr); | |||||
| ms_op->attr.type = OpT_ArgMax; | |||||
| ms_op->attr.value = attr.release(); | |||||
| return true; | |||||
| } | |||||
| } // namespace convert | |||||
| } // namespace predict | |||||
| } // namespace mindspore | |||||
| @@ -1,34 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "predict/converter/lite_model/op_attr_packer.h" | |||||
| namespace mindspore { | |||||
| namespace predict { | |||||
| namespace convert { | |||||
| bool BatchNormFoldPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op) { | |||||
| if (c_node_ptr == nullptr || ms_op == nullptr) { | |||||
| return false; | |||||
| } | |||||
| std::unique_ptr<BatchNormFoldT> attr(new BatchNormFoldT()); | |||||
| MS_EXCEPTION_IF_NULL(attr); | |||||
| ms_op->attr.type = OpT_BatchNormFold; | |||||
| ms_op->attr.value = attr.release(); | |||||
| return true; | |||||
| } | |||||
| } // namespace convert | |||||
| } // namespace predict | |||||
| } // namespace mindspore | |||||
| @@ -1,37 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include <memory> | |||||
| #include "predict/converter/lite_model/op_attr_packer.h" | |||||
| namespace mindspore { | |||||
| namespace predict { | |||||
| namespace convert { | |||||
| bool BiasAddPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op) { | |||||
| if (c_node_ptr == nullptr || ms_op == nullptr) { | |||||
| return false; | |||||
| } | |||||
| std::unique_ptr<BiasAddT> attr(new BiasAddT()); | |||||
| MS_EXCEPTION_IF_NULL(attr); | |||||
| attr->axis = {1}; | |||||
| ms_op->name = c_node_ptr->fullname_with_scope(); | |||||
| ms_op->attr.type = OpT_BiasAdd; | |||||
| ms_op->attr.value = attr.release(); | |||||
| return true; | |||||
| } | |||||
| } // namespace convert | |||||
| } // namespace predict | |||||
| } // namespace mindspore | |||||
| @@ -1,37 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "predict/converter/lite_model/op_attr_packer.h" | |||||
| namespace mindspore { | |||||
| namespace predict { | |||||
| namespace convert { | |||||
| bool CastPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op) { | |||||
| if (c_node_ptr == nullptr || ms_op == nullptr) { | |||||
| return false; | |||||
| } | |||||
| std::unique_ptr<CastT> attr(new CastT()); | |||||
| MS_EXCEPTION_IF_NULL(attr); | |||||
| attr->srcT = 0; | |||||
| attr->dstT = 0; | |||||
| ms_op->name = c_node_ptr->fullname_with_scope(); | |||||
| ms_op->attr.type = OpT_Cast; | |||||
| ms_op->attr.value = attr.release(); | |||||
| return true; | |||||
| } | |||||
| } // namespace convert | |||||
| } // namespace predict | |||||
| } // namespace mindspore | |||||
| @@ -1,63 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "predict/converter/lite_model/op_attr_packer.h" | |||||
| namespace mindspore { | |||||
| namespace predict { | |||||
| namespace convert { | |||||
| bool Conv2dPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op) { | |||||
| if (c_node_ptr == nullptr || ms_op == nullptr) { | |||||
| return false; | |||||
| } | |||||
| int kernel_group_value = AnfAlgo::GetNodeAttr<int>(c_node_ptr, "group"); | |||||
| int kernel_channel_value = AnfAlgo::GetNodeAttr<int>(c_node_ptr, "out_channel"); | |||||
| std::vector<int> kernel_size_value = AnfAlgo::GetNodeAttr<std::vector<int>>(c_node_ptr, "kernel_size"); | |||||
| std::string kernel_pad_mode_value = AnfAlgo::GetNodeAttr<std::string>(c_node_ptr, "pad_mode"); | |||||
| int kernel_pad_value = AnfAlgo::GetNodeAttr<int>(c_node_ptr, "pad"); | |||||
| auto kernel_stride_value = AnfAlgo::GetNodeAttr<std::vector<int>>(c_node_ptr, "stride"); | |||||
| auto kernel_dilation_value = AnfAlgo::GetNodeAttr<std::vector<int>>(c_node_ptr, "dilation"); | |||||
| std::string kernel_data_format_value = AnfAlgo::GetNodeAttr<std::string>(c_node_ptr, "data_format"); | |||||
| std::unique_ptr<Conv2DT> attr(new Conv2DT()); | |||||
| MS_EXCEPTION_IF_NULL(attr); | |||||
| attr->format = GetAttrFormat(kernel_data_format_value); | |||||
| attr->group = kernel_group_value; | |||||
| auto in_shape = AnfAlgo::GetPrevNodeOutputInferShape(c_node_ptr, 1); | |||||
| if (in_shape.size() != kNCHWSize) { | |||||
| return false; | |||||
| } | |||||
| attr->channelIn = SizeToInt(in_shape[1]); | |||||
| attr->channelOut = kernel_channel_value; | |||||
| attr->kernelW = kernel_size_value[0]; | |||||
| attr->kernelH = kernel_size_value[1]; | |||||
| attr->strideW = kernel_stride_value[0]; | |||||
| attr->strideH = kernel_stride_value[1]; | |||||
| attr->padMode = GetAttrPadMode(kernel_pad_mode_value); | |||||
| attr->padUp = kernel_pad_value; | |||||
| attr->padDown = kernel_pad_value; | |||||
| attr->padLeft = kernel_pad_value; | |||||
| attr->padRight = kernel_pad_value; | |||||
| attr->dilateW = kernel_dilation_value[0]; | |||||
| attr->dilateH = kernel_dilation_value[1]; | |||||
| attr->hasBias = false; | |||||
| ms_op->name = c_node_ptr->fullname_with_scope(); | |||||
| ms_op->attr.type = OpT_Conv2D; | |||||
| ms_op->attr.value = attr.release(); | |||||
| return true; | |||||
| } | |||||
| } // namespace convert | |||||
| } // namespace predict | |||||
| } // namespace mindspore | |||||
| @@ -1,34 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "predict/converter/lite_model/op_attr_packer.h" | |||||
| namespace mindspore { | |||||
| namespace predict { | |||||
| namespace convert { | |||||
| bool FakeQuantWithMinMaxPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op) { | |||||
| if (c_node_ptr == nullptr || ms_op == nullptr) { | |||||
| return false; | |||||
| } | |||||
| std::unique_ptr<FakeQuantWithMinMaxT> attr(new FakeQuantWithMinMaxT()); | |||||
| MS_EXCEPTION_IF_NULL(attr); | |||||
| ms_op->attr.type = OpT_FakeQuantWithMinMax; | |||||
| ms_op->attr.value = attr.release(); | |||||
| return true; | |||||
| } | |||||
| } // namespace convert | |||||
| } // namespace predict | |||||
| } // namespace mindspore | |||||
| @@ -1,34 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "predict/converter/lite_model/op_attr_packer.h" | |||||
| namespace mindspore { | |||||
| namespace predict { | |||||
| namespace convert { | |||||
| bool FakeQuantWithMinMaxPerChannelPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op) { | |||||
| if (c_node_ptr == nullptr || ms_op == nullptr) { | |||||
| return false; | |||||
| } | |||||
| std::unique_ptr<FakeQuantWithMinMaxPerChannelT> attr(new FakeQuantWithMinMaxPerChannelT()); | |||||
| MS_EXCEPTION_IF_NULL(attr); | |||||
| ms_op->attr.type = OpT_FakeQuantWithMinMaxPerChannel; | |||||
| ms_op->attr.value = attr.release(); | |||||
| return true; | |||||
| } | |||||
| } // namespace convert | |||||
| } // namespace predict | |||||
| } // namespace mindspore | |||||
| @@ -1,37 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "predict/converter/lite_model/op_attr_packer.h" | |||||
| namespace mindspore { | |||||
| namespace predict { | |||||
| namespace convert { | |||||
| bool FusedBatchNormPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op) { | |||||
| if (c_node_ptr == nullptr || ms_op == nullptr) { | |||||
| return false; | |||||
| } | |||||
| std::unique_ptr<FusedBatchNormT> attr(new FusedBatchNormT()); | |||||
| MS_EXCEPTION_IF_NULL(attr); | |||||
| auto kernel_epsilon = AnfAlgo::GetNodeAttr<float>(c_node_ptr, "epsilon"); | |||||
| attr->epsilon = kernel_epsilon; | |||||
| ms_op->name = c_node_ptr->fullname_with_scope(); | |||||
| ms_op->attr.type = OpT_FusedBatchNorm; | |||||
| ms_op->attr.value = attr.release(); | |||||
| return true; | |||||
| } | |||||
| } // namespace convert | |||||
| } // namespace predict | |||||
| } // namespace mindspore | |||||
| @@ -1,39 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "predict/converter/lite_model/op_attr_packer.h" | |||||
| namespace mindspore { | |||||
| namespace predict { | |||||
| namespace convert { | |||||
| bool MatMulPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op) { | |||||
| if (c_node_ptr == nullptr || ms_op == nullptr) { | |||||
| return false; | |||||
| } | |||||
| bool kernel_transpore_a = AnfAlgo::GetNodeAttr<bool>(c_node_ptr, "transpose_a"); | |||||
| bool kernel_transpore_b = AnfAlgo::GetNodeAttr<bool>(c_node_ptr, "transpose_b"); | |||||
| std::unique_ptr<MatMulT> attr(new MatMulT()); | |||||
| MS_EXCEPTION_IF_NULL(attr); | |||||
| attr->transposeA = kernel_transpore_a; | |||||
| attr->transposeB = kernel_transpore_b; | |||||
| ms_op->name = c_node_ptr->fullname_with_scope(); | |||||
| ms_op->attr.type = OpT_MatMul; | |||||
| ms_op->attr.value = attr.release(); | |||||
| return true; | |||||
| } | |||||
| } // namespace convert | |||||
| } // namespace predict | |||||
| } // namespace mindspore | |||||
| @@ -1,37 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "predict/converter/lite_model/op_attr_packer.h" | |||||
| namespace mindspore { | |||||
| namespace predict { | |||||
| namespace convert { | |||||
| bool MeanPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op) { | |||||
| if (c_node_ptr == nullptr || ms_op == nullptr) { | |||||
| return false; | |||||
| } | |||||
| std::unique_ptr<MeanT> attr(new MeanT()); | |||||
| MS_EXCEPTION_IF_NULL(attr); | |||||
| attr->axis = {1}; | |||||
| attr->keepDims = false; | |||||
| ms_op->name = c_node_ptr->fullname_with_scope(); | |||||
| ms_op->attr.type = OpT_Mean; | |||||
| ms_op->attr.value = attr.release(); | |||||
| return true; | |||||
| } | |||||
| } // namespace convert | |||||
| } // namespace predict | |||||
| } // namespace mindspore | |||||
| @@ -1,34 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "predict/converter/lite_model/op_attr_packer.h" | |||||
| namespace mindspore { | |||||
| namespace predict { | |||||
| namespace convert { | |||||
| bool MulPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op) { | |||||
| if (c_node_ptr == nullptr || ms_op == nullptr) { | |||||
| return false; | |||||
| } | |||||
| std::unique_ptr<MulT> attr(new MulT()); | |||||
| MS_EXCEPTION_IF_NULL(attr); | |||||
| ms_op->attr.type = OpT_Mul; | |||||
| ms_op->attr.value = attr.release(); | |||||
| return true; | |||||
| } | |||||
| } // namespace convert | |||||
| } // namespace predict | |||||
| } // namespace mindspore | |||||
| @@ -1,35 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "predict/converter/lite_model/op_attr_packer.h" | |||||
| namespace mindspore { | |||||
| namespace predict { | |||||
| namespace convert { | |||||
| bool MulFoldPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op) { | |||||
| if (c_node_ptr == nullptr || ms_op == nullptr) { | |||||
| return false; | |||||
| } | |||||
| std::unique_ptr<MulFoldT> attr(new MulFoldT()); | |||||
| MS_EXCEPTION_IF_NULL(attr); | |||||
| ms_op->name = c_node_ptr->fullname_with_scope(); | |||||
| ms_op->attr.type = OpT_MulFold; | |||||
| ms_op->attr.value = attr.release(); | |||||
| return true; | |||||
| } | |||||
| } // namespace convert | |||||
| } // namespace predict | |||||
| } // namespace mindspore | |||||
| @@ -1,61 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "predict/converter/lite_model/op_attr_packer.h" | |||||
| namespace mindspore { | |||||
| namespace predict { | |||||
| namespace convert { | |||||
| bool PoolingPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op) { | |||||
| if (c_node_ptr == nullptr || ms_op == nullptr) { | |||||
| return false; | |||||
| } | |||||
| std::unique_ptr<PoolingT> attr(new PoolingT()); | |||||
| MS_EXCEPTION_IF_NULL(attr); | |||||
| std::string kernel_format_value = AnfAlgo::GetNodeAttr<std::string>(c_node_ptr, "data_format"); | |||||
| attr->format = GetAttrFormat(kernel_format_value); | |||||
| auto c_name = AnfAlgo::GetCNodeName(c_node_ptr); | |||||
| if (c_name == "MaxPool") { | |||||
| ms_op->name = c_node_ptr->fullname_with_scope(); | |||||
| attr->poolingMode = mindspore::predict::PoolMode::PoolMode_MAX_POOLING; | |||||
| } else if (c_name == "MeanPool") { | |||||
| ms_op->name = c_node_ptr->fullname_with_scope(); | |||||
| attr->poolingMode = mindspore::predict::PoolMode::PoolMode_MEAN_POOLING; | |||||
| } else if (c_name == "GlobalPool") { | |||||
| ms_op->name = c_node_ptr->fullname_with_scope(); | |||||
| } else { | |||||
| MS_LOG(ERROR) << "unknowed pooling type."; | |||||
| return false; | |||||
| } | |||||
| std::vector<int> kernel_ksize = AnfAlgo::GetNodeAttr<std::vector<int>>(c_node_ptr, "ksize"); | |||||
| attr->windowW = kernel_ksize[kHIndex]; | |||||
| attr->windowH = kernel_ksize[kWIndex]; | |||||
| std::vector<int> kernel_strides = AnfAlgo::GetNodeAttr<std::vector<int>>(c_node_ptr, "strides"); | |||||
| attr->strideW = kernel_strides[kHIndex]; | |||||
| attr->strideH = kernel_strides[kWIndex]; | |||||
| std::string kernel_pad_mode_value = AnfAlgo::GetNodeAttr<std::string>(c_node_ptr, "padding"); | |||||
| attr->padMode = GetAttrPadMode(kernel_pad_mode_value); | |||||
| attr->padUp = 0; | |||||
| attr->padDown = 0; | |||||
| attr->padLeft = 0; | |||||
| attr->padRight = 0; | |||||
| ms_op->attr.type = OpT_Pooling; | |||||
| ms_op->attr.value = attr.release(); | |||||
| return true; | |||||
| } | |||||
| } // namespace convert | |||||
| } // namespace predict | |||||
| } // namespace mindspore | |||||
| @@ -1,36 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "predict/converter/lite_model/op_attr_packer.h" | |||||
| namespace mindspore { | |||||
| namespace predict { | |||||
| namespace convert { | |||||
| bool ReshapePacker(const CNodePtr &c_node_ptr, OpDefT *ms_op) { | |||||
| if (c_node_ptr == nullptr || ms_op == nullptr) { | |||||
| return false; | |||||
| } | |||||
| std::unique_ptr<ReshapeT> attr(new ReshapeT()); | |||||
| MS_EXCEPTION_IF_NULL(attr); | |||||
| attr->format = predict::Format::Format_NCHW; | |||||
| ms_op->name = c_node_ptr->fullname_with_scope(); | |||||
| ms_op->attr.type = OpT_Reshape; | |||||
| ms_op->attr.value = attr.release(); | |||||
| return true; | |||||
| } | |||||
| } // namespace convert | |||||
| } // namespace predict | |||||
| } // namespace mindspore | |||||
| @@ -1,36 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "predict/converter/lite_model/op_attr_packer.h" | |||||
| namespace mindspore { | |||||
| namespace predict { | |||||
| namespace convert { | |||||
| bool ScalePacker(const CNodePtr &c_node_ptr, OpDefT *ms_op) { | |||||
| if (c_node_ptr == nullptr || ms_op == nullptr) { | |||||
| return false; | |||||
| } | |||||
| std::unique_ptr<ScaleT> attr(new ScaleT()); | |||||
| MS_EXCEPTION_IF_NULL(attr); | |||||
| attr->format = predict::Format::Format_NCHW; | |||||
| ms_op->name = c_node_ptr->fullname_with_scope(); | |||||
| ms_op->attr.type = OpT_Scale; | |||||
| ms_op->attr.value = attr.release(); | |||||
| return true; | |||||
| } | |||||
| } // namespace convert | |||||
| } // namespace predict | |||||
| } // namespace mindspore | |||||
| @@ -1,36 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "predict/converter/lite_model/op_attr_packer.h" | |||||
| namespace mindspore { | |||||
| namespace predict { | |||||
| namespace convert { | |||||
| bool SoftmaxPacker(const CNodePtr &c_node_ptr, OpDefT *ms_op) { | |||||
| if (c_node_ptr == nullptr || ms_op == nullptr) { | |||||
| return false; | |||||
| } | |||||
| std::unique_ptr<SoftMaxT> attr(new SoftMaxT()); | |||||
| MS_EXCEPTION_IF_NULL(attr); | |||||
| attr->axis = {1}; | |||||
| ms_op->name = c_node_ptr->fullname_with_scope(); | |||||
| ms_op->attr.type = OpT_SoftMax; | |||||
| ms_op->attr.value = attr.release(); | |||||
| return true; | |||||
| } | |||||
| } // namespace convert | |||||
| } // namespace predict | |||||
| } // namespace mindspore | |||||
| @@ -1,38 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "predict/converter/lite_model/op_attr_packer.h" | |||||
| namespace mindspore { | |||||
| namespace predict { | |||||
| namespace convert { | |||||
| bool SqueezePacker(const CNodePtr &c_node_ptr, OpDefT *ms_op) { | |||||
| if (c_node_ptr == nullptr || ms_op == nullptr) { | |||||
| return false; | |||||
| } | |||||
| std::unique_ptr<SqueezeT> attr(new SqueezeT()); | |||||
| MS_EXCEPTION_IF_NULL(attr); | |||||
| std::vector<int> kernel_axis_value = AnfAlgo::GetNodeAttr<std::vector<int>>(c_node_ptr, "axis"); | |||||
| attr->axis = kernel_axis_value; | |||||
| ms_op->attr.type = OpT_Squeeze; | |||||
| ms_op->attr.value = attr.release(); | |||||
| return true; | |||||
| } | |||||
| } // namespace convert | |||||
| } // namespace predict | |||||
| } // namespace mindspore | |||||
| @@ -1,31 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "predict/generator/ir/ir_model.h" | |||||
| #include <utility> | |||||
| #include <algorithm> | |||||
| #include "utils/log_adapter.h" | |||||
| namespace mindspore { | |||||
| namespace generator { | |||||
| IRModel::~IRModel() { ir_tasks_.clear(); } | |||||
| void IRModel::SetIrTaskInfos(const std::vector<IRtaskInfoPtr> &ir_tasks) { | |||||
| (void)std::copy(ir_tasks.begin(), ir_tasks.end(), std::back_inserter(ir_tasks_)); | |||||
| } | |||||
| } // namespace generator | |||||
| } // namespace mindspore | |||||
| @@ -1,37 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_CCSRC_EXECUTOR_GENERATOR_IR_IR_MODEL_H_ | |||||
| #define MINDSPORE_CCSRC_EXECUTOR_GENERATOR_IR_IR_MODEL_H_ | |||||
| #include <string> | |||||
| #include <vector> | |||||
| #include <memory> | |||||
| #include "predict/generator/ir/ir_task_info.h" | |||||
| namespace mindspore { | |||||
| namespace generator { | |||||
| class IRModel { | |||||
| public: | |||||
| void SetIrTaskInfos(const std::vector<IRtaskInfoPtr> &ir_tasks); | |||||
| IRModel() = default; | |||||
| ~IRModel(); | |||||
| private: | |||||
| std::vector<IRtaskInfoPtr> ir_tasks_; | |||||
| }; | |||||
| using IrModelPtr = std::shared_ptr<IRModel>; | |||||
| } // namespace generator | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_CCSRC_EXECUTOR_GENERATOR_IR_IR_MODEL_H_ | |||||
| @@ -1,244 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "predict/generator/ir/ir_task_info.h" | |||||
| #include "utils/log_adapter.h" | |||||
| namespace mindspore { | |||||
| namespace generator { | |||||
| bool CceIRTaskInfo::SerializeIRToProto() { | |||||
| auto cce_task_def_ptr = std::unique_ptr<ge::model_runner::CceTaskDef>(); | |||||
| auto kernel_context_ptr = std::unique_ptr<ge::model_runner::KernelContext>(); | |||||
| MS_EXCEPTION_IF_NULL(cce_task_def_ptr); | |||||
| MS_EXCEPTION_IF_NULL(kernel_context_ptr); | |||||
| kernel_context_ptr->set_kernel_type(k_ctx_.kernel_type); | |||||
| kernel_context_ptr->set_op_id(k_ctx_.op_id); | |||||
| kernel_context_ptr->set_kernel_func_id(k_ctx_.kernel_func_id); | |||||
| kernel_context_ptr->set_op_index(k_ctx_.op_index); | |||||
| kernel_context_ptr->set_is_flowtable(k_ctx_.is_flowtable); | |||||
| kernel_context_ptr->set_args_count(k_ctx_.args_count); | |||||
| for (unsigned int i : k_ctx_.origin_op_index) { | |||||
| kernel_context_ptr->add_origin_op_index(i); | |||||
| } | |||||
| void *tmp_args_offset = static_cast<void *>((k_ctx_.args_offset).data()); | |||||
| if (tmp_args_offset == nullptr) { | |||||
| MS_LOG(WARNING) << "tmp_args_offset have no data"; | |||||
| return false; | |||||
| } | |||||
| kernel_context_ptr->set_args_offset(tmp_args_offset, k_ctx_.args_offset.size()); | |||||
| cce_task_def_ptr->set_allocated_kernel_context(std::move(kernel_context_ptr).get()); | |||||
| cce_task_def_ptr->set_stub_func(stub_func_); | |||||
| cce_task_def_ptr->set_block_dim(block_dim_); | |||||
| cce_task_def_ptr->set_args_size(args_size_); | |||||
| void *tmp_sm_desc = static_cast<void *>(sm_desc_.data()); | |||||
| if (tmp_sm_desc == nullptr) { | |||||
| MS_LOG(WARNING) << "tmp_sm_desc have no data"; | |||||
| return false; | |||||
| } | |||||
| cce_task_def_ptr->set_sm_desc(tmp_sm_desc, sm_desc_.size()); | |||||
| void *tmp_flow_table = static_cast<void *>(flow_table_.data()); | |||||
| if (tmp_flow_table == nullptr) { | |||||
| MS_LOG(WARNING) << "tmp_flow_table have no data"; | |||||
| return false; | |||||
| } | |||||
| cce_task_def_ptr->set_flow_table(tmp_flow_table, flow_table_.size()); | |||||
| return true; | |||||
| } | |||||
| CceIRTaskInfo::~CceIRTaskInfo() { | |||||
| args_.clear(); | |||||
| sm_desc_.clear(); | |||||
| flow_table_.clear(); | |||||
| } | |||||
| bool TbeIRTaskInfo::SerializeIRToProto() { | |||||
| auto tbe_task_def_ptr = std::unique_ptr<ge::model_runner::TbeTaskDef>(); | |||||
| MS_EXCEPTION_IF_NULL(tbe_task_def_ptr); | |||||
| tbe_task_def_ptr->set_stub_func(stub_func_); | |||||
| tbe_task_def_ptr->set_block_dim(block_dim_); | |||||
| tbe_task_def_ptr->set_args_size(args_size_); | |||||
| void *tmp_args = static_cast<void *>(args_.data()); | |||||
| if (tmp_args == nullptr) { | |||||
| MS_LOG(WARNING) << "tmp_args have no data"; | |||||
| return false; | |||||
| } | |||||
| tbe_task_def_ptr->set_args(tmp_args, args_.size()); | |||||
| void *tmp_sm_desc = static_cast<void *>(sm_desc_.data()); | |||||
| if (tmp_sm_desc == nullptr) { | |||||
| MS_LOG(WARNING) << "tmp_sm_desc have no data"; | |||||
| return false; | |||||
| } | |||||
| tbe_task_def_ptr->set_sm_desc(tmp_sm_desc, sm_desc_.size()); | |||||
| void *tmp_meta_data = static_cast<void *>(meta_data_.data()); | |||||
| if (tmp_meta_data == nullptr) { | |||||
| MS_LOG(WARNING) << "tmp_meta_data have no data"; | |||||
| return false; | |||||
| } | |||||
| tbe_task_def_ptr->set_meta_data(tmp_meta_data, meta_data_.size()); | |||||
| for (auto &in : input_data_addrs_) { | |||||
| tbe_task_def_ptr->add_input_addrs(in); | |||||
| } | |||||
| for (auto &ou : output_data_addrs_) { | |||||
| tbe_task_def_ptr->add_output_addrs(ou); | |||||
| } | |||||
| for (auto &wk : workspace_addrs_) { | |||||
| tbe_task_def_ptr->add_workspace_addrs(wk); | |||||
| } | |||||
| return true; | |||||
| } | |||||
| TbeIRTaskInfo::~TbeIRTaskInfo() { | |||||
| args_.clear(); | |||||
| sm_desc_.clear(); | |||||
| meta_data_.clear(); | |||||
| input_data_addrs_.clear(); | |||||
| output_data_addrs_.clear(); | |||||
| workspace_addrs_.clear(); | |||||
| } | |||||
| bool AicpuIRTaskInfo::SerializeIRToProto() { | |||||
| auto aicpu_task_def_ptr = std::unique_ptr<ge::model_runner::AicpuTaskDef>(); | |||||
| MS_EXCEPTION_IF_NULL(aicpu_task_def_ptr); | |||||
| aicpu_task_def_ptr->set_op_type(op_type_); | |||||
| aicpu_task_def_ptr->set_flag(flag_); | |||||
| for (auto &shape : input_data_shapes_) { | |||||
| auto in_shape_ptr = aicpu_task_def_ptr->add_input_shapes(); | |||||
| for (auto &in_sh : shape) { | |||||
| in_shape_ptr->add_shape(static_cast<uint32_t>(in_sh)); | |||||
| } | |||||
| } | |||||
| for (auto &shape : output_data_shapes_) { | |||||
| auto ou_shape_ptr = aicpu_task_def_ptr->add_output_shapes(); | |||||
| for (auto &ou_sh : shape) { | |||||
| ou_shape_ptr->add_shape(static_cast<uint32_t>(ou_sh)); | |||||
| } | |||||
| } | |||||
| for (auto &in_type : input_data_types_) { | |||||
| aicpu_task_def_ptr->add_input_types(in_type); | |||||
| } | |||||
| for (auto &ou_type : output_data_types_) { | |||||
| aicpu_task_def_ptr->add_output_types(ou_type); | |||||
| } | |||||
| for (auto &in_addr : input_data_addrs_) { | |||||
| aicpu_task_def_ptr->add_input_addrs(in_addr); | |||||
| } | |||||
| for (auto &ou_addr : output_data_addrs_) { | |||||
| aicpu_task_def_ptr->add_output_addrs(ou_addr); | |||||
| } | |||||
| void *tmp_node_def = static_cast<void *>(node_def_.data()); | |||||
| if (tmp_node_def == nullptr) { | |||||
| MS_LOG(WARNING) << "tmp_node_def have no data"; | |||||
| return false; | |||||
| } | |||||
| aicpu_task_def_ptr->set_node_def(tmp_node_def, node_def_.size()); | |||||
| void *tmp_func_def = static_cast<void *>(func_def_.data()); | |||||
| if (tmp_func_def == nullptr) { | |||||
| MS_LOG(WARNING) << "tmp_func_def have no data"; | |||||
| return false; | |||||
| } | |||||
| aicpu_task_def_ptr->set_func_def(tmp_func_def, func_def_.size()); | |||||
| return true; | |||||
| } | |||||
| AicpuIRTaskInfo::~AicpuIRTaskInfo() { | |||||
| input_data_types_.clear(); | |||||
| input_data_shapes_.clear(); | |||||
| input_data_addrs_.clear(); | |||||
| output_data_types_.clear(); | |||||
| output_data_shapes_.clear(); | |||||
| output_data_addrs_.clear(); | |||||
| node_def_.clear(); | |||||
| func_def_.clear(); | |||||
| } | |||||
| bool LabelIRTaskInfo::SerializeIRToProto() { | |||||
| auto label_task_def_ptr = std::unique_ptr<ge::model_runner::LabelTaskDef>(); | |||||
| MS_EXCEPTION_IF_NULL(label_task_def_ptr); | |||||
| label_task_def_ptr->set_label_id(label_id_); | |||||
| return true; | |||||
| } | |||||
| bool EventIRTaskInfo::SerializeIRToProto() { | |||||
| auto event_task_def_ptr = std::unique_ptr<ge::model_runner::EventTaskDef>(); | |||||
| MS_EXCEPTION_IF_NULL(event_task_def_ptr); | |||||
| event_task_def_ptr->set_event_id(event_id_); | |||||
| return true; | |||||
| } | |||||
| bool HcclIRTaskInfo::SerializeIRToProto() { | |||||
| auto hccl_task_def_ptr = std::unique_ptr<ge::model_runner::HcclTaskDef>(); | |||||
| MS_EXCEPTION_IF_NULL(hccl_task_def_ptr); | |||||
| hccl_task_def_ptr->set_hccl_type(hccl_type_); | |||||
| hccl_task_def_ptr->set_input_addr(input_data_addr_); | |||||
| hccl_task_def_ptr->set_output_addr(output_data_addr_); | |||||
| auto tmp_wk = static_cast<void *>(workspace_.data()); | |||||
| hccl_task_def_ptr->set_workspace(tmp_wk, workspace_.size()); | |||||
| hccl_task_def_ptr->set_workspace_num(workspace_num_); | |||||
| auto tmp_pri_def = static_cast<void *>(private_def_.data()); | |||||
| hccl_task_def_ptr->set_private_def(tmp_pri_def, private_def_.size()); | |||||
| hccl_task_def_ptr->set_ops_kernel_store(ops_kernel_store_); | |||||
| hccl_task_def_ptr->set_count(count_); | |||||
| hccl_task_def_ptr->set_root_id(root_id_); | |||||
| hccl_task_def_ptr->set_op_type(op_type_); | |||||
| hccl_task_def_ptr->set_data_type(data_type_); | |||||
| return true; | |||||
| } | |||||
| HcclIRTaskInfo::~HcclIRTaskInfo() { | |||||
| workspace_.clear(); | |||||
| private_def_.clear(); | |||||
| } | |||||
| bool ProfilerIRTaskInfo::SerializeIRToProto() { | |||||
| auto profiler_task_def_ptr = std::unique_ptr<ge::model_runner::ProfilerTaskDef>(); | |||||
| MS_EXCEPTION_IF_NULL(profiler_task_def_ptr); | |||||
| profiler_task_def_ptr->set_log_id(log_id_); | |||||
| profiler_task_def_ptr->set_flat(flat_); | |||||
| profiler_task_def_ptr->set_notify(notify_); | |||||
| return true; | |||||
| } | |||||
| bool MemcpyAsyncIRTaskInfo::SerializeIRToProto() { | |||||
| auto mem_task_def_ptr = std::unique_ptr<ge::model_runner::MemcpyAsyncTaskDef>(); | |||||
| MS_EXCEPTION_IF_NULL(mem_task_def_ptr); | |||||
| mem_task_def_ptr->set_dst(dst_); | |||||
| mem_task_def_ptr->set_dst_max(dst_max_); | |||||
| mem_task_def_ptr->set_src(src_); | |||||
| mem_task_def_ptr->set_count(count_); | |||||
| mem_task_def_ptr->set_kind(kind_); | |||||
| return true; | |||||
| } | |||||
| bool StreamSwitchIRTaskInfo::SerializeIRToProto() { | |||||
| auto stream_switch_task_def_ptr = std::unique_ptr<ge::model_runner::StreamSwitchTaskDef>(); | |||||
| MS_EXCEPTION_IF_NULL(stream_switch_task_def_ptr); | |||||
| stream_switch_task_def_ptr->set_true_stream_id(true_stream_id_); | |||||
| stream_switch_task_def_ptr->set_input_addr(input_addr_); | |||||
| stream_switch_task_def_ptr->set_value_addr(value_addr_); | |||||
| stream_switch_task_def_ptr->set_cond(cond_); | |||||
| stream_switch_task_def_ptr->set_data_type(data_type_); | |||||
| return true; | |||||
| } | |||||
| bool StreamActiveIRTaskInfo::SerializeIRToProto() { | |||||
| auto stream_active_task_def_ptr = std::unique_ptr<ge::model_runner::StreamActiveTaskDef>(); | |||||
| MS_EXCEPTION_IF_NULL(stream_active_task_def_ptr); | |||||
| stream_active_task_def_ptr->set_active_stream_id(active_stream_id_); | |||||
| return true; | |||||
| } | |||||
| } // namespace generator | |||||
| } // namespace mindspore | |||||
| @@ -1,295 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_CCSRC_EXECUTOR_GENERATOR_IR_IR_TASK_H_ | |||||
| #define MINDSPORE_CCSRC_EXECUTOR_GENERATOR_IR_IR_TASK_H_ | |||||
| #include <cstdint> | |||||
| #include <utility> | |||||
| #include <memory> | |||||
| #include <string> | |||||
| #include <vector> | |||||
| #include "proto/ge_runtime_taskinfo.pb.h" | |||||
| namespace mindspore { | |||||
| namespace generator { | |||||
| using TaskType = ::ge::model_runner::TaskDef_TaskType; | |||||
| enum TaskTmpType { | |||||
| CCE_TMP_DEF = 0, | |||||
| TBE_TMP_DEF = 1, | |||||
| AICPU_TMP_DEF = 2, | |||||
| LABEL_TMP_DEF = 3, | |||||
| EVENT_TMP_DEF = 4, | |||||
| HCCL_TMP_DEF = 5, | |||||
| PROFILER_TRACE_TMP_DEF = 6, | |||||
| MEMCPY_ASYNC_TMP_DEF = 7, | |||||
| STREAM_SWITCH_TMP_DEF = 8, | |||||
| STREAM_ACTIVE_TMP_DEF = 9 | |||||
| }; | |||||
| struct KernelContext { | |||||
| uint32_t kernel_type = 0; | |||||
| uint32_t op_id = 0; | |||||
| uint32_t kernel_func_id = 0; | |||||
| uint32_t op_index = 0; | |||||
| bool is_flowtable = false; | |||||
| std::vector<uint8_t> args_offset; | |||||
| uint32_t args_count = 0; | |||||
| std::vector<uint32_t> origin_op_index; | |||||
| }; | |||||
| class IRtaskInfo { | |||||
| public: | |||||
| virtual ~IRtaskInfo() = default; | |||||
| virtual bool SerializeIRToProto() = 0; | |||||
| protected: | |||||
| IRtaskInfo(TaskType task_type, TaskTmpType task_tmp_type, uint64_t stream_id) | |||||
| : task_type_(task_type), task_tmp_type_(task_tmp_type), stream_id_(stream_id) {} | |||||
| public: | |||||
| uint64_t GetStreamId() const { return stream_id_; } | |||||
| TaskType GetTaskType() const { return task_type_; } | |||||
| TaskTmpType GetTaskTmpType() const { return task_tmp_type_; } | |||||
| private: | |||||
| TaskType task_type_; | |||||
| TaskTmpType task_tmp_type_; | |||||
| uint64_t stream_id_ = 0; | |||||
| }; | |||||
| using IRtaskInfoPtr = std::shared_ptr<IRtaskInfo>; | |||||
| class CceIRTaskInfo : public IRtaskInfo { | |||||
| public: | |||||
| CceIRTaskInfo(TaskType task_type, uint64_t stream_id, KernelContext k_ctx, std::string stub_func, uint32_t block_dim, | |||||
| std::vector<uint8_t> args, uint32_t args_size, std::vector<uint8_t> sm_desc, | |||||
| std::vector<uint8_t> flow_table) | |||||
| : IRtaskInfo(task_type, CCE_TMP_DEF, stream_id), | |||||
| k_ctx_(std::move(k_ctx)), | |||||
| stub_func_(std::move(stub_func)), | |||||
| block_dim_(block_dim), | |||||
| args_(std::move(args)), | |||||
| args_size_(args_size), | |||||
| sm_desc_(std::move(sm_desc)), | |||||
| flow_table_(std::move(flow_table)) {} | |||||
| ~CceIRTaskInfo() override; | |||||
| bool SerializeIRToProto() override; | |||||
| private: | |||||
| KernelContext k_ctx_; | |||||
| std::string stub_func_; | |||||
| uint32_t block_dim_ = 0; | |||||
| std::vector<uint8_t> args_; | |||||
| // uintptr_t args_addr_; | |||||
| uint32_t args_size_ = 0; | |||||
| std::vector<uint8_t> sm_desc_; | |||||
| std::vector<uint8_t> flow_table_; | |||||
| }; | |||||
| class TbeIRTaskInfo : public IRtaskInfo { | |||||
| public: | |||||
| TbeIRTaskInfo(TaskType task_type, uint64_t stream_id, std::string stub_func, uint32_t block_dim, | |||||
| std::vector<uint8_t> args, uint32_t args_size, std::vector<uint8_t> sm_desc, | |||||
| std::vector<uint8_t> meta_data, std::vector<uintptr_t> input_data_addrs, | |||||
| std::vector<uintptr_t> output_data_addrs, std::vector<uintptr_t> workspace_addrs) | |||||
| : IRtaskInfo(task_type, TBE_TMP_DEF, stream_id), | |||||
| stub_func_(std::move(stub_func)), | |||||
| block_dim_(block_dim), | |||||
| args_(std::move(args)), | |||||
| args_size_(args_size), | |||||
| sm_desc_(std::move(sm_desc)), | |||||
| meta_data_(std::move(meta_data)), | |||||
| input_data_addrs_(std::move(input_data_addrs)), | |||||
| output_data_addrs_(std::move(output_data_addrs)), | |||||
| workspace_addrs_(std::move(workspace_addrs)) {} | |||||
| ~TbeIRTaskInfo() override; | |||||
| bool SerializeIRToProto() override; | |||||
| private: | |||||
| std::string stub_func_; | |||||
| uint32_t block_dim_ = 0; | |||||
| std::vector<uint8_t> args_; | |||||
| uint32_t args_size_ = 0; | |||||
| std::vector<uint8_t> sm_desc_; | |||||
| // uintptr_t binary_; | |||||
| // uint32_t binary_size_; | |||||
| std::vector<uint8_t> meta_data_; | |||||
| std::vector<uintptr_t> input_data_addrs_; | |||||
| std::vector<uintptr_t> output_data_addrs_; | |||||
| std::vector<uintptr_t> workspace_addrs_; | |||||
| // std::vector<uint8_t> flow_table_; | |||||
| }; | |||||
| class AicpuIRTaskInfo : public IRtaskInfo { | |||||
| public: | |||||
| AicpuIRTaskInfo(TaskType task_type, uint64_t stream_id, std::string op_type, uint32_t flag, | |||||
| std::vector<uint32_t> input_data_types, std::vector<std::vector<size_t>> input_data_shapes, | |||||
| std::vector<uintptr_t> input_data_addrs, std::vector<uint32_t> output_data_types, | |||||
| std::vector<std::vector<size_t>> output_data_shapes, std::vector<uintptr_t> output_data_addrs, | |||||
| std::vector<uint8_t> node_def, std::vector<uint8_t> func_def) | |||||
| : IRtaskInfo(task_type, AICPU_TMP_DEF, stream_id), | |||||
| op_type_(std::move(op_type)), | |||||
| flag_(flag), | |||||
| input_data_types_(std::move(input_data_types)), | |||||
| input_data_shapes_(std::move(input_data_shapes)), | |||||
| input_data_addrs_(std::move(input_data_addrs)), | |||||
| output_data_types_(std::move(output_data_types)), | |||||
| output_data_shapes_(std::move(output_data_shapes)), | |||||
| output_data_addrs_(std::move(output_data_addrs)), | |||||
| node_def_(std::move(node_def)), | |||||
| func_def_(std::move(func_def)) {} | |||||
| ~AicpuIRTaskInfo() override; | |||||
| bool SerializeIRToProto() override; | |||||
| private: | |||||
| std::string op_type_; | |||||
| uint32_t flag_ = 0; | |||||
| std::vector<uint32_t> input_data_types_; | |||||
| std::vector<std::vector<size_t>> input_data_shapes_; | |||||
| std::vector<uintptr_t> input_data_addrs_; | |||||
| std::vector<uint32_t> output_data_types_; | |||||
| std::vector<std::vector<size_t>> output_data_shapes_; | |||||
| std::vector<uintptr_t> output_data_addrs_; | |||||
| std::vector<uint8_t> node_def_; | |||||
| std::vector<uint8_t> func_def_; | |||||
| }; | |||||
| class LabelIRTaskInfo : public IRtaskInfo { | |||||
| public: | |||||
| LabelIRTaskInfo(TaskType task_type, uint64_t stream_id, uint32_t label_id) | |||||
| : IRtaskInfo(task_type, LABEL_TMP_DEF, stream_id), label_id_(label_id) {} | |||||
| ~LabelIRTaskInfo() override {} | |||||
| bool SerializeIRToProto() override; | |||||
| private: | |||||
| uint32_t label_id_ = 0; | |||||
| }; | |||||
| class EventIRTaskInfo : public IRtaskInfo { | |||||
| public: | |||||
| EventIRTaskInfo(TaskType task_type, uint64_t stream_id, uint32_t event_id) | |||||
| : IRtaskInfo(task_type, EVENT_TMP_DEF, stream_id), event_id_(event_id) {} | |||||
| ~EventIRTaskInfo() override {} | |||||
| bool SerializeIRToProto() override; | |||||
| private: | |||||
| uint32_t event_id_ = 0; | |||||
| }; | |||||
| class HcclIRTaskInfo : public IRtaskInfo { | |||||
| public: | |||||
| HcclIRTaskInfo(TaskType task_type, uint64_t stream_id, std::string hccl_type, uintptr_t input_data_addr, | |||||
| uintptr_t output_data_addr, std::vector<uint8_t> workspace, int64_t workspace_num, | |||||
| std::vector<uint8_t> private_def, uintptr_t ops_kernel_store, int32_t count, int64_t root_id, | |||||
| int64_t op_type, int64_t data_type) | |||||
| : IRtaskInfo(task_type, HCCL_TMP_DEF, stream_id), | |||||
| hccl_type_(std::move(hccl_type)), | |||||
| input_data_addr_(input_data_addr), | |||||
| output_data_addr_(output_data_addr), | |||||
| workspace_(std::move(workspace)), | |||||
| workspace_num_(workspace_num), | |||||
| private_def_(std::move(private_def)), | |||||
| ops_kernel_store_(ops_kernel_store), | |||||
| count_(count), | |||||
| root_id_(root_id), | |||||
| op_type_(op_type), | |||||
| data_type_(data_type) {} | |||||
| ~HcclIRTaskInfo() override; | |||||
| bool SerializeIRToProto() override; | |||||
| private: | |||||
| std::string hccl_type_; | |||||
| uintptr_t input_data_addr_ = 0; | |||||
| uintptr_t output_data_addr_ = 0; | |||||
| std::vector<uint8_t> workspace_; | |||||
| int64_t workspace_num_ = 0; | |||||
| std::vector<uint8_t> private_def_; | |||||
| uintptr_t ops_kernel_store_ = 0; | |||||
| int32_t count_ = 0; | |||||
| int64_t root_id_ = 0; | |||||
| int64_t op_type_ = 0; | |||||
| int64_t data_type_ = 0; | |||||
| }; | |||||
| class ProfilerIRTaskInfo : public IRtaskInfo { | |||||
| public: | |||||
| ProfilerIRTaskInfo(TaskType task_type, uint64_t stream_id, uint64_t log_id, bool notify, uint32_t flat) | |||||
| : IRtaskInfo(task_type, PROFILER_TRACE_TMP_DEF, stream_id), log_id_(log_id), notify_(notify), flat_(flat) {} | |||||
| ~ProfilerIRTaskInfo() override {} | |||||
| bool SerializeIRToProto() override; | |||||
| private: | |||||
| uint64_t log_id_ = 0; | |||||
| bool notify_ = false; | |||||
| uint32_t flat_ = 0; | |||||
| }; | |||||
| class MemcpyAsyncIRTaskInfo : public IRtaskInfo { | |||||
| public: | |||||
| MemcpyAsyncIRTaskInfo(TaskType task_type, uint32_t stream_id, uint64_t dst, uint64_t dst_max, uint64_t src, | |||||
| uint64_t count, int64_t kind) | |||||
| : IRtaskInfo(task_type, MEMCPY_ASYNC_TMP_DEF, stream_id), | |||||
| dst_(dst), | |||||
| dst_max_(dst_max), | |||||
| src_(src), | |||||
| count_(count), | |||||
| kind_(kind) {} | |||||
| ~MemcpyAsyncIRTaskInfo() override {} | |||||
| bool SerializeIRToProto() override; | |||||
| private: | |||||
| uint64_t dst_ = 0; | |||||
| uint64_t dst_max_ = 0; | |||||
| uint64_t src_ = 0; | |||||
| uint64_t count_ = 0; | |||||
| uint32_t kind_ = 0; | |||||
| }; | |||||
| class StreamSwitchIRTaskInfo : public IRtaskInfo { | |||||
| public: | |||||
| StreamSwitchIRTaskInfo(TaskType task_type, uint64_t stream_id, uint32_t true_stream_id, uintptr_t input_addr, | |||||
| uintptr_t value_addr, uint32_t cond, int64_t data_type) | |||||
| : IRtaskInfo(task_type, STREAM_SWITCH_TMP_DEF, stream_id), | |||||
| true_stream_id_(true_stream_id), | |||||
| input_addr_(input_addr), | |||||
| value_addr_(value_addr), | |||||
| cond_(cond), | |||||
| data_type_(data_type) {} | |||||
| ~StreamSwitchIRTaskInfo() override {} | |||||
| bool SerializeIRToProto() override; | |||||
| private: | |||||
| uint32_t true_stream_id_ = 0; | |||||
| uintptr_t input_addr_ = 0; | |||||
| uintptr_t value_addr_ = 0; | |||||
| uint32_t cond_ = 0; | |||||
| int64_t data_type_ = 0; | |||||
| }; | |||||
| class StreamActiveIRTaskInfo : public IRtaskInfo { | |||||
| public: | |||||
| StreamActiveIRTaskInfo(TaskType task_type, uint64_t stream_id, uint32_t active_stream_id) | |||||
| : IRtaskInfo(task_type, STREAM_ACTIVE_TMP_DEF, stream_id), active_stream_id_(active_stream_id) {} | |||||
| ~StreamActiveIRTaskInfo() override {} | |||||
| bool SerializeIRToProto() override; | |||||
| private: | |||||
| uint32_t active_stream_id_ = 0; | |||||
| }; | |||||
| }; // namespace generator | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_CCSRC_EXECUTOR_GENERATOR_IR_IR_TASK_H_ | |||||
| @@ -1,43 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "predict/generator/utils/ir_model_util.h" | |||||
| namespace mindspore { | |||||
| namespace generator { | |||||
| IRModelUtil &IRModelUtil::GetInstance() { | |||||
| static IRModelUtil instance; | |||||
| return instance; | |||||
| } | |||||
| void IRModelUtil::Init() { | |||||
| MS_LOG(INFO) << "IRModel init success"; | |||||
| version_ = "defaultVersion"; | |||||
| stream_num_ = 0; | |||||
| event_num_ = 0; | |||||
| batch_num_ = 0; | |||||
| memory_size_ = 0; | |||||
| weight_size_ = 0; | |||||
| var_size_ = 0; | |||||
| logic_mem_base_ = 0; | |||||
| logic_var_base_ = 0; | |||||
| logic_var_base_ = 0; | |||||
| priority_ = 0; | |||||
| is_enable_save_model_ = false; | |||||
| min_static_offset_ = 0; | |||||
| max_dynamic_offset_ = 0; | |||||
| } | |||||
| } // namespace generator | |||||
| } // namespace mindspore | |||||
| @@ -1,92 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_CCSRC_PREDICT_GENERATOR_IR_IR_MODEL_UTIL_H_ | |||||
| #define MINDSPORE_CCSRC_PREDICT_GENERATOR_IR_IR_MODEL_UTIL_H_ | |||||
| #include <string> | |||||
| #include <vector> | |||||
| #include <memory> | |||||
| #include <utility> | |||||
| #include <algorithm> | |||||
| #include "utils/log_adapter.h" | |||||
| namespace mindspore { | |||||
| namespace generator { | |||||
| class IRModelUtil { | |||||
| public: | |||||
| static IRModelUtil &GetInstance(); | |||||
| IRModelUtil(const IRModelUtil &) = delete; | |||||
| IRModelUtil &operator=(const IRModelUtil &) = delete; | |||||
| void Init(); | |||||
| void set_version(const std::string &version) { version_ = version; } | |||||
| void set_stream_num(uint32_t stream_num) { stream_num_ = stream_num; } | |||||
| void set_event_num(uint32_t event_num) { event_num_ = event_num; } | |||||
| void set_batch_num(uint32_t batch_num) { batch_num_ = batch_num; } | |||||
| void set_memory_size(uint32_t memory_size) { memory_size_ = memory_size; } | |||||
| void set_weight_size(uint32_t weight_size) { weight_size_ = weight_size; } | |||||
| void set_var_size(uint32_t var_size) { var_size_ = var_size; } | |||||
| void set_logic_mem_base(uint32_t logic_mem_base) { logic_mem_base_ = logic_mem_base; } | |||||
| void set_logic_weight_base(uint32_t logic_weight_base) { logic_weight_base_ = logic_weight_base; } | |||||
| void set_logic_var_base(uint32_t logic_var_base) { logic_var_base_ = logic_var_base; } | |||||
| void set_priority(uint32_t priority) { priority_ = priority; } | |||||
| void set_is_enable_save_model(bool is_enable_save_model) { is_enable_save_model_ = is_enable_save_model; } | |||||
| void set_min_static_offset(uint64_t min_static_offset) { min_static_offset_ = min_static_offset; } | |||||
| void set_max_dynamic_offset(uint64_t max_dynamic_offset) { max_dynamic_offset_ = max_dynamic_offset; } | |||||
| void set_max_mem_size(uint64_t max_mem_size) { max_mem_size_ = max_mem_size; } | |||||
| void set_irmodel_mem_base(uint8_t irmodel_mem_base) { irmodel_mem_base_ = irmodel_mem_base; } | |||||
| std::string version() const { return version_; } | |||||
| uint32_t stream_num() const { return stream_num_; } | |||||
| uint32_t event_num() const { return event_num_; } | |||||
| uint32_t batch_num() const { return batch_num_; } | |||||
| uint64_t memory_size() const { return memory_size_; } | |||||
| uint64_t weight_size() const { return weight_size_; } | |||||
| uint64_t var_size() const { return var_size_; } | |||||
| uint64_t logic_mem_base() const { return logic_mem_base_; } | |||||
| uint64_t logic_weight_base() const { return logic_weight_base_; } | |||||
| uint64_t logic_var_base() const { return logic_var_base_; } | |||||
| uint32_t priority() const { return priority_; } | |||||
| bool is_enable_save_model() const { return is_enable_save_model_; } | |||||
| uint64_t min_static_offset() const { return min_static_offset_; } | |||||
| uint64_t max_dynamic_offset() const { return max_dynamic_offset_; } | |||||
| uint64_t max_mem_size() const { return max_mem_size_; } | |||||
| uint8_t irmodel_mem_base() const { return irmodel_mem_base_; } | |||||
| private: | |||||
| IRModelUtil() = default; | |||||
| ~IRModelUtil() = default; | |||||
| std::string version_; | |||||
| uint32_t stream_num_ = 0; | |||||
| uint32_t event_num_ = 0; | |||||
| uint32_t batch_num_ = 0; | |||||
| uint64_t memory_size_ = 0; | |||||
| uint64_t weight_size_ = 0; | |||||
| uint64_t var_size_ = 0; | |||||
| uint64_t logic_mem_base_ = 0; | |||||
| uint64_t logic_weight_base_ = 0; | |||||
| uint64_t logic_var_base_ = 0; | |||||
| uint32_t priority_ = 0; | |||||
| bool is_enable_save_model_ = false; | |||||
| uint64_t min_static_offset_ = 0; | |||||
| uint64_t max_dynamic_offset_ = 0; | |||||
| uint64_t max_mem_size_ = 0; | |||||
| uint8_t irmodel_mem_base_ = 0; | |||||
| }; | |||||
| } // namespace generator | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_CCSRC_PREDICT_GENERATOR_IR_IR_MODEL_UTIL_H_ | |||||
| @@ -1,69 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "predict/predict.h" | |||||
| #include <memory> | |||||
| #include <vector> | |||||
| #include <string> | |||||
| namespace mindspore { | |||||
| namespace predictmodel { | |||||
| void StepConvertGraph(const KernelGraphPtr &kernel_graph_ptr) { | |||||
| MS_LOG(INFO) << "start convert_graph step"; | |||||
| // get kernel_graph. this graph can be origin or device, depends on which steps to persistence | |||||
| MS_EXCEPTION_IF_NULL(kernel_graph_ptr); | |||||
| bool save_ms_model = MsContext::GetInstance()->save_ms_model_flag(); | |||||
| if (save_ms_model) { | |||||
| if (kernel_graph_ptr->inputs().empty()) { | |||||
| return; | |||||
| } | |||||
| // set convert_mode: convert cpu info or convert Davnici | |||||
| executor::Kernel2Ms::GetInstance().set_convert_mode(executor::kConvertCpuMode); | |||||
| // convert kernel_graph to sub_ms_graph | |||||
| bool ret = executor::Kernel2Ms::GetInstance().KernelGraph2MsGraph(kernel_graph_ptr); | |||||
| if (!ret) { | |||||
| MS_LOG(WARNING) << "convert to mindsporeGraph failed"; | |||||
| } else { | |||||
| MS_LOG(INFO) << "convert to Graph success"; | |||||
| } | |||||
| } | |||||
| } | |||||
| void StepConvertWeight(const std::vector<tensor::TensorPtr> &inputs) { | |||||
| MS_LOG(INFO) << "start convert_input step"; | |||||
| // get all inputs tensor | |||||
| bool save_ms_model = MsContext::GetInstance()->save_ms_model_flag(); | |||||
| std::string save_path = MsContext::GetInstance()->save_ms_model_path(); | |||||
| if (save_ms_model) { | |||||
| if (inputs.empty()) { | |||||
| return; | |||||
| } | |||||
| MS_LOG(INFO) << "save ms model is true to path " << save_path; | |||||
| if (!executor::Kernel2Ms::GetInstance().KernelInput2MS(inputs)) { | |||||
| MS_LOG(WARNING) << "convert mindspore kernel input failed"; | |||||
| } | |||||
| auto new_ms_graph_ptr = std::make_shared<mindspore::predict::GraphDefT>(); | |||||
| bool ret = executor::Kernel2Ms::GetInstance().SaveDeviceModel(new_ms_graph_ptr, save_path); | |||||
| if (!ret) { | |||||
| MS_LOG(WARNING) << "convert to mindsporeGraph failed"; | |||||
| } else { | |||||
| MS_LOG(INFO) << "save ms model success"; | |||||
| } | |||||
| } | |||||
| } | |||||
| } // namespace predictmodel | |||||
| } // namespace mindspore | |||||
| @@ -1,32 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_CCSRC_PREDICT_H_ | |||||
| #define MINDSPORE_CCSRC_PREDICT_H_ | |||||
| #include <memory> | |||||
| #include <vector> | |||||
| #include "backend/session/session_basic.h" | |||||
| #include "predict/converter/kernel2ms.h" | |||||
| namespace mindspore { | |||||
| namespace predictmodel { | |||||
| using KernelGraphPtr = std::shared_ptr<mindspore::session::KernelGraph>; | |||||
| void StepConvertGraph(const KernelGraphPtr &kernel_graph_ptr); | |||||
| void StepConvertWeight(const std::vector<tensor::TensorPtr> &inputs); | |||||
| } // namespace predictmodel | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_CCSRC_PREDICT_H_ | |||||
| @@ -1,42 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| syntax = "proto3"; | |||||
| import public "Graph_ir.proto"; | |||||
| import public "ge_runtime_taskinfo.proto"; | |||||
| package ge.model_runner; | |||||
| option cc_enable_arenas = true; | |||||
| message ModelTaskDef { | |||||
| string version = 1; | |||||
| repeated TaskDef task = 10; | |||||
| uint32 stream_num = 11; | |||||
| uint32 event_num = 12; | |||||
| uint32 batch_num_ = 13; | |||||
| uint64 memory_size = 14; | |||||
| uint64 weight_size = 15; | |||||
| uint64 var_size_ = 16; | |||||
| uint64 logic_mem_base_ = 17; | |||||
| uint64 logic_weight_base_ = 18; | |||||
| uint64 logic_var_base_ = 19; | |||||
| uint32 priority_ = 20; | |||||
| } | |||||
| @@ -1,125 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| syntax = "proto3"; | |||||
| package mindspore; | |||||
| // Data type definition | |||||
| enum DataType { | |||||
| DT_UNDEFINED = 0; | |||||
| // Basic types. | |||||
| DT_BOOL = 1; // bool | |||||
| DT_INT8 = 2; // int8_t | |||||
| DT_INT16 = 3; // int16_t | |||||
| DT_INT32 = 4; // int32_t | |||||
| DT_INT64 = 5; // int64_t | |||||
| DT_UINT8 = 6; // uint8_t | |||||
| DT_UINT16 = 7; // uint16_t | |||||
| DT_UINT32 = 8; // uint32_t | |||||
| DT_UINT64 = 9; // uint64_t | |||||
| DT_FLOAT16 = 10; // float 16 | |||||
| DT_FLOAT32 = 11; // float 32 | |||||
| DT_FLOAT64 = 12; // float 64 | |||||
| DT_STRING = 13; // string | |||||
| DT_TENSOR = 14; // tensor | |||||
| DT_GRAPH = 15; // graph | |||||
| // list type | |||||
| DT_BOOLS = 16; // list of bool | |||||
| DT_INTS8 = 17; // list of int8_t | |||||
| DT_INTS16 = 18; // list of int16_t | |||||
| DT_INTS32 = 19; // list of int32_t | |||||
| DT_INTS64 = 20; // list of int64_t | |||||
| DT_UINTS8 = 21; // list of uint8_t | |||||
| DT_UINTS16 = 22; // list of uint16_t | |||||
| DT_UINTS32 = 23; // list of uint32_t | |||||
| DT_UINTS64 = 24; // list of uint64_t | |||||
| DT_FLOATS16 = 25; // list of float16 | |||||
| DT_FLOATS32 = 26; // list of float32 | |||||
| DT_FLOATS64 = 27; // list of float64 | |||||
| DT_STRINGS = 28; // list of string | |||||
| DT_TENSORS = 29; // list of tensor | |||||
| DT_GRAPHS = 30; // list of graph | |||||
| DT_TUPLE = 31; // tuple | |||||
| DT_LIST = 32; // list | |||||
| DT_DICT = 33; // dictionary | |||||
| // other types | |||||
| DT_NONE = 34; // None | |||||
| DT_SYM_INST = 35; // Symbolic Key Instance | |||||
| // type related type | |||||
| DT_BASE_INT = 36; // type generic int | |||||
| DT_BASE_UINT = 37; // type generate unsigned int | |||||
| DT_BASE_FLOAT = 38; // type generate float | |||||
| DT_TYPE = 39; // type type | |||||
| DT_ANYTHING = 40; // type anything | |||||
| }; | |||||
| enum MSConst { | |||||
| DEFAULT_REFCOUNT = 0; | |||||
| WEIGHT_REFCOUNT = 999; | |||||
| }; | |||||
| message TensorDef { | |||||
| DataType data_type = 1; | |||||
| repeated int64 dims = 2; | |||||
| string format = 3; | |||||
| string layout = 4; | |||||
| uint32 refCount = 5; | |||||
| uint64 offset = 6; | |||||
| uint64 size = 7; | |||||
| uint64 weight_size = 8; | |||||
| bytes data = 9; | |||||
| } | |||||
| message OpDef { | |||||
| string name = 1; | |||||
| string type = 2; | |||||
| string fwk_type = 3; | |||||
| string opAttr = 4; | |||||
| repeated int64 input_index = 5; | |||||
| repeated int64 output_index = 6; | |||||
| } | |||||
| message GraphDef { | |||||
| string name = 1; | |||||
| repeated int64 input_index = 2; | |||||
| repeated int64 output_index = 3; | |||||
| uint64 mempool_size = 4; | |||||
| repeated OpDef opdefs = 5; | |||||
| repeated TensorDef alltensors = 6; | |||||
| } | |||||
| @@ -1,155 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| syntax = "proto3"; | |||||
| package ge.model_runner; | |||||
| option cc_enable_arenas = true; | |||||
| message TaskDef { | |||||
| enum TaskType { | |||||
| CCE = 0; | |||||
| TBE = 1; | |||||
| AICPU = 2; | |||||
| LABEL_SET = 3; | |||||
| LABEL_SWITCH = 4; | |||||
| LABEL_GOTO = 5; | |||||
| EVENT_RECORD = 6; | |||||
| EVENT_WAIT = 7; | |||||
| FUSION_START = 8; | |||||
| FUSION_END = 9; | |||||
| HCCL = 10; | |||||
| PROFILER_TRACE = 11; | |||||
| MEMCPY_ASYNC = 12; | |||||
| STREAM_SWITCH = 13; | |||||
| STREAM_ACTIVE = 14; | |||||
| // insert new task type here | |||||
| REVSERVED = 23; | |||||
| }; | |||||
| TaskType task_type = 1; | |||||
| uint64 stream_id = 2; | |||||
| oneof subclass { | |||||
| CceTaskDef cce_task_def = 3; | |||||
| TbeTaskDef tbe_task_def = 4; | |||||
| AicpuTaskDef aicpu_task_def = 5; | |||||
| LabelTaskDef label_task_def = 6; | |||||
| EventTaskDef event_task_def = 7; | |||||
| HcclTaskDef hccl_task_def = 8; | |||||
| ProfilerTaskDef profiler_task_def = 9; | |||||
| MemcpyAsyncTaskDef memcpy_async_task_def = 10; | |||||
| StreamSwitchTaskDef stream_switch_task_def = 11; | |||||
| StreamActiveTaskDef stream_active_task_def = 12; | |||||
| } | |||||
| } | |||||
| message CceTaskDef { | |||||
| KernelContext kernel_context = 1; | |||||
| string stub_func = 2; | |||||
| uint32 block_dim = 3; | |||||
| bytes args = 4; | |||||
| uint32 args_size = 5; | |||||
| bytes sm_desc = 6; | |||||
| bytes flow_table = 7; | |||||
| } | |||||
| message TbeTaskDef { | |||||
| string stub_func = 1; | |||||
| uint32 block_dim = 2; | |||||
| bytes args = 3; | |||||
| uint32 args_size = 4; | |||||
| bytes sm_desc = 5; | |||||
| bytes meta_data = 8; | |||||
| repeated uint64 input_addrs = 9; | |||||
| repeated uint64 output_addrs = 10; | |||||
| repeated uint64 workspace_addrs = 11; | |||||
| } | |||||
| message AicpuTaskDef { | |||||
| string op_type = 1; | |||||
| uint32 flag = 2; | |||||
| repeated uint32 input_types = 3; | |||||
| repeated Shape input_shapes = 4; | |||||
| repeated uint64 input_addrs = 5; | |||||
| repeated uint32 output_types = 6; | |||||
| repeated Shape output_shapes = 7; | |||||
| repeated uint64 output_addrs = 8; | |||||
| bytes node_def = 9; | |||||
| bytes func_def = 10; | |||||
| } | |||||
| message Shape { | |||||
| repeated uint32 shape = 1; | |||||
| } | |||||
| message LabelTaskDef { | |||||
| uint32 label_id = 1; | |||||
| } | |||||
| message EventTaskDef { | |||||
| uint32 event_id = 1; | |||||
| } | |||||
| message HcclTaskDef { | |||||
| string hccl_type = 1; | |||||
| uint64 input_addr = 2; | |||||
| uint64 output_addr = 3; | |||||
| bytes workspace = 4; | |||||
| int64 workspace_num = 5; | |||||
| bytes private_def = 6; | |||||
| uint64 ops_kernel_store = 7; | |||||
| int32 count = 8; | |||||
| int64 root_id = 9; | |||||
| int64 op_type = 10; | |||||
| int64 data_type = 11; | |||||
| } | |||||
| message ProfilerTaskDef { | |||||
| uint64 log_id = 1; | |||||
| bool notify = 2; | |||||
| uint32 flat = 3; | |||||
| } | |||||
| message MemcpyAsyncTaskDef { | |||||
| uint64 dst = 1; | |||||
| uint64 dst_max = 2; | |||||
| uint64 src = 3; | |||||
| uint64 count = 4; | |||||
| uint32 kind = 5; | |||||
| } | |||||
| message StreamSwitchTaskDef { | |||||
| uint32 true_stream_id = 1; | |||||
| uint64 input_addr = 2; | |||||
| uint64 value_addr = 3; | |||||
| int64 cond = 4; | |||||
| int64 data_type = 5; | |||||
| } | |||||
| message StreamActiveTaskDef { | |||||
| uint32 active_stream_id = 1; | |||||
| } | |||||
| message KernelContext { | |||||
| uint32 kernel_type = 1; | |||||
| uint32 op_id = 2; | |||||
| uint32 kernel_func_id = 3; | |||||
| uint32 op_index = 4; | |||||
| bool is_flowtable = 5; | |||||
| bytes args_offset = 6; | |||||
| uint32 args_count = 7; | |||||
| repeated uint32 origin_op_index = 8; | |||||
| } | |||||
| @@ -1,17 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| this is a dictory for predict including saving model &&& saving taskinfos. | |||||
| @@ -1 +0,0 @@ | |||||
| this is a dictory for predict to gen fbs headers | |||||
| @@ -1,212 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| include "op.fbs"; | |||||
| namespace mindspore.predict; | |||||
| enum MSCONST: int { | |||||
| WEIGHT_REFCOUNT = 999 | |||||
| } | |||||
| table QuantParam { | |||||
| scale: double; | |||||
| zeroPoint: int; | |||||
| min: double = 0; | |||||
| max: double = 0; | |||||
| narrowRange: bool = true; | |||||
| numBits: int = 8; | |||||
| } | |||||
| table QuantParamArray { | |||||
| param: [QuantParam]; //pre-channel | |||||
| } | |||||
| table TensorDef { | |||||
| // data type | |||||
| dataType: DataType; | |||||
| // shape | |||||
| dims: [int]; | |||||
| format: Format; | |||||
| refCount: int; | |||||
| offset: int; | |||||
| data: [ubyte]; | |||||
| } | |||||
| union OpT { | |||||
| Concat, | |||||
| SoftMax, | |||||
| Activation, | |||||
| Conv2D, | |||||
| FusedBatchNorm, | |||||
| CaffeBatchNorm, | |||||
| BiasAdd, | |||||
| Pooling, | |||||
| DepthwiseConv2D, | |||||
| DeDepthwiseConv2D, | |||||
| Resize, | |||||
| DetectionPostProcess, | |||||
| FullConnection, | |||||
| Mean, | |||||
| DeConv2D, | |||||
| Scale, | |||||
| Reshape, | |||||
| Eltwise, | |||||
| NetOutput, | |||||
| Add, | |||||
| Sub, | |||||
| MatMul, | |||||
| StridedSlice, | |||||
| Power, | |||||
| Slice, | |||||
| Stack, | |||||
| Mul, | |||||
| RealDiv, | |||||
| Pad, | |||||
| Maximum, | |||||
| Minimum, | |||||
| CaffePReLU, | |||||
| LeakyReLU, | |||||
| ArgMax, | |||||
| ArgMin, | |||||
| Exp, | |||||
| CaffeCrop, | |||||
| Range, | |||||
| Rsqrt, | |||||
| ExpandDims, | |||||
| Tile, | |||||
| Cast, | |||||
| Shape, | |||||
| Nchw2Nhwc, | |||||
| Nhwc2Nchw, | |||||
| QuantDTypeCast, | |||||
| Split, | |||||
| Permute, | |||||
| FakeQuantWithMinMaxVars, | |||||
| Equal, | |||||
| Less, | |||||
| Greater, | |||||
| Min, | |||||
| Floor, | |||||
| Abs, | |||||
| Neg, | |||||
| Cos, | |||||
| Sin, | |||||
| Sqrt, | |||||
| Square, | |||||
| Constant, | |||||
| Log, | |||||
| Tan, | |||||
| Atan, | |||||
| Asin, | |||||
| Clip, | |||||
| Transpose, | |||||
| Squeeze, | |||||
| Unsqueeze, | |||||
| Upsample, | |||||
| Dropout, | |||||
| Broadcast, | |||||
| Lrn, | |||||
| Prelu, | |||||
| ZerosLike, | |||||
| TopK, | |||||
| SpaceToDepth, | |||||
| SpaceToBatch, | |||||
| SparseToDense, | |||||
| ReverseSequence, | |||||
| Rank, | |||||
| Gather, | |||||
| GatherNd, | |||||
| Fill, | |||||
| Elu, | |||||
| DepthToSpace, | |||||
| BatchToSpace, | |||||
| AddN, | |||||
| Ceil, | |||||
| EmbeddingLookup, | |||||
| EmbeddingLookupSparse, | |||||
| FloorDiv, | |||||
| FloorMod, | |||||
| L2Norm, | |||||
| LocalResponseNormalization, | |||||
| MatrixDiag, | |||||
| Reduce, | |||||
| Reverse, | |||||
| Round, | |||||
| Select, | |||||
| Scatter, | |||||
| Unique, | |||||
| Unstack, | |||||
| LogicalAnd, | |||||
| LogicalOr, | |||||
| LogicalXor, | |||||
| LogicalNot, | |||||
| OnnxInt8Quantize, | |||||
| OnnxInt8Dequantize, | |||||
| FakeQuantWithMinMax, | |||||
| FakeQuantWithMinMaxPerChannel, | |||||
| BatchNormFold, | |||||
| MulFold, | |||||
| AddFold, | |||||
| SquaredDifference | |||||
| } | |||||
| enum QuantType: int { | |||||
| QUANT_NONE, | |||||
| AwareTrainning, | |||||
| WeightQuant, | |||||
| PostTraining | |||||
| } | |||||
| enum FmkType: int { | |||||
| TF, | |||||
| CAFFE, | |||||
| ONNX, | |||||
| MS, | |||||
| TFLITE | |||||
| } | |||||
| table OpDef { | |||||
| name: string; | |||||
| fmkType: FmkType; | |||||
| attr: OpT; | |||||
| inputIndex: [uint]; | |||||
| outputIndex: [uint]; | |||||
| quantType: QuantType = QUANT_NONE; | |||||
| quantParam: [QuantParamArray]; | |||||
| } | |||||
| table SubGraphDef { | |||||
| name: string; | |||||
| inputIndex: [uint]; | |||||
| outputIndex: [uint]; | |||||
| mempoolSize: uint; | |||||
| nodes: [OpDef]; | |||||
| allTensors: [TensorDef]; // weight + input + output | |||||
| } | |||||
| table MempoolCfg { | |||||
| size: uint; | |||||
| shiftFactor: uint; | |||||
| } | |||||
| table GraphDef { | |||||
| name: string; | |||||
| mempoolCfg: MempoolCfg; | |||||
| subgraphs: [SubGraphDef]; | |||||
| } | |||||
| root_type GraphDef; | |||||
| @@ -1,699 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| namespace mindspore.predict; | |||||
| enum ResizeMethod: byte { | |||||
| UNKNOW = -1, | |||||
| BILINEAR = 0, | |||||
| NEAREST_NEIGHBOR = 1 | |||||
| } | |||||
| enum DataType : int { | |||||
| DT_FLOAT = 0, | |||||
| DT_FLOAT16 = 1, | |||||
| DT_INT8 = 2, | |||||
| DT_INT32 = 3, | |||||
| DT_UINT8 = 4, | |||||
| DT_INT16 = 5, | |||||
| DT_UINT32 = 8, | |||||
| DT_INT64 = 9, | |||||
| DT_UINT16 = 10, | |||||
| DT_UNDEFINED = 16 | |||||
| } | |||||
| enum Format : int { | |||||
| NCHW = 0, | |||||
| NHWC, | |||||
| HWKC, | |||||
| HWCK, | |||||
| KCHW, | |||||
| CKHW, | |||||
| KHWC, | |||||
| CHWK, | |||||
| NC4HW4 = 100, | |||||
| NUM_OF_FORMAT | |||||
| } | |||||
| enum ActivationType : byte { | |||||
| NO_ACTIVATION = 0, | |||||
| RELU = 1, | |||||
| SIGMOID = 2, | |||||
| RELU6 = 3, | |||||
| ELU = 4, | |||||
| LEAKY_RELU = 5, | |||||
| ABS = 6, | |||||
| RELU1 = 7, | |||||
| SOFTSIGN = 8, | |||||
| SOFTPLUS = 9, | |||||
| TANH = 10, | |||||
| SELU = 11, | |||||
| HSWISH = 12, | |||||
| HSIGMOID = 13, | |||||
| THRESHOLDRELU = 14, | |||||
| LINEAR = 15, | |||||
| UNKNOW = 16 | |||||
| } | |||||
| enum ReduceType : byte { | |||||
| REDUCE_MAX = 0, | |||||
| REDUCE_MEAN = 1, | |||||
| REDUCE_ALL = 2, | |||||
| REDUCE_ANY = 3, | |||||
| REDUCE_LOG_SUM_EXP = 4, | |||||
| REDUCE_PROD = 5, | |||||
| REDUCE_SUM = 6, | |||||
| UNKNOW = 7 | |||||
| } | |||||
| enum PoolMode : byte { | |||||
| MAX_POOLING = 0, | |||||
| MEAN_POOLING = 1, | |||||
| } | |||||
| enum EltwiseMode : byte { | |||||
| PROD = 0, | |||||
| SUM = 1, | |||||
| MAXIMUM = 2, | |||||
| UNKNOW = 3 | |||||
| } | |||||
| enum PadMode : byte { | |||||
| NOTSET = 0, | |||||
| SAME = 1, | |||||
| VALID = 2, | |||||
| CAFFE = 4 | |||||
| } | |||||
| enum RoundMode : byte { | |||||
| FLOOR = 0, | |||||
| CEIL = 1 | |||||
| } | |||||
| enum PaddingMode : byte { | |||||
| CONSTANT = 0, | |||||
| REFLECT = 1, | |||||
| SYMMETRIC = 2, | |||||
| MODE_RESERVED = 3 | |||||
| } | |||||
| table Pad { | |||||
| paddingmode: PaddingMode; | |||||
| paddings: [int]; | |||||
| } | |||||
| table Maximum { | |||||
| } | |||||
| table Minimum { | |||||
| } | |||||
| table Concat { | |||||
| axis: int; | |||||
| n: int; | |||||
| } | |||||
| table SoftMax { | |||||
| axis: [int]; | |||||
| } | |||||
| table Activation { | |||||
| type: ActivationType = 0; | |||||
| } | |||||
| table Conv2D { | |||||
| format: Format = 0; | |||||
| group: int; | |||||
| channelIn: int; | |||||
| channelOut: int; | |||||
| kernelW: int; | |||||
| kernelH: int; | |||||
| strideW: int; | |||||
| strideH: int; | |||||
| padMode: PadMode; | |||||
| padUp: int; | |||||
| padDown: int; | |||||
| padLeft: int; | |||||
| padRight: int; | |||||
| dilateW: int; | |||||
| dilateH: int; | |||||
| hasBias: bool = false; | |||||
| activationType: ActivationType = 0; | |||||
| } | |||||
| table FusedBatchNorm { | |||||
| epsilon: float = 0.00001; // eg. epsilon=0.001 | |||||
| momentum: float = 0.9; | |||||
| spatial: int = 1; | |||||
| } | |||||
| table CaffeBatchNorm { | |||||
| epsilon: float; // eg. epsilon=0.001 | |||||
| } | |||||
| table Shape { | |||||
| } | |||||
| table Nchw2Nhwc { | |||||
| } | |||||
| table Nhwc2Nchw { | |||||
| } | |||||
| table FakeQuantWithMinMaxVars { | |||||
| narrowRange: bool; | |||||
| numBits: int; | |||||
| } | |||||
| table BiasAdd { | |||||
| axis: [int]; | |||||
| } | |||||
| table Pooling { | |||||
| format: Format = 0; | |||||
| poolingMode: PoolMode; | |||||
| global: bool = false; | |||||
| windowW: int; | |||||
| windowH: int; | |||||
| strideW: int; | |||||
| strideH: int; | |||||
| padMode: PadMode; | |||||
| padUp: int; | |||||
| padDown: int; | |||||
| padLeft: int; | |||||
| padRight: int; | |||||
| roundMode: RoundMode; | |||||
| } | |||||
| table DepthwiseConv2D { | |||||
| format: Format = 0; | |||||
| channelIn: int; | |||||
| channelMultiplier: int; | |||||
| kernelW: int; | |||||
| kernelH: int; | |||||
| strideW: int; | |||||
| strideH: int; | |||||
| padMode: PadMode; | |||||
| padUp: int; | |||||
| padDown: int; | |||||
| padLeft: int; | |||||
| padRight: int; | |||||
| dilateW: int; | |||||
| dilateH: int; | |||||
| hasBias: bool = false; | |||||
| activationType: ActivationType = 0; | |||||
| } | |||||
| table DeDepthwiseConv2D { | |||||
| format: Format = 0; | |||||
| channelIn: int; | |||||
| channelMultiplier: int; | |||||
| kernelW: int; | |||||
| kernelH: int; | |||||
| strideW: int; | |||||
| strideH: int; | |||||
| padMode: PadMode; | |||||
| padUp: int; | |||||
| padDown: int; | |||||
| padLeft: int; | |||||
| padRight: int; | |||||
| dilateW: int; | |||||
| dilateH: int; | |||||
| hasBias: bool = false; | |||||
| activationType: ActivationType = 0; | |||||
| } | |||||
| table Resize { | |||||
| format: Format = 0; | |||||
| method: ResizeMethod; | |||||
| newHeight: long; | |||||
| newWidth: long; | |||||
| alignCorners: bool = false; | |||||
| preserveAspectRatio: bool = false; | |||||
| } | |||||
| table DetectionPostProcess { | |||||
| format: Format = 0; | |||||
| inputSize: int; | |||||
| hScale: float; | |||||
| wScale: float; | |||||
| xScale: float; | |||||
| yScale: float; | |||||
| NmsIouThreshold: float; | |||||
| NmsScoreThreshold: float; | |||||
| MaxDetections: long; | |||||
| DetectionsPreClass: long; | |||||
| MaxClassesPreDetection: long; | |||||
| NumClasses: long; | |||||
| UseRegularNms: bool; | |||||
| } | |||||
| table FullConnection { | |||||
| hasBias: bool; | |||||
| axis: int; | |||||
| } | |||||
| // Mean(input_tensor, axis, keep_dims) | |||||
| table Mean { | |||||
| axis: [int]; | |||||
| keepDims: bool = false; | |||||
| } | |||||
| table DeConv2D { | |||||
| format: Format = 0; | |||||
| group: int; | |||||
| channelIn: int; | |||||
| channelOut: int; | |||||
| kernelW: int; | |||||
| kernelH: int; | |||||
| strideW: int; | |||||
| strideH: int; | |||||
| padMode: PadMode; | |||||
| padUp: int; | |||||
| padDown: int; | |||||
| padLeft: int; | |||||
| padRight: int; | |||||
| dilateW: int; | |||||
| dilateH: int; | |||||
| hasBias: bool = false; | |||||
| activationType: ActivationType = 0; | |||||
| } | |||||
| table Scale { | |||||
| format: Format = 0; | |||||
| } | |||||
| table Eltwise { | |||||
| mode: EltwiseMode; | |||||
| } | |||||
| table Add { | |||||
| } | |||||
| table Sub { | |||||
| } | |||||
| table Mul { | |||||
| } | |||||
| table RealDiv { | |||||
| } | |||||
| table Rsqrt { | |||||
| } | |||||
| table Equal { | |||||
| } | |||||
| table Less { | |||||
| } | |||||
| table Greater { | |||||
| } | |||||
| table Min { | |||||
| } | |||||
| table Slice { | |||||
| format: Format = 0; | |||||
| begin: [int]; | |||||
| size: [int]; | |||||
| } | |||||
| table Floor { | |||||
| } | |||||
| table Abs { | |||||
| } | |||||
| table Neg { | |||||
| } | |||||
| table Exp { | |||||
| } | |||||
| table Cos { | |||||
| } | |||||
| table Sin { | |||||
| } | |||||
| table Sqrt { | |||||
| } | |||||
| table Square { | |||||
| } | |||||
| table Ceil { | |||||
| } | |||||
| table Log { | |||||
| } | |||||
| table Tan { | |||||
| } | |||||
| table Atan { | |||||
| } | |||||
| table Asin { | |||||
| } | |||||
| table Reshape { | |||||
| format: Format = 0; | |||||
| shape: [long]; | |||||
| } | |||||
| table Power { | |||||
| power: float; | |||||
| scale: float; | |||||
| shift: float; | |||||
| } | |||||
| table ArgMax { | |||||
| axis: int; | |||||
| outMaxValue: bool; | |||||
| topK: int = 1; | |||||
| keepDims: bool; | |||||
| axisType: int; | |||||
| } | |||||
| table ArgMin { | |||||
| axis: int; | |||||
| outMaxValue: bool; | |||||
| topK: int = 1; | |||||
| keepDims: bool; | |||||
| axisType: int; | |||||
| } | |||||
| table NetOutput { | |||||
| } | |||||
| table MatMul { | |||||
| transposeA : bool = false; | |||||
| transposeB : bool = false; | |||||
| } | |||||
| table CaffePReLU { | |||||
| channelShared : bool = false; | |||||
| } | |||||
| table LeakyReLU { | |||||
| negativeSlope: float; | |||||
| } | |||||
| table StridedSlice { | |||||
| beginMask: int; | |||||
| endMask: int; | |||||
| ellipsisMask: int; | |||||
| newAxisMask: int; | |||||
| shrinkAxisMask: int; | |||||
| begin: [int]; | |||||
| end: [int]; | |||||
| stride: [int]; | |||||
| isScale: [int]; | |||||
| } | |||||
| table Stack { | |||||
| axis: int; | |||||
| n: int; | |||||
| isScale: [int]; | |||||
| } | |||||
| table Range { | |||||
| dType: DataType; | |||||
| start: int; | |||||
| limit: int; | |||||
| delta: int; | |||||
| } | |||||
| table ExpandDims { | |||||
| dim: int; | |||||
| } | |||||
| table Tile { | |||||
| multiples: [int]; | |||||
| } | |||||
| table Cast { | |||||
| srcT: int; | |||||
| dstT: int; | |||||
| } | |||||
| table QuantDTypeCast { | |||||
| srcT: DataType; | |||||
| dstT: DataType; | |||||
| } | |||||
| table Split { | |||||
| numberSplit: int; | |||||
| sizeSplits: [int]; | |||||
| splitDim: int; | |||||
| } | |||||
| table CaffeCrop { | |||||
| axis : long; | |||||
| offsets : [long]; | |||||
| } | |||||
| table Permute { | |||||
| order: [long]; | |||||
| } | |||||
| table Clip { | |||||
| max: float; | |||||
| min: float; | |||||
| } | |||||
| table Constant { | |||||
| } | |||||
| table Elu { | |||||
| alpha: float = 1.0; | |||||
| } | |||||
| table Broadcast { | |||||
| } | |||||
| table Lrn { | |||||
| alpha: float = 0.0001; | |||||
| beta: float = 0.75; | |||||
| bias: float = 1.0; | |||||
| size: int; | |||||
| } | |||||
| enum ReduceMode : byte { | |||||
| ReduceMean = 0, | |||||
| ReduceMax = 1, | |||||
| ReduceMin = 2, | |||||
| ReduceProd = 3, | |||||
| ReduceSum = 4, | |||||
| ReduceSumSquare = 5 | |||||
| } | |||||
| table Reduce { | |||||
| axes: [int]; | |||||
| keepDims: int; | |||||
| mode: ReduceMode; | |||||
| } | |||||
| table Prelu { | |||||
| slope: [float]; | |||||
| } | |||||
| table Transpose { | |||||
| perm: [int]; | |||||
| conjugate: bool = false; | |||||
| } | |||||
| table Squeeze { | |||||
| axis: [int]; | |||||
| } | |||||
| table Unsqueeze { | |||||
| axis: [int]; | |||||
| } | |||||
| table Upsample { | |||||
| mode: string; | |||||
| scales: [float]; | |||||
| } | |||||
| table Dropout { | |||||
| ratio : float = 0.5; | |||||
| } | |||||
| table LocalResponseNormalization { | |||||
| depth_radius: int; | |||||
| bias: float; | |||||
| alpha: float; | |||||
| beta: float; | |||||
| } | |||||
| table ZerosLike { | |||||
| } | |||||
| table TopK { | |||||
| k : int; | |||||
| sorted : bool = true; | |||||
| } | |||||
| table SpaceToDepth { | |||||
| blockSize : int; | |||||
| format: Format = 0; | |||||
| } | |||||
| table SpaceToBatch { | |||||
| blockShape : [int]; | |||||
| paddings : [int]; | |||||
| } | |||||
| table SparseToDense { | |||||
| validateIndices: bool; | |||||
| } | |||||
| table ReverseSequence { | |||||
| seqAxis: int; | |||||
| batchAxis: int; | |||||
| } | |||||
| table Rank { | |||||
| } | |||||
| table Gather { | |||||
| axis: int; | |||||
| batchDims: int; | |||||
| } | |||||
| table GatherNd { | |||||
| batchDims: int; | |||||
| } | |||||
| table Fill { | |||||
| dims: [int]; | |||||
| } | |||||
| table DepthToSpace { | |||||
| blockSize: int; | |||||
| format: Format = 0; | |||||
| } | |||||
| table BatchToSpace { | |||||
| blockShape: [int]; | |||||
| crops: [int]; | |||||
| } | |||||
| table AddN { | |||||
| N: int; | |||||
| } | |||||
| table EmbeddingLookup { | |||||
| ids: [int]; | |||||
| maxNorm: float; | |||||
| } | |||||
| table EmbeddingLookupSparse { | |||||
| spIds: [int]; | |||||
| spWeights: [float]; | |||||
| //combiner: Combiner=0; | |||||
| maxNortm: float; | |||||
| } | |||||
| table FloorDiv { | |||||
| } | |||||
| table FloorMod { | |||||
| } | |||||
| table L2Norm { | |||||
| axis: [int]; | |||||
| epsilon: float; | |||||
| } | |||||
| table LogicalAnd { | |||||
| } | |||||
| table LogicalOr { | |||||
| } | |||||
| table LogicalXor { | |||||
| } | |||||
| table LogicalNot { | |||||
| } | |||||
| table MatrixDiag { | |||||
| k: int; | |||||
| numRows: int; | |||||
| numCols: int; | |||||
| paddingValue: float; | |||||
| } | |||||
| table Select { | |||||
| } | |||||
| table TfReduce { | |||||
| type: ReduceType = 7; | |||||
| } | |||||
| table Reverse { | |||||
| axis: [int]; | |||||
| } | |||||
| table Round { | |||||
| } | |||||
| table Scatter { | |||||
| } | |||||
| table Unique { | |||||
| } | |||||
| table Unstack { | |||||
| num: int; | |||||
| axis: int; | |||||
| } | |||||
| table OnnxInt8Quantize { | |||||
| } | |||||
| table OnnxInt8Dequantize { | |||||
| } | |||||
| table FakeQuantWithMinMax { | |||||
| } | |||||
| table FakeQuantWithMinMaxPerChannel { | |||||
| } | |||||
| table BatchNormFold { | |||||
| } | |||||
| table MulFold { | |||||
| } | |||||
| table AddFold { | |||||
| } | |||||
| table SquaredDifference { | |||||
| } | |||||
| @@ -24,7 +24,6 @@ | |||||
| #include "common/utils.h" | #include "common/utils.h" | ||||
| #include "backend/session/anf_runtime_algorithm.h" | #include "backend/session/anf_runtime_algorithm.h" | ||||
| #include "runtime/device/kernel_adjust.h" | #include "runtime/device/kernel_adjust.h" | ||||
| #include "predict/generator/utils/ir_model_util.h" | |||||
| #include "backend/optimizer/common/helper.h" | #include "backend/optimizer/common/helper.h" | ||||
| #include "utils/utils.h" | #include "utils/utils.h" | ||||
| @@ -53,13 +52,6 @@ void AscendStreamAssign::AssignStream(const NotNull<KernelGraphPtr> &graph_ptr) | |||||
| GetStreamRelations(); | GetStreamRelations(); | ||||
| PrintStreamGroups(); | PrintStreamGroups(); | ||||
| FindEventRelations(graph_ptr); | FindEventRelations(graph_ptr); | ||||
| // Get info for D Model | |||||
| AscendResourceMng &resource_manager = AscendResourceMng::GetInstance(); | |||||
| generator::IRModelUtil::GetInstance().set_event_num(resource_manager.get_cur_event_num()); | |||||
| generator::IRModelUtil::GetInstance().set_stream_num(resource_manager.get_cur_stream_num()); | |||||
| // Init to 1,temporarily | |||||
| generator::IRModelUtil::GetInstance().set_batch_num(1); | |||||
| } | } | ||||
| } | } | ||||
| @@ -24,7 +24,6 @@ | |||||
| #include "runtime/device/device_address.h" | #include "runtime/device/device_address.h" | ||||
| #include "ir/tensor.h" | #include "ir/tensor.h" | ||||
| #include "utils/convert_utils.h" | #include "utils/convert_utils.h" | ||||
| #include "predict/generator/utils/ir_model_util.h" | |||||
| #ifdef ENABLE_DUMP_E2E | #ifdef ENABLE_DUMP_E2E | ||||
| #include "debug/e2e_dump.h" | #include "debug/e2e_dump.h" | ||||
| #endif | #endif | ||||
| @@ -50,8 +50,6 @@ std::map<std::string, MsBackendPolicy> MsContext::policy_map_ = {{"ge", kMsBacke | |||||
| MsContext::MsContext(const std::string &policy, const std::string &target) { | MsContext::MsContext(const std::string &policy, const std::string &target) { | ||||
| save_graphs_flag_ = false; | save_graphs_flag_ = false; | ||||
| save_graphs_path_ = "."; | save_graphs_path_ = "."; | ||||
| save_ms_model_flag_ = false; | |||||
| save_ms_model_path_ = "./model.ms"; | |||||
| enable_dump_ = false; | enable_dump_ = false; | ||||
| save_dump_path_ = "."; | save_dump_path_ = "."; | ||||
| tsd_ref_ = 0; | tsd_ref_ = 0; | ||||
| @@ -102,12 +102,6 @@ class MsContext { | |||||
| void set_enable_mem_reuse(bool enable_mem_reuse) { enable_mem_reuse_ = enable_mem_reuse; } | void set_enable_mem_reuse(bool enable_mem_reuse) { enable_mem_reuse_ = enable_mem_reuse; } | ||||
| bool enable_mem_reuse() const { return enable_mem_reuse_; } | bool enable_mem_reuse() const { return enable_mem_reuse_; } | ||||
| bool save_ms_model_flag() const { return save_ms_model_flag_; } | |||||
| void set_save_ms_model_flag(bool save_ms_model_flag) { save_ms_model_flag_ = save_ms_model_flag; } | |||||
| std::string save_ms_model_path() const { return save_ms_model_path_; } | |||||
| void set_save_ms_model_path(const std::string &save_ms_model_path) { save_ms_model_path_ = save_ms_model_path; } | |||||
| void set_enable_gpu_summary(bool enable_gpu_summary) { enable_gpu_summary_ = enable_gpu_summary; } | void set_enable_gpu_summary(bool enable_gpu_summary) { enable_gpu_summary_ = enable_gpu_summary; } | ||||
| bool enable_gpu_summary() const { return enable_gpu_summary_; } | bool enable_gpu_summary() const { return enable_gpu_summary_; } | ||||
| @@ -190,8 +184,6 @@ class MsContext { | |||||
| bool enable_reduce_precision_; | bool enable_reduce_precision_; | ||||
| bool enable_loop_sink_; | bool enable_loop_sink_; | ||||
| bool enable_mem_reuse_; | bool enable_mem_reuse_; | ||||
| std::string save_ms_model_path_; | |||||
| bool save_ms_model_flag_; | |||||
| bool enable_gpu_summary_; | bool enable_gpu_summary_; | ||||
| bool enable_dump_; | bool enable_dump_; | ||||
| std::string save_dump_path_; | std::string save_dump_path_; | ||||
| @@ -234,22 +234,6 @@ class _Context: | |||||
| if not success: | if not success: | ||||
| raise RuntimeError("Device id set failed!!!") | raise RuntimeError("Device id set failed!!!") | ||||
| @property | |||||
| def save_ms_model(self): | |||||
| return self._context_handle.get_save_ms_model_flag() | |||||
| @save_ms_model.setter | |||||
| def save_ms_model(self, save_ms_model_flag): | |||||
| self._context_handle.set_save_ms_model_flag(save_ms_model_flag) | |||||
| @property | |||||
| def save_ms_model_path(self): | |||||
| return self._context_handle.get_save_ms_model_path() | |||||
| @save_ms_model_path.setter | |||||
| def save_ms_model_path(self, save_ms_model_path): | |||||
| self._context_handle.set_save_ms_model_path(save_ms_model_path) | |||||
| @property | @property | ||||
| def enable_auto_mixed_precision(self): | def enable_auto_mixed_precision(self): | ||||
| return self._context_handle.get_auto_mixed_precision_flag() | return self._context_handle.get_auto_mixed_precision_flag() | ||||
| @@ -541,7 +525,7 @@ def reset_auto_parallel_context(): | |||||
| @args_type_check(mode=int, precompile_only=bool, device_target=str, device_id=int, save_graphs=bool, | @args_type_check(mode=int, precompile_only=bool, device_target=str, device_id=int, save_graphs=bool, | ||||
| save_graphs_path=str, save_ms_model=bool, save_ms_model_path=str, enable_dump=bool, | |||||
| save_graphs_path=str, enable_dump=bool, | |||||
| save_dump_path=str, enable_reduce_precision=bool, variable_memory_max_size=str, | save_dump_path=str, enable_reduce_precision=bool, variable_memory_max_size=str, | ||||
| enable_profiling=bool, profiling_options=str, enable_auto_mixed_precision=bool, | enable_profiling=bool, profiling_options=str, enable_auto_mixed_precision=bool, | ||||
| enable_graph_kernel=bool, check_bprop=bool, max_device_memory=str, print_file_path=str, | enable_graph_kernel=bool, check_bprop=bool, max_device_memory=str, print_file_path=str, | ||||
| @@ -569,8 +553,6 @@ def set_context(**kwargs): | |||||
| device_id (int): Id of target device, the value must be in [0, device_num_per_host-1], | device_id (int): Id of target device, the value must be in [0, device_num_per_host-1], | ||||
| while device_num_per_host should no more than 4096. Default: 0. | while device_num_per_host should no more than 4096. Default: 0. | ||||
| save_graphs (bool): Whether to save graphs. Default: False. | save_graphs (bool): Whether to save graphs. Default: False. | ||||
| save_ms_model (bool): Whether to save lite model converted by graph. Default: False. | |||||
| save_ms_model_path (str): Path to save converted lite model. Default: "." | |||||
| save_graphs_path (str): Path to save graphs. Default: "." | save_graphs_path (str): Path to save graphs. Default: "." | ||||
| enable_auto_mixed_precision (bool): Whether to enable auto mixed precision. Default: True. | enable_auto_mixed_precision (bool): Whether to enable auto mixed precision. Default: True. | ||||
| enable_graph_kernel (bool): Whether to enable composition of basic primitives. These primitives would be | enable_graph_kernel (bool): Whether to enable composition of basic primitives. These primitives would be | ||||
| @@ -615,7 +597,6 @@ def set_context(**kwargs): | |||||
| >>> context.set_context(device_id=0) | >>> context.set_context(device_id=0) | ||||
| >>> context.set_context(save_graphs=True, save_graphs_path="./model.ms") | >>> context.set_context(save_graphs=True, save_graphs_path="./model.ms") | ||||
| >>> context.set_context(enable_reduce_precision=True) | >>> context.set_context(enable_reduce_precision=True) | ||||
| >>> context.set_context(save_ms_model=True, save_ms_model_path=".") | |||||
| >>> context.set_context(enable_dump=True, save_dump_path=".") | >>> context.set_context(enable_dump=True, save_dump_path=".") | ||||
| >>> context.set_context(reserve_class_name_in_scope=True) | >>> context.set_context(reserve_class_name_in_scope=True) | ||||
| >>> context.set_context(variable_memory_max_size="6GB") | >>> context.set_context(variable_memory_max_size="6GB") | ||||
| @@ -20,7 +20,6 @@ from threading import Thread, Lock | |||||
| import numpy as np | import numpy as np | ||||
| import mindspore.nn as nn | import mindspore.nn as nn | ||||
| import mindspore.context as context | |||||
| from mindspore import log as logger | from mindspore import log as logger | ||||
| from mindspore.train.checkpoint_pb2 import Checkpoint | from mindspore.train.checkpoint_pb2 import Checkpoint | ||||
| from mindspore.train.print_pb2 import Print | from mindspore.train.print_pb2 import Print | ||||
| @@ -457,18 +456,17 @@ def export(net, *inputs, file_name, file_format='GEIR'): | |||||
| net (Cell): MindSpore network. | net (Cell): MindSpore network. | ||||
| inputs (Tensor): Inputs of the `net`. | inputs (Tensor): Inputs of the `net`. | ||||
| file_name (str): File name of model to export. | file_name (str): File name of model to export. | ||||
| file_format (str): MindSpore currently supports 'GEIR', 'ONNX' 'LITE' and 'BINARY' format for exported model. | |||||
| file_format (str): MindSpore currently supports 'GEIR', 'ONNX' and 'BINARY' format for exported model. | |||||
| - GEIR: Graph Engine Intermidiate Representation. An intermidiate representation format of | - GEIR: Graph Engine Intermidiate Representation. An intermidiate representation format of | ||||
| Ascend model. | Ascend model. | ||||
| - ONNX: Open Neural Network eXchange. An open format built to represent machine learning models. | - ONNX: Open Neural Network eXchange. An open format built to represent machine learning models. | ||||
| - LITE: Huawei model format for mobile. A lite model only for the MindSpore Lite | |||||
| - BINARY: Binary format for model. An intermidiate representation format for models. | - BINARY: Binary format for model. An intermidiate representation format for models. | ||||
| """ | """ | ||||
| logger.info("exporting model file:%s format:%s.", file_name, file_format) | logger.info("exporting model file:%s format:%s.", file_name, file_format) | ||||
| check_input_data(*inputs, data_class=Tensor) | check_input_data(*inputs, data_class=Tensor) | ||||
| supported_formats = ['GEIR', 'ONNX', 'LITE', 'BINARY'] | |||||
| supported_formats = ['GEIR', 'ONNX', 'BINARY'] | |||||
| if file_format not in supported_formats: | if file_format not in supported_formats: | ||||
| raise ValueError(f'Illegal file format {file_format}, it must be one of {supported_formats}') | raise ValueError(f'Illegal file format {file_format}, it must be one of {supported_formats}') | ||||
| # switch network mode to infer when it is training | # switch network mode to infer when it is training | ||||
| @@ -497,9 +495,6 @@ def export(net, *inputs, file_name, file_format='GEIR'): | |||||
| with open(file_name, 'wb') as f: | with open(file_name, 'wb') as f: | ||||
| os.chmod(file_name, stat.S_IWUSR | stat.S_IRUSR) | os.chmod(file_name, stat.S_IWUSR | stat.S_IRUSR) | ||||
| f.write(onnx_stream) | f.write(onnx_stream) | ||||
| elif file_format == 'LITE': # file_format is 'LITE' | |||||
| context.set_context(save_ms_model=True, save_ms_model_path=file_name) | |||||
| net(*inputs) | |||||
| # restore network training mode | # restore network training mode | ||||
| if is_training: | if is_training: | ||||
| net.set_train(mode=True) | net.set_train(mode=True) | ||||
| @@ -1,14 +0,0 @@ | |||||
| # git ignore file for predict | |||||
| #flatbuf generated file | |||||
| schema/*_generated.h | |||||
| schema/inner/*_generated.h | |||||
| module/tvm_module/lite/include/*_generated.h | |||||
| #tvm fbs files | |||||
| module/tvm_module/lite/tune/convert/*.fbs | |||||
| #doTest dir | |||||
| test/doTest/ | |||||
| @@ -1,79 +0,0 @@ | |||||
| cmake_minimum_required(VERSION 3.12.1) | |||||
| project (mindspore-predict) | |||||
| set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g") | |||||
| set(CMAKE_BUILD_TYPE "Release") | |||||
| set(CMAKE_CXX_STANDARD 11) | |||||
| set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fvisibility=hidden") | |||||
| set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility=hidden") | |||||
| set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -s") | |||||
| option(ENABLE_ASAN "Enable Google Sanitizer to find memory bugs" OFF) | |||||
| option(ENABLE_PREDICT_ARM64 "predict arm64" OFF) | |||||
| option(ENABLE_PREDICT_ARM32 "predict arm32" OFF) | |||||
| set(PREDICT_DIR ${CMAKE_CURRENT_SOURCE_DIR}) | |||||
| set(PREDICT_BUILD_DIR ${CMAKE_CURRENT_SOURCE_DIR}/build) | |||||
| set(3RD_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../third_party) | |||||
| set(DOTEST_DIR ${PREDICT_BUILD_DIR}/test/doTest) | |||||
| include_directories(${3RD_DIR}) | |||||
| include_directories(${3RD_DIR}/flatbuffers/include/) | |||||
| include_directories(${3RD_DIR}/protobuf/build/include/) | |||||
| include_directories(${3RD_DIR}/googletest/googletest/include/) | |||||
| include_directories(${3RD_DIR}/googletest/googlemock/include/) | |||||
| include_directories(${CMAKE_CURRENT_SOURCE_DIR}/module/tvm_kernel/lite/include/) | |||||
| include_directories(${PREDICT_DIR}/module/tvm_kernel/incubator-tvm/3rdparty/dlpack/include) | |||||
| include_directories(common) | |||||
| if(ENABLE_PREDICT_ARM64 OR ENABLE_PREDICT_ARM32) | |||||
| message("*********************predict compile arm*********************") | |||||
| set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DMS_USE_ARM=1") | |||||
| set(ANDROID_NDK $ENV{ANDROID_NDK}) | |||||
| if(ANDROID_NDK) | |||||
| add_subdirectory(${3RD_DIR}/googletest ${CMAKE_BINARY_DIR}/googletest) | |||||
| link_directories(${PREDICT_BUILD_DIR}/googletest/googlemock/gtest) | |||||
| add_subdirectory(${3RD_DIR}/securec ${CMAKE_BINARY_DIR}/securec) | |||||
| link_directories(${PREDICT_BUILD_DIR}/securec/src) | |||||
| else() | |||||
| message(FATAL_ERROR "please set ANDROID_NDK in environment variable for example: export ANDROID_NDK=/root/usr/android-ndk-r16b/") | |||||
| endif() | |||||
| include_directories(${ANDROID_SYSROOT}/usr/include/) | |||||
| if(${ANDROID_ABI} STREQUAL "armeabi-v7a") | |||||
| include_directories(${ANDROID_SYSROOT}/usr/include/arm-linux-androideabi) | |||||
| elseif(${ANDROID_ABI} STREQUAL "arm64-v8a") | |||||
| include_directories(${ANDROID_SYSROOT}/usr/include/aarch64-linux-android) | |||||
| else() | |||||
| include_directories(${ANDROID_SYSROOT}/usr/include/arm-linux-androideabi) | |||||
| endif() | |||||
| else() | |||||
| # include libsecurec.a x86 | |||||
| message("*********************predict compile x86*********************") | |||||
| if(EXISTS "${PREDICT_DIR}/../build/mindspore/securec/src/libsecurec.a") | |||||
| link_directories(${PREDICT_DIR}/../build/mindspore/securec/src) | |||||
| else() | |||||
| include(${PREDICT_DIR}/../cmake/dependency_securec.cmake) | |||||
| link_directories(${PREDICT_BUILD_DIR}/securec/src) | |||||
| endif() | |||||
| # include libgtest.so x86 | |||||
| if(EXISTS "${PREDICT_DIR}/../build/googletest/googlemock/gtest/libgtest.so") | |||||
| link_directories(${PREDICT_DIR}/../build/googletest/googlemock/gtest) | |||||
| else() | |||||
| include(${PREDICT_DIR}/../cmake/dependency_gtest.cmake) | |||||
| link_directories(${PREDICT_BUILD_DIR}/googletest/googlemock/gtest) | |||||
| endif() | |||||
| endif() | |||||
| if (CODE_COVERAGE) | |||||
| set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fprofile-arcs -ftest-coverage -O0") | |||||
| endif() | |||||
| add_subdirectory(common) | |||||
| add_subdirectory(src) | |||||
| add_subdirectory(benchmark) | |||||
| add_subdirectory(test) | |||||
| add_subdirectory(module) | |||||
| @@ -1,38 +0,0 @@ | |||||
| cmake_minimum_required(VERSION 3.12) | |||||
| project(benchmark) | |||||
| set(CMAKE_CXX_STANDARD 14) | |||||
| set(CMAKE_BUILD_TYPE "Debug") | |||||
| #include 3rd | |||||
| include_directories(${3RD_DIR}/protobuf/build/include) | |||||
| include_directories(${3RD_DIR}/securec/include) | |||||
| include_directories(${3RD_DIR}/flatbuffers/include) | |||||
| include_directories(${3RD_DIR}/googletest/googletest/include) | |||||
| include_directories(${3RD_DIR}/googletest/googlemock/include) | |||||
| include_directories(${PREDICT_DIR}/module/tvm_kernel/incubator-tvm/3rdparty/dlpack/include) | |||||
| include_directories(${3RD_DIR}/flatbuffers/include) | |||||
| include_directories(${3RD_DIR}/securec/include) | |||||
| #include ms | |||||
| include_directories(.) | |||||
| include_directories(${PREDICT_DIR}) | |||||
| set(COMMON_SRC ${PREDICT_DIR}/common/flag_parser.cc | |||||
| ${PREDICT_DIR}/common/file_utils.cc | |||||
| ${PREDICT_DIR}/common/func_utils.cc | |||||
| ${PREDICT_DIR}/common/mslog.cc | |||||
| ${PREDICT_DIR}/common/utils.cc) | |||||
| link_directories(${CMAKE_CURRENT_SOURCE_DIR}/../output/lib/) | |||||
| add_executable(benchmark main.cc benchmark.cc ${COMMON_SRC}) | |||||
| target_link_libraries(benchmark mspredict libsecurec.a) | |||||
| add_dependencies(benchmark tvm_kernel) | |||||
| add_dependencies(benchmark securec) | |||||
| add_custom_command(TARGET benchmark POST_BUILD | |||||
| COMMAND mkdir -pv ${DOTEST_DIR} | |||||
| COMMAND cp ${PREDICT_BUILD_DIR}/benchmark/benchmark ${DOTEST_DIR}) | |||||
| @@ -1,451 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "benchmark/benchmark.h" | |||||
| #include <random> | |||||
| #include <limits> | |||||
| #include <algorithm> | |||||
| #include <utility> | |||||
| #include <memory> | |||||
| #include "include/session.h" | |||||
| namespace mindspore { | |||||
| namespace predict { | |||||
| STATUS Benchmark::GenerateRandomData(size_t size, void *data) { | |||||
| MS_ASSERT(data != nullptr); | |||||
| char *castedData = static_cast<char *>(data); | |||||
| for (size_t i = 0; i < size; i++) { | |||||
| castedData[i] = static_cast<char>(i); | |||||
| } | |||||
| return RET_OK; | |||||
| } | |||||
| STATUS Benchmark::GenerateInputData() { | |||||
| for (Tensor *tensor : msInputs) { | |||||
| MS_ASSERT(tensor != nullptr); | |||||
| auto ret = tensor->MallocData(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOGE("MallocData for inTensor failed %d", ret); | |||||
| return ret; | |||||
| } | |||||
| MS_ASSERT(tensor->GetData() != nullptr); | |||||
| auto tensorByteSize = tensor->GetDataSize(); | |||||
| auto status = GenerateRandomData(tensorByteSize, tensor->GetData()); | |||||
| if (status != RET_OK) { | |||||
| MS_LOGE("GenerateRandomData for inTensor failed %d", status); | |||||
| return status; | |||||
| } | |||||
| } | |||||
| return RET_OK; | |||||
| } | |||||
| STATUS Benchmark::LoadInput() { | |||||
| size_t size = 0; | |||||
| char *graphBuf = ReadFile(_flags->modelPath.c_str(), &size); | |||||
| if (graphBuf == nullptr) { | |||||
| MS_LOGE("Load graph failed, path %s", _flags->modelPath.c_str()); | |||||
| return RET_ERROR; | |||||
| } | |||||
| this->msInputs = session->GetInput(); | |||||
| if (_flags->inDataPath.empty()) { | |||||
| auto status = GenerateInputData(); | |||||
| if (status != RET_OK) { | |||||
| delete graphBuf; | |||||
| MS_LOGE("Generate input data error %d", status); | |||||
| return status; | |||||
| } | |||||
| } else { | |||||
| auto status = ReadInputFile(); | |||||
| if (status != RET_OK) { | |||||
| delete graphBuf; | |||||
| MS_LOGE("ReadInputFile error, %d", status); | |||||
| return status; | |||||
| } | |||||
| } | |||||
| delete graphBuf; | |||||
| return RET_OK; | |||||
| } | |||||
| STATUS Benchmark::ReadInputFile() { | |||||
| MS_ASSERT(msInputs.size() <= 1); | |||||
| if (msInputs.empty()) { | |||||
| return RET_OK; | |||||
| } | |||||
| Tensor *inTensor = msInputs.at(0); | |||||
| MS_ASSERT(inTensor != nullptr); | |||||
| size_t size; | |||||
| char *binBuf = ReadFile(_flags->inDataPath.c_str(), &size); | |||||
| if (binBuf == nullptr) { | |||||
| return RET_ERROR; | |||||
| } | |||||
| auto tensorDataSize = inTensor->GetDataSize(); | |||||
| if (size != tensorDataSize) { | |||||
| MS_LOGE("Input binary file size error, required: %zu, in fact: %zu", tensorDataSize, size); | |||||
| delete binBuf; | |||||
| return RET_ERROR; | |||||
| } | |||||
| inTensor->SetData(binBuf); | |||||
| binBuf = nullptr; | |||||
| return RET_OK; | |||||
| } | |||||
| // calibData is FP32 | |||||
| STATUS Benchmark::ReadCalibData() { | |||||
| const char *calibDataPath = _flags->calibDataPath.c_str(); | |||||
| // read calib data | |||||
| std::ifstream inFile(calibDataPath); | |||||
| if (!inFile.good()) { | |||||
| MS_LOGE("file: %s is not exist", calibDataPath); | |||||
| return RET_PARAM_INVALID; | |||||
| } | |||||
| if (!inFile.is_open()) { | |||||
| MS_LOGE("file: %s open failed", calibDataPath); | |||||
| inFile.close(); | |||||
| return RET_PARAM_INVALID; | |||||
| } | |||||
| std::string line; | |||||
| MS_LOGI("Start reading calibData file"); | |||||
| std::string tensorName; | |||||
| while (!inFile.eof()) { | |||||
| getline(inFile, line); | |||||
| std::stringstream stringLine1(line); | |||||
| size_t dim = 0; | |||||
| stringLine1 >> tensorName >> dim; | |||||
| std::vector<size_t> dims; | |||||
| size_t shapeSize = 1; | |||||
| for (size_t i = 0; i < dim; i++) { | |||||
| size_t tmpDim; | |||||
| stringLine1 >> tmpDim; | |||||
| dims.push_back(tmpDim); | |||||
| shapeSize *= tmpDim; | |||||
| } | |||||
| getline(inFile, line); | |||||
| std::stringstream stringLine2(line); | |||||
| std::vector<float> tensorData; | |||||
| for (size_t i = 0; i < shapeSize; i++) { | |||||
| float tmpData; | |||||
| stringLine2 >> tmpData; | |||||
| tensorData.push_back(tmpData); | |||||
| } | |||||
| std::unique_ptr<CheckTensor> checkTensor(new CheckTensor(dims, tensorData)); | |||||
| this->calibData.insert(std::make_pair(tensorName, checkTensor.release())); | |||||
| } | |||||
| inFile.close(); | |||||
| MS_LOGI("Finish reading calibData file"); | |||||
| return RET_OK; | |||||
| } | |||||
| // tensorData need to be converter first | |||||
| float Benchmark::CompareData(const std::string &nodeName, std::vector<int64_t> msShape, float *msTensorData) { | |||||
| auto iter = this->calibData.find(nodeName); | |||||
| if (iter != this->calibData.end()) { | |||||
| std::vector<size_t> castedMSShape; | |||||
| size_t shapeSize = 1; | |||||
| for (int64_t dim : msShape) { | |||||
| castedMSShape.push_back(size_t(dim)); | |||||
| shapeSize *= dim; | |||||
| } | |||||
| CheckTensor *calibTensor = iter->second; | |||||
| if (calibTensor->shape != castedMSShape) { | |||||
| std::ostringstream oss; | |||||
| oss << "Shape of mslite output("; | |||||
| for (auto dim : castedMSShape) { | |||||
| oss << dim << ","; | |||||
| } | |||||
| oss << ") and shape source model output("; | |||||
| for (auto dim : calibTensor->shape) { | |||||
| oss << dim << ","; | |||||
| } | |||||
| oss << ") are different"; | |||||
| MS_LOGE("%s", oss.str().c_str()); | |||||
| return -1; | |||||
| } | |||||
| float meanBias = 0; | |||||
| std::ostringstream outputData; | |||||
| outputData << "Data of node " << nodeName << " : "; | |||||
| for (size_t j = 0; j < shapeSize; j++) { | |||||
| if (j < printNum) { | |||||
| outputData << msTensorData[j] << " "; | |||||
| } | |||||
| if (fabs(calibTensor->data.at(j)) > minFloatThr) { | |||||
| double bias = fabs(msTensorData[j] - calibTensor->data.at(j)) / fabs(calibTensor->data.at(j)); | |||||
| meanBias += bias; | |||||
| } | |||||
| } | |||||
| meanBias /= shapeSize; | |||||
| MS_LOGI("%s", outputData.str().c_str()); | |||||
| if (meanBias <= minFloatThr) { | |||||
| MS_LOGI("Mean bias of node %s : 0%%", nodeName.c_str()); | |||||
| } else { | |||||
| MS_LOGI("Mean bias of node %s : %f%%", nodeName.c_str(), meanBias * percentage); | |||||
| } | |||||
| return meanBias; | |||||
| } else { | |||||
| MS_LOGI("%s is not in Source Model output", nodeName.c_str()); | |||||
| return -1; | |||||
| } | |||||
| } | |||||
| STATUS Benchmark::CompareOutput(const std::map<NODE_ID, std::vector<Tensor *>> &msOutputs) { | |||||
| float totalBias = 0; | |||||
| int totalSize = 0; | |||||
| bool hasError = false; | |||||
| for (const auto &msOutput : msOutputs) { | |||||
| std::string nodeName = msOutput.first; | |||||
| auto tensors = msOutput.second; | |||||
| for (auto tensor : tensors) { | |||||
| MS_ASSERT(tensor->GetData() != nullptr); | |||||
| float bias = CompareData(nodeName, tensor->GetDims(), static_cast<float *>(tensor->GetData())); | |||||
| if (bias >= 0) { | |||||
| totalBias += bias; | |||||
| totalSize++; | |||||
| } else { | |||||
| hasError = true; | |||||
| break; | |||||
| } | |||||
| } | |||||
| } | |||||
| if (!hasError) { | |||||
| float meanBias; | |||||
| if (totalSize != 0) { | |||||
| meanBias = totalBias / totalSize * percentage; | |||||
| } else { | |||||
| meanBias = 0; | |||||
| } | |||||
| MS_LOGI("Mean bias all node : %f%%", meanBias); | |||||
| if (meanBias > 1) { | |||||
| MS_LOGE("Mean bias of all nodes is too big: %f%%", meanBias); | |||||
| return RET_ERROR; | |||||
| } else { | |||||
| return RET_OK; | |||||
| } | |||||
| } else { | |||||
| MS_LOGE("Error in CompareData"); | |||||
| return RET_ERROR; | |||||
| } | |||||
| } | |||||
| STATUS Benchmark::MarkPerformance() { | |||||
| MS_LOGI("Running warm up loops..."); | |||||
| for (int i = 0; i < _flags->warmUpLoopCount; i++) { | |||||
| auto status = session->Run(msInputs); | |||||
| if (status != RET_OK) { | |||||
| MS_LOGE("Inference error %d", status); | |||||
| return status; | |||||
| } | |||||
| } | |||||
| MS_LOGI("Running benchmark loops..."); | |||||
| uint64_t timeMin = maxTimeThr; | |||||
| uint64_t timeMax = 0; | |||||
| uint64_t timeAvg = 0; | |||||
| for (int i = 0; i < _flags->loopCount; i++) { | |||||
| uint64_t start = GetTimeUs(); | |||||
| auto status = session->Run(msInputs); | |||||
| if (status != RET_OK) { | |||||
| MS_LOGE("Inference error %d", status); | |||||
| return status; | |||||
| } | |||||
| uint64_t end = GetTimeUs(); | |||||
| uint64_t time = end - start; | |||||
| timeMin = std::min(timeMin, time); | |||||
| timeMax = std::max(timeMax, time); | |||||
| timeAvg += time; | |||||
| msOutputs = session->GetAllOutput(); | |||||
| if (cleanData) { | |||||
| for (auto &msOutput : msOutputs) { | |||||
| for (auto &outputTensor : msOutput.second) { | |||||
| delete outputTensor; | |||||
| } | |||||
| } | |||||
| msOutputs.clear(); | |||||
| } | |||||
| } | |||||
| if (_flags->loopCount > 0) { | |||||
| timeAvg /= _flags->loopCount; | |||||
| MS_LOGI("MinRunTime = %f ms, MaxRuntime = %f ms, AvgRunTime = %f ms", timeMin / US2MS, timeMax / US2MS, | |||||
| timeAvg / US2MS); | |||||
| } | |||||
| return RET_OK; | |||||
| } | |||||
| STATUS Benchmark::MarkAccuracy() { | |||||
| MS_LOGI("MarkAccuracy"); | |||||
| auto status = session->Run(msInputs); | |||||
| if (status != RET_OK) { | |||||
| MS_LOGE("Inference error %d", status); | |||||
| return status; | |||||
| } | |||||
| msOutputs = session->GetAllOutput(); | |||||
| ReadCalibData(); | |||||
| status = CompareOutput(msOutputs); | |||||
| if (cleanData) { | |||||
| for (auto &msOutput : msOutputs) { | |||||
| for (auto &outputTensor : msOutput.second) { | |||||
| delete outputTensor; | |||||
| } | |||||
| } | |||||
| msOutputs.clear(); | |||||
| } | |||||
| return status; | |||||
| } | |||||
| STATUS Benchmark::CleanData() { | |||||
| if (cleanData) { | |||||
| for (auto &msInput : msInputs) { | |||||
| delete msInput; | |||||
| } | |||||
| msInputs.clear(); | |||||
| for (auto &data : calibData) { | |||||
| data.second->shape.clear(); | |||||
| data.second->data.clear(); | |||||
| delete data.second; | |||||
| } | |||||
| calibData.clear(); | |||||
| } | |||||
| return RET_OK; | |||||
| } | |||||
| STATUS Benchmark::RunBenchmark() { | |||||
| // Load graph | |||||
| std::string comment = modelName; | |||||
| MS_LOGI("start reading model file"); | |||||
| size_t size = 0; | |||||
| char *graphBuf = ReadFile(_flags->modelPath.c_str(), &size); | |||||
| if (graphBuf == nullptr) { | |||||
| MS_LOGE("Load graph failed while running %s", comment.c_str()); | |||||
| return RET_ERROR; | |||||
| } | |||||
| uint64_t startPrepareTime = GetTimeUs(); | |||||
| session = CreateSession(graphBuf, size, ctx); | |||||
| if (session == nullptr) { | |||||
| delete graphBuf; | |||||
| MS_LOGE("new session failed while running %s", comment.c_str()); | |||||
| return RET_ERROR; | |||||
| } | |||||
| uint64_t endPrepareTime = GetTimeUs(); | |||||
| MS_LOGI("PrepareTime = %f ms, ", (endPrepareTime - startPrepareTime) / US2MS); | |||||
| // Load input | |||||
| MS_LOGI("start generate input data"); | |||||
| auto status = LoadInput(); | |||||
| if (status != RET_OK) { | |||||
| delete graphBuf; | |||||
| MS_LOGE("Generate input data error"); | |||||
| return status; | |||||
| } | |||||
| if (!_flags->calibDataPath.empty()) { | |||||
| status = MarkAccuracy(); | |||||
| if (status != RET_OK) { | |||||
| delete graphBuf; | |||||
| MS_LOGE("Run MarkAccuracy error: %d", status); | |||||
| return status; | |||||
| } | |||||
| } else { | |||||
| status = MarkPerformance(); | |||||
| if (status != RET_OK) { | |||||
| delete graphBuf; | |||||
| MS_LOGE("Run MarkPerformance error: %d", status); | |||||
| return status; | |||||
| } | |||||
| } | |||||
| CleanData(); | |||||
| delete graphBuf; | |||||
| return RET_OK; | |||||
| } | |||||
| STATUS Benchmark::Init() { | |||||
| if (this->_flags == nullptr) { | |||||
| return RET_ERROR; | |||||
| } | |||||
| MS_LOGI("ModelPath = %s", this->_flags->modelPath.c_str()); | |||||
| MS_LOGI("InDataPath = %s", this->_flags->inDataPath.c_str()); | |||||
| MS_LOGI("TensorDataType = %s", this->_flags->tensorDataTypeIn.c_str()); | |||||
| MS_LOGI("LoopCount = %d", this->_flags->loopCount); | |||||
| MS_LOGI("WarmUpLoopCount = %d", this->_flags->warmUpLoopCount); | |||||
| MS_LOGI("NumThreads = %d", this->_flags->numThreads); | |||||
| MS_LOGI("calibDataPath = %s", this->_flags->calibDataPath.c_str()); | |||||
| this->_flags->inDataType = this->_flags->inDataTypeIn == "img" ? kImage : kBinary; | |||||
| if (this->_flags->tensorDataTypeIn == "float") { | |||||
| this->_flags->tensorDataType = DataType_DT_FLOAT; | |||||
| } | |||||
| if (_flags->modelPath.empty()) { | |||||
| MS_LOGE("modelPath is required"); | |||||
| return RET_ERROR; | |||||
| } | |||||
| modelName = _flags->modelPath.substr(_flags->modelPath.find_last_of("/") + 1); | |||||
| return RET_OK; | |||||
| } | |||||
| int RunBenchmark(int argc, const char **argv) { | |||||
| BenchmarkFlags flags; | |||||
| Option<std::string> err = flags.ParseFlags(argc, argv); | |||||
| if (err.IsSome()) { | |||||
| std::cerr << err.Get() << std::endl; | |||||
| std::cerr << flags.Usage() << std::endl; | |||||
| return -1; | |||||
| } | |||||
| if (flags.help) { | |||||
| std::cerr << flags.Usage() << std::endl; | |||||
| return 0; | |||||
| } | |||||
| Benchmark mBenchmark(&flags); | |||||
| auto status = mBenchmark.Init(); | |||||
| if (status != RET_OK) { | |||||
| MS_LOGE("Benchmark init Error : %d", status); | |||||
| return 1; | |||||
| } | |||||
| status = mBenchmark.RunBenchmark(); | |||||
| if (status != RET_OK) { | |||||
| MS_LOGE("Run Benchmark Error : %d", status); | |||||
| return 1; | |||||
| } | |||||
| MS_LOGI("end of benchmark"); | |||||
| return 0; | |||||
| } | |||||
| } // namespace predict | |||||
| } // namespace mindspore | |||||
| @@ -1,142 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef PREDICT_BENCHMARK_BENCHMARK_H_ | |||||
| #define PREDICT_BENCHMARK_BENCHMARK_H_ | |||||
| #include <getopt.h> | |||||
| #include <signal.h> | |||||
| #include <fstream> | |||||
| #include <iostream> | |||||
| #include <map> | |||||
| #include <string> | |||||
| #include <vector> | |||||
| #include <memory> | |||||
| #include <unordered_map> | |||||
| #include "common/flag_parser.h" | |||||
| #include "common/file_utils.h" | |||||
| #include "common/func_utils.h" | |||||
| #include "common/mslog.h" | |||||
| #include "common/utils.h" | |||||
| #include "include/errorcode.h" | |||||
| #include "include/session.h" | |||||
| #include "include/tensor.h" | |||||
| #include "schema/inner/ms_generated.h" | |||||
| #include "src/graph.h" | |||||
| #include "src/graph_execution.h" | |||||
| #include "src/op.h" | |||||
| namespace mindspore { | |||||
| namespace predict { | |||||
| enum InDataType { kImage = 0, kBinary = 1 }; | |||||
| struct CheckTensor { | |||||
| CheckTensor(const std::vector<size_t> &shape, const std::vector<float> &data) { | |||||
| this->shape = shape; | |||||
| this->data = data; | |||||
| } | |||||
| std::vector<size_t> shape; | |||||
| std::vector<float> data; | |||||
| }; | |||||
| class BenchmarkFlags : public virtual FlagParser { | |||||
| public: | |||||
| BenchmarkFlags() { | |||||
| // common | |||||
| AddFlag(&BenchmarkFlags::modelPath, "modelPath", "Input model path", ""); | |||||
| AddFlag(&BenchmarkFlags::tensorDataTypeIn, "tensorDataType", "Data type of input Tensor. float", "float"); | |||||
| AddFlag(&BenchmarkFlags::inDataPath, "inDataPath", "Input data path, if not set, use random input", ""); | |||||
| // MarkPerformance | |||||
| AddFlag(&BenchmarkFlags::loopCount, "loopCount", "Run loop count", 10); | |||||
| AddFlag(&BenchmarkFlags::numThreads, "numThreads", "Run threads number", 2); | |||||
| AddFlag(&BenchmarkFlags::warmUpLoopCount, "warmUpLoopCount", "Run warm up loop", 3); | |||||
| // MarkAccuracy | |||||
| AddFlag(&BenchmarkFlags::calibDataPath, "calibDataPath", "Calibration data file path", ""); | |||||
| } | |||||
| ~BenchmarkFlags() override = default; | |||||
| public: | |||||
| // common | |||||
| std::string modelPath; | |||||
| std::string inDataPath; | |||||
| InDataType inDataType; | |||||
| std::string inDataTypeIn; | |||||
| DataType tensorDataType; | |||||
| std::string tensorDataTypeIn; | |||||
| // MarkPerformance | |||||
| int loopCount; | |||||
| int numThreads; | |||||
| int warmUpLoopCount; | |||||
| // MarkAccuracy | |||||
| std::string calibDataPath; | |||||
| }; | |||||
| class Benchmark { | |||||
| public: | |||||
| explicit Benchmark(BenchmarkFlags *flags) : _flags(flags) {} | |||||
| virtual ~Benchmark() = default; | |||||
| STATUS Init(); | |||||
| STATUS RunBenchmark(); | |||||
| private: | |||||
| // call GenerateInputData or ReadInputFile to init inputTensors | |||||
| STATUS LoadInput(); | |||||
| // call GenerateRandomData to fill inputTensors | |||||
| STATUS GenerateInputData(); | |||||
| STATUS GenerateRandomData(size_t size, void *data); | |||||
| STATUS ReadInputFile(); | |||||
| STATUS ReadCalibData(); | |||||
| STATUS CleanData(); | |||||
| STATUS CompareOutput(const std::map<NODE_ID, std::vector<Tensor *>> &msOutputs); | |||||
| float CompareData(const std::string &nodeName, std::vector<int64_t> msShape, float *msTensorData); | |||||
| STATUS MarkPerformance(); | |||||
| STATUS MarkAccuracy(); | |||||
| private: | |||||
| BenchmarkFlags *_flags; | |||||
| std::shared_ptr<Session> session; | |||||
| Context ctx; | |||||
| std::vector<Tensor *> msInputs; | |||||
| std::map<std::string, std::vector<Tensor *>> msOutputs; | |||||
| std::unordered_map<std::string, CheckTensor *> calibData; | |||||
| std::string modelName = ""; | |||||
| bool cleanData = true; | |||||
| const float US2MS = 1000.0f; | |||||
| const float percentage = 100.0f; | |||||
| const int printNum = 50; | |||||
| const float minFloatThr = 0.0000001f; | |||||
| const uint64_t maxTimeThr = 1000000; | |||||
| }; | |||||
| int RunBenchmark(int argc, const char **argv); | |||||
| } // namespace predict | |||||
| } // namespace mindspore | |||||
| #endif // PREDICT_BENCHMARK_BENCHMARK_H_ | |||||
| @@ -1,24 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include <random> | |||||
| #include <limits> | |||||
| #include "benchmark/benchmark.h" | |||||
| int main(int argc, const char **argv) { | |||||
| signal(SIGSEGV, mindspore::predict::CoreDumpTraceFunc); | |||||
| return mindspore::predict::RunBenchmark(argc, argv); | |||||
| } | |||||
| @@ -1,17 +0,0 @@ | |||||
| include_directories(${CMAKE_CURRENT_SOURCE_DIR}) | |||||
| include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../include) | |||||
| include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../) | |||||
| include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../../third_party) | |||||
| add_compile_options(-fPIC) | |||||
| add_library(common_mid OBJECT | |||||
| ${CMAKE_CURRENT_SOURCE_DIR}/common.h | |||||
| ${CMAKE_CURRENT_SOURCE_DIR}/graph_util.cc | |||||
| ${CMAKE_CURRENT_SOURCE_DIR}/file_utils.cc | |||||
| ${CMAKE_CURRENT_SOURCE_DIR}/flag_parser.cc | |||||
| ${CMAKE_CURRENT_SOURCE_DIR}/func_utils.cc | |||||
| ${CMAKE_CURRENT_SOURCE_DIR}/module_registry.cc | |||||
| ${CMAKE_CURRENT_SOURCE_DIR}/mslog.cc | |||||
| ${CMAKE_CURRENT_SOURCE_DIR}/storage.cc | |||||
| ${CMAKE_CURRENT_SOURCE_DIR}/utils.cc) | |||||
| @@ -1,57 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef PREDICT_COMMON_COMMON_H_ | |||||
| #define PREDICT_COMMON_COMMON_H_ | |||||
| #include <string> | |||||
| #include "schema/inner/ms_generated.h" | |||||
| namespace mindspore { | |||||
| namespace predict { | |||||
| enum NCHW_SHAPE { NCHW_N = 0, NCHW_C = 1, NCHW_H = 2, NCHW_W = 3 }; | |||||
| enum NHWC_SHAPE { NHWC_N = 0, NHWC_H = 1, NHWC_W = 2, NHWC_C = 3 }; | |||||
| enum HWCK_SHAPE { HWCK_H = 0, HWCK_W = 1, HWCK_C = 2, HWCK_K = 3 }; | |||||
| enum KCHW_SHAPE { KCHW_K = 0, KCHW_C = 1, KCHW_H = 2, KCHW_W = 3 }; | |||||
| enum CHW_SHAPE { CHW_C = 0, CHW_H = 1, CHW_W = 2 }; | |||||
| enum HWC_SHAPE { HWC_H = 0, HWC_W = 1, HWC_C = 2 }; | |||||
| static constexpr int TENSOR_MAX_REFCOUNT = 999; | |||||
| static const char *DELIM_COLON = ":"; | |||||
| static const char *DELIM_COMMA = ","; | |||||
| static const char *DELIM_SLASH = "/"; | |||||
| static const char *DELIM_DOUBLE_BACKSLASH = "\\"; | |||||
| // quantization relative | |||||
| static const char QUANTIZED_UINT8[] = "QUANTIZED_UINT8"; | |||||
| static const char QUANTIZED_INT8[] = "QUANTIZED_INT8"; | |||||
| static const char QUANTIZED_INT16[] = "QUANTIZED_INT16"; | |||||
| static const char QUANTIZED_UINT16[] = "QUANTIZED_UINT16"; | |||||
| static const char QUANTIZED_FLOAT16[] = "FLOAT16"; | |||||
| static const char QUANTIZED_FLOAT32[] = "FLOAT32"; | |||||
| static const char QUANTIZATION_TYPE_DYNAMIC[] = "DYNAMIC"; | |||||
| static const char QUANTIZATION_TYPE_STATIC[] = "STATIC"; | |||||
| static const char CALIB_NORM[] = "NORM"; | |||||
| // dims | |||||
| static const int32_t DIM_DEFAULT_SIZE = 4; | |||||
| static const Format DEFAULT_FORMAT = Format_NCHW; | |||||
| } // namespace predict | |||||
| } // namespace mindspore | |||||
| #endif // PREDICT_COMMON_COMMON_H_ | |||||
| @@ -1,79 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "common/file_utils.h" | |||||
| #include <climits> | |||||
| namespace mindspore { | |||||
| namespace predict { | |||||
| char *ReadFile(const char *file, size_t *size) { | |||||
| if (file == nullptr) { | |||||
| MS_LOGE("file is nullptr"); | |||||
| return nullptr; | |||||
| } | |||||
| MS_ASSERT(size != nullptr); | |||||
| std::ifstream ifs(RealPath(file)); | |||||
| if (!ifs.good()) { | |||||
| MS_LOGE("file: %s is not exist", file); | |||||
| return nullptr; | |||||
| } | |||||
| if (!ifs.is_open()) { | |||||
| MS_LOGE("file: %s open failed", file); | |||||
| return nullptr; | |||||
| } | |||||
| ifs.seekg(0, std::ios::end); | |||||
| *size = ifs.tellg(); | |||||
| std::unique_ptr<char> buf(new (std::nothrow) char[*size]); | |||||
| if (buf == nullptr) { | |||||
| MS_LOGE("malloc buf failed, file:%s", file); | |||||
| ifs.close(); | |||||
| return nullptr; | |||||
| } | |||||
| ifs.seekg(0, std::ios::beg); | |||||
| ifs.read(buf.get(), *size); | |||||
| ifs.close(); | |||||
| return buf.release(); | |||||
| } | |||||
| std::string RealPath(const char *path) { | |||||
| if (path == nullptr) { | |||||
| MS_LOGE("path is nullptr"); | |||||
| return ""; | |||||
| } | |||||
| if ((strlen(path)) >= PATH_MAX) { | |||||
| MS_LOGE("path is too long"); | |||||
| return ""; | |||||
| } | |||||
| std::shared_ptr<char> resolvedPath(new (std::nothrow) char[PATH_MAX]{0}); | |||||
| if (resolvedPath == nullptr) { | |||||
| MS_LOGE("new resolvedPath failed"); | |||||
| return ""; | |||||
| } | |||||
| auto ret = realpath(path, resolvedPath.get()); | |||||
| if (ret == nullptr) { | |||||
| MS_LOGE("realpath failed"); | |||||
| return ""; | |||||
| } | |||||
| return resolvedPath.get(); | |||||
| } | |||||
| } // namespace predict | |||||
| } // namespace mindspore | |||||
| @@ -1,39 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef PREDICT_COMMON_FILE_UTILS_H_ | |||||
| #define PREDICT_COMMON_FILE_UTILS_H_ | |||||
| #include <stdio.h> | |||||
| #include <stdlib.h> | |||||
| #include <time.h> | |||||
| #include <string> | |||||
| #include <iostream> | |||||
| #include <memory> | |||||
| #include <fstream> | |||||
| #include "common/utils.h" | |||||
| #include "common/mslog.h" | |||||
| #include "include/tensor.h" | |||||
| namespace mindspore { | |||||
| namespace predict { | |||||
| char *ReadFile(const char *file, size_t *size); | |||||
| std::string RealPath(const char *path); | |||||
| } // namespace predict | |||||
| } // namespace mindspore | |||||
| #endif // PREDICT_COMMON_FILE_UTILS_H_ | |||||
| @@ -1,179 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "common/flag_parser.h" | |||||
| namespace mindspore { | |||||
| namespace predict { | |||||
| // parse flags read from command line | |||||
| Option<std::string> FlagParser::ParseFlags(int argc, const char *const *argv, bool supportUnknown, | |||||
| bool supportDuplicate) { | |||||
| MS_ASSERT(argv != nullptr); | |||||
| const int FLAG_PREFIX_LEN = 2; | |||||
| // Get binary name | |||||
| binName = GetFileName(argv[0]); | |||||
| std::multimap<std::string, Option<std::string>> keyValues; | |||||
| for (int i = 1; i < argc; i++) { | |||||
| std::string tmp = argv[i]; | |||||
| Trim(&tmp); | |||||
| const std::string flagItem(tmp); | |||||
| if (flagItem == "--") { | |||||
| break; | |||||
| } | |||||
| if (flagItem.find("--") == std::string::npos) { | |||||
| continue; | |||||
| } | |||||
| std::string key; | |||||
| Option<std::string> value = Option<std::string>(None()); | |||||
| size_t pos = flagItem.find_first_of("="); | |||||
| if (pos == std::string::npos && flagItem.find("--no-") != std::string::npos) { | |||||
| key = flagItem.substr(FLAG_PREFIX_LEN); | |||||
| } else if (pos == std::string::npos) { | |||||
| key = flagItem.substr(FLAG_PREFIX_LEN); | |||||
| } else { | |||||
| key = flagItem.substr(FLAG_PREFIX_LEN, pos - FLAG_PREFIX_LEN); | |||||
| value = Option<std::string>(flagItem.substr(pos + 1)); | |||||
| } | |||||
| keyValues.insert(std::pair<std::string, Option<std::string>>(key, value)); | |||||
| } | |||||
| Option<std::string> ret = Option<std::string>(InnerParseFlags(&keyValues)); | |||||
| if (ret.IsSome()) { | |||||
| return Option<std::string>(ret.Get()); | |||||
| } | |||||
| return Option<std::string>(None()); | |||||
| } | |||||
| bool FlagParser::GetRealFlagName(const std::string &oriFlagName, std::string *flagName) { | |||||
| MS_ASSERT(flagName != nullptr); | |||||
| const int BOOL_TYPE_FLAG_PREFIX_LEN = 3; | |||||
| bool opaque = false; | |||||
| if (StartsWithPrefix(oriFlagName, "no-")) { | |||||
| *flagName = oriFlagName.substr(BOOL_TYPE_FLAG_PREFIX_LEN); | |||||
| opaque = true; | |||||
| } else { | |||||
| *flagName = oriFlagName; | |||||
| } | |||||
| return opaque; | |||||
| } | |||||
| // Inner parse function | |||||
| Option<std::string> FlagParser::InnerParseFlags(std::multimap<std::string, Option<std::string>> *keyValues) { | |||||
| MS_ASSERT(keyValues != nullptr); | |||||
| for (auto it = keyValues->begin(); it != keyValues->end(); ++it) { | |||||
| std::string flagName; | |||||
| bool opaque = GetRealFlagName((*it).first, &flagName); | |||||
| Option<std::string> flagValue = (*it).second; | |||||
| auto item = flags.find(flagName); | |||||
| if (item == flags.end()) { | |||||
| return Option<std::string>(std::string(flagName + " is not a valid flag")); | |||||
| } | |||||
| FlagInfo *flag = &(item->second); | |||||
| if (flag == nullptr) { | |||||
| return Option<std::string>("Failed: flag is nullptr"); | |||||
| } | |||||
| if (flag->isParsed) { | |||||
| return Option<std::string>("Failed: already parsed flag: " + flagName); | |||||
| } | |||||
| std::string tmpValue; | |||||
| if (!flag->isBoolean) { | |||||
| if (opaque) { | |||||
| return Option<std::string>(flagName + " is not a boolean type"); | |||||
| } | |||||
| if (flagValue.IsNone()) { | |||||
| return Option<std::string>("No value provided for non-boolean type: " + flagName); | |||||
| } | |||||
| tmpValue = flagValue.Get(); | |||||
| } else { | |||||
| if (flagValue.IsNone() || flagValue.Get().empty()) { | |||||
| tmpValue = !opaque ? "true" : "false"; | |||||
| } else if (!opaque) { | |||||
| tmpValue = flagValue.Get(); | |||||
| } else { | |||||
| return Option<std::string>(std::string("Boolean flag can not have non-empty value")); | |||||
| } | |||||
| } | |||||
| // begin to parse value | |||||
| Option<Nothing> ret = flag->parse(this, tmpValue); | |||||
| if (ret.IsNone()) { | |||||
| return Option<std::string>("Failed to parse value for: " + flag->flagName); | |||||
| } | |||||
| flag->isParsed = true; | |||||
| } | |||||
| // to check flags not given in command line but added as in constructor | |||||
| for (auto &flag : flags) { | |||||
| if (flag.second.isRequired && !flag.second.isParsed) { | |||||
| return Option<std::string>("Error, value of '" + flag.first + "' not provided"); | |||||
| } | |||||
| } | |||||
| return Option<std::string>(None()); | |||||
| } | |||||
| void Replaceall(std::string *str, const std::string &oldValue, const std::string &newValue) { | |||||
| if (str == nullptr) { | |||||
| MS_LOGE("Input str is nullptr"); | |||||
| return; | |||||
| } | |||||
| while (true) { | |||||
| std::string::size_type pos(0); | |||||
| if ((pos = str->find(oldValue)) != std::string::npos) { | |||||
| str->replace(pos, oldValue.length(), newValue); | |||||
| } else { | |||||
| break; | |||||
| } | |||||
| } | |||||
| } | |||||
| std::string FlagParser::Usage(const Option<std::string> &usgMsg) const { | |||||
| // first line, brief of the usage | |||||
| std::string usageString = usgMsg.IsSome() ? usgMsg.Get() + "\n" : ""; | |||||
| // usage of bin name | |||||
| usageString += usageMsg.IsNone() ? "usage: " + binName + " [options]\n" : usageMsg.Get() + "\n"; | |||||
| // help line of help message, usageLine:message of parametors | |||||
| std::string helpLine = ""; | |||||
| std::string usageLine = ""; | |||||
| uint32_t i = 0; | |||||
| for (auto flag = flags.begin(); flag != flags.end(); flag++) { | |||||
| std::string flagName = flag->second.flagName; | |||||
| std::string helpInfo = flag->second.helpInfo; | |||||
| // parameter line | |||||
| std::string thisLine = flag->second.isBoolean ? " --[no-]" + flagName : " --" + flagName + "=VALUE"; | |||||
| if (++i < flags.size()) { | |||||
| // add paramter help message of each line | |||||
| thisLine += " " + helpInfo; | |||||
| Replaceall(&helpInfo, "\n\r", "\n"); | |||||
| usageLine += thisLine + "\n"; | |||||
| } else { | |||||
| // brief help message | |||||
| helpLine = thisLine + " " + helpInfo + "\n"; | |||||
| } | |||||
| } | |||||
| // total usage is brief of usage+ brief of bin + help message + brief of | |||||
| // paramters | |||||
| return usageString + helpLine + usageLine; | |||||
| } | |||||
| } // namespace predict | |||||
| } // namespace mindspore | |||||
| @@ -1,291 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef PREDICT_COMMON_FLAG_PARSER_H_ | |||||
| #define PREDICT_COMMON_FLAG_PARSER_H_ | |||||
| #include <functional> | |||||
| #include <map> | |||||
| #include <utility> | |||||
| #include <string> | |||||
| #include "common/utils.h" | |||||
| #include "common/option.h" | |||||
| namespace mindspore { | |||||
| namespace predict { | |||||
| struct FlagInfo; | |||||
| struct Nothing {}; | |||||
| class FlagParser { | |||||
| public: | |||||
| FlagParser() { AddFlag(&FlagParser::help, "help", "print usage message", false); } | |||||
| virtual ~FlagParser() = default; | |||||
| // only support read flags from command line | |||||
| virtual Option<std::string> ParseFlags(int argc, const char *const *argv, bool supportUnknown = false, | |||||
| bool supportDuplicate = false); | |||||
| std::string Usage(const Option<std::string> &usgMsg = Option<std::string>(None())) const; | |||||
| template <typename Flags, typename T1, typename T2> | |||||
| void AddFlag(T1 *t1, const std::string &flagName, const std::string &helpInfo, const T2 &t2); | |||||
| template <typename Flags, typename T1, typename T2> | |||||
| void AddFlag(T1 Flags::*t1, const std::string &flagName, const std::string &helpInfo, const T2 &t2); | |||||
| template <typename Flags, typename T> | |||||
| void AddFlag(T Flags::*t, const std::string &flagName, const std::string &helpInfo); | |||||
| // Option-type fields | |||||
| template <typename Flags, typename T> | |||||
| void AddFlag(Option<T> Flags::*t, const std::string &flagName, const std::string &helpInfo); | |||||
| bool help; | |||||
| protected: | |||||
| std::string binName; | |||||
| Option<std::string> usageMsg; | |||||
| private: | |||||
| struct FlagInfo { | |||||
| std::string flagName; | |||||
| bool isRequired; | |||||
| bool isBoolean; | |||||
| std::string helpInfo; | |||||
| bool isParsed; | |||||
| std::function<Option<Nothing>(FlagParser *, const std::string &)> parse; | |||||
| }; | |||||
| inline void AddFlag(const FlagInfo &flag); | |||||
| // construct a temporary flag | |||||
| template <typename Flags, typename T> | |||||
| void ConstructFlag(Option<T> Flags::*t, const std::string &flagName, const std::string &helpInfo, FlagInfo *flag); | |||||
| // construct a temporary flag | |||||
| template <typename Flags, typename T1> | |||||
| void ConstructFlag(T1 Flags::*t1, const std::string &flagName, const std::string &helpInfo, FlagInfo *flag); | |||||
| Option<std::string> InnerParseFlags(std::multimap<std::string, Option<std::string>> *values); | |||||
| bool GetRealFlagName(const std::string &oriFlagName, std::string *flagName); | |||||
| std::map<std::string, FlagInfo> flags; | |||||
| }; | |||||
| // convert to std::string | |||||
| template <typename Flags, typename T> | |||||
| Option<std::string> ConvertToString(T Flags::*t, const FlagParser &baseFlag) { | |||||
| const Flags *flag = dynamic_cast<Flags *>(&baseFlag); | |||||
| if (flag != nullptr) { | |||||
| return std::to_string(flag->*t); | |||||
| } | |||||
| return Option<std::string>(None()); | |||||
| } | |||||
| // construct for a Option-type flag | |||||
| template <typename Flags, typename T> | |||||
| void FlagParser::ConstructFlag(Option<T> Flags::*t1, const std::string &flagName, const std::string &helpInfo, | |||||
| FlagInfo *flag) { | |||||
| if (flag == nullptr) { | |||||
| MS_LOGE("FlagInfo is nullptr"); | |||||
| return; | |||||
| } | |||||
| flag->flagName = flagName; | |||||
| flag->helpInfo = helpInfo; | |||||
| flag->isBoolean = typeid(T) == typeid(bool); | |||||
| flag->isParsed = false; | |||||
| } | |||||
| // construct a temporary flag | |||||
| template <typename Flags, typename T> | |||||
| void FlagParser::ConstructFlag(T Flags::*t1, const std::string &flagName, const std::string &helpInfo, FlagInfo *flag) { | |||||
| if (flag == nullptr) { | |||||
| MS_LOGE("FlagInfo is nullptr"); | |||||
| return; | |||||
| } | |||||
| if (t1 == nullptr) { | |||||
| MS_LOGE("t1 is nullptr"); | |||||
| return; | |||||
| } | |||||
| flag->flagName = flagName; | |||||
| flag->helpInfo = helpInfo; | |||||
| flag->isBoolean = typeid(T) == typeid(bool); | |||||
| flag->isParsed = false; | |||||
| } | |||||
| inline void FlagParser::AddFlag(const FlagInfo &flagItem) { flags[flagItem.flagName] = flagItem; } | |||||
| template <typename Flags, typename T> | |||||
| void FlagParser::AddFlag(T Flags::*t, const std::string &flagName, const std::string &helpInfo) { | |||||
| if (t == nullptr) { | |||||
| MS_LOGE("t1 is nullptr"); | |||||
| return; | |||||
| } | |||||
| Flags *flag = dynamic_cast<Flags *>(this); | |||||
| if (flag == nullptr) { | |||||
| MS_LOGI("dynamic_cast failed"); | |||||
| return; | |||||
| } | |||||
| FlagInfo flagItem; | |||||
| // flagItem is as a output parameter | |||||
| ConstructFlag(t, flagName, helpInfo, &flagItem); | |||||
| flagItem.parse = [t](FlagParser *base, const std::string &value) -> Option<Nothing> { | |||||
| Flags *flag = dynamic_cast<Flags *>(base); | |||||
| if (base != nullptr) { | |||||
| Option<T> ret = Option<T>(GenericParseValue<T>(value)); | |||||
| if (ret.IsNone()) { | |||||
| return Option<Nothing>(None()); | |||||
| } else { | |||||
| flag->*t = ret.Get(); | |||||
| } | |||||
| } | |||||
| return Option<Nothing>(Nothing()); | |||||
| }; | |||||
| flagItem.isRequired = true; | |||||
| flagItem.helpInfo += | |||||
| !helpInfo.empty() && helpInfo.find_last_of("\n\r") != helpInfo.size() - 1 ? " (default: " : "(default: "; | |||||
| flagItem.helpInfo += ")"; | |||||
| // add this flag to a std::map | |||||
| AddFlag(flagItem); | |||||
| } | |||||
| template <typename Flags, typename T1, typename T2> | |||||
| void FlagParser::AddFlag(T1 *t1, const std::string &flagName, const std::string &helpInfo, const T2 &t2) { | |||||
| if (t1 == nullptr) { | |||||
| MS_LOGE("t1 is nullptr"); | |||||
| return; | |||||
| } | |||||
| FlagInfo flagItem; | |||||
| // flagItem is as a output parameter | |||||
| ConstructFlag(t1, flagName, helpInfo, flagItem); | |||||
| flagItem.parse = [t1](FlagParser *base, const std::string &value) -> Option<Nothing> { | |||||
| if (base != nullptr) { | |||||
| Option<T1> ret = Option<T1>(GenericParseValue<T1>(value)); | |||||
| if (ret.IsNone()) { | |||||
| return Option<T1>(None()); | |||||
| } else { | |||||
| *t1 = ret.Get(); | |||||
| } | |||||
| } | |||||
| return Option<Nothing>(Nothing()); | |||||
| }; | |||||
| flagItem.isRequired = false; | |||||
| *t1 = t2; | |||||
| flagItem.helpInfo += | |||||
| !helpInfo.empty() && helpInfo.find_last_of("\n\r") != helpInfo.size() - 1 ? " (default: " : "(default: "; | |||||
| flagItem.helpInfo += ToString(t2).Get(); | |||||
| flagItem.helpInfo += ")"; | |||||
| // add this flag to a std::map | |||||
| AddFlag(flagItem); | |||||
| } | |||||
| template <typename Flags, typename T1, typename T2> | |||||
| void FlagParser::AddFlag(T1 Flags::*t1, const std::string &flagName, const std::string &helpInfo, const T2 &t2) { | |||||
| if (t1 == nullptr) { | |||||
| MS_LOGE("t1 is nullptr"); | |||||
| return; | |||||
| } | |||||
| Flags *flag = dynamic_cast<Flags *>(this); | |||||
| if (flag == nullptr) { | |||||
| MS_LOGI("dynamic_cast failed"); | |||||
| return; | |||||
| } | |||||
| FlagInfo flagItem; | |||||
| // flagItem is as a output parameter | |||||
| ConstructFlag(t1, flagName, helpInfo, &flagItem); | |||||
| flagItem.parse = [t1](FlagParser *base, const std::string &value) -> Option<Nothing> { | |||||
| Flags *flag = dynamic_cast<Flags *>(base); | |||||
| if (base != nullptr) { | |||||
| Option<T1> ret = Option<T1>(GenericParseValue<T1>(value)); | |||||
| if (ret.IsNone()) { | |||||
| return Option<Nothing>(None()); | |||||
| } else { | |||||
| flag->*t1 = ret.Get(); | |||||
| } | |||||
| } | |||||
| return Option<Nothing>(Nothing()); | |||||
| }; | |||||
| flagItem.isRequired = false; | |||||
| flag->*t1 = t2; | |||||
| flagItem.helpInfo += | |||||
| !helpInfo.empty() && helpInfo.find_last_of("\n\r") != helpInfo.size() - 1 ? " (default: " : "(default: "; | |||||
| flagItem.helpInfo += ToString(t2).Get(); | |||||
| flagItem.helpInfo += ")"; | |||||
| // add this flag to a std::map | |||||
| AddFlag(flagItem); | |||||
| } | |||||
| // option-type add flag | |||||
| template <typename Flags, typename T> | |||||
| void FlagParser::AddFlag(Option<T> Flags::*t, const std::string &flagName, const std::string &helpInfo) { | |||||
| if (t == nullptr) { | |||||
| MS_LOGE("t is nullptr"); | |||||
| return; | |||||
| } | |||||
| Flags *flag = dynamic_cast<Flags *>(this); | |||||
| if (flag == nullptr) { | |||||
| MS_LOGE("dynamic_cast failed"); | |||||
| return; | |||||
| } | |||||
| FlagInfo flagItem; | |||||
| // flagItem is as a output parameter | |||||
| ConstructFlag(t, flagName, helpInfo, &flagItem); | |||||
| flagItem.isRequired = false; | |||||
| flagItem.parse = [t](FlagParser *base, const std::string &value) -> Option<Nothing> { | |||||
| Flags *flag = dynamic_cast<Flags *>(base); | |||||
| if (base != nullptr) { | |||||
| Option<T> ret = Option<std::string>(GenericParseValue<T>(value)); | |||||
| if (ret.IsNone()) { | |||||
| return Option<Nothing>(None()); | |||||
| } else { | |||||
| flag->*t = Option<T>(Some(ret.Get())); | |||||
| } | |||||
| } | |||||
| return Option<Nothing>(Nothing()); | |||||
| }; | |||||
| // add this flag to a std::map | |||||
| AddFlag(flagItem); | |||||
| } | |||||
| } // namespace predict | |||||
| } // namespace mindspore | |||||
| #endif // PREDICT_COMMON_FLAG_PARSER_H_ | |||||
| @@ -1,77 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "common/func_utils.h" | |||||
| namespace mindspore { | |||||
| namespace predict { | |||||
| #if MS_USE_ARM | |||||
| _Unwind_Reason_Code PrintTraceArm(_Unwind_Context *ctx, void *d) { | |||||
| MS_ASSERT(ctx != nullptr); | |||||
| MS_ASSERT(d != nullptr); | |||||
| Dl_info info; | |||||
| int *depth = static_cast<int *>(d); | |||||
| auto ipAddr = static_cast<int64_t>(_Unwind_GetIP(ctx)); | |||||
| if (dladdr(reinterpret_cast<void *>(ipAddr), &info)) { | |||||
| const char *symbol = ""; | |||||
| const char *dlfile = ""; | |||||
| if (info.dli_sname) { | |||||
| symbol = info.dli_sname; | |||||
| } | |||||
| if (info.dli_fname) { | |||||
| dlfile = info.dli_fname; | |||||
| } | |||||
| MS_PRINT_ERROR("#%d: (%08lx) %s %s ", *depth, ipAddr, dlfile, symbol); | |||||
| } | |||||
| (*depth)++; | |||||
| return _URC_NO_REASON; | |||||
| } | |||||
| #endif | |||||
| void CoreDumpTraceFunc(int iSignum) { | |||||
| MS_PRINT_ERROR("----- start get backtrace info -----"); | |||||
| #if MS_USE_ARM | |||||
| int depth = 0; | |||||
| _Unwind_Backtrace(&PrintTraceArm, &depth); | |||||
| #else | |||||
| const auto maxDeep = 32; | |||||
| const auto maxStringLen = 100; | |||||
| void *apBuffer[maxStringLen]; | |||||
| char **ppStrings; | |||||
| auto iStackDepth = backtrace(apBuffer, maxDeep); | |||||
| if (0 > iStackDepth) { | |||||
| KillProcess("Get backtrace depth failed"); | |||||
| return; | |||||
| } | |||||
| MS_PRINT_ERROR("Current stack depth is %d", iStackDepth); | |||||
| ppStrings = backtrace_symbols(apBuffer, iStackDepth); | |||||
| if (nullptr == ppStrings) { | |||||
| KillProcess("Get backtrace_symbols failed"); | |||||
| return; | |||||
| } | |||||
| for (int iLoop = 0; iLoop < iStackDepth; iLoop++) { | |||||
| MS_PRINT_ERROR("%s \n", ppStrings[iLoop]); | |||||
| } | |||||
| #endif | |||||
| MS_PRINT_ERROR("----- finish get backtrace info -----"); | |||||
| KillProcess("Exit after core dump"); | |||||
| return; // try exit 1 | |||||
| } | |||||
| } // namespace predict | |||||
| } // namespace mindspore | |||||
| @@ -1,35 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef PREDICT_COMMON_FUNC_UTILS_H_ | |||||
| #define PREDICT_COMMON_FUNC_UTILS_H_ | |||||
| #if MS_USE_ARM | |||||
| #include <dlfcn.h> | |||||
| #include <unwind.h> | |||||
| #else | |||||
| #include <execinfo.h> | |||||
| #endif | |||||
| #include "include/errorcode.h" | |||||
| #include "common/mslog.h" | |||||
| namespace mindspore { | |||||
| namespace predict { | |||||
| void CoreDumpTraceFunc(int iSignum); | |||||
| } // namespace predict | |||||
| } // namespace mindspore | |||||
| #endif // PREDICT_COMMON_FUNC_UTILS_H_ | |||||
| @@ -1,167 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "common/graph_util.h" | |||||
| #include <fstream> | |||||
| #include <sstream> | |||||
| #include "common/mslog.h" | |||||
| #include "include/errorcode.h" | |||||
| namespace mindspore { | |||||
| namespace predict { | |||||
| OpGraph *OpGraph::Build(const SubGraphDef &subGraphDef) { | |||||
| auto graph = std::unique_ptr<OpGraph>(new OpGraph()); | |||||
| if (graph == nullptr) { | |||||
| MS_LOGE("malloc opgraph failed"); | |||||
| return nullptr; | |||||
| } | |||||
| auto nodeDefs = subGraphDef.nodes(); | |||||
| if (nodeDefs == nullptr) { | |||||
| MS_LOGE("nodeDefs from subGraphDef is nullptr"); | |||||
| return nullptr; | |||||
| } | |||||
| uint32_t opCount = nodeDefs->size(); | |||||
| for (uint32_t i = 0; i < opCount; i++) { | |||||
| auto nodeDef = nodeDefs->GetAs<NodeDef>(i); | |||||
| MS_ASSERT(nodeDef != nullptr); | |||||
| auto ret = graph->AddEdge(*nodeDef, *nodeDefs); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOGE("%s add edge failed. ret:%d", nodeDef->opDef()->name()->c_str(), ret); | |||||
| return nullptr; | |||||
| } | |||||
| } | |||||
| return graph.release(); | |||||
| } | |||||
| int OpGraph::AddEdge(const NodeDef &srcNodeDef, const flatbuffers::Vector<flatbuffers::Offset<NodeDef>> &nodeDefs) { | |||||
| MS_ASSERT(srcNodeDef.opDef() != nullptr); | |||||
| MS_ASSERT(srcNodeDef.opDef()->name() != nullptr); | |||||
| NODE_ID srcId = std::string(srcNodeDef.opDef()->name()->c_str()); | |||||
| uint32_t opCount = nodeDefs.size(); | |||||
| MS_ASSERT(srcNodeDef.opDef()->outputIndex() != nullptr); | |||||
| for (auto index : *(srcNodeDef.opDef()->outputIndex())) { | |||||
| for (uint32_t i = 0; i < opCount; i++) { | |||||
| auto dstNodeDef = nodeDefs.GetAs<NodeDef>(i); | |||||
| bool find = false; | |||||
| MS_ASSERT(dstNodeDef != nullptr); | |||||
| MS_ASSERT(dstNodeDef->opDef() != nullptr); | |||||
| auto inputIndex = dstNodeDef->opDef()->inputIndex(); | |||||
| MS_ASSERT(inputIndex != nullptr); | |||||
| if (std::any_of(inputIndex->begin(), inputIndex->end(), [&index](int i) { return i == index; })) { | |||||
| find = true; | |||||
| } | |||||
| if (!find) { | |||||
| continue; | |||||
| } | |||||
| MS_ASSERT(dstNodeDef->opDef()->name() != nullptr); | |||||
| NODE_ID dstId = std::string(dstNodeDef->opDef()->name()->c_str()); | |||||
| auto ret = AddEdge(srcId, dstId); | |||||
| if (ret != RET_OK) { | |||||
| return ret; | |||||
| } | |||||
| } | |||||
| } | |||||
| return RET_OK; | |||||
| } | |||||
| int OpGraph::AddEdge(const NODE_ID &srcId, const NODE_ID &dstId) { | |||||
| auto srcNode = AddNode(srcId); | |||||
| if (srcNode == nullptr) { | |||||
| MS_LOGE("add srcNode failed"); | |||||
| return RET_ERROR; | |||||
| } | |||||
| srcNode->AddOutEdge(dstId); | |||||
| auto dstNode = AddNode(dstId); | |||||
| if (dstNode == nullptr) { | |||||
| MS_LOGE("add dstNode failed"); | |||||
| return RET_ERROR; | |||||
| } | |||||
| dstNode->AddInEdge(srcId); | |||||
| return RET_OK; | |||||
| } | |||||
| OpNode *OpGraph::GetNode(const NODE_ID &nodeId) { | |||||
| auto node = nodes.find(nodeId); | |||||
| if (node == nodes.end()) { | |||||
| return nullptr; | |||||
| } | |||||
| return node->second; | |||||
| } | |||||
| OpNode *OpGraph::AddNode(const NODE_ID &nodeId) { | |||||
| auto node = GetNode(nodeId); | |||||
| if (node != nullptr) { | |||||
| return node; | |||||
| } | |||||
| node = new (std::nothrow) OpNode(nodeId); | |||||
| if (node == nullptr) { | |||||
| MS_LOGE("new node failed"); | |||||
| return nullptr; | |||||
| } | |||||
| nodes[nodeId] = node; | |||||
| return node; | |||||
| } | |||||
| std::unordered_set<NODE_ID> OpGraph::GetInputNode() { | |||||
| std::unordered_set<NODE_ID> inputNodes; | |||||
| for (const auto &iter : nodes) { | |||||
| auto node = iter.second; | |||||
| MS_ASSERT(node != nullptr); | |||||
| if (node->GetAllInEdge().empty()) { | |||||
| inputNodes.insert(node->ID()); | |||||
| } | |||||
| } | |||||
| return inputNodes; | |||||
| } | |||||
| std::unordered_set<NODE_ID> OpGraph::GetOutputNode() { | |||||
| std::unordered_set<NODE_ID> outputNodes; | |||||
| for (const auto &iter : nodes) { | |||||
| auto node = iter.second; | |||||
| MS_ASSERT(node != nullptr); | |||||
| if (node->GetAllOutEdge().empty()) { | |||||
| outputNodes.insert(node->ID()); | |||||
| } | |||||
| } | |||||
| return outputNodes; | |||||
| } | |||||
| OpGraph::~OpGraph() { | |||||
| for (auto iter : nodes) { | |||||
| if (iter.second != nullptr) { | |||||
| delete iter.second; | |||||
| } | |||||
| } | |||||
| nodes.clear(); | |||||
| } | |||||
| NODE_ID OpNode::ID() { return id; } | |||||
| void OpNode::AddInEdge(const NODE_ID &nodeId) { inEdges.insert(nodeId); } | |||||
| void OpNode::AddOutEdge(const NODE_ID &nodeId) { outEdges.insert(nodeId); } | |||||
| std::unordered_set<NODE_ID> OpNode::GetAllInEdge() { return inEdges; } | |||||
| std::unordered_set<NODE_ID> OpNode::GetAllOutEdge() { return outEdges; } | |||||
| } // namespace predict | |||||
| } // namespace mindspore | |||||
| @@ -1,71 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef PREDICT_COMMON_GRAPH_UTIL_H_ | |||||
| #define PREDICT_COMMON_GRAPH_UTIL_H_ | |||||
| #include <string> | |||||
| #include <unordered_map> | |||||
| #include <unordered_set> | |||||
| #include <utility> | |||||
| #include <vector> | |||||
| #include <memory> | |||||
| #include "common/utils.h" | |||||
| #include "schema/inner/ms_generated.h" | |||||
| namespace mindspore { | |||||
| namespace predict { | |||||
| using NODE_ID = std::string; | |||||
| class OpNode { | |||||
| public: | |||||
| explicit OpNode(NODE_ID nodeId) : id(std::move(nodeId)) {} | |||||
| NODE_ID ID(); | |||||
| void AddInEdge(const NODE_ID &nodeId); | |||||
| void AddOutEdge(const NODE_ID &nodeId); | |||||
| std::unordered_set<NODE_ID> GetAllInEdge(); | |||||
| std::unordered_set<NODE_ID> GetAllOutEdge(); | |||||
| protected: | |||||
| NODE_ID id; | |||||
| std::unordered_set<NODE_ID> inEdges; | |||||
| std::unordered_set<NODE_ID> outEdges; | |||||
| }; | |||||
| class OpGraph { | |||||
| public: | |||||
| OpGraph() = default; | |||||
| ~OpGraph(); | |||||
| static OpGraph *Build(const SubGraphDef &subGraphDef); | |||||
| OpNode *GetNode(const NODE_ID &nodeId); | |||||
| OpNode *AddNode(const NODE_ID &nodeId); | |||||
| std::unordered_set<NODE_ID> GetInputNode(); | |||||
| std::unordered_set<NODE_ID> GetOutputNode(); | |||||
| private: | |||||
| int AddEdge(const NODE_ID &srcId, const NODE_ID &dstId); | |||||
| int AddEdge(const NodeDef &srcNodeDef, const flatbuffers::Vector<flatbuffers::Offset<NodeDef>> &nodeDefs); | |||||
| protected: | |||||
| std::unordered_map<NODE_ID, OpNode *> nodes; | |||||
| }; | |||||
| } // namespace predict | |||||
| } // namespace mindspore | |||||
| #endif // PREDICT_COMMON_GRAPH_UTIL_H_ | |||||
| @@ -1,26 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "common/module_registry.h" | |||||
| namespace mindspore { | |||||
| namespace predict { | |||||
| ModuleRegistry *GetRegistryInstance() { | |||||
| static ModuleRegistry registry; | |||||
| return ®istry; | |||||
| } | |||||
| } // namespace predict | |||||
| } // namespace mindspore | |||||
| @@ -1,97 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef PREDICT_COMMON_MODULE_REGISTRY_H_ | |||||
| #define PREDICT_COMMON_MODULE_REGISTRY_H_ | |||||
| #include <memory> | |||||
| #include <string> | |||||
| #include <unordered_map> | |||||
| #include "common/mslog.h" | |||||
| #define MSPREDICT_API __attribute__((visibility("default"))) | |||||
| namespace mindspore { | |||||
| namespace predict { | |||||
| class ModuleBase { | |||||
| public: | |||||
| virtual ~ModuleBase() = default; | |||||
| }; | |||||
| template <typename T> | |||||
| class Module; | |||||
| class ModuleRegistry { | |||||
| public: | |||||
| ModuleRegistry() = default; | |||||
| virtual ~ModuleRegistry() = default; | |||||
| template <class T> | |||||
| bool Register(const std::string &name, const T &t) { | |||||
| modules[name] = &t; | |||||
| return true; | |||||
| } | |||||
| template <class T> | |||||
| std::shared_ptr<T> Create(const std::string &name) { | |||||
| auto it = modules.find(name); | |||||
| if (it == modules.end()) { | |||||
| return nullptr; | |||||
| } | |||||
| auto *module = (Module<T> *)it->second; | |||||
| if (module == nullptr) { | |||||
| return nullptr; | |||||
| } else { | |||||
| return module->Create(); | |||||
| } | |||||
| } | |||||
| template <class T> | |||||
| T *GetInstance(const std::string &name) { | |||||
| auto it = modules.find(name); | |||||
| if (it == modules.end()) { | |||||
| return nullptr; | |||||
| } | |||||
| auto *module = (Module<T> *)it->second; | |||||
| if (module == nullptr) { | |||||
| return nullptr; | |||||
| } else { | |||||
| return module->GetInstance(); | |||||
| } | |||||
| } | |||||
| protected: | |||||
| std::unordered_map<std::string, const ModuleBase *> modules; | |||||
| }; | |||||
| ModuleRegistry *GetRegistryInstance() MSPREDICT_API; | |||||
| template <class T> | |||||
| class ModuleRegistrar { | |||||
| public: | |||||
| ModuleRegistrar(const std::string &name, const T &module) { | |||||
| auto registryInstance = GetRegistryInstance(); | |||||
| if (registryInstance == nullptr) { | |||||
| MS_LOGW("registryInstance is nullptr."); | |||||
| } else { | |||||
| registryInstance->Register(name, module); | |||||
| } | |||||
| } | |||||
| }; | |||||
| } // namespace predict | |||||
| } // namespace mindspore | |||||
| #endif // PREDICT_COMMON_MODULE_REGISTRY_H_ | |||||
| @@ -1,47 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "common/mslog.h" | |||||
| #include <iostream> | |||||
| #include <cstdlib> | |||||
| #include <climits> | |||||
| #include <string> | |||||
| #include "include/errorcode.h" | |||||
| namespace mindspore { | |||||
| namespace predict { | |||||
| std::string GetEnv(const std::string &envvar) { | |||||
| const char *value = std::getenv(envvar.c_str()); | |||||
| if (value == nullptr) { | |||||
| return std::string(); | |||||
| } | |||||
| return std::string(value); | |||||
| } | |||||
| bool IsPrint(int level) { | |||||
| auto envString = GetEnv("MSLOG"); | |||||
| static int env = static_cast<int>(std::strtol(!envString.empty() ? envString.c_str() : "3", nullptr, 0)); | |||||
| if (env == INT_MIN || env == INT_MAX) { | |||||
| env = WARN; | |||||
| // enable the SP for binscope checking | |||||
| std::string errorStr = "env exceeded the value that type int is able to represent"; | |||||
| MS_LOGE("%s", errorStr.c_str()); | |||||
| } | |||||
| return level >= env; | |||||
| } | |||||
| } // namespace predict | |||||
| } // namespace mindspore | |||||
| @@ -1,230 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef PREDICT_COMMON_MSLOG_H_ | |||||
| #define PREDICT_COMMON_MSLOG_H_ | |||||
| #include <syslog.h> | |||||
| #include <unistd.h> | |||||
| #include <csignal> | |||||
| #include <iostream> | |||||
| #include <sstream> | |||||
| #include <string> | |||||
| #if defined(__ANDROID__) || defined(ANDROID) | |||||
| #include <android/log.h> | |||||
| #endif | |||||
| namespace mindspore { | |||||
| namespace predict { | |||||
| constexpr const char *TAG = "MS_PREDICT"; | |||||
| constexpr int DEBUG = 1; | |||||
| constexpr int INFO = 2; | |||||
| constexpr int WARN = 3; | |||||
| constexpr int ERROR = 4; | |||||
| #define MSPREDICT_API __attribute__((visibility("default"))) | |||||
| bool MSPREDICT_API IsPrint(int level); | |||||
| #if !defined(__ANDROID__) && !defined(ANDROID) | |||||
| #if LOG_TO_FILE | |||||
| #define MS_LOGD(fmt, args...) \ | |||||
| { \ | |||||
| if (mindspore::predict::IsPrint(mindspore::predict::DEBUG)) { \ | |||||
| syslog(LOG_DEBUG, "%s|%d|%s[%d]|: " #fmt, mindspore::predict::TAG, \getpid(), __func__, __LINE__, ##args); \ | |||||
| } \ | |||||
| } | |||||
| #define MS_LOGI(fmt, args...) \ | |||||
| { \ | |||||
| if (mindspore::predict::IsPrint(mindspore::predict::INFO)) { \ | |||||
| syslog(LOG_INFO, "%s|%d|%s[%d]|: " #fmt, mindspore::predict::TAG, \getpid(), __func__, __LINE__, ##args); \ | |||||
| } \ | |||||
| } | |||||
| #define MS_LOGW(fmt, args...) \ | |||||
| { \ | |||||
| if (mindspore::predict::IsPrint(mindspore::predict::WARN)) { \ | |||||
| syslog(LOG_WARNING, "%s|%d|%s[%d]|: " #fmt, mindspore::predict::TAG, \getpid(), __func__, __LINE__, ##args); \ | |||||
| } \ | |||||
| } | |||||
| #define MS_LOGE(fmt, args...) \ | |||||
| { \ | |||||
| if (mindspore::predict::IsPrint(mindspore::predict::ERROR)) { \ | |||||
| syslog(LOG_ERR, "%s|%d|%s[%d]|: " #fmt, mindspore::predict::TAG, getpid(), __func__, __LINE__, ##args); \ | |||||
| } \ | |||||
| } | |||||
| #else | |||||
| #define MS_LOGD(fmt, args...) \ | |||||
| { \ | |||||
| if (mindspore::predict::IsPrint(mindspore::predict::DEBUG)) { \ | |||||
| printf("[DEBUG] %s|%d|%s|%s[%d]|: " #fmt "\r\n", mindspore::predict::TAG, getpid(), __FILE__, __func__, \ | |||||
| __LINE__, ##args); \ | |||||
| } \ | |||||
| } | |||||
| #define MS_LOGI(fmt, args...) \ | |||||
| { \ | |||||
| if (mindspore::predict::IsPrint(mindspore::predict::INFO)) { \ | |||||
| printf("[INFO] %s|%d|%s|%s[%d]|: " #fmt "\r\n", mindspore::predict::TAG, getpid(), __FILE__, __func__, \ | |||||
| __LINE__, ##args); \ | |||||
| } \ | |||||
| } | |||||
| #define MS_LOGW(fmt, args...) \ | |||||
| { \ | |||||
| if (mindspore::predict::IsPrint(mindspore::predict::WARN)) { \ | |||||
| printf("[WARN] %s|%d|%s|%s[%d]|: " #fmt "\r\n", mindspore::predict::TAG, getpid(), __FILE__, __func__, \ | |||||
| __LINE__, ##args); \ | |||||
| } \ | |||||
| } | |||||
| #define MS_LOGE(fmt, args...) \ | |||||
| { \ | |||||
| if (mindspore::predict::IsPrint(mindspore::predict::ERROR)) { \ | |||||
| printf("[ERROR] %s|%d|%s|%s[%d]|: " #fmt "\r\n", mindspore::predict::TAG, getpid(), __FILE__, __func__, \ | |||||
| __LINE__, ##args); \ | |||||
| } \ | |||||
| } | |||||
| #endif | |||||
| #else | |||||
| #define MS_LOGD(fmt, args...) \ | |||||
| { \ | |||||
| if (mindspore::predict::IsPrint(mindspore::predict::DEBUG)) \ | |||||
| __android_log_print(ANDROID_LOG_DEBUG, mindspore::predict::TAG, "|%d|%s[%d]|: " fmt, getpid(), __func__, \ | |||||
| __LINE__, ##args); \ | |||||
| } | |||||
| #define MS_LOGI(fmt, args...) \ | |||||
| { \ | |||||
| if (mindspore::predict::IsPrint(mindspore::predict::INFO)) \ | |||||
| __android_log_print(ANDROID_LOG_INFO, mindspore::predict::TAG, "|%d|%s[%d]|: " fmt, getpid(), __func__, \ | |||||
| __LINE__, ##args); \ | |||||
| } | |||||
| #define MS_LOGW(fmt, args...) \ | |||||
| { \ | |||||
| if (mindspore::predict::IsPrint(mindspore::predict::WARN)) \ | |||||
| __android_log_print(ANDROID_LOG_WARN, mindspore::predict::TAG, "|%d|%s[%d]|: " fmt, getpid(), __func__, \ | |||||
| __LINE__, ##args); \ | |||||
| } | |||||
| #define MS_LOGE(fmt, args...) \ | |||||
| { \ | |||||
| if (mindspore::predict::IsPrint(mindspore::predict::ERROR)) \ | |||||
| __android_log_print(ANDROID_LOG_ERROR, mindspore::predict::TAG, "|%d|%s[%d]|: " fmt, getpid(), __func__, \ | |||||
| __LINE__, ##args); \ | |||||
| } | |||||
| #endif | |||||
| #define MS_LOG(severity) std::cout << std::endl | |||||
| #define MS_DLOG(verboselevel) std::cout << std::endl | |||||
| // Kill the process for safe exiting. | |||||
| inline void KillProcess(const std::string &ret) { | |||||
| MS_LOG(ERROR) << "mindspore Exit Tip:" << ret; | |||||
| if (raise(SIGKILL) != 0) { | |||||
| MS_LOGE("Send SIGKILL to kill process failed"); | |||||
| } | |||||
| } | |||||
| } // namespace predict | |||||
| } // namespace mindspore | |||||
| #define MS_ASSERT(expression) \ | |||||
| do { \ | |||||
| if (!(expression)) { \ | |||||
| std::stringstream ss; \ | |||||
| ss << "Assertion failed: " << #expression << ", file: " << __FILE__ << ", line: " << __LINE__; \ | |||||
| mindspore::predict::KillProcess(ss.str()); \ | |||||
| } \ | |||||
| } while (0) | |||||
| #define MS_EXIT(ret) \ | |||||
| do { \ | |||||
| std::stringstream ss; \ | |||||
| ss << (ret) << " ( file: " << __FILE__ << ", line: " << __LINE__ << " )."; \ | |||||
| mindspore::predict::KillProcess(ss.str()); \ | |||||
| } while (0) | |||||
| #define MS_PRINT_ERROR(fmt, args...) \ | |||||
| printf(#fmt "\n", ##args); \ | |||||
| MS_LOGE(fmt, ##args); | |||||
| #define MS_PRINT_INFO(fmt, args...) \ | |||||
| printf(fmt "\n", ##args); \ | |||||
| MS_LOGI(fmt, ##args); | |||||
| constexpr int LOG_CHECK_EVERY_FIRSTNUM = 10; | |||||
| constexpr int LOG_CHECK_EVERY_NUM1 = 10; | |||||
| constexpr int LOG_CHECK_EVERY_NUM2 = 100; | |||||
| constexpr int LOG_CHECK_EVERY_NUM3 = 1000; | |||||
| constexpr int LOG_CHECK_EVERY_NUM4 = 10000; | |||||
| #define LOG_CHECK_ID_CONCAT(word1, word2) word1##word2 | |||||
| #define LOG_CHECK_ID LOG_CHECK_ID_CONCAT(__FUNCTION__, __LINE__) | |||||
| #define LOG_CHECK_FIRST_N \ | |||||
| [](uint32_t firstNum) { \ | |||||
| static uint32_t LOG_CHECK_ID = 0; \ | |||||
| ++LOG_CHECK_ID; \ | |||||
| return (LOG_CHECK_ID <= firstNum); \ | |||||
| } | |||||
| #define LOG_CHECK_EVERY_N1 \ | |||||
| [](uint32_t firstNum, uint32_t num) { \ | |||||
| static uint32_t LOG_CHECK_ID = 0; \ | |||||
| ++LOG_CHECK_ID; \ | |||||
| return ((LOG_CHECK_ID <= firstNum) || (LOG_CHECK_ID % num == 0)); \ | |||||
| } | |||||
| #define LOG_CHECK_EVERY_N2 \ | |||||
| [](uint32_t firstNum, uint32_t num1, uint32_t num2) { \ | |||||
| static uint32_t LOG_CHECK_ID = 0; \ | |||||
| ++LOG_CHECK_ID; \ | |||||
| return ((LOG_CHECK_ID <= firstNum) || (LOG_CHECK_ID < num2 && LOG_CHECK_ID % num1 == 0) || \ | |||||
| (LOG_CHECK_ID % num2 == 0)); \ | |||||
| } | |||||
| #define LOG_CHECK_EVERY_N3 \ | |||||
| [](uint32_t firstNum, uint32_t num1, uint32_t num2, uint32_t num3) { \ | |||||
| static uint32_t LOG_CHECK_ID = 0; \ | |||||
| ++LOG_CHECK_ID; \ | |||||
| return ((LOG_CHECK_ID <= firstNum) || (LOG_CHECK_ID < num2 && LOG_CHECK_ID % num1 == 0) || \ | |||||
| (LOG_CHECK_ID < num3 && LOG_CHECK_ID % num2 == 0) || (LOG_CHECK_ID % num3 == 0)); \ | |||||
| } | |||||
| #define LOG_CHECK_EVERY_N4 \ | |||||
| [](uint32_t firstNum, uint32_t num1, uint32_t num2, uint32_t num3, uint32_t num4) { \ | |||||
| static uint32_t LOG_CHECK_ID = 0; \ | |||||
| ++LOG_CHECK_ID; \ | |||||
| return ((LOG_CHECK_ID <= firstNum) || (LOG_CHECK_ID < num2 && LOG_CHECK_ID % num1 == 0) || \ | |||||
| (LOG_CHECK_ID < num3 && LOG_CHECK_ID % num2 == 0) || (LOG_CHECK_ID < num4 && LOG_CHECK_ID % num3 == 0) || \ | |||||
| (LOG_CHECK_ID % num4 == 0)); \ | |||||
| } | |||||
| #define LOG_CHECK_EVERY_N \ | |||||
| []() { \ | |||||
| static uint32_t LOG_CHECK_ID = 0; \ | |||||
| ++LOG_CHECK_ID; \ | |||||
| return ((LOG_CHECK_ID <= LOG_CHECK_EVERY_FIRSTNUM) || \ | |||||
| (LOG_CHECK_ID < LOG_CHECK_EVERY_NUM2 && LOG_CHECK_ID % LOG_CHECK_EVERY_NUM1 == 0) || \ | |||||
| (LOG_CHECK_ID < LOG_CHECK_EVERY_NUM3 && LOG_CHECK_ID % LOG_CHECK_EVERY_NUM2 == 0) || \ | |||||
| (LOG_CHECK_ID < LOG_CHECK_EVERY_NUM4 && LOG_CHECK_ID % LOG_CHECK_EVERY_NUM3 == 0) || \ | |||||
| (LOG_CHECK_ID % LOG_CHECK_EVERY_NUM4 == 0)); \ | |||||
| } | |||||
| #endif // PREDICT_COMMON_MSLOG_H_ | |||||
| @@ -1,44 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef PREDICT_COMMON_OP_UTILS_H_ | |||||
| #define PREDICT_COMMON_OP_UTILS_H_ | |||||
| #include <functional> | |||||
| #include <string> | |||||
| #include "schema/inner/ms_generated.h" | |||||
| namespace mindspore { | |||||
| namespace predict { | |||||
| inline OpT GetOpType(const OpDef &opDef) { return opDef.attr_type(); } | |||||
| inline OpT GetOpType(const NodeDef &nodeDef) { return GetOpType(*(nodeDef.opDef())); } | |||||
| inline std::string GetOpTypeName(const NodeDef &nodeDef) { return EnumNameOpT(GetOpType(nodeDef)); } | |||||
| inline std::string GetOpTypeName(const OpDef &opDef) { return EnumNameOpT(GetOpType(opDef)); } | |||||
| inline OpT GetOpType(const OpDefT &opDefT) { return opDefT.attr.type; } | |||||
| inline OpT GetOpType(const NodeDefT &nodeDefT) { return GetOpType(*(nodeDefT.opDef.get())); } | |||||
| inline std::string GetOpTypeName(const NodeDefT &nodeDefT) { return EnumNameOpT(GetOpType(nodeDefT)); } | |||||
| inline std::string GetOpTypeName(const OpDefT &opDefT) { return EnumNameOpT(GetOpType(opDefT)); } | |||||
| } // namespace predict | |||||
| } // namespace mindspore | |||||
| #endif // PREDICT_COMMON_OP_UTILS_H_ | |||||
| @@ -1,119 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef PREDICT_COMMON_OPTION_H_ | |||||
| #define PREDICT_COMMON_OPTION_H_ | |||||
| #include <type_traits> | |||||
| #include <utility> | |||||
| #include "common/mslog.h" | |||||
| namespace mindspore { | |||||
| namespace predict { | |||||
| template <typename T> | |||||
| struct InnerSome { | |||||
| explicit InnerSome(const T &t) : _t(std::move(t)) {} | |||||
| T _t; | |||||
| }; | |||||
| template <typename T> | |||||
| InnerSome<typename std::decay<T>::type> Some(T &&t) { | |||||
| return InnerSome<typename std::decay<T>::type>(std::forward<T>(t)); | |||||
| } | |||||
| struct None {}; | |||||
| template <typename T> | |||||
| class Option { | |||||
| public: | |||||
| Option() : state(NONE) {} | |||||
| explicit Option(const T &t) : data(t), state(SOME) {} | |||||
| explicit Option(T &&t) : data(std::move(t)), state(SOME) {} | |||||
| explicit Option(const InnerSome<T> &some) : data(some._t), state(SOME) {} | |||||
| explicit Option(const None &none) : state(NONE) {} | |||||
| Option(const Option<T> &that) : state(that.state) { | |||||
| if (that.IsSome()) { | |||||
| new (&data) T(that.data); | |||||
| } | |||||
| } | |||||
| virtual ~Option() = default; | |||||
| bool IsNone() const { return state == NONE; } | |||||
| bool IsSome() const { return state == SOME; } | |||||
| const T &Get() const & { | |||||
| MS_ASSERT(IsSome()); | |||||
| return data; | |||||
| } | |||||
| T &Get() & { | |||||
| MS_ASSERT(IsSome()); | |||||
| return data; | |||||
| } | |||||
| T &&Get() && { | |||||
| MS_ASSERT(IsSome()); | |||||
| return std::move(data); | |||||
| } | |||||
| const T &&Get() const && { | |||||
| MS_ASSERT(IsSome()); | |||||
| return std::move(data); | |||||
| } | |||||
| // oprerator override | |||||
| Option<T> &operator=(const Option<T> &that) { | |||||
| if (&that != this) { | |||||
| if (IsSome()) { | |||||
| data.~T(); | |||||
| } | |||||
| state = that.state; | |||||
| if (that.IsSome()) { | |||||
| new (&data) T(that.data); | |||||
| } | |||||
| } | |||||
| return *this; | |||||
| } | |||||
| bool operator==(const Option<T> &that) const { | |||||
| return (IsNone() && that.IsNone()) || (IsSome() && that.IsSome() && data == that.data); | |||||
| } | |||||
| bool operator!=(const Option<T> &that) const { return !(*this == that); } | |||||
| bool operator==(const T &that) const { return IsSome() && data == that; } | |||||
| bool operator!=(const T &that) const { return !(*this == that); } | |||||
| private: | |||||
| enum State { NONE = 0, SOME = 1 }; | |||||
| T data; | |||||
| State state; | |||||
| }; | |||||
| } // namespace predict | |||||
| } // namespace mindspore | |||||
| #endif // PREDICT_COMMON_OPTION_H_ | |||||
| @@ -1,50 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "common/storage.h" | |||||
| #include "flatbuffers/flatbuffers.h" | |||||
| #include "common/mslog.h" | |||||
| #include "common/file_utils.h" | |||||
| namespace mindspore { | |||||
| namespace predict { | |||||
| int Storage::Save(const GraphDefT &graph, const std::string &outputPath) { | |||||
| flatbuffers::FlatBufferBuilder builder(flatSize); | |||||
| auto offset = GraphDef::Pack(builder, &graph); | |||||
| builder.Finish(offset); | |||||
| int size = builder.GetSize(); | |||||
| auto content = builder.GetBufferPointer(); | |||||
| if (content == nullptr) { | |||||
| MS_LOGE("GetBufferPointer nullptr"); | |||||
| return RET_ERROR; | |||||
| } | |||||
| std::string realPath = RealPath(outputPath.c_str()); | |||||
| if (realPath.empty()) { | |||||
| MS_LOGE("Output file path '%s' is not valid", outputPath.c_str()); | |||||
| return RET_ERROR; | |||||
| } | |||||
| std::ofstream output(realPath, std::ofstream::binary); | |||||
| if (!output.is_open()) { | |||||
| MS_LOGE("ofstream open failed"); | |||||
| return RET_ERROR; | |||||
| } | |||||
| output.write((const char *)content, size); | |||||
| output.close(); | |||||
| return RET_OK; | |||||
| } | |||||
| } // namespace predict | |||||
| } // namespace mindspore | |||||
| @@ -1,36 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef PREDICT_COMMON_STORAGE_H_ | |||||
| #define PREDICT_COMMON_STORAGE_H_ | |||||
| #include <fstream> | |||||
| #include <string> | |||||
| #include "include/errorcode.h" | |||||
| #include "flatbuffers/flatbuffers.h" | |||||
| #include "schema/inner/ms_generated.h" | |||||
| namespace mindspore { | |||||
| namespace predict { | |||||
| class Storage { | |||||
| public: | |||||
| int Save(const GraphDefT &graph, const std::string &outputPath); | |||||
| const int flatSize = 1024; | |||||
| }; | |||||
| } // namespace predict | |||||
| } // namespace mindspore | |||||
| #endif // PREDICT_COMMON_STORAGE_H_ | |||||
| @@ -1,228 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "common/utils.h" | |||||
| namespace mindspore { | |||||
| namespace predict { | |||||
| uint64_t GetTimeUs() { | |||||
| struct timespec ts = {0, 0}; | |||||
| if (clock_gettime(CLOCK_MONOTONIC, &ts) != 0) { | |||||
| return 0; | |||||
| } | |||||
| // USECS_IN_SEC *NSECS_IN_USEC; | |||||
| auto retval = static_cast<uint64_t>((ts.tv_sec * USEC) + (ts.tv_nsec / MSEC)); | |||||
| return retval; | |||||
| } | |||||
| static const unsigned int FP32_BIT_SIZE = 32; | |||||
| static const unsigned int FP32_EXPONENT_BIAS = 127; | |||||
| static const unsigned int FP32_SIGNIFICAND = 23; | |||||
| static const unsigned int FP32_EXPONENT_MAX = 255; | |||||
| static const unsigned int FP16_BIT_SIZE = 16; | |||||
| static const unsigned int FP16_EXPONENT_BIAS = 15; | |||||
| static const unsigned int FP16_SIGNIFICAND = 10; | |||||
| static const int FP16_EXPONENT_MAX = 30; | |||||
| static const int FP16_EXPONENT_MIN = -10; | |||||
| float ShortToFloat32(int16_t srcValue) { | |||||
| uint16_t expHalf16 = srcValue & 0x7C00; | |||||
| int exp1 = static_cast<int>(expHalf16); | |||||
| uint16_t mantissa16 = srcValue & 0x03FF; | |||||
| int mantissa1 = static_cast<int>(mantissa16); | |||||
| int sign = static_cast<int>(srcValue & 0x8000); | |||||
| sign = sign << FP16_BIT_SIZE; | |||||
| // nan or inf | |||||
| if (expHalf16 == 0x7C00) { | |||||
| // nan | |||||
| if (mantissa16 > 0) { | |||||
| int res = (0x7FC00000 | sign); | |||||
| int *iRes = &res; | |||||
| MS_ASSERT(iRes != nullptr); | |||||
| auto fres = static_cast<float>(*iRes); | |||||
| return fres; | |||||
| } | |||||
| // inf | |||||
| int res = (0x7F800000 | sign); | |||||
| int *iRes = &res; | |||||
| MS_ASSERT(iRes != nullptr); | |||||
| auto fres = static_cast<float>(*iRes); | |||||
| return fres; | |||||
| } | |||||
| if (expHalf16 != 0) { | |||||
| exp1 += ((FP32_EXPONENT_BIAS - FP16_EXPONENT_BIAS) << FP16_SIGNIFICAND); // exponents converted to float32 bias | |||||
| int res = (exp1 | mantissa1); | |||||
| res = res << (FP32_SIGNIFICAND - FP16_SIGNIFICAND); | |||||
| res = (res | sign); | |||||
| int *iRes = &res; | |||||
| auto fres = static_cast<float>(*iRes); | |||||
| return fres; | |||||
| } | |||||
| int xmm1 = exp1 > (1 << FP16_SIGNIFICAND) ? exp1 : (1 << FP16_SIGNIFICAND); | |||||
| xmm1 = (xmm1 << (FP32_SIGNIFICAND - FP16_SIGNIFICAND)); | |||||
| xmm1 += ((FP32_EXPONENT_BIAS - FP16_EXPONENT_BIAS - FP16_SIGNIFICAND) | |||||
| << FP32_SIGNIFICAND); // add the bias difference to xmm1 | |||||
| xmm1 = xmm1 | sign; // Combine with the sign mask | |||||
| auto res = static_cast<float>(mantissa1); // Convert mantissa to float | |||||
| res *= static_cast<float>(xmm1); | |||||
| return res; | |||||
| } | |||||
| int16_t Float32ToShort(float srcValue) { | |||||
| auto srcValueBit = static_cast<unsigned int>(srcValue); | |||||
| int sign = srcValueBit >> (FP32_BIT_SIZE - 1); | |||||
| int mantissa = srcValueBit & 0x007FFFFF; | |||||
| // exponent | |||||
| int exp = ((srcValueBit & 0x7F800000) >> FP32_SIGNIFICAND) + FP16_EXPONENT_BIAS - FP32_EXPONENT_BIAS; | |||||
| int16_t res; | |||||
| if (exp > 0 && exp < FP16_EXPONENT_MAX) { | |||||
| // use rte rounding mode, round the significand, combine sign, exponent and significand into a short. | |||||
| res = (sign << (FP16_BIT_SIZE - 1)) | (exp << FP16_SIGNIFICAND) | | |||||
| ((mantissa + 0x00001000) >> (FP32_SIGNIFICAND - FP16_SIGNIFICAND)); | |||||
| } else if (srcValueBit == 0) { | |||||
| res = 0; | |||||
| } else { | |||||
| if (exp <= 0) { | |||||
| if (exp < FP16_EXPONENT_MIN) { | |||||
| // value is less than min half float point | |||||
| res = 0; | |||||
| } else { | |||||
| // normalized single, magnitude is less than min normal half float point. | |||||
| mantissa = (mantissa | 0x00800000) >> (1 - exp); | |||||
| // round to nearest | |||||
| if ((mantissa & 0x00001000) > 0) { | |||||
| mantissa = mantissa + 0x00002000; | |||||
| } | |||||
| // combine sign & mantissa (exp is zero to get denormalized number) | |||||
| res = (sign << FP16_EXPONENT_BIAS) | (mantissa >> (FP32_SIGNIFICAND - FP16_SIGNIFICAND)); | |||||
| } | |||||
| } else if (exp == (FP32_EXPONENT_MAX - FP32_EXPONENT_BIAS + FP16_EXPONENT_BIAS)) { | |||||
| if (mantissa == 0) { | |||||
| // input float is infinity, return infinity half | |||||
| res = (sign << FP16_EXPONENT_BIAS) | 0x7C00; | |||||
| } else { | |||||
| // input float is NaN, return half NaN | |||||
| res = (sign << FP16_EXPONENT_BIAS) | 0x7C00 | (mantissa >> (FP32_SIGNIFICAND - FP16_SIGNIFICAND)); | |||||
| } | |||||
| } else { | |||||
| // exp > 0, normalized single, round to nearest | |||||
| if ((mantissa & 0x00001000) > 0) { | |||||
| mantissa = mantissa + 0x00002000; | |||||
| if ((mantissa & 0x00800000) > 0) { | |||||
| mantissa = 0; | |||||
| exp = exp + 1; | |||||
| } | |||||
| } | |||||
| if (exp > FP16_EXPONENT_MAX) { | |||||
| // exponent overflow - return infinity half | |||||
| res = (sign << FP16_EXPONENT_BIAS) | 0x7C00; | |||||
| } else { | |||||
| // combine sign, exp and mantissa into normalized half | |||||
| res = (sign << FP16_EXPONENT_BIAS) | (exp << FP16_SIGNIFICAND) | | |||||
| (mantissa >> (FP32_SIGNIFICAND - FP16_SIGNIFICAND)); | |||||
| } | |||||
| } | |||||
| } | |||||
| return res; | |||||
| } | |||||
| std::string Remove(const std::string &from, const std::string &subStr, Mode mode) { | |||||
| std::string result = from; | |||||
| if (mode == PREFIX) { | |||||
| if (from.substr(0, subStr.length()) == subStr) { | |||||
| result = from.substr(subStr.size()); | |||||
| } | |||||
| } else if (mode == SUFFIX) { | |||||
| if (from.rfind(subStr) == from.size() - subStr.size()) { | |||||
| result = from.substr(0, from.size() - subStr.size()); | |||||
| } | |||||
| } else { | |||||
| size_t index; | |||||
| while ((index = result.find(subStr)) != std::string::npos) { | |||||
| result = result.erase(index, subStr.size()); | |||||
| } | |||||
| } | |||||
| return result; | |||||
| } | |||||
| std::vector<std::string> StrSplit(const std::string &str, const std::string &pattern) { | |||||
| std::string::size_type pos; | |||||
| std::vector<std::string> result; | |||||
| std::string tmpStr(str + pattern); | |||||
| std::string::size_type size = tmpStr.size(); | |||||
| for (std::string::size_type i = 0; i < size; i++) { | |||||
| pos = tmpStr.find(pattern, i); | |||||
| if (pos < size) { | |||||
| std::string s = tmpStr.substr(i, pos - i); | |||||
| result.push_back(s); | |||||
| i = pos + pattern.size() - 1; | |||||
| } | |||||
| } | |||||
| return result; | |||||
| } | |||||
| std::vector<std::string> Tokenize(const std::string &src, const std::string &delimiters, | |||||
| const Option<size_t> &maxTokenNum) { | |||||
| if (maxTokenNum.IsSome() && maxTokenNum.Get() == 0) { | |||||
| return {}; | |||||
| } | |||||
| std::vector<std::string> tokens; | |||||
| size_t offset = 0; | |||||
| while (true) { | |||||
| size_t nonDelimiter = src.find_first_not_of(delimiters, offset); | |||||
| if (nonDelimiter == std::string::npos) { | |||||
| break; | |||||
| } | |||||
| size_t delimiter = src.find_first_of(delimiters, nonDelimiter); | |||||
| if (delimiter == std::string::npos || (maxTokenNum.IsSome() && tokens.size() == maxTokenNum.Get() - 1)) { | |||||
| tokens.push_back(src.substr(nonDelimiter)); | |||||
| break; | |||||
| } | |||||
| tokens.push_back(src.substr(nonDelimiter, delimiter - nonDelimiter)); | |||||
| offset = delimiter; | |||||
| } | |||||
| return tokens; | |||||
| } | |||||
| void ShortToFloat32(const int16_t *srcdata, float *dstdata, size_t elementSize) { | |||||
| MS_ASSERT(srcdata != nullptr); | |||||
| MS_ASSERT(dstdata != nullptr); | |||||
| for (size_t i = 0; i < elementSize; i++) { | |||||
| dstdata[i] = ShortToFloat32(srcdata[i]); | |||||
| } | |||||
| } | |||||
| void Float32ToShort(const float *srcdata, int16_t *dstdata, size_t elementSize) { | |||||
| MS_ASSERT(srcdata != nullptr); | |||||
| MS_ASSERT(dstdata != nullptr); | |||||
| for (size_t i = 0; i < elementSize; i++) { | |||||
| dstdata[i] = Float32ToShort(srcdata[i]); | |||||
| } | |||||
| } | |||||
| } // namespace predict | |||||
| } // namespace mindspore | |||||
| @@ -1,154 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef PREDICT_COMMON_UTILS_H_ | |||||
| #define PREDICT_COMMON_UTILS_H_ | |||||
| #include <stdint.h> | |||||
| #include <ctime> | |||||
| #include <cstdint> | |||||
| #include <vector> | |||||
| #include <string> | |||||
| #include "common/mslog.h" | |||||
| #include "common/option.h" | |||||
| #include "include/errorcode.h" | |||||
| namespace mindspore { | |||||
| namespace predict { | |||||
| const int USEC = 1000000; | |||||
| const int MSEC = 1000; | |||||
| uint64_t GetTimeUs(); | |||||
| int16_t Float32ToShort(float srcValue); | |||||
| float ShortToFloat32(int16_t srcValue); | |||||
| void ShortToFloat32(const int16_t *srcData, float *dstData, size_t elementSize); | |||||
| void Float32ToShort(const float *srcData, int16_t *dstData, size_t elementSize); | |||||
| template <typename T> | |||||
| bool IsContain(const std::vector<T> &vec, T element) { | |||||
| for (auto iter = vec.begin(); iter != vec.end(); iter++) { | |||||
| if (*iter == element) { | |||||
| return true; | |||||
| } | |||||
| } | |||||
| return false; | |||||
| } | |||||
| const char WHITESPACE[] = "\t\n\v\f\r "; | |||||
| const char STR_TRUE[] = "true"; | |||||
| const char STR_FALSE[] = "false"; | |||||
| template <typename T> | |||||
| Option<std::string> ToString(T t) { | |||||
| std::ostringstream out; | |||||
| out << t; | |||||
| if (!out.good()) { | |||||
| return Option<std::string>(None()); | |||||
| } | |||||
| return Option<std::string>(out.str()); | |||||
| } | |||||
| template <> | |||||
| inline Option<std::string> ToString(bool value) { | |||||
| return value ? Option<std::string>(STR_TRUE) : Option<std::string>(STR_FALSE); | |||||
| } | |||||
| // get the file name from a given path | |||||
| // for example: "/usr/bin", we will get "bin" | |||||
| inline std::string GetFileName(const std::string &path) { | |||||
| char delim = '/'; | |||||
| size_t i = path.rfind(delim, path.length()); | |||||
| if (i != std::string::npos) { | |||||
| return (path.substr(i + 1, path.length() - i)); | |||||
| } | |||||
| return ""; | |||||
| } | |||||
| // trim the white space character in a string | |||||
| // see also: macro WHITESPACE defined above | |||||
| inline void Trim(std::string *input) { | |||||
| if (input == nullptr) { | |||||
| return; | |||||
| } | |||||
| if (input->empty()) { | |||||
| return; | |||||
| } | |||||
| input->erase(0, input->find_first_not_of(WHITESPACE)); | |||||
| input->erase(input->find_last_not_of(WHITESPACE) + 1); | |||||
| } | |||||
| // to judge whether a string is starting with prefix | |||||
| // for example: "hello world" is starting with "hello" | |||||
| inline bool StartsWithPrefix(const std::string &source, const std::string &prefix) { | |||||
| if (source.length() < prefix.length()) { | |||||
| return false; | |||||
| } | |||||
| return (source.compare(0, prefix.length(), prefix) == 0); | |||||
| } | |||||
| // split string | |||||
| std::vector<std::string> StrSplit(const std::string &str, const std::string &pattern); | |||||
| // tokenize string | |||||
| std::vector<std::string> Tokenize(const std::string &src, const std::string &delimiters, | |||||
| const Option<size_t> &maxTokenNum = Option<size_t>(None())); | |||||
| enum Mode { PREFIX, SUFFIX, ANY }; | |||||
| // remove redundant character | |||||
| std::string Remove(const std::string &from, const std::string &subStr, Mode mode = ANY); | |||||
| template <typename T> | |||||
| inline Option<T> GenericParseValue(const std::string &value) { | |||||
| T ret; | |||||
| std::istringstream input(value); | |||||
| input >> ret; | |||||
| if (input && input.eof()) { | |||||
| return Option<T>(ret); | |||||
| } | |||||
| return Option<T>(None()); | |||||
| } | |||||
| template <> | |||||
| inline Option<std::string> GenericParseValue(const std::string &value) { | |||||
| return Option<std::string>(value); | |||||
| } | |||||
| template <> | |||||
| inline Option<bool> GenericParseValue(const std::string &value) { | |||||
| if (value == "true") { | |||||
| return Option<bool>(true); | |||||
| } else if (value == "false") { | |||||
| return Option<bool>(false); | |||||
| } | |||||
| return Option<bool>(None()); | |||||
| } | |||||
| } // namespace predict | |||||
| } // namespace mindspore | |||||
| #endif // PREDICT_COMMON_UTILS_H_ | |||||
| @@ -1,56 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef PREDICT_INCLUDE_CONTEXT_H_ | |||||
| #define PREDICT_INCLUDE_CONTEXT_H_ | |||||
| #include <memory> | |||||
| #include "dlpack/dlpack.h" | |||||
| #include "include/tensor.h" | |||||
| #define MSPREDICT_API __attribute__((visibility("default"))) | |||||
| namespace mindspore { | |||||
| namespace predict { | |||||
| ///\brief Resource management definition of MindSpore predict. | |||||
| class MSPREDICT_API Context { | |||||
| public: | |||||
| ///\brief Constructor of MindSpore predict context using default value for parameters. | |||||
| /// | |||||
| ///\return Instance of MindSpore predict context. | |||||
| Context(); | |||||
| ///\brief Custum constructor of MindSpore predict context using input value for parameters. | |||||
| /// | |||||
| ///\param[in] threadNum The number of thread during the runtime. | |||||
| ///\param[in] allocator The memory management during the runtime | |||||
| ///\param[in] deviceCtx The device information during the runtime. | |||||
| /// | |||||
| ///\return Instance of MindSpore predict context. | |||||
| Context(int threadNum, std::shared_ptr<Allocator> allocator, DLContext deviceCtx); | |||||
| ///\brief Destructor of MindSpore predict context. | |||||
| virtual ~Context(); | |||||
| public: | |||||
| DLContext deviceCtx; | |||||
| int threadNum = 1; | |||||
| std::shared_ptr<Allocator> allocator; | |||||
| }; | |||||
| } // namespace predict | |||||
| } // namespace mindspore | |||||
| #endif // PREDICT_INCLUDE_CONTEXT_H_ | |||||
| @@ -1,52 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef PREDICT_INCLUDE_ERRORCODE_H_ | |||||
| #define PREDICT_INCLUDE_ERRORCODE_H_ | |||||
| namespace mindspore { | |||||
| namespace predict { | |||||
| using STATUS = int; | |||||
| /* Success */ | |||||
| constexpr int RET_OK = 0; /**< No error occurs. */ | |||||
| /* Common error code, range: [-1, -100]*/ | |||||
| constexpr int RET_ERROR = -1; /**< Common error code. */ | |||||
| constexpr int RET_NULL_PTR = -2; /**< NULL pointer returned.*/ | |||||
| constexpr int RET_PARAM_INVALID = -3; /**< Invalid parameter.*/ | |||||
| constexpr int RET_NO_CHANGE = -4; /**< No change. */ | |||||
| /* Executor error code, range: [-101,-200] */ | |||||
| constexpr int RET_OUT_OF_TENSOR_RANGE = -101; /**< Failed to checking range. */ | |||||
| constexpr int RET_INPUT_TENSOR_ERROR = -102; /**< Failed to checking input tensor. */ | |||||
| constexpr int RET_REENTRANT_ERROR = -103; /**< Exist executor running. */ | |||||
| /* Graph error code, range: [-201,-300] */ | |||||
| constexpr int RET_GRAPH_FILE_ERR = -201; /**< Failed to verify graph file. */ | |||||
| /* Node error code, range: [-301,-400] */ | |||||
| constexpr int RET_NOT_FIND_OP = -301; /**< Failed to find OP. */ | |||||
| constexpr int RET_INVALID_OP_NAME = -302; /**< Invalid OP name. */ | |||||
| constexpr int RET_INVALID_OP_ATTR = -303; /**< Invalid OP attr. */ | |||||
| constexpr int RET_OP_EXECUTE_FAILURE = -304; /**< Failed to execution OP. */ | |||||
| /* Tensor error code, range: [-401,-500] */ | |||||
| constexpr int RET_FORMAT_ERR = -401; /**< Failed to checking tensor format. */ | |||||
| } // namespace predict | |||||
| } // namespace mindspore | |||||
| #endif // PREDICT_INCLUDE_ERRORCODE_H_ | |||||
| @@ -1,139 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef PREDICT_INCLUDE_SESSION_H_ | |||||
| #define PREDICT_INCLUDE_SESSION_H_ | |||||
| #include <memory> | |||||
| #include <string> | |||||
| #include <vector> | |||||
| #include <map> | |||||
| #include <unordered_set> | |||||
| #include "include/context.h" | |||||
| #include "include/tensor.h" | |||||
| #define MSPREDICT_API __attribute__((visibility("default"))) | |||||
| namespace mindspore { | |||||
| namespace predict { | |||||
| using NODE_ID = std::string; | |||||
| ///\brief Graph defined by MindSpore predict. | |||||
| /// | |||||
| ///\note | |||||
| /// The caller does not need to care about detailed implementation of this class, so just list the class name here. | |||||
| class Graph; | |||||
| ///\brief GraphExecution defined by MindSpore predict. | |||||
| /// | |||||
| ///\note | |||||
| /// The caller does not need to care about detailed implementation of this class, so just list the class name here. | |||||
| class GraphExecution; | |||||
| ///\brief MindSpore predict session. | |||||
| /// | |||||
| /// This class represents session of MindSpore predict. | |||||
| /// | |||||
| ///\note | |||||
| /// The caller needs to allocate and free memory of inputs and outputs. | |||||
| /// New Session is not suggested, please use CreateSession function to create new session class. | |||||
| class MSPREDICT_API Session { | |||||
| public: | |||||
| ///\brief Constructor of MindSpore predict session. | |||||
| /// | |||||
| ///\param[in] ctx The context of the session. | |||||
| /// | |||||
| ///\return Instance of MindSpore predict session. | |||||
| explicit Session(const Context &ctx); | |||||
| ///\brief Destructor of MindSpore predict session. | |||||
| ~Session(); | |||||
| ///\brief Init the session. | |||||
| /// | |||||
| ///\param[in] ctx The context of the session. | |||||
| ///\param[in] size The size of the session. | |||||
| ///\param[in] graphBuf The buffer of the graph, used for build session. | |||||
| /// | |||||
| ///\return Return RET_OK if the initialization is success, otherwhise return RET_ERROR. | |||||
| int Init(const char *graphBuf, size_t size); | |||||
| ///\brief Get the input of session. | |||||
| /// | |||||
| ///\return Input node's input tensors if found, empty vector otherwise. | |||||
| /// | |||||
| ///\note | |||||
| /// The caller needs to allocate and free memory of inputs. | |||||
| std::vector<Tensor *> GetInput(); | |||||
| ///\brief Run the session. | |||||
| /// | |||||
| ///\param[in] inputs The input of the session. | |||||
| /// | |||||
| ///\return Return RET_OK if run success, otherwhise return RET_ERROR. | |||||
| ///\note | |||||
| /// Currently input tensors' data format only support FORMAT_NCHW. | |||||
| /// Currently input tensors' data type only support FLOAT. | |||||
| int Run(const std::vector<Tensor *> &inputs); | |||||
| ///\brief Get the output of session. | |||||
| /// | |||||
| ///\param[in] nodeName Given output node name. | |||||
| /// | |||||
| ///\return Output node's output tensors if found, empty vector otherwise. | |||||
| /// | |||||
| ///\note | |||||
| /// The caller needs to free memory of outputs. | |||||
| std::vector<Tensor *> GetOutput(const std::string &nodeName); | |||||
| ///\brief Get the all output of session. | |||||
| /// | |||||
| ///\return Every output node's output tensors. | |||||
| /// | |||||
| ///\note | |||||
| /// The caller needs to free memory of outputs. | |||||
| std::map<std::string, std::vector<Tensor *>> GetAllOutput(); | |||||
| protected: | |||||
| ///\brief Init the executor. | |||||
| /// | |||||
| ///\return Return RET_OK if the initialization is success, otherwhise return RET_ERROR. | |||||
| int InitExecutor(); | |||||
| const Context &_ctx; | |||||
| Graph *_graph = nullptr; | |||||
| GraphExecution *_executor = nullptr; | |||||
| bool reinitExecutor = true; | |||||
| }; | |||||
| ///\brief MindSpore predict neural network session create function | |||||
| /// | |||||
| /// This function used to create MindSpore predict neural network session, which will be used to run the neural network. | |||||
| /// | |||||
| ///\param[in] sessionName The name of the session. | |||||
| ///\param[in] graphBuf The buffer of the graph, used for build session. | |||||
| ///\param[in] size The size of the session. | |||||
| ///\param[in] ctx The context of the session. | |||||
| /// | |||||
| ///\return Instance of MindSpore predict session. | |||||
| /// | |||||
| ///\note | |||||
| /// The caller needs to allocate and free memory of graph buffer. | |||||
| std::shared_ptr<Session> MSPREDICT_API CreateSession(const char *graphBuf, size_t size, const Context &ctx); | |||||
| } // namespace predict | |||||
| } // namespace mindspore | |||||
| #endif // PREDICT_INCLUDE_SESSION_H_ | |||||
| @@ -1,259 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef PREDICT_INCLUDE_TENSOR_H_ | |||||
| #define PREDICT_INCLUDE_TENSOR_H_ | |||||
| #include <memory> | |||||
| #include <vector> | |||||
| #include "dlpack/dlpack.h" | |||||
| #include "schema/inner/ms_generated.h" | |||||
| #define MSPREDICT_API __attribute__((visibility("default"))) | |||||
| namespace mindspore { | |||||
| namespace predict { | |||||
| ///\brief Allocator definition of MindSpore predict. | |||||
| class Allocator; | |||||
| ///\brief Tensor definition of MindSpore predict. | |||||
| class MSPREDICT_API Tensor { | |||||
| public: | |||||
| ///\brief Constructor of MindSpore predict tensor. | |||||
| /// | |||||
| ///\param[in] tensor Define the parameters of the tensor. | |||||
| ///\param[in] copyData Malloc data for the tensor, and copy origin data from | |||||
| /// input tensor. | |||||
| /// | |||||
| ///\return Instance of MindSpore predict tensor. | |||||
| Tensor(const Tensor &tensor, bool copyData = false); | |||||
| ///\brief Constructor of MindSpore predict tensor. | |||||
| /// | |||||
| ///\param[in] dt Data Type of the tensor, see introduction to 'enum DataType' | |||||
| /// for supported type. | |||||
| ///\param[in] dims Dimension Values such as height and width, which defined | |||||
| /// the shape of the tensor. | |||||
| ///\param[in] format Tensor format, see introduction to 'enum Format' for | |||||
| /// supported format. | |||||
| ///\param[in] data Data of the tensor. | |||||
| /// | |||||
| ///\return Instance of MindSpore predict tensor. | |||||
| /// | |||||
| ///\note | |||||
| /// Length of data should align with dt, format and dims, otherwise the | |||||
| /// application might run into unexpected error, | |||||
| /// such as segment fault. | |||||
| /// For example, dt is DT_FLOAT, format is FORMAT_NCHW, dims is [1,3,300,300], | |||||
| /// then minimum length of data should | |||||
| /// be 1 * 3 * 300 * 300 * sizeof(float). | |||||
| Tensor(DataType dt, const std::vector<int64_t> &dims, Format format, void *data); | |||||
| ///\brief Destructor of MindSpore predict tensor. | |||||
| ~Tensor(); | |||||
| ///\brief Get MindSpore predict tensor. | |||||
| /// | |||||
| ///\param[in] Definition of the tensor. | |||||
| /// | |||||
| ///\return Address of MindSpore predict tensor. | |||||
| static Tensor *CopyFromTensorDef(const TensorDef &tensordef); | |||||
| ///\brief Get dtype of MindSpore predict tensor. | |||||
| /// | |||||
| ///\return Dtype of MindSpore predict tensor. | |||||
| DLDataType GetTensorDtype() const; | |||||
| ///\brief Get data of MindSpore predict tensor. | |||||
| /// | |||||
| ///\return Address of MindSpore predict tensor data. | |||||
| void *GetData() const; | |||||
| ///\brief Set data of MindSpore predict tensor. | |||||
| /// | |||||
| ///\param[in] data Address for data of the MindSpore predict tensor instance. | |||||
| /// | |||||
| ///\note | |||||
| /// Length of data should align with dt, format and dims, otherwise the | |||||
| /// application might run into unexpected error, | |||||
| /// such as segment fault. | |||||
| /// For example, dt is DT_FLOAT, format is FORMAT_NCHW, dims is [1,3,300,300], | |||||
| /// then minimum length of data should | |||||
| /// be 1 * 3 * 300 * 300 * sizeof(float). | |||||
| void SetData(void *data); | |||||
| ///\brief Get data type of MindSpore predict tensor. | |||||
| /// | |||||
| ///\return Data Type of the tensor. | |||||
| DataType GetDataType() const; | |||||
| ///\brief Set data type of MindSpore predict tensor. | |||||
| /// | |||||
| ///\param[in] dt Data Type of the tensor, see introduction to 'enum DataType' | |||||
| /// for supported type. | |||||
| void SetDataType(DataType dt); | |||||
| ///\brief Get number of dimension of MindSpore predict tensor. | |||||
| /// | |||||
| ///\return Number of dimension of the MindSpore predict tensor. | |||||
| int GetNDim() const; | |||||
| ///\brief Get dimension of MindSpore predict tensor. | |||||
| /// | |||||
| ///\return Dimension of the MindSpore predict tensor. | |||||
| std::vector<int64_t> GetDims() const; | |||||
| ///\brief Set dimension of MindSpore predict tensor. | |||||
| /// | |||||
| ///\param[in] dims Vector that has values of dimension. | |||||
| void SetDims(const std::vector<int64_t> &dims); | |||||
| ///\brief Get format of MindSpore predict tensor. | |||||
| /// | |||||
| ///\return Format of the MindSpore predict tensor. | |||||
| Format GetFormat() const { return format; } | |||||
| ///\brief Set format of MindSpore predict tensor. | |||||
| /// | |||||
| ///\param[in] format Format of the tensor. | |||||
| void SetFormat(Format format) { this->format = format; } | |||||
| ///\brief Get reference count of MindSpore predict tensor. | |||||
| /// | |||||
| ///\return Reference count of the MindSpore predict tensor. | |||||
| int RefCount() { return refCount; } | |||||
| ///\brief Increase reference count of MindSpore predict tensor. | |||||
| /// | |||||
| ///\param[in] ref The increase of the reference count. | |||||
| void AddRef(int ref) { refCount += ref; } | |||||
| ///\brief Decrease reference count of MindSpore predict tensor. | |||||
| /// | |||||
| ///\param[in] ref The decrease of the reference count. | |||||
| void DefRef(int ref) { refCount -= ref; } | |||||
| ///\brief Get element size of MindSpore predict tensor. | |||||
| /// | |||||
| ///\return Element size of MindSpore predict tensor. | |||||
| size_t GetElementSize() const; | |||||
| ///\brief Get data size of MindSpore predict tensor. | |||||
| /// | |||||
| ///\return Data size of MindSpore predict tensor. | |||||
| size_t GetDataSize() const; | |||||
| ///\brief Get element size of MindSpore predict tensor in NC4HW4 format. | |||||
| /// | |||||
| ///\param[in] isNhwc Whether the current format is NHWC. | |||||
| /// | |||||
| ///\return Element size of MindSpore predict tensor in NC4HW4 format. | |||||
| size_t GetNC4HW4ElementSize(bool isNhwc); | |||||
| ///\brief Get data size of MindSpore predict tensor in NC4HW4 format. | |||||
| /// | |||||
| ///\param[in] isNhwc Whether the current format is NHWC. | |||||
| /// | |||||
| ///\return Data size of MindSpore predict tensor in NC4HW4 format. | |||||
| size_t GetNC4HW4DataSize(bool isNhwc); | |||||
| ///\brief Malloc data for the MindSpore predict tensor. | |||||
| /// | |||||
| ///\param[in] allocator The malloc source for data. | |||||
| ///\param[in] refCount The reference count of the data. | |||||
| /// | |||||
| ///\return Return RET_OK if the data is successfully allocated, otherwhise return RET_ERROR. | |||||
| int MallocData(std::shared_ptr<Allocator> allocator = nullptr, int refCount = 0); | |||||
| ///\brief Free the MindSpore predict tensor. | |||||
| void FreeTensor(); | |||||
| ///\brief Free the data of MindSpore predict tensor. | |||||
| void ForceFreeData(); | |||||
| ///\brief Free the data of MindSpore predict tensor. | |||||
| void FreeData(); | |||||
| ///\brief Compare data size of MindSpore predict tensor in NC4HW4 format. | |||||
| /// | |||||
| ///\param[in] dst The compare tensor. | |||||
| /// | |||||
| ///\return The result of fuction. | |||||
| bool CompareShape(const Tensor &dst); | |||||
| ///\brief Compare shape of MindSpore predict tensor with another shape. | |||||
| /// | |||||
| ///\param[in] other The compare shape information. | |||||
| /// | |||||
| ///\return The result of function. | |||||
| bool CompareShape(const std::vector<int64_t> &other); | |||||
| ///\brief Get instance of MindSpore predict tensor. | |||||
| /// | |||||
| ///\return Instance of MindSpore predict dlTensor. | |||||
| DLTensor *GetDLTensor() { return &dlTensor; } | |||||
| ///\brief Get height of MindSpore predict tensor. | |||||
| /// | |||||
| ///\return Height of MindSpore predict tensor. | |||||
| int64_t Height() const; | |||||
| ///\brief Get width of MindSpore predict tensor. | |||||
| /// | |||||
| ///\return Width of MindSpore predict tensor. | |||||
| int64_t Width() const; | |||||
| ///\brief Get channel of MindSpore predict tensor. | |||||
| /// | |||||
| ///\return Channel of MindSpore predict tensor. | |||||
| int64_t Channel() const; | |||||
| ///\brief Get batch of MindSpore predict tensor. | |||||
| /// | |||||
| ///\return Batch of MindSpore predict tensor. | |||||
| int64_t Batch() const; | |||||
| ///\brief Get stride of MindSpore predict tensor. | |||||
| /// | |||||
| ///\param[in] index the index of stride. | |||||
| /// | |||||
| ///\return Stride of MindSpore predict tensor. | |||||
| int64_t Stride(int index) const; | |||||
| ///\brief Set stride of MindSpore predict tensor by input. | |||||
| /// | |||||
| ///\param[in] index Index of stride | |||||
| ///\param[in] stride The stride to set | |||||
| void SetStride(int index, int64_t stride); | |||||
| ///\brief Set stride of MindSpore predict tensor by dims. | |||||
| void SetStride(); | |||||
| void SetScale(bool isScale = true); | |||||
| private: | |||||
| bool isScale = false; | |||||
| int refCount = 0; | |||||
| int isConst; | |||||
| Format format; | |||||
| DLTensor dlTensor; | |||||
| std::shared_ptr<Allocator> allocator = nullptr; | |||||
| std::vector<float> scale; | |||||
| std::vector<int> zeroPoint; | |||||
| }; | |||||
| } // namespace predict | |||||
| } // namespace mindspore | |||||
| #endif // PREDICT_INCLUDE_TENSOR_H_ | |||||
| @@ -1 +0,0 @@ | |||||
| add_subdirectory(tvm_kernel) | |||||
| @@ -1,27 +0,0 @@ | |||||
| # Created by .ignore support plugin | |||||
| # | |||||
| # filter python | |||||
| *.pyc | |||||
| # filter build | |||||
| *.so | |||||
| *.o | |||||
| # filter coverage | |||||
| coverage/ | |||||
| # filter report | |||||
| *.xml | |||||
| # filter tvm | |||||
| 3rdparty/ | |||||
| # filter build | |||||
| build/ | |||||
| cmake-build-debug/ | |||||
| .idea/ | |||||
| TFLite_Detection_PostProcess_CI | |||||
| app_run | |||||
| output | |||||
| tvm | |||||
| @@ -1,4 +0,0 @@ | |||||
| [submodule "3rdparty/incubator-tvm"] | |||||
| path = 3rdparty/incubator-tvm | |||||
| url = https://github.com/dmlc/tvm.git | |||||
| branch = v0.5 | |||||
| @@ -1,25 +0,0 @@ | |||||
| cmake_minimum_required(VERSION 3.12.1) | |||||
| project(autotensor LANGUAGES CXX) | |||||
| set (MINDSPORE "${PROJECT_SOURCE_DIR}/../../..") | |||||
| set (TVM_KERNEL_LITE "${PROJECT_SOURCE_DIR}/lite") | |||||
| set (THIRDPARTY "${MINDSPORE}/third_party") | |||||
| set (TVM_CLEAN_SOURCE "${THIRDPARTY}/incubator-tvm") | |||||
| set (TVM_BUILD_SOURCE "${PROJECT_SOURCE_DIR}/incubator-tvm") | |||||
| set (BUILD_DIR "${PROJECT_SOURCE_DIR}") | |||||
| set (TVM_KERNEL_OUTPUT_DIR ${CMAKE_CURRENT_BINARY_DIR}) | |||||
| set (TVM_OUTPUT_DIR ${TVM_KERNEL_OUTPUT_DIR}/incubator-tvm) | |||||
| set (LLVM_CONFIG $ENV{LLVM_PATH}) | |||||
| if (NOT LLVM_CONFIG) | |||||
| message(FATAL_ERROR "please set LLVM_PATH in env") | |||||
| endif() | |||||
| set (CMAKE_BUILD_TYPE "Release") | |||||
| include(${TVM_BUILD_SOURCE}/cmake/util/Util.cmake) | |||||
| include(${TVM_BUILD_SOURCE}/cmake/util/FindLLVM.cmake) | |||||
| if(EXISTS ${TVM_BUILD_SOURCE}/cmake/config.cmake) | |||||
| include(${TVM_BUILD_SOURCE}/cmake/config.cmake) | |||||
| endif() | |||||
| add_subdirectory(${TVM_KERNEL_LITE}) | |||||
| set(CMAKE_EXPORT_COMPILE_COMMANDS ON) | |||||
| @@ -1,140 +0,0 @@ | |||||
| cmake_minimum_required(VERSION 3.12) | |||||
| set(CMAKE_CXX_STANDARD 14) | |||||
| if(ENABLE_PREDICT_ARM64) | |||||
| set(TARGS "arm64") | |||||
| elseif(ENABLE_PREDICT_ARM32) | |||||
| set(TARGS "arm32") | |||||
| else() | |||||
| set(TARGS "x86") | |||||
| endif() | |||||
| message("TARGET is set to ${TARGS}") | |||||
| set(CMAKE_VERBOSE_MAKEFILE ON) | |||||
| set(CMAKE_SKIP_RPATH TRUE) | |||||
| if(MSVC) | |||||
| message("not support MSVC") | |||||
| else(MSVC) | |||||
| include(CheckCXXCompilerFlag) | |||||
| check_cxx_compiler_flag("-std=c++11" SUPPORT_CXX11) | |||||
| if ("${CMAKE_BUILD_TYPE}" STREQUAL "Debug") | |||||
| message("Build in Debug mode") | |||||
| set(CMAKE_C_FLAGS "-O0 -g -Wall -Werror -fPIC [${CMAKE_C_FLAGS} -rdynamic") | |||||
| set(CMAKE_CXX_FLAGS "-O0 -g -Wall -Werror -fPIC -std=c++11 ${CMAKE_CXX_FLAGS} -rdynamic") | |||||
| else() | |||||
| set(CMAKE_C_FLAGS "-D_FORTIFY_SOURCE=2 -O2 -fno-rtti -fvisibility=hidden -Wall -Werror -fPIC -fstack-protector-strong ${CMAKE_C_FLAGS}") | |||||
| set(CMAKE_CXX_FLAGS "-D_FORTIFY_SOURCE=2 -O2 -fno-rtti -fvisibility=hidden -Wall -Werror -fPIC -fstack-protector-strong -std=c++11 ${CMAKE_CXX_FLAGS}") | |||||
| set(CMAKE_EXE_LINKER_FLAGS "-Wl,-z,relro,-z,now -Wl,-z,noexecstack") | |||||
| endif () | |||||
| if (CMAKE_CXX_COMPILER_ID MATCHES "GNU" AND | |||||
| CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 7.0) | |||||
| set(CMAKE_CXX_FLAGS "-Wall -Werror -faligned-new ${CMAKE_CXX_FLAGS}") | |||||
| endif() | |||||
| if (CODE_COVERAGE) | |||||
| set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Werror -fprofile-arcs -ftest-coverage -O0") | |||||
| endif() | |||||
| endif(MSVC) | |||||
| if("${TARGS}" STREQUAL "x86") | |||||
| set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -D__x86_64__ -fno-strict-aliasing") | |||||
| set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__x86_64__ -fno-strict-aliasing") | |||||
| endif() | |||||
| set(PRJ_SRC_DIR "${PROJECT_SOURCE_DIR}") | |||||
| set(PRJ_KLIB_DIR "${PROJECT_SOURCE_DIR}") | |||||
| set(PRJ_LITE_DIR "${PROJECT_SOURCE_DIR}/lite") | |||||
| # include directories | |||||
| message("current PRJ DIR: ${PROJECT_SOURCE_DIR}") | |||||
| message("current SUB_PRJ DIR: ${PRJ_SRC_DIR}") | |||||
| message("current KLIB DIR: ${PRJ_KLIB_DIR}") | |||||
| message("current PRJ_LITE_DIR: ${PRJ_LITE_DIR}") | |||||
| message("CMAKE_CURRENT_BINARY_DIR: ${CMAKE_CURRENT_BINARY_DIR}") | |||||
| set(DMLC_CORE "${TVM_BUILD_SOURCE}/3rdparty/dmlc-core") | |||||
| set(DLPACK "${TVM_BUILD_SOURCE}/3rdparty/dlpack") | |||||
| set(PREDICT "${PRJ_SRC_DIR}/../../") | |||||
| set(SECUREC "${PRJ_SRC_DIR}/../../../third_party/securec") | |||||
| message("include dir: ${DLPACK}/include") | |||||
| include_directories(${DLPACK}/include) | |||||
| include_directories(${DMLC_CORE}/include) | |||||
| include_directories(${TVM_BUILD_SOURCE}/include) | |||||
| include_directories(${TVM_BUILD_SOURCE}/src/pass) | |||||
| include_directories(${PRJ_LITE_DIR}) | |||||
| include_directories(${PRJ_LITE_DIR}/include) | |||||
| include_directories(${PRJ_LITE_DIR}/../../..) | |||||
| include_directories(${PRJ_LITE_DIR}/../../../include) | |||||
| include_directories(${PRJ_LITE_DIR}/../../../src/runtime) | |||||
| include_directories(${PRJ_LITE_DIR}/../../../common) | |||||
| include_directories(${SECUREC}) | |||||
| message("SECUREC: " "${SECUREC}/build/src") | |||||
| include_directories(${PREDICT}) | |||||
| include_directories(${PREDICT}/src) | |||||
| include_directories(${PRJ_SRC_DIR}/../../../third_party/flatbuffers/include) | |||||
| include_directories(${PRJ_SRC_DIR}/../../../third_party) | |||||
| # Source file lists | |||||
| file(GLOB_RECURSE TVM_KERNEL_SRC | |||||
| src/api/*.cc | |||||
| src/tflite/TFLite_Detection_PostProcess.cc) | |||||
| set (TVM_RUNTIME_FLG $ENV{TVM_RUNTIME_ON}) | |||||
| if ("${TVM_RUNTIME_FLG}" STREQUAL "true") | |||||
| message("Using TVM runtime function") | |||||
| file(GLOB TVM_RUNTIME_SRCS | |||||
| ${TVM_ROOT}/apps/howto_deploy/tvm_runtime_pack.cc) | |||||
| else() | |||||
| message("Using LITE runtime function") | |||||
| set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DLITE_RUNTIME_ON -DTVM_RUNTIME_HEADER_ONLY -DLITE_THREAD_POOL_SHARED") | |||||
| set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DLITE_RUNTIME_ON -DTVM_RUNTIME_HEADER_ONLY -DLITE_THREAD_POOL_SHARED") | |||||
| file(GLOB_RECURSE TVM_RUNTIME_SRCS | |||||
| ${PREDICT}/src/runtime/*.cc) | |||||
| endif() | |||||
| if("${TARGS}" STREQUAL "arm32" OR "${TARGS}" STREQUAL "arm64") | |||||
| set(CMAKE_SKIP_BUILD_RPATH TRUE) | |||||
| set(CMAKE_BUILD_WITH_INSTALL_RPATH TRUE) | |||||
| set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE) | |||||
| endif() | |||||
| set(LIB_X86_PATH "${PRJ_KLIB_DIR}/build/lib_x86") | |||||
| set(LIB_ARM64_PATH "${PRJ_KLIB_DIR}/build/lib_arm64") | |||||
| set(LIB_ARM32_PATH "${PRJ_KLIB_DIR}/build/lib_arm32") | |||||
| if("${TARGS}" STREQUAL "x86") | |||||
| set(KLIBS_PATH "${LIB_X86_PATH}") | |||||
| elseif("${TARGS}" STREQUAL "arm64") | |||||
| set(KLIBS_PATH "${LIB_ARM64_PATH}") | |||||
| elseif("${TARGS}" STREQUAL "arm32") | |||||
| set(KLIBS_PATH "${LIB_ARM32_PATH}") | |||||
| else() | |||||
| message(ERROR " not suport ${TARGS}") | |||||
| endif() | |||||
| file(GLOB_RECURSE KERNEL_LIBS "${KLIBS_PATH}/*.o") | |||||
| message("KERNEL_PATH= ${KLIBS_PATH}") | |||||
| add_compile_options(-DTVM_CUDA_RUNTIM=0) | |||||
| add_compile_options(-DTVM_METAL_RUNTIM=0) | |||||
| add_compile_options(-DTVM_OPENCL_RUNTIM=0) | |||||
| link_directories(${KLIBS_PATH}) | |||||
| add_library(tvm_runtime_pack STATIC ${TVM_RUNTIME_SRCS}) | |||||
| add_library(kernel_manager STATIC ${TVM_KERNEL_SRC}) | |||||
| add_library(tvm_kernel_static STATIC ${TVM_KERNEL_SRC} ${KERNEL_LIBS}) | |||||
| add_library(tvm_kernel SHARED ${TVM_KERNEL_SRC} ${KERNEL_LIBS}) | |||||
| set_target_properties(tvm_kernel PROPERTIES LINK_FLAGS "-Wl,-z,relro,-z,now -Wl,-z,noexecstack") | |||||
| set(KERNEL_LD_LIB tvm_runtime_pack dl) | |||||
| if("${TARGS}" STREQUAL "x86") | |||||
| set(KERNEL_LD_LIB ${KERNEL_LD_LIB} pthread) | |||||
| else() | |||||
| set(ANDROID_ALLOW_UNDEFINED_SYMBOLS TRUE) | |||||
| endif() | |||||
| target_link_libraries(tvm_kernel ${KERNEL_LD_LIB} libsecurec.a) | |||||
| target_link_libraries(tvm_kernel_static OBJECT tvm_runtime_pack libsecurec.a) | |||||
| add_dependencies(tvm_kernel securec) | |||||
| @@ -1,94 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2019 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this ${file} except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef PREDICT_MODULE_TVM_KERNEL_LITE_INCLUDE_LITE_API_KM_API_H_ | |||||
| #define PREDICT_MODULE_TVM_KERNEL_LITE_INCLUDE_LITE_API_KM_API_H_ | |||||
| #include <dlpack/dlpack.h> | |||||
| #include <functional> | |||||
| #include <string> | |||||
| #include <vector> | |||||
| #include "schema/inner/ms_generated.h" | |||||
| #include "schema/inner/op_generated.h" | |||||
| #define PUBLIC __attribute__((visibility("default"))) | |||||
| /*! | |||||
| * \brief Call tvm kernel. | |||||
| * \param fid tvm kernel id. | |||||
| * \param tensors tvm kernel arguments. | |||||
| * \return 0 if SUCCESS. | |||||
| */ | |||||
| PUBLIC int CallKernel(const std::string &fid, const std::vector<DLTensor *> &tensors); | |||||
| /*! | |||||
| * \brief Get tvm kernel by id. | |||||
| * \param fid tvm kernel id. | |||||
| * \return std::function if SUCCESS else nullptr. | |||||
| */ | |||||
| PUBLIC std::function<int(const std::vector<DLTensor *> &)> GetKernel(const std::string &fid); | |||||
| /*! | |||||
| * \brief Get tvm kernel by OpDef. | |||||
| * \param opdef defined by predict schema. | |||||
| * \param tensors. | |||||
| * \param option. | |||||
| * \return std::function if SUCCESS else nullptr. | |||||
| */ | |||||
| struct PUBLIC KernelOption { | |||||
| int numThreads = 0; | |||||
| std::string device; | |||||
| }; | |||||
| PUBLIC std::function<int(const std::vector<DLTensor *> &)> GetKernel(const mindspore::predict::OpDef &opdef, | |||||
| const std::vector<DLTensor *> &tensors, | |||||
| const KernelOption &option); | |||||
| /*! | |||||
| * \brief load TVM Kernel lib | |||||
| * \param mode 0 indicate shared lib | |||||
| * \param fname shared lib path when mode equals 0 | |||||
| * \return 0 if SUCCESS | |||||
| */ | |||||
| PUBLIC void InitKernelManager(int mode, const std::string &fname); | |||||
| /* | |||||
| * \brief config ThreadPool using mode | |||||
| * \param mode: -1 using mid speed cpu first, 1 using higher speed cpu first | |||||
| * \param nthreads: threads num to be used, can't exceed cpu num | |||||
| * if mode==-1 bind mid cpu first | |||||
| * if mode==1 bind higher cpu first | |||||
| * if mode==0 no bind | |||||
| * \param execute_self: cur thread do arithmetic or not | |||||
| * execute_self: true cur thread do arithmetic work | |||||
| * execute_self: false cur thread not do arithmetic work | |||||
| */ | |||||
| PUBLIC void ConfigThreadPool(int mode = -1, int nthreads = 2, bool execute_self = true); | |||||
| /* | |||||
| * \brief provid simple api for mslite, mslite not care mode | |||||
| */ | |||||
| inline void CfgThreadPool(int nthread) { ConfigThreadPool(-1, nthread, true); } | |||||
| /* | |||||
| * the Callback function to do cpu bind for master thread. | |||||
| */ | |||||
| PUBLIC void DoMasterThreadBind(bool bindflg); | |||||
| PUBLIC void DoAllThreadBind(bool ifBind); | |||||
| #undef PUBLIC | |||||
| #endif // PREDICT_MODULE_TVM_KERNEL_LITE_INCLUDE_LITE_API_KM_API_H_ | |||||
| @@ -1,17 +0,0 @@ | |||||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||||
| # | |||||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| # you may not use this file except in compliance with the License. | |||||
| # You may obtain a copy of the License at | |||||
| # | |||||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||||
| # | |||||
| # Unless required by applicable law or agreed to in writing, software | |||||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| # See the License for the specific language governing permissions and | |||||
| # limitations under the License. | |||||
| # ============================================================================ | |||||
| """Neural network operators""" | |||||
| # from . import arm_cpu | |||||
| # from . import at_ops | |||||
| @@ -1,17 +0,0 @@ | |||||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||||
| # | |||||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| # you may not use this file except in compliance with the License. | |||||
| # You may obtain a copy of the License at | |||||
| # | |||||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||||
| # | |||||
| # Unless required by applicable law or agreed to in writing, software | |||||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| # See the License for the specific language governing permissions and | |||||
| # limitations under the License. | |||||
| # ============================================================================ | |||||
| """Schedule for ARM CPU""" | |||||
| from . import conv2d | |||||
| @@ -1,470 +0,0 @@ | |||||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||||
| # | |||||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| # you may not use this file except in compliance with the License. | |||||
| # You may obtain a copy of the License at | |||||
| # | |||||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||||
| # | |||||
| # Unless required by applicable law or agreed to in writing, software | |||||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| # See the License for the specific language governing permissions and | |||||
| # limitations under the License. | |||||
| # ============================================================================ | |||||
| """Conv2D schedule for ARM CPU""" | |||||
| from __future__ import absolute_import as _abs | |||||
| import functools | |||||
| import tvm | |||||
| from tvm import autotvm | |||||
| import tvm.contrib.nnpack | |||||
| from topi.generic import schedule_conv2d_nchw | |||||
| from topi.util import traverse_inline, get_const_tuple | |||||
| from topi.nn import pad, conv2d | |||||
| from topi.nn.util import get_const_int, get_pad_tuple | |||||
| @autotvm.register_topi_compute(conv2d, "arm_cpu", ["asm"]) | |||||
| def conv2d_arm_cpu(cfg, data, kernel, strides, padding, dilation, out_dtype): | |||||
| """TOPI compute callback for conv2d | |||||
| Parameters | |||||
| ---------- | |||||
| cfg: ConfigEntity | |||||
| The config for this template | |||||
| data : tvm.Tensor | |||||
| 4-D with shape [batch, in_channel, in_height, in_width] | |||||
| kernel : tvm.Tensor | |||||
| 4-D with shape [num_filter, in_channel, filter_height, filter_width] or | |||||
| pre-packed 5-D with shape [num_filter_chunk, in_channel, filter_height, | |||||
| filter_width, num_filter_block] | |||||
| strides : list of two ints | |||||
| [stride_height, stride_width] | |||||
| padding : list of two ints | |||||
| [pad_height, pad_width] | |||||
| dilation : list of two ints | |||||
| [dilation_height, dilation_width] | |||||
| out_dtype: str | |||||
| The output type. This is used for mixed precision. | |||||
| Returns | |||||
| ------- | |||||
| output : tvm.Tensor | |||||
| 4-D with shape [batch, out_channel, out_height, out_width] | |||||
| """ | |||||
| args = _gen_cfg(cfg, data, kernel, strides, padding, dilation, num_tile=2) | |||||
| return _conv_spatial_pack_asm( | |||||
| args, data, kernel, strides, padding, dilation, out_dtype | |||||
| ) | |||||
| @autotvm.register_topi_schedule(schedule_conv2d_nchw, "arm_cpu", ["asm"]) | |||||
| def schedule_conv2d_nchw_arm_cpu(outs): | |||||
| """TOPI schedule callback for conv2d | |||||
| Parameters | |||||
| ---------- | |||||
| outs: Array of Tensor | |||||
| The computation graph description of conv2d | |||||
| in the format of an array of tensors. | |||||
| Returns | |||||
| ------- | |||||
| s: Schedule | |||||
| The computation schedule for conv2d. | |||||
| """ | |||||
| s = _conv_schedule_asm(outs) | |||||
| return s | |||||
| def _gen_cfg(cfg, data, kernel, strides, padding, dilation, num_tile): | |||||
| """_gen_cfg""" | |||||
| if len(kernel.shape) == 4: | |||||
| co_, _, kh_, kw_ = get_const_tuple(kernel.shape) | |||||
| else: # kernel tensor is pre packed | |||||
| co_, _, kh_, kw_, vc_ = get_const_tuple(kernel.shape) | |||||
| co_ = co_ * vc_ | |||||
| if isinstance(dilation, int): | |||||
| dilation_h = dilation_w = dilation | |||||
| else: | |||||
| dilation_h, dilation_w = dilation | |||||
| n_, ci_, ih_, iw_ = get_const_tuple(data.shape) | |||||
| dilated_kernel_h = (kh_ - 1) * dilation_h + 1 | |||||
| dilated_kernel_w = (kw_ - 1) * dilation_w + 1 | |||||
| pad_top, pad_left, pad_bottom, pad_right = get_pad_tuple( | |||||
| padding, (dilated_kernel_h, dilated_kernel_w) | |||||
| ) | |||||
| hstr, wstr = strides if isinstance(strides, (tuple, list)) else (strides, strides) | |||||
| oh_ = (ih_ + pad_top + pad_bottom - dilated_kernel_h) // hstr + 1 | |||||
| ow_ = (iw_ + pad_left + pad_right - dilated_kernel_w) // wstr + 1 | |||||
| n, co, oh, ow = cfg.axis(n_), cfg.axis(co_), cfg.axis(oh_), cfg.axis(ow_) | |||||
| ci, kh, kw = cfg.reduce_axis(ci_), cfg.reduce_axis(kh_), cfg.reduce_axis(kw_) | |||||
| if num_tile == 2: # for arm cpu | |||||
| candidate_vc = [] | |||||
| for iv in range(3, co_): | |||||
| if co_ % iv == 0: | |||||
| candidate_vc.append([co_ // iv, iv]) | |||||
| candidate_vc.append([1, co_]) | |||||
| co, vc = cfg.define_split( | |||||
| "tile_co", co, num_outputs=2, policy="candidate", candidate=candidate_vc | |||||
| ) | |||||
| oh, vh = cfg.define_split("tile_oh", oh, num_outputs=2) | |||||
| ow, vw = cfg.define_split("tile_ow", ow, num_outputs=2) | |||||
| elif num_tile == 3: # for mali gpu | |||||
| co, _, vc = cfg.define_split("tile_co", co, num_outputs=3) | |||||
| oh, _, vh = cfg.define_split("tile_oh", oh, num_outputs=3) | |||||
| ow, _, vw = cfg.define_split("tile_ow", ow, num_outputs=3) | |||||
| else: | |||||
| raise RuntimeError("Invalid num_tile") | |||||
| cfg.define_reorder( | |||||
| "reorder_0", | |||||
| [n, co, oh, ow, ci, kh, kw, vh, vw, vc], | |||||
| policy="candidate", | |||||
| candidate=[[n, co, oh, ow, ci, kh, kw, vh, vw, vc],], | |||||
| ) | |||||
| vc_ = cfg["tile_co"].size[-1] | |||||
| vh_ = cfg["tile_oh"].size[-1] | |||||
| vw_ = cfg["tile_ow"].size[-1] | |||||
| is_var = False | |||||
| return (is_var, vh_, vw_, vc_) | |||||
| def _conv_spatial_pack_asm(args, data, kernel, strides, padding, | |||||
| dilation, out_dtype): | |||||
| """_conv_spatial_pack_asm""" | |||||
| is_var, vh_, vw_, vc_ = args | |||||
| # create workload according to raw arguments | |||||
| out_dtype = out_dtype or data.dtype | |||||
| n_, ci_, ih_, iw_ = data.shape if is_var else get_const_tuple(data.shape) | |||||
| if isinstance(dilation, int): | |||||
| dilation_h = dilation_w = dilation | |||||
| else: | |||||
| dilation_h, dilation_w = dilation | |||||
| if len(kernel.shape) == 4: | |||||
| pre_packed = False | |||||
| co_, _, kh_, kw_ = kernel.shape if is_var else get_const_tuple(kernel.shape) | |||||
| else: # kernel tensor is pre packed | |||||
| pre_packed = True | |||||
| co_, _, kh_, kw_, vc_ = kernel.shape if is_var else get_const_tuple(kernel.shape) | |||||
| co_ = co_ * vc_ | |||||
| dilated_kernel_h = (kh_ - 1) * dilation_h + 1 | |||||
| dilated_kernel_w = (kw_ - 1) * dilation_w + 1 | |||||
| pad_top, pad_left, pad_bottom, pad_right = get_pad_tuple( | |||||
| padding, (dilated_kernel_h, dilated_kernel_w) | |||||
| ) | |||||
| hstr, wstr = strides if isinstance(strides, (tuple, list)) else (strides, strides) | |||||
| oh_ = (ih_ + pad_top + pad_bottom - dilated_kernel_h) // hstr + 1 | |||||
| ow_ = (iw_ + pad_left + pad_right - dilated_kernel_w) // wstr + 1 | |||||
| data_pad = pad(data, [0, 0, pad_top, pad_left], [0, 0, pad_bottom, pad_right]) | |||||
| oh_div = oh_ // vh_ | |||||
| ow_div = ow_ // vw_ | |||||
| kvshape = (co_ // vc_, ci_, kh_, kw_, vc_) | |||||
| ovshape = (n_, co_ // vc_, oh_div, ow_div, vh_, vw_, vc_) | |||||
| oshape = (n_, co_, oh_div * vh_, ow_div * vw_) | |||||
| if dilation_h != 1 or dilation_w != 1: | |||||
| # undilate input data | |||||
| dvshape = (n_, oh_ // vh_, ow_ // vw_, kh_, kw_, vh_, vw_, ci_) | |||||
| data_vec = tvm.compute( | |||||
| dvshape, | |||||
| lambda n, h, w, kh, kw, vh, vw, ci: data_pad[n][ci][ | |||||
| (h * vh_ + vh) * hstr + kh * dilation_h | |||||
| ][(w * vw_ + vw) * wstr + kw * dilation_w], | |||||
| name="data_vec_undilated", | |||||
| ) | |||||
| else: | |||||
| dvshape = ( | |||||
| n_, | |||||
| oh_ // vh_, | |||||
| ow_ // vw_, | |||||
| (vh_ - 1) * hstr + kh_, | |||||
| (vw_ - 1) * wstr + kw_, | |||||
| ci_, | |||||
| ) | |||||
| data_vec = tvm.compute( | |||||
| dvshape, | |||||
| lambda n, h, w, vh, vw, ci: data_pad[n][ci][h * vh_ * hstr + vh][ | |||||
| w * vw_ * wstr + vw | |||||
| ], | |||||
| name="data_vec", | |||||
| ) | |||||
| if pre_packed: | |||||
| kernel_vec = kernel | |||||
| else: | |||||
| kernel_vec = tvm.compute( | |||||
| kvshape, | |||||
| lambda co, ci, kh, kw, vc: kernel[co * vc_ + vc][ci][kh][kw], | |||||
| name="kernel_vec", | |||||
| ) | |||||
| ci = tvm.reduce_axis((0, ci_), name="ci") | |||||
| kh = tvm.reduce_axis((0, kh_), name="kh") | |||||
| kw = tvm.reduce_axis((0, kw_), name="kw") | |||||
| # asm begin---- | |||||
| type_map = { | |||||
| "int8": "int32", | |||||
| "uint8": "uint32", | |||||
| "float32": "float32", | |||||
| "float16": "float16", | |||||
| } | |||||
| acum_dtype = type_map[data.dtype] | |||||
| attrs = { | |||||
| "SH": hstr, | |||||
| "SW": wstr, | |||||
| "PH": pad_top, | |||||
| "PW": pad_left, | |||||
| "DILA_H": dilation_h, | |||||
| "DILA_W": dilation_w, | |||||
| "VH": vh_, | |||||
| "VW": vw_, | |||||
| "VC": vc_, | |||||
| "ACUM_DTYPE": acum_dtype, | |||||
| } | |||||
| # asm end---- | |||||
| if dilation_h != 1 or dilation_w != 1: | |||||
| conv = tvm.compute( | |||||
| ovshape, | |||||
| lambda n, co, h, w, vh, vw, vc: tvm.sum( | |||||
| data_vec[n, h, w, kh, kw, vh, vw, ci].astype(out_dtype) | |||||
| * kernel_vec[co, ci, kh, kw, vc].astype(out_dtype), | |||||
| axis=[ci, kh, kw], | |||||
| ), | |||||
| name="conv", | |||||
| attrs=attrs, | |||||
| ) | |||||
| else: | |||||
| conv = tvm.compute( | |||||
| ovshape, | |||||
| lambda n, co, h, w, vh, vw, vc: tvm.sum( | |||||
| data_vec[n, h, w, vh * hstr + kh, vw * wstr + kw, ci].astype(out_dtype) | |||||
| * kernel_vec[co, ci, kh, kw, vc].astype(out_dtype), | |||||
| axis=[ci, kh, kw], | |||||
| ), | |||||
| name="conv", | |||||
| attrs=attrs, | |||||
| ) | |||||
| output = tvm.compute( | |||||
| oshape, | |||||
| lambda n, co, h, w: conv[n][co // vc_][h // vh_][w // vw_][h % vh_][w % vw_][ | |||||
| co % vc_ | |||||
| ], | |||||
| name="output_unpack", | |||||
| tag="asm_conv2d_output", | |||||
| ) | |||||
| return output | |||||
| def intrin_conv(args): | |||||
| """intrin_conv""" | |||||
| ( | |||||
| ci_, | |||||
| vh_, | |||||
| vw_, | |||||
| vc_, | |||||
| kh_, | |||||
| kw_, | |||||
| sh_, | |||||
| sw_, | |||||
| dila_h, | |||||
| dila_w, | |||||
| dtype, | |||||
| acum_dtype, | |||||
| opname, | |||||
| core_id, | |||||
| ) = args | |||||
| hstr, wstr = sh_, sw_ | |||||
| ci_ = tvm.var("ci_") if ci_ is None else ci_ | |||||
| kvshape = (ci_, kh_, kw_, vc_) | |||||
| ovshape = (vh_, vw_, vc_) | |||||
| if dila_h != 1 or dila_w != 1: | |||||
| dvshape = (kh_, kw_, vh_, vw_, ci_) | |||||
| else: | |||||
| dvshape = ((vh_ - 1) * hstr + kh_, (vw_ - 1) * wstr + kw_, ci_) | |||||
| data_vec = tvm.placeholder(dvshape, name="a", dtype=dtype) | |||||
| kernel_vec = tvm.placeholder(kvshape, name="b", dtype=dtype) | |||||
| ci = tvm.reduce_axis((0, ci_), name="ci") | |||||
| kh = tvm.reduce_axis((0, kh_), name="kh") | |||||
| kw = tvm.reduce_axis((0, kw_), name="kw") | |||||
| if dila_h != 1 or dila_w != 1: | |||||
| conv = tvm.compute( | |||||
| ovshape, | |||||
| lambda vh, vw, vc: tvm.sum( | |||||
| data_vec[kh, kw, vh, vw, ci].astype(acum_dtype) | |||||
| * kernel_vec[ci, kh, kw, vc].astype(acum_dtype), | |||||
| axis=[ci, kh, kw], | |||||
| ), | |||||
| name="conv", | |||||
| ) | |||||
| else: | |||||
| conv = tvm.compute( | |||||
| ovshape, | |||||
| lambda vh, vw, vc: tvm.sum( | |||||
| data_vec[vh * hstr + kh, vw * wstr + kw, ci].astype(acum_dtype) | |||||
| * kernel_vec[ci, kh, kw, vc].astype(acum_dtype), | |||||
| axis=[ci, kh, kw], | |||||
| ), | |||||
| name="conv", | |||||
| ) | |||||
| stride_a = [ | |||||
| functools.reduce(lambda x, y: x * y, dvshape[i + 1: len(dvshape)]) | |||||
| for i in range(0, len(dvshape) - 1) | |||||
| ] | |||||
| stride_a.append(1) | |||||
| stride_b = [ | |||||
| functools.reduce(lambda x, y: x * y, kvshape[i + 1: len(kvshape)]) | |||||
| for i in range(0, len(kvshape) - 1) | |||||
| ] | |||||
| stride_b.append(1) | |||||
| stride_c = [ | |||||
| functools.reduce(lambda x, y: x * y, ovshape[i + 1: len(ovshape)]) | |||||
| for i in range(0, len(ovshape) - 1) | |||||
| ] | |||||
| stride_c.append(1) | |||||
| a_buffer = tvm.decl_buffer( | |||||
| data_vec.shape, data_vec.dtype, name="A", offset_factor=1, strides=stride_a | |||||
| ) | |||||
| b_buffer = tvm.decl_buffer( | |||||
| kernel_vec.shape, kernel_vec.dtype, name="B", offset_factor=1, strides=stride_b | |||||
| ) | |||||
| c_buffer = tvm.decl_buffer( | |||||
| conv.shape, conv.dtype, name="C", offset_factor=1, strides=stride_c | |||||
| ) | |||||
| def intrin_func(ins, outs): | |||||
| aa, bb = ins | |||||
| cc = outs[0] | |||||
| def _body(): | |||||
| ib = tvm.ir_builder.create() | |||||
| ib.emit( | |||||
| tvm.call_extern( | |||||
| "int32", | |||||
| opname, | |||||
| cc.access_ptr("w"), | |||||
| aa.access_ptr("r"), | |||||
| bb.access_ptr("r"), | |||||
| ci_, | |||||
| vh_, | |||||
| vw_, | |||||
| vc_, | |||||
| kh_, | |||||
| sh_, | |||||
| core_id, | |||||
| ) | |||||
| ) | |||||
| return ib.get() | |||||
| return _body() | |||||
| return tvm.decl_tensor_intrin( | |||||
| conv.op, intrin_func, binds={data_vec: a_buffer, kernel_vec: b_buffer, conv: c_buffer} | |||||
| ) | |||||
| def _schedule_asm(s, data_vec, kernel_vec, conv, output, last): | |||||
| """schedule implementation""" | |||||
| n, co, oh, ow, vh, vw, vc = s[conv].op.axis | |||||
| axis_extent = [] | |||||
| for i in (vh, vw, vc): | |||||
| axis_extent.append(get_const_int(i.dom.extent)) | |||||
| reduce_extent = [] | |||||
| for i in s[conv].op.reduce_axis[1:]: | |||||
| reduce_extent.append(get_const_int(i.dom.extent)) | |||||
| vh_, vw_, vc_ = axis_extent | |||||
| # schedule fusion | |||||
| n, co, h, w = s[last].op.axis | |||||
| co, vc = s[last].split(co, vc_) | |||||
| oh, vh = s[last].split(h, vh_) | |||||
| ow, vw = s[last].split(w, vw_) | |||||
| s[last].reorder(n, co, oh, ow, vh, vw, vc) | |||||
| if last != output: | |||||
| s[output].compute_inline() | |||||
| s[conv].compute_at(s[last], ow) | |||||
| # mark parallel | |||||
| s[last].parallel(co) | |||||
| if data_vec.op.name == "data_vec_undilated": | |||||
| _, h, _, _, _, _, _, _ = s[data_vec].op.axis | |||||
| else: | |||||
| _, h, _, _, _, _ = s[data_vec].op.axis | |||||
| s[data_vec].parallel(h) | |||||
| if kernel_vec.op.name == "kernel_vec": | |||||
| co, _, _, _, _ = s[kernel_vec].op.axis | |||||
| if autotvm.GLOBAL_SCOPE.in_tuning: | |||||
| # kernel packing will be pre-computed during compilation, so we skip | |||||
| # this part to make tuning records correct | |||||
| s[kernel_vec].pragma(co, "debug_skip_region") | |||||
| else: | |||||
| s[kernel_vec].parallel(co) | |||||
| elif kernel_vec.op.name == "kernel_vec_conv2d_transpose": # for conv2d transpose | |||||
| co, _, _, _, _ = s[kernel_vec].op.axis | |||||
| s[kernel_vec].parallel(co) | |||||
| return s | |||||
| def _conv_schedule_asm(outs): | |||||
| """_conv_schedule_asm""" | |||||
| s = tvm.create_schedule([x.op for x in outs]) | |||||
| def _callback(op): | |||||
| if "asm_conv2d_output" in op.tag: | |||||
| # schedule conv2d | |||||
| output = op.output(0) | |||||
| conv = op.input_tensors[0] | |||||
| sidx = 0 | |||||
| if conv.op.input_tensors[0].name == "attr": | |||||
| sidx = 1 | |||||
| data_vec = conv.op.input_tensors[sidx] | |||||
| data_pad = data_vec.op.input_tensors[0] | |||||
| s[data_pad].compute_inline() | |||||
| kernel_vec = conv.op.input_tensors[sidx + 1] | |||||
| if kernel_vec.op.name == "kernel_vec": | |||||
| kernel = kernel_vec.op.input_tensors[0] | |||||
| else: | |||||
| kernel = kernel_vec | |||||
| if (isinstance(kernel.op, tvm.tensor.ComputeOp) and "dilate" in kernel.op.tag): | |||||
| s[kernel].compute_inline() | |||||
| if conv.op.input_tensors[0].name == "attr": | |||||
| _schedule_asm(s, data_vec, kernel_vec, conv, output, outs[0]) | |||||
| else: | |||||
| _schedule_asm(s, data_vec, kernel_vec, conv, output, outs[0]) | |||||
| traverse_inline(s, outs[0].op, _callback) | |||||
| return s | |||||
| @@ -1,477 +0,0 @@ | |||||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||||
| # | |||||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| # you may not use this file except in compliance with the License. | |||||
| # You may obtain a copy of the License at | |||||
| # | |||||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||||
| # | |||||
| # Unless required by applicable law or agreed to in writing, software | |||||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| # See the License for the specific language governing permissions and | |||||
| # limitations under the License. | |||||
| # ============================================================================ | |||||
| """Conv2D_transpose of stride=2, kernel=2*2 schedule for ARM CPU""" | |||||
| from __future__ import absolute_import as _abs | |||||
| import functools | |||||
| import tvm | |||||
| from tvm import autotvm | |||||
| import tvm.contrib.nnpack | |||||
| from topi.generic import schedule_conv2d_nchw | |||||
| from topi.util import traverse_inline, get_const_tuple | |||||
| from topi.nn import conv2d | |||||
| @autotvm.register_topi_compute(conv2d, "arm_cpu", ["deconv"]) | |||||
| def conv2d_arm_cpu_deconv(cfg, data, kernel, out_dtype): | |||||
| """TOPI compute callback for conv2d | |||||
| Parameters | |||||
| ---------- | |||||
| cfg: ConfigEntity | |||||
| The config for this template | |||||
| data : tvm.Tensor | |||||
| 4-D with shape [batch, in_channel, in_height, in_width] | |||||
| kernel : tvm.Tensor | |||||
| 4-D with shape [num_filter, in_channel, filter_height, filter_width] or | |||||
| pre-packed 5-D with shape [num_filter_chunk, in_channel, filter_height, | |||||
| filter_width, num_filter_block] | |||||
| out_dtype: str | |||||
| The output type. This is used for mixed precision. | |||||
| Returns | |||||
| ------- | |||||
| output : tvm.Tensor | |||||
| 4-D with shape [batch, out_channel, out_height, out_width] | |||||
| """ | |||||
| args = _gen_cfg_deconv(cfg, data, kernel, num_tile=2) | |||||
| return _conv_spatial_pack_deconv( | |||||
| args, data, kernel, out_dtype | |||||
| ) | |||||
| @autotvm.register_topi_schedule(schedule_conv2d_nchw, "arm_cpu", ["deconv"]) | |||||
| def schedule_conv2d_nchw_arm_cpu_deconv(cfg, outs): | |||||
| """TOPI schedule callback for conv2d | |||||
| Parameters | |||||
| ---------- | |||||
| cfg: ConfigEntity | |||||
| The config for this template | |||||
| outs: Array of Tensor | |||||
| The computation graph description of conv2d | |||||
| in the format of an array of tensors. | |||||
| Returns | |||||
| ------- | |||||
| s: Schedule | |||||
| The computation schedule for conv2d. | |||||
| """ | |||||
| s = _conv_schedule_deconv(cfg, outs) | |||||
| return s | |||||
| def _gen_cfg_deconv(cfg, data, kernel, num_tile): | |||||
| """generation config from input args""" | |||||
| if len(kernel.shape) == 4: | |||||
| co_, _, _, _ = get_const_tuple(kernel.shape) | |||||
| else: # kernel tensor is pre packed | |||||
| co_, _, _, _, vc_ = get_const_tuple(kernel.shape) | |||||
| co_ = co_ * vc_ | |||||
| if len(data.shape) == 4: | |||||
| _, ci_, ih_, iw_ = get_const_tuple(data.shape) | |||||
| c4 = 4 | |||||
| ci_ = ci_ // 4 | |||||
| else: | |||||
| _, ci_, ih_, iw_, c4 = get_const_tuple(data.shape) | |||||
| oh_ = ih_ * 2 | |||||
| ow_ = iw_ * 2 | |||||
| co, oh, ow = cfg.axis(co_), cfg.axis(oh_), cfg.axis(ow_) | |||||
| ci, ki = cfg.reduce_axis(ci_), cfg.reduce_axis(c4) | |||||
| if num_tile == 2: # for arm cpu | |||||
| candidate_vc = [[co_ // c4, c4]] | |||||
| co, vc = cfg.define_split( | |||||
| "tile_co", co, num_outputs=2, policy="candidate", candidate=candidate_vc | |||||
| ) | |||||
| candidate_vw = [] | |||||
| for iv in range(4, ow_ + 1): # [4, 6, 8, 12, 16, 24, 32, 40]: | |||||
| if iv % 4 == 0 and (ow_ % iv == 0): | |||||
| candidate_vw.append([ow_ // iv, iv]) | |||||
| ow, vw = cfg.define_split( | |||||
| "tile_ow", ow, num_outputs=2, policy="candidate", candidate=candidate_vw | |||||
| ) | |||||
| candidate_vh = [[1, 2]] | |||||
| oh, vh = cfg.define_split( | |||||
| "tile_oh", oh, num_outputs=2, policy="candidate", candidate=candidate_vh | |||||
| ) | |||||
| elif num_tile == 3: # for mali gpu | |||||
| co, _, vc = cfg.define_split("tile_co", co, num_outputs=3) | |||||
| oh, _, vh = cfg.define_split("tile_oh", oh, num_outputs=3) | |||||
| ow, _, vw = cfg.define_split("tile_ow", ow, num_outputs=3) | |||||
| else: | |||||
| raise RuntimeError("Invalid num_tile") | |||||
| cfg.define_annotate("ann_reduce", [ci, ki], policy="try_unroll") | |||||
| cfg.define_annotate("ann_spatial", [vh, vw, vc], policy="try_unroll_vec") | |||||
| vc_ = cfg["tile_co"].size[-1] | |||||
| vh_ = cfg["tile_oh"].size[-1] | |||||
| vw_ = cfg["tile_ow"].size[-1] | |||||
| is_var = False | |||||
| return (is_var, vh_, vw_, vc_) | |||||
| def _conv_spatial_pack_deconv(args, data, kernel, out_dtype): | |||||
| """conv2d_arm_cpu_deconv inner implement""" | |||||
| is_var, vh_, vw_, vc_ = args | |||||
| # create workload according to raw arguments | |||||
| out_dtype = out_dtype or data.dtype | |||||
| if len(data.shape) == 4: | |||||
| n_, ci_, ih_, iw_ = data.shape if is_var else get_const_tuple(data.shape) | |||||
| c4 = 4 | |||||
| ci_ = ci_ // c4 | |||||
| else: | |||||
| n_, ci_, ih_, iw_, c4 = data.shape if is_var else get_const_tuple(data.shape) | |||||
| if len(kernel.shape) == 4: | |||||
| pre_packed = False | |||||
| _, co_, kh_, kw_ = kernel.shape if is_var else get_const_tuple(kernel.shape) | |||||
| else: # kernel tensor is pre packed | |||||
| pre_packed = True | |||||
| _, co_, kh_, kw_, vc_ = kernel.shape if is_var else get_const_tuple(kernel.shape) | |||||
| co_ = co_ * c4 | |||||
| oh_ = ih_ * 2 | |||||
| ow_ = iw_ * 2 | |||||
| ow_div = ow_ // vw_ | |||||
| oh_div = oh_ // vh_ | |||||
| kvshape = (co_ // vc_, kh_, kw_, ci_, c4, c4) | |||||
| ovshape = (n_, co_ // vc_, oh_div, ow_div, vh_, vw_, c4) | |||||
| dvshape = (n_, ih_ // (vh_ // 2), iw_ // (vw_ // 2), vh_ // 2, ci_, vw_ // 2, c4) | |||||
| if len(data.shape) == 4: | |||||
| data_vec = tvm.compute( | |||||
| dvshape, | |||||
| lambda n, h, w, vh, ci, vw, ki: data[n][ci * c4 + ki][h * vh_ // 2 + vh][ | |||||
| w * vw_ // 2 + vw | |||||
| ], | |||||
| name="data_vec", | |||||
| ) | |||||
| else: | |||||
| data_vec = tvm.compute( | |||||
| dvshape, | |||||
| lambda n, h, w, vh, ci, vw, ki: data[n][ci][h * vh_ // 2 + vh][ | |||||
| w * vw_ // 2 + vw | |||||
| ][ki], | |||||
| name="data_vec", | |||||
| ) | |||||
| if pre_packed: | |||||
| kernel_vec = kernel | |||||
| else: | |||||
| kernel_vec = tvm.compute( | |||||
| kvshape, | |||||
| lambda co, kh, kw, ci, ki, vc: kernel[ci * c4 + ki][co * vc_ + vc][kh][kw], | |||||
| name="kernel_vec", | |||||
| ) | |||||
| ci = tvm.reduce_axis((0, ci_), name="ci") | |||||
| ki = tvm.reduce_axis((0, c4), name="ki") | |||||
| type_map = { | |||||
| "int8": "int32", | |||||
| "uint8": "uint32", | |||||
| "float32": "float32", | |||||
| "float16": "float16", | |||||
| } | |||||
| acum_dtype = type_map[data.dtype] | |||||
| attrs = { | |||||
| "SH": 2, | |||||
| "SW": 2, | |||||
| "PH": 0, | |||||
| "PW": 0, | |||||
| "DILA_H": 1, | |||||
| "DILA_W": 1, | |||||
| "VH": vh_, | |||||
| "VW": vw_, | |||||
| "VC": vc_, | |||||
| "ACUM_DTYPE": acum_dtype, | |||||
| } | |||||
| conv = tvm.compute( | |||||
| ovshape, | |||||
| lambda n, co, h, w, vh, vw, vc: tvm.sum( | |||||
| data_vec[n, h, w, vh // 2, ci, vw // 2, ki].astype(out_dtype) | |||||
| * kernel_vec[co, (h * vh_ + vh) % 2, (w * vw_ + vw) % 2, ci, ki, vc].astype( | |||||
| out_dtype | |||||
| ), | |||||
| axis=[ci, ki], | |||||
| ), | |||||
| name="conv", | |||||
| attrs=attrs, | |||||
| ) | |||||
| if len(data.shape) == 4: | |||||
| osshape = (n_, co_, oh_, ow_div * vw_) | |||||
| output = tvm.compute( | |||||
| osshape, | |||||
| lambda n, co, h, w: conv[n][co // c4][h][w // vw_][w % vw_][co % c4], | |||||
| name="output_unpack", | |||||
| tag="deconv_conv2d_output", | |||||
| ) | |||||
| else: | |||||
| osshape = (n_, co_ // c4, oh_, ow_div * vw_, c4) | |||||
| output = tvm.compute( | |||||
| osshape, | |||||
| lambda n, co, h, w, vc: conv[n][co][h // vh_][w // vw_][h % vh_][w % vw_][vc], | |||||
| name="output_unpack", | |||||
| tag="deconv_conv2d_output", | |||||
| ) | |||||
| return output | |||||
| def intrin_deconv(args): | |||||
| """deconv inner implement""" | |||||
| ( | |||||
| ci_, | |||||
| vh_, | |||||
| vw_, | |||||
| vc_, | |||||
| kh_, | |||||
| kw_, | |||||
| sh_, | |||||
| sw_, | |||||
| dila_h, | |||||
| dila_w, | |||||
| dtype, | |||||
| acum_dtype, | |||||
| opname, | |||||
| core_id, | |||||
| ) = args | |||||
| hstr, wstr = sh_, sw_ | |||||
| ci_ = tvm.var("ci_") if ci_ is None else ci_ | |||||
| kvshape = (ci_, kh_, kw_, vc_) | |||||
| ovshape = (vh_, vw_, vc_) | |||||
| if dila_h != 1 or dila_w != 1: | |||||
| dvshape = (kh_, kw_, vh_, vw_, ci_) | |||||
| else: | |||||
| dvshape = ((vh_ - 1) * hstr + kh_, (vw_ - 1) * wstr + kw_, ci_) | |||||
| data_vec = tvm.placeholder(dvshape, name="a", dtype=dtype) | |||||
| kernel_vec = tvm.placeholder(kvshape, name="b", dtype=dtype) | |||||
| ci = tvm.reduce_axis((0, ci_), name="ci") | |||||
| kh = tvm.reduce_axis((0, kh_), name="kh") | |||||
| kw = tvm.reduce_axis((0, kw_), name="kw") | |||||
| if DILA_H != 1 or dila_w != 1: | |||||
| conv = tvm.compute( | |||||
| ovshape, | |||||
| lambda vh, vw, vc: tvm.sum( | |||||
| data_vec[kh, kw, vh, vw, ci].astype(acum_dtype) | |||||
| * kernel_vec[ci, kh, kw, vc].astype(acum_dtype), | |||||
| axis=[ci, kh, kw], | |||||
| ), | |||||
| name="conv", | |||||
| ) | |||||
| else: | |||||
| conv = tvm.compute( | |||||
| ovshape, | |||||
| lambda vh, vw, vc: tvm.sum( | |||||
| data_vec[vh * hstr + kh, vw * wstr + kw, ci].astype(acum_dtype) | |||||
| * kernel_vec[ci, kh, kw, vc].astype(acum_dtype), | |||||
| axis=[ci, kh, kw], | |||||
| ), | |||||
| name="conv", | |||||
| ) | |||||
| stride_a = [ | |||||
| functools.reduce(lambda x, y: x * y, dvshape[i + 1: len(dvshape)]) | |||||
| for i in range(0, len(dvshape) - 1) | |||||
| ] | |||||
| stride_a.append(1) | |||||
| stride_b = [ | |||||
| functools.reduce(lambda x, y: x * y, kvshape[i + 1: len(kvshape)]) | |||||
| for i in range(0, len(kvshape) - 1) | |||||
| ] | |||||
| stride_b.append(1) | |||||
| stride_c = [ | |||||
| functools.reduce(lambda x, y: x * y, ovshape[i + 1: len(ovshape)]) | |||||
| for i in range(0, len(ovshape) - 1) | |||||
| ] | |||||
| stride_c.append(1) | |||||
| a_buffer = tvm.decl_buffer( | |||||
| data_vec.shape, data_vec.dtype, name="A", offset_factor=1, strides=stride_a | |||||
| ) | |||||
| b_buffer = tvm.decl_buffer( | |||||
| kernel_vec.shape, kernel_vec.dtype, name="B", offset_factor=1, strides=stride_b | |||||
| ) | |||||
| c_buffer = tvm.decl_buffer( | |||||
| conv.shape, conv.dtype, name="C", offset_factor=1, strides=stride_c | |||||
| ) | |||||
| def intrin_func(ins, outs): | |||||
| aa, bb = ins | |||||
| cc = outs[0] | |||||
| def _body(): | |||||
| ib = tvm.ir_builder.create() | |||||
| ib.emit( | |||||
| tvm.call_extern( | |||||
| "int32", | |||||
| opname, | |||||
| cc.access_ptr("w"), | |||||
| aa.access_ptr("r"), | |||||
| bb.access_ptr("r"), | |||||
| ci_, | |||||
| vh_, | |||||
| vw_, | |||||
| vc_, | |||||
| kh_, | |||||
| sh_, | |||||
| core_id, | |||||
| ) | |||||
| ) | |||||
| return ib.get() | |||||
| return _body() | |||||
| return tvm.decl_tensor_intrin( | |||||
| conv.op, intrin_func, binds={data_vec: a_buffer, kernel_vec: b_buffer, conv: c_buffer} | |||||
| ) | |||||
| def _schedule_deconv(cfg, s, data_vec, kernel_vec, conv, output, last): | |||||
| """schedule implementation""" | |||||
| is_tune = bool(isinstance(cfg, (tvm.autotvm.ConfigEntity, tvm.autotvm.ConfigSpace))) | |||||
| if is_tune: | |||||
| vh_ = cfg["tile_oh"].size[-1] | |||||
| vw_ = cfg["tile_ow"].size[-1] | |||||
| vc_ = cfg["tile_co"].size[-1] | |||||
| cfg = { | |||||
| "ci_": tvm.var("ci_"), | |||||
| "VH": vh_, | |||||
| "VW": vw_, | |||||
| "VC": vc_, | |||||
| "tile_oh": vh_, | |||||
| "tile_ow": vw_, | |||||
| "tile_co": vc_, | |||||
| "tile_ci": 4, | |||||
| "ann_reduce": cfg["ann_reduce"].anns, | |||||
| "ann_spatial": cfg["ann_spatial"].anns, | |||||
| } # ,'reorder_0':cfg['reorder_0'].perm} | |||||
| else: | |||||
| pass | |||||
| n, co, oh, ow, vh, vw, vc = s[conv].op.axis | |||||
| ci, ki = s[conv].op.reduce_axis | |||||
| s[conv].reorder(n, co, oh, ow, ci, vw, ki, vc) | |||||
| if cfg["ann_reduce"][0] == "unroll": | |||||
| s[conv].unroll(ci) | |||||
| elif cfg["ann_reduce"][0] == "vec": | |||||
| s[conv].vectorize(ci) | |||||
| if cfg["ann_reduce"][1] == "unroll": | |||||
| s[conv].unroll(ki) | |||||
| elif cfg["ann_reduce"][1] == "vec": | |||||
| s[conv].vectorize(ki) | |||||
| if cfg["ann_spatial"][0] == "vec": | |||||
| s[conv].vectorize(vh) | |||||
| elif cfg["ann_spatial"][0] == "unroll": | |||||
| s[conv].unroll(vh) | |||||
| if cfg["ann_spatial"][1] == "vec": | |||||
| s[conv].vectorize(vw) | |||||
| elif cfg["ann_spatial"][1] == "unroll": | |||||
| s[conv].unroll(vw) | |||||
| if cfg["ann_spatial"][2] == "vec": | |||||
| s[conv].vectorize(vc) | |||||
| elif cfg["ann_spatial"][2] == "unroll": | |||||
| s[conv].unroll(vc) | |||||
| # schedule conv | |||||
| attrs = conv.op.attrs | |||||
| vh_, vw_, vc_ = (attrs["VH"].value, attrs["VW"].value, attrs["VC"].value) | |||||
| # schedule fusion | |||||
| if len(s[last].op.axis) == 4: | |||||
| n, co, h, w = s[last].op.axis | |||||
| co, vc = s[last].split(co, vc_) | |||||
| ow, vw = s[last].split(w, vw_) | |||||
| oh, vh = s[last].split(h, vh_) | |||||
| s[last].reorder(n, co, oh, ow, vh, vw, vc) | |||||
| else: | |||||
| n, co, h, w, vc = s[last].op.axis | |||||
| oh, vh = s[last].split(h, vh_) | |||||
| ow, vw = s[last].split(w, vw_) | |||||
| s[last].reorder(n, co, oh, ow, vh, vw, vc) | |||||
| if last != output and isinstance(output.op, tvm.tensor.ComputeOp): | |||||
| s[output].compute_inline() | |||||
| if cfg["ann_spatial"][0] == "vec": | |||||
| s[last].vectorize(vh) | |||||
| elif cfg["ann_spatial"][0] == "unroll": | |||||
| s[last].unroll(vh) | |||||
| if cfg["ann_spatial"][1] == "vec": | |||||
| s[last].vectorize(vw) | |||||
| elif cfg["ann_spatial"][1] == "unroll": | |||||
| s[last].unroll(vw) | |||||
| if cfg["ann_spatial"][2] == "vec": | |||||
| s[last].vectorize(vc) | |||||
| elif cfg["ann_spatial"][2] == "unroll": | |||||
| s[last].unroll(vc) | |||||
| s[conv].compute_at(s[last], ow) | |||||
| # mark parallel | |||||
| s[last].parallel(co) | |||||
| if data_vec.op.name == "data_vec_undilated": | |||||
| _, h, _, _, _, _, _, _, _ = s[data_vec].op.axis | |||||
| else: | |||||
| _, h, _, _, _, _, _ = s[data_vec].op.axis | |||||
| s[data_vec].parallel(h) | |||||
| co, _, _, _, _, vc = s[kernel_vec].op.axis | |||||
| s[kernel_vec].parallel(co) | |||||
| if cfg["ann_spatial"][2] == "vec": | |||||
| s[kernel_vec].vectorize(vc) | |||||
| elif cfg["ann_spatial"][2] == "unroll": | |||||
| s[kernel_vec].unroll(vc) | |||||
| return s | |||||
| def _conv_schedule_deconv(cfg, outs): | |||||
| """schedule_conv2d_nchw_arm_cpu_deconv inner implementation""" | |||||
| s = tvm.create_schedule([x.op for x in outs]) | |||||
| def _callback(op): | |||||
| if "deconv_conv2d_output" in op.tag: | |||||
| # schedule conv2d | |||||
| output = op.output(0) | |||||
| conv = op.input_tensors[0] | |||||
| sidx = 0 | |||||
| if conv.op.input_tensors[0].name == "attr": | |||||
| sidx = 1 | |||||
| data_vec = conv.op.input_tensors[sidx] | |||||
| kernel_vec = conv.op.input_tensors[sidx + 1] | |||||
| if kernel_vec.op.name == "kernel_vec": | |||||
| kernel = kernel_vec.op.input_tensors[0] | |||||
| else: | |||||
| kernel = kernel_vec | |||||
| if (isinstance(kernel.op, tvm.tensor.ComputeOp) and "dilate" in kernel.op.tag): | |||||
| s[kernel].compute_inline() | |||||
| _schedule_deconv(cfg, s, data_vec, kernel_vec, conv, output, outs[0]) | |||||
| traverse_inline(s, outs[0].op, _callback) | |||||
| return s | |||||
| @@ -1,289 +0,0 @@ | |||||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||||
| # | |||||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| # you may not use this file except in compliance with the License. | |||||
| # You may obtain a copy of the License at | |||||
| # | |||||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||||
| # | |||||
| # Unless required by applicable law or agreed to in writing, software | |||||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| # See the License for the specific language governing permissions and | |||||
| # limitations under the License. | |||||
| # ============================================================================ | |||||
| """Depthwise convolution schedule for ARM CPU""" | |||||
| import tvm | |||||
| from tvm import autotvm | |||||
| from topi.generic import schedule_depthwise_conv2d_nchw | |||||
| from topi.nn import depthwise_conv2d_nchw, pad | |||||
| from topi.util import traverse_inline, get_const_tuple | |||||
| from topi.nn.util import get_pad_tuple | |||||
| # register customized schedule for arm cpu. | |||||
| @autotvm.register_topi_schedule( | |||||
| schedule_depthwise_conv2d_nchw, ["arm_cpu", "cpu"], ["custom"] | |||||
| ) | |||||
| def schedule_depthwise_conv2d_nchw_arm(cfg, outs): | |||||
| """Schedule depthwise conv2d | |||||
| Parameters | |||||
| ---------- | |||||
| cfg: ConfigEntity | |||||
| The configuration of this template | |||||
| outs: Array of Tensor | |||||
| The computation graph description of depthwise convolution2d | |||||
| in the format of an array of tensors. | |||||
| Returns | |||||
| ------- | |||||
| s: Schedule | |||||
| The computation schedule for depthwise_conv2d nchw. | |||||
| """ | |||||
| s = _depthwise_schedule_spatial_pack(cfg, outs) | |||||
| return s | |||||
| @autotvm.register_topi_compute(depthwise_conv2d_nchw, ["arm_cpu", "cpu"], ["custom"]) | |||||
| def depthwise_conv2d_arm_cpu(cfg, data, kernel, strides, padding, dilation, out_dtype): | |||||
| """TOPI compute callback for depthwise_conv2d nchw | |||||
| Parameters | |||||
| ---------- | |||||
| cfg: ConfigEntity | |||||
| The config for this template | |||||
| data : tvm.Tensor | |||||
| 4-D with shape [batch, in_channel, in_height, in_width] | |||||
| kernel : tvm.Tensor | |||||
| 4-D with shape [num_filter, multiplier, filter_height, filter_width] or | |||||
| pre-packed 5-D with shape [num_filter_chunk, multiplier, filter_height, | |||||
| filter_width, num_filter_block] | |||||
| strides : list of two ints | |||||
| [stride_height, stride_width] | |||||
| padding : list of two ints | |||||
| [pad_height, pad_width] | |||||
| dilation : list of two ints | |||||
| [dilation_height, dilation_width] | |||||
| out_dtype: str | |||||
| The output type. This is used for mixed precision. | |||||
| Returns | |||||
| ------- | |||||
| output : tvm.Tensor | |||||
| 4-D with shape [batch, out_channel, out_height, out_width] | |||||
| """ | |||||
| return _depthwise_spatial_pack( | |||||
| cfg, data, kernel, strides, padding, dilation, out_dtype | |||||
| ) | |||||
| def _depthwise_spatial_pack(args, data, kernel, strides, padding, dilation, out_dtype): | |||||
| """depthwise_conv2d_arm_cpu's inner implement""" | |||||
| is_var, u_vh, u_vw, u_vc = args | |||||
| out_dtype = out_dtype or data.dtype | |||||
| u_n, u_c, ih, iw = data.shape if is_var else get_const_tuple(data.shape) | |||||
| if isinstance(dilation, int): | |||||
| dilation_h = dilation_w = dilation | |||||
| else: | |||||
| dilation_h, dilation_w = dilation | |||||
| if len(kernel.shape) == 4: | |||||
| pre_packed = False | |||||
| u_c, um, ukh, ukw = kernel.shape if is_var else get_const_tuple(kernel.shape) | |||||
| else: # kernel tensor is pre packed | |||||
| pre_packed = True | |||||
| u_c, um, ukh, ukw, u_vc = kernel.shape if is_var else get_const_tuple(kernel.shape) | |||||
| u_c = u_c * u_vc | |||||
| dilated_kernel_h = (ukh - 1) * dilation_h + 1 | |||||
| dilated_kernel_w = (ukw - 1) * dilation_w + 1 | |||||
| pad_top, pad_left, pad_down, pad_right = get_pad_tuple( | |||||
| padding, (dilated_kernel_h, dilated_kernel_w) | |||||
| ) | |||||
| hstr, wstr = strides if isinstance(strides, (tuple, list)) else (strides, strides) | |||||
| u_oh = (ih + pad_top + pad_down - dilated_kernel_h) // hstr + 1 | |||||
| u_ow = (iw + pad_left + pad_right - dilated_kernel_w) // wstr + 1 | |||||
| # pack data | |||||
| hpad = pad_top + pad_down | |||||
| wpad = pad_left + pad_right | |||||
| dopad = hpad != 0 or wpad != 0 | |||||
| if dopad: | |||||
| data_pad = pad( | |||||
| data, | |||||
| (0, 0, pad_top, pad_left), | |||||
| (0, 0, pad_down, pad_right), | |||||
| name="data_pad", | |||||
| ) | |||||
| else: | |||||
| data_pad = data | |||||
| oh_div = u_oh // u_vh | |||||
| ow_div = u_ow // u_vw | |||||
| kvshape = (u_c // u_vc, um, ukh, ukw, u_vc) | |||||
| ovshape = (u_n, u_c * um // u_vc, oh_div, u_ow // u_vw, u_vh, u_vw, u_vc) | |||||
| oshape = (u_n, u_c * um, oh_div * u_vh, ow_div * u_vw) | |||||
| if dilation_h != 1 or dilation_w != 1: | |||||
| # undilate input data | |||||
| dvshape = (u_n, oh_div, ow_div, u_c, ukh, ukw, u_vh, u_vw) | |||||
| data_vec = tvm.compute( | |||||
| dvshape, | |||||
| lambda n, h, w, c, kh, kw, vh, vw: data_pad[n][c][ | |||||
| (h * u_vh + vh) * hstr + kh * dilation_h | |||||
| ][(w * u_vw + vw) * wstr + kw * dilation_w], | |||||
| name="data_vec_undilated", | |||||
| ) | |||||
| else: | |||||
| dvshape = (u_n, oh_div, ow_div, u_c, u_vh * hstr + ukh - 1, u_vw * wstr + ukw - 1) | |||||
| data_vec = tvm.compute( | |||||
| dvshape, | |||||
| lambda n, h, w, c, vh, vw: data_pad[n][c][h * u_vh * hstr + vh][ | |||||
| w * u_vw * wstr + vw | |||||
| ], | |||||
| name="data_vec", | |||||
| ) | |||||
| if pre_packed: | |||||
| kernel_vec = kernel | |||||
| else: | |||||
| kernel_vec = tvm.compute( | |||||
| kvshape, | |||||
| lambda co, m, kh, kw, vc: kernel[co * u_vc + vc][m][kh][kw], | |||||
| name="kernel_vec", | |||||
| ) | |||||
| kh = tvm.reduce_axis((0, ukh), name="kh") | |||||
| kw = tvm.reduce_axis((0, ukw), name="kw") | |||||
| if dilation_h != 1 or dilation_w != 1: | |||||
| conv = tvm.compute( | |||||
| ovshape, | |||||
| lambda n, co, h, w, vh, vw, vc: tvm.sum( | |||||
| data_vec[n, h, w, (co * u_vc + vc) // um, kh, kw, vh, vw].astype(out_dtype) | |||||
| * kernel_vec[co // um, co % um, kh, kw, vc].astype(out_dtype), | |||||
| axis=[kh, kw], | |||||
| ), | |||||
| name="depthwise_conv", | |||||
| ) | |||||
| else: | |||||
| conv = tvm.compute( | |||||
| ovshape, | |||||
| lambda n, co, h, w, vh, vw, vc: tvm.sum( | |||||
| data_vec[ | |||||
| n, h, w, (co * u_vc + vc) // um, vh * hstr + kh, vw * wstr + kw | |||||
| ].astype(out_dtype) | |||||
| * kernel_vec[co // um, co % um, kh, kw, vc].astype(out_dtype), | |||||
| axis=[kh, kw], | |||||
| ), | |||||
| name="depthwise_conv", | |||||
| ) | |||||
| output = tvm.compute( | |||||
| oshape, | |||||
| lambda n, co, h, w: conv[n][co // u_vc][h // u_vh][w // u_vw][h % u_vh][w % u_vw][ | |||||
| co % u_vc | |||||
| ], | |||||
| name="output_unpack", | |||||
| tag="spatial_depthwise_conv_nchw_output", | |||||
| ) | |||||
| return output | |||||
| def _schedule_spatial_pack(cfg, s, data_vec, kernel_vec, conv, output, last): | |||||
| """schedule implementation""" | |||||
| u_vc = cfg["tile_co"].size[-1] if not isinstance(cfg, dict) else cfg["VC"] | |||||
| u_vh = cfg["tile_oh"].size[-1] if not isinstance(cfg, dict) else cfg["VH"] | |||||
| u_vw = cfg["tile_ow"].size[-1] if not isinstance(cfg, dict) else cfg["VW"] | |||||
| n, co, oh, ow, vh, vw, vc = s[conv].op.axis | |||||
| kh, kw = s[conv].op.reduce_axis | |||||
| if data_vec.op.name == "data_vec_undilated": | |||||
| _, _, dv_ow, _, _, _, _, _ = s[data_vec].op.axis | |||||
| else: | |||||
| _, _, dv_ow, _, _, _ = s[data_vec].op.axis | |||||
| data_pad = data_vec.op.input_tensors[0] | |||||
| if isinstance(data_pad.op, tvm.tensor.ComputeOp): | |||||
| s[data_pad].vectorize(list(s[data_pad].op.axis)[-1]) | |||||
| s[data_pad].compute_at(s[data_vec], dv_ow) | |||||
| s[data_vec].vectorize(list(s[data_vec].op.axis)[-1]) | |||||
| s[data_vec].compute_at(s[conv], ow) | |||||
| # schedule conv | |||||
| s[conv].reorder(n, co, oh, ow, kh, kw, vh, vw, vc) | |||||
| s[conv].unroll(kh) | |||||
| s[conv].unroll(vh) | |||||
| s[conv].vectorize(vw) | |||||
| s[conv].unroll(vc) | |||||
| s[conv].parallel(co) | |||||
| n, co, h, w = s[last].op.axis | |||||
| co, vc = s[last].split(co, u_vc) | |||||
| oh, vh = s[last].split(h, u_vh) | |||||
| ow, vw = s[last].split(w, u_vw) | |||||
| if last != output: | |||||
| s[output].compute_inline() | |||||
| s[last].vectorize(vw) | |||||
| s[last].unroll(vc) | |||||
| else: | |||||
| s[last].vectorize(vw) | |||||
| s[conv].compute_at(s[last], oh) | |||||
| # mark parallel | |||||
| s[last].parallel(co) | |||||
| if data_vec.op.name == "data_vec_undilated": | |||||
| _, h, _, _, _, _, _, _ = s[data_vec].op.axis | |||||
| else: | |||||
| _, h, _, _, _, _ = s[data_vec].op.axis | |||||
| s[data_vec].parallel(h) | |||||
| if kernel_vec.op.name == "kernel_vec": | |||||
| co, _, _, _, _ = s[kernel_vec].op.axis | |||||
| if autotvm.GLOBAL_SCOPE.in_tuning: | |||||
| # kernel packing will be pre-computed during compliation, so we skip | |||||
| # this part to make tuning records correct | |||||
| s[kernel_vec].pragma(co, "debug_skip_region") | |||||
| else: | |||||
| s[kernel_vec].parallel(co) | |||||
| return s | |||||
| def _depthwise_schedule_spatial_pack(cfg, outs): | |||||
| """schedule_depthwise_conv2d_nchw_arm's inner implement""" | |||||
| outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs | |||||
| s = tvm.create_schedule([x.op for x in outs]) | |||||
| def _callback(op): | |||||
| if op.tag == "spatial_depthwise_conv_nchw_output": | |||||
| output = op.output(0) | |||||
| conv = op.input_tensors[0] | |||||
| data_vec = conv.op.input_tensors[0] | |||||
| kernel_vec = conv.op.input_tensors[1] | |||||
| if kernel_vec.op.name == "kernel_vec": | |||||
| kernel = kernel_vec.op.input_tensors[0] | |||||
| else: | |||||
| kernel = kernel_vec | |||||
| if isinstance(kernel.op, tvm.tensor.ComputeOp) and "dilate" in kernel.op.tag: | |||||
| s[kernel].compute_inline() | |||||
| _schedule_spatial_pack(cfg, s, data_vec, kernel_vec, conv, output, outs[0]) | |||||
| traverse_inline(s, outs[0].op, _callback) | |||||
| return s | |||||
| @@ -1,472 +0,0 @@ | |||||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||||
| # | |||||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| # you may not use this file except in compliance with the License. | |||||
| # You may obtain a copy of the License at | |||||
| # | |||||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||||
| # | |||||
| # Unless required by applicable law or agreed to in writing, software | |||||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| # See the License for the specific language governing permissions and | |||||
| # limitations under the License. | |||||
| # ============================================================================ | |||||
| """Conv2D schedule for ARM CPU""" | |||||
| from __future__ import absolute_import as _abs | |||||
| import functools | |||||
| import tvm | |||||
| from tvm import autotvm | |||||
| import tvm.contrib.nnpack | |||||
| from topi.generic import schedule_conv2d_nchw | |||||
| from topi.util import traverse_inline | |||||
| from topi.nn import conv2d | |||||
| @autotvm.register_topi_compute(conv2d, "arm_cpu", ["matmul"]) | |||||
| def matmul_arm_cpu(cfg, a_, b_, layout, out_dtype): | |||||
| """TOPI compute callback for | |||||
| Parameters | |||||
| ---------- | |||||
| cfg: ConfigEntity | |||||
| The config for this template | |||||
| a_ : tvm.Tensor | |||||
| 2-D with shape [M, k_] | |||||
| b_ : tvm.Tensor | |||||
| 2-D with shape [k_, N] | |||||
| out_dtype: str | |||||
| The output type. This is used for mixed precision. | |||||
| Returns | |||||
| ------- | |||||
| output : tvm.Tensor | |||||
| 4-D with shape [batch, out_channel, out_height, out_width] | |||||
| """ | |||||
| args = _gen_cfg(cfg, a_, b_) | |||||
| return _matmul_spatial_pack_asm(args, a_, b_, layout, out_dtype) | |||||
| @autotvm.register_topi_schedule(schedule_conv2d_nchw, "arm_cpu", ["matmul"]) | |||||
| def schedule_matmul_arm_cpu(cfg, outs): | |||||
| """TOPI schedule callback for conv2d | |||||
| Parameters | |||||
| ---------- | |||||
| cfg: ConfigEntity | |||||
| The config for this template | |||||
| outs: Array of Tensor | |||||
| The computation graph description of conv2d | |||||
| in the format of an array of tensors. | |||||
| Returns | |||||
| ------- | |||||
| s: Schedule | |||||
| The computation schedule for conv2d. | |||||
| """ | |||||
| s = _matmul_schedule_asm(cfg, outs) | |||||
| return s | |||||
| def _gen_cfg(cfg, a_, b_): | |||||
| """get best loginfo from cfg""" | |||||
| if len(a_.shape) == 2: | |||||
| w_, ci_ = get_const_tuple(a_.shape) | |||||
| h_ = 1 | |||||
| elif len(a_.shape) == 3: | |||||
| _, ci_, w_ = get_const_tuple(a_.shape) | |||||
| h_ = 1 | |||||
| elif len(a_.shape) == 4: | |||||
| _, ci_, h_, w_ = get_const_tuple(a_.shape) | |||||
| else: | |||||
| raise ValueError("not support shape: " + a_.shape) | |||||
| co_, k_ = get_const_tuple(b_.shape) | |||||
| oh, ow = cfg.axis(h_), cfg.axis(w_) | |||||
| co = cfg.axis(co_) | |||||
| k = cfg.reduce_axis(k_) | |||||
| oh, vh = cfg.define_split("tile_oh", oh, num_outputs=2) | |||||
| ow, vw = cfg.define_split("tile_ow", ow, num_outputs=2) | |||||
| oc, vc = cfg.define_split("tile_co", co, num_outputs=2) | |||||
| cfg.define_reorder( | |||||
| "reorder_0", | |||||
| [oc, oh, ow, k, vh, vw, vc], | |||||
| policy="candidate", | |||||
| candidate=[[oc, oh, ow, k, vh, vw, vc],], | |||||
| ) | |||||
| vh_ = cfg["tile_oh"].size[-1] | |||||
| vw_ = cfg["tile_ow"].size[-1] | |||||
| vc_ = cfg["tile_co"].size[-1] | |||||
| is_var = False | |||||
| is_transpose = False | |||||
| return (is_var, is_transpose, ci_, vh_, vw_, vc_) | |||||
| def _matmul_spatial_pack_asm(args, a_, b_, layout, out_dtype): | |||||
| """matmul_spatial_pack_asm's inner interace""" | |||||
| is_var, is_transpose, ci_, vh_, vw_, vc_ = args | |||||
| # create workload according to raw arguments | |||||
| out_dtype = out_dtype or a_.dtype | |||||
| if layout == "NCHW": | |||||
| batch, k_, h_, w_ = a_.shape if is_var else get_const_tuple(a_.shape) | |||||
| n_, _ = b_.shape if is_var else get_const_tuple(b_.shape) | |||||
| elif layout == "NCH": | |||||
| batch, k_, h_ = a_.shape if is_var else get_const_tuple(a_.shape) | |||||
| n_, _ = b_.shape if is_var else get_const_tuple(b_.shape) | |||||
| w_ = 1 | |||||
| elif layout == "NC": | |||||
| w_, k_ = a_.shape if is_var else get_const_tuple(a_.shape) | |||||
| n_, _ = b_.shape if is_var else get_const_tuple(b_.shape) | |||||
| h_ = 1 | |||||
| else: | |||||
| raise ValueError("not support layout: " + layout) | |||||
| ki = tvm.reduce_axis((0, k_), name="ki") | |||||
| type_map = { | |||||
| "int8": "int32", | |||||
| "uint8": "uint32", | |||||
| "float32": "float32", | |||||
| "float16": "float16", | |||||
| } | |||||
| acum_dtype = type_map[a_.dtype] | |||||
| attrs = {"ci_": ci_, "vh_": vh_, "vw_": vw_, "vc_": vc_, "ACUM_DTYPE": acum_dtype} | |||||
| if layout == "NCHW": | |||||
| h_div = h_ // vh_ | |||||
| w_div = w_ // vw_ | |||||
| n_div = n_ // vc_ | |||||
| avshape = (batch, h_div, w_div, vh_, vw_, k_) | |||||
| bvshape = (n_div, k_, vc_) | |||||
| ovshape = (batch, n_div, h_div, w_div, vh_, vw_, vc_) | |||||
| a_vec = tvm.compute( | |||||
| avshape, | |||||
| lambda n, oh, ow, vh, vw, ci: a_[n][ci][oh * vh_ + vh][ow * vw_ + vw], | |||||
| name="a_vec", | |||||
| ) | |||||
| b_vec = tvm.compute( | |||||
| bvshape, lambda oc, ci, vc: b_[oc * vc_ + vc][ci], name="b_vec" | |||||
| ) | |||||
| ma = tvm.compute( | |||||
| ovshape, | |||||
| lambda n, oc, oh, ow, vh, vw, vc: tvm.sum( | |||||
| a_vec[n, oh, ow, vh, vw, ki].astype(out_dtype) | |||||
| * b_vec[oc, ki, vc].astype(out_dtype), | |||||
| axis=[ki], | |||||
| ), | |||||
| name="matmul", | |||||
| attrs=attrs, | |||||
| ) | |||||
| if is_transpose: | |||||
| oshape = (batch, h_div * vh_, w_div * vw_, n_div * vc_) | |||||
| output = tvm.compute( | |||||
| oshape, | |||||
| lambda n, h, w, c: ma[n][c // vc_][h // vh_][w // vw_][h % vh_][w % vw_][ | |||||
| c % vc_ | |||||
| ], | |||||
| name="output_unpack", | |||||
| tag="asm_matmul_output", | |||||
| ) | |||||
| else: | |||||
| oshape = (batch, n_div * vc_, h_div * vh_, w_div * vw_) | |||||
| output = tvm.compute( | |||||
| oshape, | |||||
| lambda n, c, h, w: ma[n][c // vc_][h // vh_][w // vw_][h % vh_][w % vw_][ | |||||
| c % vc_ | |||||
| ], | |||||
| name="output_unpack", | |||||
| tag="asm_matmul_output", | |||||
| ) | |||||
| elif layout == "NCH": | |||||
| w_div = w_ // vw_ | |||||
| n_div = n_ // vc_ | |||||
| avshape = (batch, w_div, vw_, k_) | |||||
| bvshape = (n_div, k_, vc_) | |||||
| ovshape = (batch, n_div, w_div, vw_, vc_) | |||||
| oshape = (batch, n_div * vc_, w_div * vw_) | |||||
| a_vec = tvm.compute( | |||||
| avshape, lambda b, om, vw, ci: a_[b][ci][om * vw_ + vw], name="a_vec" | |||||
| ) | |||||
| b_vec = tvm.compute( | |||||
| bvshape, lambda on, ci, vc: b_[on * vc_ + vc][ci], name="b_vec" | |||||
| ) | |||||
| ma = tvm.compute( | |||||
| ovshape, | |||||
| lambda b, on, om, vm, vn: tvm.sum( | |||||
| a_vec[b, om, vm, ki].astype(out_dtype) | |||||
| * b_vec[on, ki, vn].astype(out_dtype), | |||||
| axis=[ki], | |||||
| ), | |||||
| name="matmul", | |||||
| attrs=attrs, | |||||
| ) | |||||
| output = tvm.compute( | |||||
| oshape, | |||||
| lambda b, n, m: ma[b][n // vc_][m // vw_][m % vw_][n % vc_], | |||||
| name="output_unpack", | |||||
| tag="asm_matmul_output", | |||||
| ) | |||||
| elif layout == "NC": | |||||
| w_div = w_ // vw_ | |||||
| n_div = n_ // vc_ | |||||
| avshape = (w_div, vw_, k_) | |||||
| bvshape = (n_div, k_, vc_) | |||||
| ovshape = (w_div, n_div, vw_, vc_) | |||||
| oshape = (w_div * vw_, n_div * vc_) | |||||
| a_vec = tvm.compute( | |||||
| avshape, lambda om, vw, ci: a_[om * vw_ + vw][ci], name="a_vec" | |||||
| ) | |||||
| b_vec = tvm.compute( | |||||
| bvshape, lambda on, ci, vc: b_[on * vc_ + vc][ci], name="b_vec" | |||||
| ) | |||||
| ma = tvm.compute( | |||||
| ovshape, | |||||
| lambda om, on, vm, vn: tvm.sum( | |||||
| a_vec[om, vm, ki].astype(out_dtype) | |||||
| * b_vec[on, ki, vn].astype(out_dtype), | |||||
| axis=[ki], | |||||
| ), | |||||
| name="matmul", | |||||
| attrs=attrs, | |||||
| ) | |||||
| output = tvm.compute( | |||||
| oshape, | |||||
| lambda m, n: ma[m // vw_][n // vc_][m % vw_][n % vc_], | |||||
| name="output_unpack", | |||||
| tag="asm_matmul_output", | |||||
| ) | |||||
| else: | |||||
| raise ValueError("not support layout: " + layout) | |||||
| return output | |||||
| def intrin_conv(args): | |||||
| """intrin_conv is a conv inner interface""" | |||||
| ( | |||||
| ndim, | |||||
| ci_, | |||||
| vh_, | |||||
| vw_, | |||||
| vc_, | |||||
| _, | |||||
| _, | |||||
| _, | |||||
| _, | |||||
| _, | |||||
| _, | |||||
| _, | |||||
| _, | |||||
| dtype, | |||||
| acum_dtype, | |||||
| opname, | |||||
| core_id, | |||||
| ) = args | |||||
| ci_ = tvm.var("ci_") if ci_ is None else ci_ | |||||
| kvshape = (ci_, vc_) | |||||
| if ndim == 2: | |||||
| dvshape = (vw_, ci_) | |||||
| ovshape = (vw_, vc_) | |||||
| data_vec = tvm.placeholder(dvshape, name="a", dtype=dtype) | |||||
| kernel_vec = tvm.placeholder(kvshape, name="b", dtype=dtype) | |||||
| ci = tvm.reduce_axis((0, ci_), name="ci") | |||||
| conv = tvm.compute( | |||||
| ovshape, | |||||
| lambda vw, vc: tvm.sum( | |||||
| data_vec[vw, ci].astype(acum_dtype) | |||||
| * kernel_vec[ci, vc].astype(acum_dtype), | |||||
| axis=[ci], | |||||
| ), | |||||
| name="conv", | |||||
| ) | |||||
| else: | |||||
| dvshape = (vh_, vw_, ci_) | |||||
| ovshape = (vh_, vw_, vc_) | |||||
| data_vec = tvm.placeholder(dvshape, name="a", dtype=dtype) | |||||
| kernel_vec = tvm.placeholder(kvshape, name="b", dtype=dtype) | |||||
| ci = tvm.reduce_axis((0, ci_), name="ci") | |||||
| conv = tvm.compute( | |||||
| ovshape, | |||||
| lambda vh, vw, vc: tvm.sum( | |||||
| data_vec[vh, vw, ci].astype(acum_dtype) | |||||
| * kernel_vec[ci, vc].astype(acum_dtype), | |||||
| axis=[ci], | |||||
| ), | |||||
| name="conv", | |||||
| ) | |||||
| stride_a = [ | |||||
| functools.reduce(lambda x, y: x * y, dvshape[i + 1: len(dvshape)]) | |||||
| for i in range(0, len(dvshape) - 1) | |||||
| ] | |||||
| stride_a.append(1) | |||||
| stride_b = [ | |||||
| functools.reduce(lambda x, y: x * y, kvshape[i + 1: len(kvshape)]) | |||||
| for i in range(0, len(kvshape) - 1) | |||||
| ] | |||||
| stride_b.append(1) | |||||
| stride_c = [ | |||||
| functools.reduce(lambda x, y: x * y, ovshape[i + 1: len(ovshape)]) | |||||
| for i in range(0, len(ovshape) - 1) | |||||
| ] | |||||
| stride_c.append(1) | |||||
| ab_ = tvm.decl_buffer( | |||||
| data_vec.shape, data_vec.dtype, name="a_", offset_factor=1, strides=stride_a | |||||
| ) | |||||
| bb_ = tvm.decl_buffer( | |||||
| kernel_vec.shape, kernel_vec.dtype, name="b_", offset_factor=1, strides=stride_b | |||||
| ) | |||||
| cb_ = tvm.decl_buffer( | |||||
| conv.shape, conv.dtype, name="C", offset_factor=1, strides=stride_c | |||||
| ) | |||||
| def intrin_func(ins, outs): | |||||
| aa, bb = ins | |||||
| cc = outs[0] | |||||
| def _body(): | |||||
| b_ = tvm.ir_builder.create() | |||||
| b_.emit( | |||||
| tvm.call_extern( | |||||
| "int32", | |||||
| opname, | |||||
| cc.access_ptr("w"), | |||||
| aa.access_ptr("r"), | |||||
| bb.access_ptr("r"), | |||||
| ci_, | |||||
| vh_, | |||||
| vw_, | |||||
| vc_, | |||||
| core_id, | |||||
| ) | |||||
| ) | |||||
| return b_.get() | |||||
| return _body() | |||||
| return tvm.decl_tensor_intrin( | |||||
| conv.op, intrin_func, binds={data_vec: ab_, kernel_vec: bb_, conv: cb_} | |||||
| ) | |||||
| def _schedule_asm(cfg, s, a_vec, b_vec, mat, output, last): | |||||
| """schedule implementation""" | |||||
| is_transpose = 0 if not isinstance(cfg, dict) else cfg["is_transpose"] | |||||
| attrs = mat.op.attrs | |||||
| vh_, vw_, vc_ = (attrs["vh_"].value, attrs["vw_"].value, attrs["vc_"].value) | |||||
| # axis split and reorder | |||||
| if len(a_vec.shape) == 3: | |||||
| ow, oc = s[last].op.axis | |||||
| oc, vc = s[last].split(oc, vc_) | |||||
| ow, vw = s[last].split(ow, vw_) | |||||
| s[last].reorder(ow, oc, vw, vc) | |||||
| s[last].vectorize(vc) | |||||
| oh = ow = oc | |||||
| elif len(a_vec.shape) == 4: | |||||
| n, oc, ow, vw, vc = s[last].op.axis | |||||
| oc, vc = s[last].split(oc, vc_) | |||||
| ow, vw = s[last].split(ow, vw_) | |||||
| s[last].reorder(n, oc, ow, vw, vc) | |||||
| elif len(a_vec.shape) == 6: | |||||
| if is_transpose: | |||||
| n, oh, ow, oc = s[last].op.axis | |||||
| else: | |||||
| n, oc, oh, ow = s[last].op.axis | |||||
| oc, vc = s[last].split(oc, vc_) | |||||
| oh, vh = s[last].split(oh, vh_) | |||||
| ow, vw = s[last].split(ow, vw_) | |||||
| s[last].reorder(n, oc, oh, ow, vh, vw, vc) | |||||
| else: | |||||
| raise ValueError("not support a_vec: " + str(len(a_vec.shape))) | |||||
| if last != output and isinstance(output.op, tvm.tensor.ComputeOp): | |||||
| s[output].compute_inline() | |||||
| s[mat].compute_at(s[last], ow) | |||||
| s[mat].vectorize(s[mat].op.axis[-1]) | |||||
| # mark parallel | |||||
| s[last].parallel(oh) | |||||
| if len(a_vec.shape) == 3: | |||||
| om, _, _ = s[a_vec].op.axis | |||||
| s[a_vec].compute_at(s[last], ow) | |||||
| s[a_vec].parallel(om) | |||||
| elif len(a_vec.shape) == 4: | |||||
| _, om, _, _ = s[a_vec].op.axis | |||||
| s[a_vec].compute_at(s[last], ow) | |||||
| s[a_vec].parallel(om) | |||||
| else: | |||||
| _, oh, _, _, _, _ = s[a_vec].op.axis | |||||
| s[a_vec].parallel(oh) | |||||
| s[a_vec].vectorize(s[a_vec].op.axis[-1]) | |||||
| s[a_vec].compute_inline() | |||||
| oc, _, _ = s[b_vec].op.axis | |||||
| s[b_vec].parallel(oc) | |||||
| s[b_vec].vectorize(s[b_vec].op.axis[-1]) | |||||
| s[b_vec].compute_inline() | |||||
| return s | |||||
| def _matmul_schedule_asm(cfg, outs): | |||||
| """schedule_conv2d_nchw schedule implementation""" | |||||
| s = tvm.create_schedule([x.op for x in outs]) | |||||
| def _callback(op): | |||||
| if "asm_matmul_output" in op.tag: | |||||
| # schedule conv2d | |||||
| output = op.output(0) | |||||
| mat = op.input_tensors[0] | |||||
| sidx = 0 | |||||
| if mat.op.input_tensors[0].name == "attr": | |||||
| sidx = 1 | |||||
| a_vec = mat.op.input_tensors[sidx] | |||||
| b_vec = mat.op.input_tensors[sidx + 1] | |||||
| def recurs_inline(a_): | |||||
| if a_.op.input_tensors: | |||||
| a1 = a_.op.input_tensors[0] | |||||
| if a1.shape == a_.shape: | |||||
| s[a1].compute_inline() | |||||
| recurs_inline(a1) | |||||
| def recurs_inline_(a_): | |||||
| if isinstance(a_, tvm.tensor.ComputeOp): | |||||
| if a_.op.input_tensors: | |||||
| a1 = a_.op.input_tensors[0] | |||||
| s[a1].compute_inline() | |||||
| recurs_inline_(a1) | |||||
| recurs_inline_(a_vec) | |||||
| recurs_inline_(b_vec) | |||||
| _schedule_asm(cfg, s, a_vec, b_vec, mat, output, outs[0]) | |||||
| traverse_inline(s, outs[0].op, _callback) | |||||
| return s | |||||
| @@ -1,17 +0,0 @@ | |||||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||||
| # | |||||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| # you may not use this file except in compliance with the License. | |||||
| # You may obtain a copy of the License at | |||||
| # | |||||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||||
| # | |||||
| # Unless required by applicable law or agreed to in writing, software | |||||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| # See the License for the specific language governing permissions and | |||||
| # limitations under the License. | |||||
| # ============================================================================ | |||||
| """Neural network operators""" | |||||
| # from .at_lib import * | |||||
| # from .at_gen import * | |||||