Merge pull request !5685 from guozhijian/udpate_run_from_c75b100_to_c75b150_mastertags/v1.0.0
| @@ -1 +1 @@ | |||||
| Subproject commit 622af6c1c50034bea5a08bd409c5a410782bfe53 | |||||
| Subproject commit 2dbfefcdd0d4b958801403dbaf9efe46447dccd2 | |||||
| @@ -17,8 +17,6 @@ import json | |||||
| import os | import os | ||||
| import sys | import sys | ||||
| from te.platform.cce_conf import te_set_version | from te.platform.cce_conf import te_set_version | ||||
| from te.platform.fusion_manager import op_build_cfg_dis, op_build_cfg_en, set_current_op_name, \ | |||||
| init_op_pattern, set_op_params, set_op_build_type, get_op_pattern, set_current_op_func_name | |||||
| from te.platform.fusion_util import fusion_op | from te.platform.fusion_util import fusion_op | ||||
| from common import check_kernel_info, get_args, get_build_in_impl_path, get_ddk_version | from common import check_kernel_info, get_args, get_build_in_impl_path, get_ddk_version | ||||
| @@ -27,7 +25,6 @@ build_in_impl_path = get_build_in_impl_path() | |||||
| # op function list | # op function list | ||||
| op_build = "compile" | op_build = "compile" | ||||
| op_pre_build = "pre_build" | |||||
| fusion_pattern_start_flag = "fusion_pattern_start" | fusion_pattern_start_flag = "fusion_pattern_start" | ||||
| fusion_pattern_end_flag = "fusion_pattern_end" | fusion_pattern_end_flag = "fusion_pattern_end" | ||||
| @@ -83,19 +80,7 @@ def build_op(build_type, json_str): | |||||
| else: | else: | ||||
| op_module = __import__("impl."+op_name, globals(), locals(), [op_name], 0) | op_module = __import__("impl."+op_name, globals(), locals(), [op_name], 0) | ||||
| # get function | # get function | ||||
| if build_type == op_pre_build: | |||||
| # set op parameter | |||||
| op_build_cfg_dis() | |||||
| set_current_op_func_name(op_name) | |||||
| set_current_op_name(kernel_name) | |||||
| init_op_pattern() | |||||
| set_op_params(*outputs_args, *attrs_args, kernel_name=kernel_name) | |||||
| set_op_build_type('prebuild') | |||||
| if custom_flag: | |||||
| py_fn_name = kernel_info['op_info']['name'] | |||||
| else: | |||||
| py_fn_name = op_name | |||||
| elif build_type == op_build: | |||||
| if build_type == op_build: | |||||
| if custom_flag: | if custom_flag: | ||||
| py_fn_name = kernel_info['op_info']['name'] | py_fn_name = kernel_info['op_info']['name'] | ||||
| else: | else: | ||||
| @@ -106,13 +91,6 @@ def build_op(build_type, json_str): | |||||
| if op_func is None: | if op_func is None: | ||||
| raise ValueError("Op:{} function {} is not supported by Tbe.".format(op_name, build_type)) | raise ValueError("Op:{} function {} is not supported by Tbe.".format(op_name, build_type)) | ||||
| # pre build | |||||
| if build_type == op_pre_build: | |||||
| op_func(*inputs_args, *outputs_args, *attrs_args, kernel_name=kernel_name) | |||||
| # disable only pattern configuration | |||||
| op_build_cfg_en() | |||||
| return get_op_pattern() | |||||
| # call function | # call function | ||||
| if kernel_name[0:19] == "bounding_box_encode": | if kernel_name[0:19] == "bounding_box_encode": | ||||
| return op_func(*inputs_args, *outputs_args, *attrs_args, kernel_name_val=kernel_name) | return op_func(*inputs_args, *outputs_args, *attrs_args, kernel_name_val=kernel_name) | ||||
| @@ -120,8 +98,6 @@ def build_op(build_type, json_str): | |||||
| return op_func(*inputs_args, *outputs_args, *attrs_args, kernel_name=kernel_name) | return op_func(*inputs_args, *outputs_args, *attrs_args, kernel_name=kernel_name) | ||||
| except Exception as e: | except Exception as e: | ||||
| if build_type == op_pre_build: | |||||
| op_build_cfg_en() | |||||
| raise RuntimeError(e) | raise RuntimeError(e) | ||||
| @@ -136,14 +112,9 @@ def compile_fusion_op(json_str): | |||||
| Exception: If specific keyword is not found. | Exception: If specific keyword is not found. | ||||
| """ | """ | ||||
| args = json.loads(json_str) | args = json.loads(json_str) | ||||
| te_set_version(ddk_version) | |||||
| if 'fusion_op' not in args or not args['fusion_op']: | if 'fusion_op' not in args or not args['fusion_op']: | ||||
| raise ValueError("Json string Errors, key:fusion_op not found.") | raise ValueError("Json string Errors, key:fusion_op not found.") | ||||
| if 'prebuild_ops' not in args or not args['prebuild_ops']: | |||||
| raise ValueError("Json string Errors, key:prebuild_ops not found.") | |||||
| pre_build_op_list = args['prebuild_ops'] | |||||
| for op in pre_build_op_list: | |||||
| build_op(op_pre_build, json.dumps(op)) | |||||
| fusion_op_arg = args['fusion_op'] | fusion_op_arg = args['fusion_op'] | ||||
| return fusion_op(json.dumps(fusion_op_arg)) | return fusion_op(json.dumps(fusion_op_arg)) | ||||
| @@ -159,8 +130,6 @@ def compile_with_json(json_str): | |||||
| json_info = json.loads(json_str) | json_info = json.loads(json_str) | ||||
| if "fusion_op" in json_info: | if "fusion_op" in json_info: | ||||
| ret = compile_fusion_op(json_str) | ret = compile_fusion_op(json_str) | ||||
| elif "compile_type" in json_info: | |||||
| ret = build_op(op_pre_build, json_str) | |||||
| else: | else: | ||||
| ret = build_op(op_build, json_str) | ret = build_op(op_build, json_str) | ||||
| return ret | return ret | ||||
| @@ -20,6 +20,8 @@ | |||||
| #include <vector> | #include <vector> | ||||
| #include <memory> | #include <memory> | ||||
| #include <algorithm> | #include <algorithm> | ||||
| #include <map> | |||||
| #include <climits> | |||||
| #include "runtime/device/kernel_runtime.h" | #include "runtime/device/kernel_runtime.h" | ||||
| #include "backend/kernel_compiler/aicpu/aicpu_kernel_mod.h" | #include "backend/kernel_compiler/aicpu/aicpu_kernel_mod.h" | ||||
| #include "backend/kernel_compiler/akg/akg_kernel_build.h" | #include "backend/kernel_compiler/akg/akg_kernel_build.h" | ||||
| @@ -218,7 +220,7 @@ void SetNodeInputs(const std::shared_ptr<AnfNode> &anf_node, mindspore::NodeDef | |||||
| mindspore::TensorShape_Dim *dim = tensorShape->add_dim(); | mindspore::TensorShape_Dim *dim = tensorShape->add_dim(); | ||||
| dim->set_size((::google::protobuf::int64)item); | dim->set_size((::google::protobuf::int64)item); | ||||
| } | } | ||||
| node_inputs->set_tensor_type((mindspore::DataType)input_data_type); | |||||
| node_inputs->set_tensor_type(input_data_type); | |||||
| node_inputs->set_mem_device("HBM"); | node_inputs->set_mem_device("HBM"); | ||||
| } | } | ||||
| } | } | ||||
| @@ -245,7 +247,7 @@ void SetNodeOutputs(const std::shared_ptr<AnfNode> &anf_node, mindspore::NodeDef | |||||
| } | } | ||||
| TypeId output_type = AnfAlgo::GetOutputDeviceDataType(anf_node, output_index); | TypeId output_type = AnfAlgo::GetOutputDeviceDataType(anf_node, output_index); | ||||
| int32_t output_data_type = AicpuOpUtil::MsTypeToProtoType(output_type); | int32_t output_data_type = AicpuOpUtil::MsTypeToProtoType(output_type); | ||||
| node_outputs->set_tensor_type((mindspore::DataType)output_data_type); | |||||
| node_outputs->set_tensor_type(output_data_type); | |||||
| node_outputs->set_mem_device("HBM"); | node_outputs->set_mem_device("HBM"); | ||||
| } | } | ||||
| } | } | ||||
| @@ -287,6 +289,109 @@ bool CreateNodeDefBytes(const std::shared_ptr<AnfNode> &anf_node, | |||||
| return true; | return true; | ||||
| } | } | ||||
| bool CreateExtInfo(const std::shared_ptr<AnfNode> &anf_node, const std::shared_ptr<AicpuOpKernelMod> &kernel_mod_ptr) { | |||||
| if (!anf_node->isa<CNode>()) { | |||||
| return true; | |||||
| } | |||||
| if (!AnfAlgo::IsDynamicShape(anf_node)) { | |||||
| return true; | |||||
| } | |||||
| MS_LOG(INFO) << "CreateExtInfo start, " << anf_node->fullname_with_scope(); | |||||
| int32_t unknown_shape_type = UnknowShapeOpType::DEPEND_COMPUTE; | |||||
| uint64_t ext_info_head_len = kExtInfoHeadSize; | |||||
| std::string ext_info; | |||||
| size_t input_num = AnfAlgo::GetInputTensorNum(anf_node); | |||||
| size_t output_num = AnfAlgo::GetOutputTensorNum(anf_node); | |||||
| // 1.addr:unknown shape type | |||||
| uint64_t ext_info_len = ext_info.size(); | |||||
| ext_info_len += ext_info_head_len + sizeof(int32_t); | |||||
| // 2.addr:input ShapeAndType | |||||
| ext_info_len += ext_info_head_len + input_num * sizeof(ShapeAndType); | |||||
| // 3.addr:output ShapeAndType | |||||
| ext_info_len += ext_info_head_len + output_num * sizeof(ShapeAndType); | |||||
| uint64_t ext_info_offset = ext_info.size(); | |||||
| ext_info.resize(ext_info_len, 0); | |||||
| char *ext_info_buf = ext_info.data(); | |||||
| // deal1: unknown shape type | |||||
| ExtInfo *info = reinterpret_cast<ExtInfo *>(ext_info_buf + ext_info_offset); | |||||
| info->infoType = FWK_ADPT_EXT_SHAPE_TYPE; | |||||
| info->infoLen = sizeof(int32_t); | |||||
| ext_info_offset += ext_info_head_len; | |||||
| int32_t *shape_type = reinterpret_cast<int32_t *>(ext_info_buf + ext_info_offset); | |||||
| *shape_type = unknown_shape_type; | |||||
| ext_info_offset += info->infoLen; | |||||
| // deal2:input ShapeAndType | |||||
| info = reinterpret_cast<ExtInfo *>(ext_info_buf + ext_info_offset); | |||||
| info->infoType = FWK_ADPT_EXT_INPUT_SHAPE; | |||||
| info->infoLen = input_num * sizeof(ShapeAndType); | |||||
| ext_info_offset += ext_info_head_len; | |||||
| ShapeAndType *inputs = reinterpret_cast<ShapeAndType *>(ext_info_buf + ext_info_offset); | |||||
| for (size_t input_index = 0; input_index < input_num; input_index++) { | |||||
| TypeId input_type = AnfAlgo::GetInputDeviceDataType(anf_node, input_index); | |||||
| std::vector<size_t> input_shape; | |||||
| int32_t input_data_type; | |||||
| if (input_type == kObjectTypeString) { | |||||
| auto cnode = anf_node->cast<CNodePtr>(); | |||||
| MS_EXCEPTION_IF_NULL(cnode); | |||||
| auto input_node = cnode->inputs()[input_index + 1]; | |||||
| auto value_ptr = GetValueNode(input_node); | |||||
| auto value = GetValue<std::string>(value_ptr); | |||||
| input_shape.push_back(1); | |||||
| input_shape.push_back(value.size()); | |||||
| input_data_type = AicpuOpUtil::MsTypeToProtoType(kTypeUnknown); | |||||
| } else { | |||||
| input_shape = AnfAlgo::GetInputDeviceShape(anf_node, input_index); | |||||
| input_data_type = AicpuOpUtil::MsTypeToProtoType(input_type); | |||||
| } | |||||
| inputs[input_index].type = input_data_type; | |||||
| size_t input_shape_index = 0; | |||||
| for (; input_shape_index < input_shape.size(); input_shape_index++) { | |||||
| inputs[input_index].dims[input_shape_index] = SizeToLong(input_shape[input_shape_index]); | |||||
| } | |||||
| if (input_shape.size() < kMaxShapeDims) { | |||||
| inputs[input_index].dims[input_shape_index] = LLONG_MIN; | |||||
| } | |||||
| } | |||||
| ext_info_offset += info->infoLen; | |||||
| // deal3:output ShapeAndType | |||||
| info = reinterpret_cast<ExtInfo *>(ext_info_buf + ext_info_offset); | |||||
| info->infoType = FWK_ADPT_EXT_OUTPUT_SHAPE; | |||||
| info->infoLen = output_num * sizeof(ShapeAndType); | |||||
| ext_info_offset += ext_info_head_len; | |||||
| ShapeAndType *outputs = reinterpret_cast<ShapeAndType *>(ext_info_buf + ext_info_offset); | |||||
| for (size_t output_index = 0; output_index < output_num; output_index++) { | |||||
| std::vector<size_t> output_shape = AnfAlgo::GetOutputDeviceShape(anf_node, output_index); | |||||
| TypeId output_type = AnfAlgo::GetOutputDeviceDataType(anf_node, output_index); | |||||
| int32_t output_data_type = AicpuOpUtil::MsTypeToProtoType(output_type); | |||||
| outputs[output_index].type = output_data_type; | |||||
| size_t output_shape_index = 0; | |||||
| for (; output_shape_index < output_shape.size(); output_shape_index++) { | |||||
| outputs[output_index].dims[output_shape_index] = SizeToLong(output_shape[output_shape_index]); | |||||
| } | |||||
| if (output_shape_index < kMaxShapeDims) { | |||||
| outputs[output_index].dims[output_shape_index] = LLONG_MIN; | |||||
| } | |||||
| } | |||||
| // set ext info | |||||
| kernel_mod_ptr->SetExtInfo(ext_info); | |||||
| return true; | |||||
| } | |||||
| KernelModPtr AicpuOpBuild(const std::shared_ptr<AnfNode> &anf_node) { | KernelModPtr AicpuOpBuild(const std::shared_ptr<AnfNode> &anf_node) { | ||||
| MS_EXCEPTION_IF_NULL(anf_node); | MS_EXCEPTION_IF_NULL(anf_node); | ||||
| std::string op_name = AnfAlgo::GetCNodeName(anf_node); | std::string op_name = AnfAlgo::GetCNodeName(anf_node); | ||||
| @@ -300,6 +405,11 @@ KernelModPtr AicpuOpBuild(const std::shared_ptr<AnfNode> &anf_node) { | |||||
| if (!CreateNodeDefBytes(anf_node, kernel_mod_ptr)) { | if (!CreateNodeDefBytes(anf_node, kernel_mod_ptr)) { | ||||
| MS_LOG(EXCEPTION) << "Create nodeDefBytes faild!"; | MS_LOG(EXCEPTION) << "Create nodeDefBytes faild!"; | ||||
| } | } | ||||
| if (!CreateExtInfo(anf_node, kernel_mod_ptr)) { | |||||
| MS_LOG(EXCEPTION) << "Create nodeDefBytes faild!"; | |||||
| } | |||||
| if (!SetIOSize(anf_node, kernel_mod_ptr)) { | if (!SetIOSize(anf_node, kernel_mod_ptr)) { | ||||
| MS_LOG(EXCEPTION) << "Set input output size list failed."; | MS_LOG(EXCEPTION) << "Set input output size list failed."; | ||||
| } | } | ||||
| @@ -43,6 +43,7 @@ AicpuOpKernelMod::~AicpuOpKernelMod() { | |||||
| input_size_list_.clear(); | input_size_list_.clear(); | ||||
| output_size_list_.clear(); | output_size_list_.clear(); | ||||
| workspace_size_list_.clear(); | workspace_size_list_.clear(); | ||||
| ext_info_.clear(); | |||||
| } | } | ||||
| void AicpuOpKernelMod::SetInputSizeList(const std::vector<size_t> &size_list) { input_size_list_ = size_list; } | void AicpuOpKernelMod::SetInputSizeList(const std::vector<size_t> &size_list) { input_size_list_ = size_list; } | ||||
| @@ -54,6 +55,7 @@ const std::vector<size_t> &AicpuOpKernelMod::GetWorkspaceSizeList() const { retu | |||||
| void AicpuOpKernelMod::SetInputList(const std::vector<int64_t> &inputList) { inputList_ = inputList; } | void AicpuOpKernelMod::SetInputList(const std::vector<int64_t> &inputList) { inputList_ = inputList; } | ||||
| void AicpuOpKernelMod::SetOutputList(const std::vector<int64_t> &outputList) { outputList_ = outputList; } | void AicpuOpKernelMod::SetOutputList(const std::vector<int64_t> &outputList) { outputList_ = outputList; } | ||||
| void AicpuOpKernelMod::SetNodeDef(const std::string &nodeDef) { (void)node_def_str_.assign(nodeDef); } | void AicpuOpKernelMod::SetNodeDef(const std::string &nodeDef) { (void)node_def_str_.assign(nodeDef); } | ||||
| void AicpuOpKernelMod::SetExtInfo(const std::string &ext_info) { ext_info_ = ext_info; } | |||||
| void AicpuOpKernelMod::SetNodeName(const std::string &node_name) { node_name_ = node_name; } | void AicpuOpKernelMod::SetNodeName(const std::string &node_name) { node_name_ = node_name; } | ||||
| void AicpuOpKernelMod::SetAnfNode(const mindspore::AnfNodePtr &anf_node) { | void AicpuOpKernelMod::SetAnfNode(const mindspore::AnfNodePtr &anf_node) { | ||||
| MS_EXCEPTION_IF_NULL(anf_node); | MS_EXCEPTION_IF_NULL(anf_node); | ||||
| @@ -84,16 +86,30 @@ void AicpuOpKernelMod::CreateCpuKernelInfo(const std::vector<AddressPtr> &inputs | |||||
| auto node_def_len = node_def_str_.length(); | auto node_def_len = node_def_str_.length(); | ||||
| param_len += node_def_len; | param_len += node_def_len; | ||||
| param_len += sizeof(uint32_t); | |||||
| AicpuParamHead aicpu_param_head; | |||||
| aicpu_param_head.length = param_len; | |||||
| aicpu_param_head.ioAddrNum = io_addrs_num; | |||||
| if (ext_info_.empty()) { | |||||
| MS_LOG(INFO) << "Static Shape Kernel"; | |||||
| aicpu_param_head.extInfoLength = 0; | |||||
| aicpu_param_head.extInfoAddr = 0; | |||||
| } else { | |||||
| MS_LOG(INFO) << "Dynamic Kernel Ext Info size:" << ext_info_.size(); | |||||
| } | |||||
| // Create taskArgs: AicpuParamHead + ioAddrs + notifyId + customizedAttr | |||||
| AicpuParamHead paramHead = {static_cast<uint32_t>(param_len), static_cast<uint32_t>(io_addrs_num)}; | |||||
| args_.clear(); | args_.clear(); | ||||
| (void)args_.append(reinterpret_cast<const char *>(¶mHead), sizeof(AicpuParamHead)); | |||||
| (void)args_.append(reinterpret_cast<const char *>(&aicpu_param_head), sizeof(AicpuParamHead)); | |||||
| // TaskArgs append ioAddrs | // TaskArgs append ioAddrs | ||||
| if (io_addrs_size != 0) { | if (io_addrs_size != 0) { | ||||
| (void)args_.append(reinterpret_cast<const char *>(io_addrs.data()), io_addrs_size); | (void)args_.append(reinterpret_cast<const char *>(io_addrs.data()), io_addrs_size); | ||||
| } | } | ||||
| // size for node_def | |||||
| args_.append(reinterpret_cast<const char *>(&node_def_len), sizeof(uint32_t)); | |||||
| // When it's aicpu customized ops, taskArgs should append customized attr | // When it's aicpu customized ops, taskArgs should append customized attr | ||||
| if (node_def_len != 0) { | if (node_def_len != 0) { | ||||
| (void)args_.append(reinterpret_cast<const char *>(node_def_str_.data()), node_def_len); | (void)args_.append(reinterpret_cast<const char *>(node_def_str_.data()), node_def_len); | ||||
| @@ -145,8 +161,9 @@ std::vector<TaskInfoPtr> AicpuOpKernelMod::GenTask(const std::vector<AddressPtr> | |||||
| node_name_ = kTopKV2; | node_name_ = kTopKV2; | ||||
| } | } | ||||
| AicpuTaskInfoPtr task_info_ptr = make_shared<ge::model_runner::AicpuTaskInfo>( | |||||
| kernel_name_, stream_id, node_so_, node_name_, node_def_str_, input_data_addrs, output_data_addrs, NeedDump()); | |||||
| AicpuTaskInfoPtr task_info_ptr = | |||||
| make_shared<ge::model_runner::AicpuTaskInfo>(kernel_name_, stream_id, node_so_, node_name_, node_def_str_, | |||||
| ext_info_, input_data_addrs, output_data_addrs, NeedDump()); | |||||
| MS_LOG(INFO) << "AicpuOpKernelMod GenTask end"; | MS_LOG(INFO) << "AicpuOpKernelMod GenTask end"; | ||||
| return {task_info_ptr}; | return {task_info_ptr}; | ||||
| @@ -36,6 +36,7 @@ class AicpuOpKernelMod : public AscendKernelMod { | |||||
| void SetOutputList(const std::vector<int64_t> &outputList); | void SetOutputList(const std::vector<int64_t> &outputList); | ||||
| void SetAnfNode(const AnfNodePtr &anf_node); | void SetAnfNode(const AnfNodePtr &anf_node); | ||||
| void SetNodeDef(const std::string &nodeDef); | void SetNodeDef(const std::string &nodeDef); | ||||
| void SetExtInfo(const std::string &ext_info); | |||||
| void SetNodeName(const std::string &node_name); | void SetNodeName(const std::string &node_name); | ||||
| /** | /** | ||||
| @@ -58,6 +59,7 @@ class AicpuOpKernelMod : public AscendKernelMod { | |||||
| std::string node_def_str_; | std::string node_def_str_; | ||||
| std::string node_name_; | std::string node_name_; | ||||
| std::string node_so_; | std::string node_so_; | ||||
| std::string ext_info_; | |||||
| std::vector<int64_t> inputList_; | std::vector<int64_t> inputList_; | ||||
| std::vector<int64_t> outputList_; | std::vector<int64_t> outputList_; | ||||
| AnfNodePtr anf_node_; | AnfNodePtr anf_node_; | ||||
| @@ -21,7 +21,6 @@ | |||||
| #include <map> | #include <map> | ||||
| #include <string> | #include <string> | ||||
| #include "backend/kernel_compiler/kernel.h" | #include "backend/kernel_compiler/kernel.h" | ||||
| namespace mindspore { | namespace mindspore { | ||||
| namespace kernel { | namespace kernel { | ||||
| constexpr auto kInitDataSetQueue = "InitDataSetQueue"; | constexpr auto kInitDataSetQueue = "InitDataSetQueue"; | ||||
| @@ -50,6 +49,36 @@ struct AicpuParamHead { | |||||
| uint64_t extInfoAddr; // extInfo address | uint64_t extInfoAddr; // extInfo address | ||||
| } __attribute__((packed)); | } __attribute__((packed)); | ||||
| const uint32_t kExtInfoHeadSize = 8; | |||||
| struct ExtInfo { | |||||
| int32_t infoType; // extend type | |||||
| uint32_t infoLen; // length for infoMsg | |||||
| char infoMsg[0]; // extend value | |||||
| } __attribute__((packed)); | |||||
| // Extent info ShapeAndType | |||||
| const uint32_t kMaxShapeDims = 8; | |||||
| struct ShapeAndType { | |||||
| int32_t type; | |||||
| int64_t dims[kMaxShapeDims]; | |||||
| } __attribute__((packed)); | |||||
| // Extend Info type for task | |||||
| enum FWKTaskExtInfoType { | |||||
| FWK_ADPT_EXT_SHAPE_TYPE = 0, | |||||
| FWK_ADPT_EXT_INPUT_SHAPE, | |||||
| FWK_ADPT_EXT_OUTPUT_SHAPE, | |||||
| FWK_ADPT_EXT_INVALID | |||||
| }; | |||||
| // for unknown shape op type | |||||
| enum UnknowShapeOpType { | |||||
| DEPEND_IN_SHAPE = 1, // op out shape get by input shape | |||||
| DEPEND_CONST_VALUE = 2, // op out shape get by const op value | |||||
| DEPEND_SHAPE_RANGE = 3, // op out shape get by range | |||||
| DEPEND_COMPUTE = 4 // op out shape get by totally computing | |||||
| }; | |||||
| class AicpuOpUtil { | class AicpuOpUtil { | ||||
| public: | public: | ||||
| static int MsTypeToProtoType(TypeId ms_type); | static int MsTypeToProtoType(TypeId ms_type); | ||||
| @@ -26,7 +26,7 @@ message AttrValue { | |||||
| repeated int64 i = 3 [ packed = true ]; //"array(int)" | repeated int64 i = 3 [ packed = true ]; //"array(int)" | ||||
| repeated float f = 4 [ packed = true ]; //"array(float)" | repeated float f = 4 [ packed = true ]; //"array(float)" | ||||
| repeated bool b = 5 [ packed = true ]; //"array(bool)" | repeated bool b = 5 [ packed = true ]; //"array(bool)" | ||||
| repeated DataType type = 6 [ packed = true ]; //"array(type)" | |||||
| repeated int32 type = 6 [ packed = true ]; //"array(type)" | |||||
| repeated TensorShape shape = 7; //"array(shape)" | repeated TensorShape shape = 7; //"array(shape)" | ||||
| repeated Tensor tensor = 8; //"array(tensor)" | repeated Tensor tensor = 8; //"array(tensor)" | ||||
| } | } | ||||
| @@ -18,9 +18,16 @@ package mindspore; | |||||
| import "attr.proto"; | import "attr.proto"; | ||||
| import "tensor.proto"; | import "tensor.proto"; | ||||
| message DynamicIdx { | |||||
| int32 idx = 1; | |||||
| int32 num = 2; | |||||
| } | |||||
| message NodeDef { | message NodeDef { | ||||
| string op = 2; | string op = 2; | ||||
| map<string, AttrValue> attrs = 3; | map<string, AttrValue> attrs = 3; | ||||
| repeated Tensor inputs = 4; | repeated Tensor inputs = 4; | ||||
| repeated Tensor outputs = 5; | repeated Tensor outputs = 5; | ||||
| map<string, DynamicIdx> dym_inputs = 6; | |||||
| map<string, DynamicIdx> dym_outputs = 7; | |||||
| } | } | ||||
| @@ -26,9 +26,12 @@ message Tensor { | |||||
| TensorShape tensor_shape = 1; | TensorShape tensor_shape = 1; | ||||
| // tensor content data type | // tensor content data type | ||||
| DataType tensor_type = 2; | |||||
| int32 tensor_type = 2; | |||||
| // tensor memory device | // tensor memory device | ||||
| // data located memory device , "DDR" "HBM" OR "NONE" | // data located memory device , "DDR" "HBM" OR "NONE" | ||||
| string mem_device = 3; | string mem_device = 3; | ||||
| string name = 4; | |||||
| uint64 data_ptr = 5; | |||||
| uint64 data_size = 6; | |||||
| } | } | ||||
| @@ -31,5 +31,5 @@ message TensorShape { | |||||
| bool unknown_rank = 3; | bool unknown_rank = 3; | ||||
| // data format "NHWC" "NCHW" "NC1HWC0" OR "NONE" | // data format "NHWC" "NCHW" "NC1HWC0" OR "NONE" | ||||
| string data_format = 4; | |||||
| int32 data_format = 4; | |||||
| }; | }; | ||||
| @@ -19,17 +19,30 @@ option cc_enable_arenas = true; | |||||
| package mindspore; | package mindspore; | ||||
| enum DataType { | enum DataType { | ||||
| MS_UNKNOWN = 0; | |||||
| MS_BOOL = 1; | |||||
| MS_FLOAT32 = 0; | |||||
| MS_FLOAT16 = 1; | |||||
| MS_INT8 = 2; | MS_INT8 = 2; | ||||
| MS_UINT8 = 3; | |||||
| MS_INT16 = 4; | |||||
| MS_UINT16 = 5; | |||||
| MS_INT32 = 6; | |||||
| MS_UINT32 = 7; | |||||
| MS_INT64 = 8; | |||||
| MS_UINT64 = 9; | |||||
| MS_FLOAT16 = 10; | |||||
| MS_FLOAT32 = 11; | |||||
| MS_FLOAT64 = 12; | |||||
| MS_INT32 = 3; | |||||
| MS_UINT8 = 4; | |||||
| MS_INT16 = 6; | |||||
| MS_UINT16 = 7; | |||||
| MS_UINT32 = 8; | |||||
| MS_INT64 = 9; | |||||
| MS_UINT64 = 10; | |||||
| MS_FLOAT64 = 11; | |||||
| MS_BOOL = 12; | |||||
| MS_STRING = 13; | |||||
| MS_DUAL_SUB_INT8 = 14; | |||||
| MS_DUAL_SUB_UINT8 = 15; | |||||
| MS_COMPLEX64 = 16; | |||||
| MS_COMPLEX128 = 17; | |||||
| MS_QINT8 = 18; | |||||
| MS_QINT16 = 19; | |||||
| MS_QINT32 = 20; | |||||
| MS_QUINT8 = 21; | |||||
| MS_QUINT16 = 22; | |||||
| MS_RESOURCE = 23; | |||||
| MS_STRING_REF = 24; | |||||
| MS_DUAL = 25; | |||||
| MS_UNKNOWN = 26; | |||||
| } | } | ||||
| @@ -37,7 +37,6 @@ enum FusionType { | |||||
| COMMREDUCE, | COMMREDUCE, | ||||
| SEGMENT, | SEGMENT, | ||||
| OPAQUE, | OPAQUE, | ||||
| DYNAMIC, | |||||
| UNKNOWN_FUSION_TYPE = -1, | UNKNOWN_FUSION_TYPE = -1, | ||||
| }; | }; | ||||
| enum OpPattern { | enum OpPattern { | ||||
| @@ -80,8 +79,8 @@ class KernelPack { | |||||
| bool LoadKernelMeta(const std::string &json_f, const std::string &processor); | bool LoadKernelMeta(const std::string &json_f, const std::string &processor); | ||||
| bool ReadFromJsonFile(const std::string &json_f, const std::string &processor); | bool ReadFromJsonFile(const std::string &json_f, const std::string &processor); | ||||
| const std::string Serialize() const; | const std::string Serialize() const; | ||||
| const FlexArray *const GetJson() const { return json_; } | |||||
| const FlexArray *const GetKernel() const { return kernel_; } | |||||
| const FlexArray *GetJson() const { return json_; } | |||||
| const FlexArray *GetKernel() const { return kernel_; } | |||||
| ~KernelPack() { | ~KernelPack() { | ||||
| if (json_) { | if (json_) { | ||||
| delete[] json_; | delete[] json_; | ||||
| @@ -19,53 +19,36 @@ | |||||
| #include <map> | #include <map> | ||||
| #include <string> | #include <string> | ||||
| #include <memory> | #include <memory> | ||||
| #include <utility> | |||||
| #include "backend/kernel_compiler/tbe/tbe_kernel_build.h" | #include "backend/kernel_compiler/tbe/tbe_kernel_build.h" | ||||
| #include "backend/kernel_compiler/tbe/tbe_kernel_parallel_build.h" | #include "backend/kernel_compiler/tbe/tbe_kernel_parallel_build.h" | ||||
| #include "backend/kernel_compiler/tbe/tbe_utils.h" | #include "backend/kernel_compiler/tbe/tbe_utils.h" | ||||
| #include "backend/kernel_compiler/tbe/tbe_convert_utils.h" | #include "backend/kernel_compiler/tbe/tbe_convert_utils.h" | ||||
| #include "utils/ms_context.h" | |||||
| namespace mindspore { | namespace mindspore { | ||||
| namespace kernel { | namespace kernel { | ||||
| using mindspore::kernel::tbe::TbeUtils; | using mindspore::kernel::tbe::TbeUtils; | ||||
| static bool GenPreBuildKernelJson(const std::vector<AnfNodePtr> &compute_nodes, | |||||
| std::vector<nlohmann::json> *prebuild_op_list) { | |||||
| MS_EXCEPTION_IF_NULL(prebuild_op_list); | |||||
| TbeKernelJsonCreator creator(PREBUILD); | |||||
| for (const auto &anf_node : compute_nodes) { | |||||
| nlohmann::json prebuild; | |||||
| if (!creator.GenTbeSingleKernelJson(anf_node, &prebuild)) { | |||||
| MS_LOG(ERROR) << "GenTbeSingleKernelJson failed"; | |||||
| return false; | |||||
| } | |||||
| (*prebuild_op_list).push_back(prebuild); | |||||
| } | |||||
| return true; | |||||
| } | |||||
| std::map<int32_t, KernelModPtr> KernelFusion(const std::vector<FusionScopeInfo> &fusion_scopes) { | std::map<int32_t, KernelModPtr> KernelFusion(const std::vector<FusionScopeInfo> &fusion_scopes) { | ||||
| MS_LOG(INFO) << "kernel fusion build start, scope size:" << fusion_scopes.size(); | MS_LOG(INFO) << "kernel fusion build start, scope size:" << fusion_scopes.size(); | ||||
| std::map<int32_t, KernelModPtr> kernel_mod_ret; | std::map<int32_t, KernelModPtr> kernel_mod_ret; | ||||
| auto build_manger = std::make_shared<ParallelBuildManager>(); | auto build_manger = std::make_shared<ParallelBuildManager>(); | ||||
| MS_EXCEPTION_IF_NULL(build_manger); | MS_EXCEPTION_IF_NULL(build_manger); | ||||
| for (const auto &fusion_scope_iter : fusion_scopes) { | for (const auto &fusion_scope_iter : fusion_scopes) { | ||||
| auto scope_id = fusion_scope_iter.scope_id; | |||||
| string fusion_kernel_name; | |||||
| nlohmann::json fusion_op; | nlohmann::json fusion_op; | ||||
| string fusion_kernel = "te_fusion"; | |||||
| if (!TbeKernelBuild::GenFusionScopeJson(fusion_scope_iter.input_nodes, fusion_scope_iter.compute_nodes, &fusion_op, | if (!TbeKernelBuild::GenFusionScopeJson(fusion_scope_iter.input_nodes, fusion_scope_iter.compute_nodes, &fusion_op, | ||||
| &fusion_kernel)) { | |||||
| &fusion_kernel_name)) { | |||||
| continue; | continue; | ||||
| } | } | ||||
| // gen kernel_name & check cache | // gen kernel_name & check cache | ||||
| std::string json_str = fusion_op.dump(); | std::string json_str = fusion_op.dump(); | ||||
| size_t hash_id = std::hash<std::string>()(json_str); | size_t hash_id = std::hash<std::string>()(json_str); | ||||
| auto json_name = fusion_kernel.append("_").append(std::to_string(hash_id)); | |||||
| auto context_ptr = MsContext::GetInstance(); | |||||
| MS_EXCEPTION_IF_NULL(context_ptr); | |||||
| auto device_id = context_ptr->get_param<uint32_t>(MS_CTX_DEVICE_ID); | |||||
| auto json_name = | |||||
| fusion_kernel_name.append("_").append(std::to_string(hash_id)).append("_").append(std::to_string(device_id)); | |||||
| fusion_op["fusion_op_name"] = json_name; | fusion_op["fusion_op_name"] = json_name; | ||||
| // gen json for prebuild | |||||
| std::vector<nlohmann::json> prebuild_op_list; | |||||
| if (!GenPreBuildKernelJson(fusion_scope_iter.compute_nodes, &prebuild_op_list)) { | |||||
| continue; | |||||
| } | |||||
| // get io size | // get io size | ||||
| std::vector<size_t> input_size_list; | std::vector<size_t> input_size_list; | ||||
| std::vector<size_t> output_size_list; | std::vector<size_t> output_size_list; | ||||
| @@ -80,20 +63,20 @@ std::map<int32_t, KernelModPtr> KernelFusion(const std::vector<FusionScopeInfo> | |||||
| auto kernel_mod = | auto kernel_mod = | ||||
| build_manger->GenKernelMod(json_name, tbe::kProcessorAiCore, input_size_list, output_size_list, kernel_pack); | build_manger->GenKernelMod(json_name, tbe::kProcessorAiCore, input_size_list, output_size_list, kernel_pack); | ||||
| if (kernel_mod != nullptr) { | if (kernel_mod != nullptr) { | ||||
| kernel_mod_ret[scope_id] = kernel_mod; | |||||
| kernel_mod_ret[fusion_scope_iter.scope_id] = kernel_mod; | |||||
| continue; | continue; | ||||
| } | } | ||||
| } | } | ||||
| // fusion build | // fusion build | ||||
| nlohmann::json fusion_json; | nlohmann::json fusion_json; | ||||
| fusion_json["fusion_op"] = fusion_op; | fusion_json["fusion_op"] = fusion_op; | ||||
| fusion_json["prebuild_ops"] = prebuild_op_list; | |||||
| auto task_id = build_manger->StartCompileOp(fusion_json); | auto task_id = build_manger->StartCompileOp(fusion_json); | ||||
| TbeUtils::SaveJsonInfo(json_name, fusion_json.dump()); | TbeUtils::SaveJsonInfo(json_name, fusion_json.dump()); | ||||
| if (task_id < 0) { | if (task_id < 0) { | ||||
| MS_EXCEPTION(ArgumentError) << "start compile failed."; | MS_EXCEPTION(ArgumentError) << "start compile failed."; | ||||
| } | } | ||||
| build_manger->SaveTaskInfo(task_id, nullptr, json_name, input_size_list, output_size_list, scope_id); | |||||
| build_manger->SaveTaskInfo(task_id, nullptr, json_name, input_size_list, output_size_list, | |||||
| fusion_scope_iter.scope_id); | |||||
| } | } | ||||
| int build_failed_num = 0; | int build_failed_num = 0; | ||||
| @@ -16,6 +16,7 @@ | |||||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_KERNELFUSION_H_ | #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_KERNELFUSION_H_ | ||||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_KERNELFUSION_H_ | #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_KERNELFUSION_H_ | ||||
| #include <utility> | |||||
| #include <vector> | #include <vector> | ||||
| #include <map> | #include <map> | ||||
| #include "backend/kernel_compiler/kernel.h" | #include "backend/kernel_compiler/kernel.h" | ||||
| @@ -25,11 +26,9 @@ namespace kernel { | |||||
| * @brief fuse op and return a callable mod | * @brief fuse op and return a callable mod | ||||
| */ | */ | ||||
| struct FusionScopeInfo { | struct FusionScopeInfo { | ||||
| FusionScopeInfo() {} | |||||
| FusionScopeInfo(int32_t id, const std::vector<AnfNodePtr> &in, const std::vector<AnfNodePtr> &comp, | |||||
| const std::vector<AnfNodePtr> &out) | |||||
| : scope_id(id), input_nodes(in), compute_nodes(comp), output_nodes(out) {} | |||||
| int32_t scope_id; | |||||
| FusionScopeInfo(int32_t id, std::vector<AnfNodePtr> in, std::vector<AnfNodePtr> comp, std::vector<AnfNodePtr> out) | |||||
| : scope_id(id), input_nodes(std::move(in)), compute_nodes(std::move(comp)), output_nodes(std::move(out)) {} | |||||
| int32_t scope_id{}; | |||||
| std::vector<AnfNodePtr> input_nodes; | std::vector<AnfNodePtr> input_nodes; | ||||
| std::vector<AnfNodePtr> compute_nodes; | std::vector<AnfNodePtr> compute_nodes; | ||||
| std::vector<AnfNodePtr> output_nodes; | std::vector<AnfNodePtr> output_nodes; | ||||
| @@ -40,14 +40,13 @@ class OpLib { | |||||
| private: | private: | ||||
| static bool RegOpFromLocalInfo(); | static bool RegOpFromLocalInfo(); | ||||
| static bool DecodeOpInfo(const nlohmann::json &obj, const OpImplyType imply_type, const std::string &impl_path); | |||||
| static bool DecodeAttr(const nlohmann::json &obj, const OpImplyType imply_type, | |||||
| const std::shared_ptr<OpInfo> &op_info); | |||||
| static bool DecodeOpInfo(const nlohmann::json &obj, OpImplyType imply_type, const std::string &impl_path); | |||||
| static bool DecodeAttr(const nlohmann::json &obj, OpImplyType imply_type, const std::shared_ptr<OpInfo> &op_info); | |||||
| static bool DecodeDtypeFormat(const nlohmann::json &dtype_format, const std::shared_ptr<OpIOInfo> &op_io, | static bool DecodeDtypeFormat(const nlohmann::json &dtype_format, const std::shared_ptr<OpIOInfo> &op_io, | ||||
| size_t index); | size_t index); | ||||
| static void DecodeTBESpecificInfo(const nlohmann::json &obj, const std::shared_ptr<OpInfo> &op_info); | static void DecodeTBESpecificInfo(const nlohmann::json &obj, const std::shared_ptr<OpInfo> &op_info); | ||||
| static void DecodeAKGSpecificInfo(const nlohmann::json &obj, const std::shared_ptr<OpInfo> &op_info); | static void DecodeAKGSpecificInfo(const nlohmann::json &obj, const std::shared_ptr<OpInfo> &op_info); | ||||
| static bool DecodeInputOutput(const nlohmann::json &obj, const OpImplyType imply_type, const OpIOType io_type, | |||||
| static bool DecodeInputOutput(const nlohmann::json &obj, OpImplyType imply_type, OpIOType io_type, | |||||
| const std::shared_ptr<OpInfo> &op_info, const nlohmann::json &dtype_format); | const std::shared_ptr<OpInfo> &op_info, const nlohmann::json &dtype_format); | ||||
| static bool GetRefInfo(const std::shared_ptr<OpInfo> &op_info); | static bool GetRefInfo(const std::shared_ptr<OpInfo> &op_info); | ||||
| static bool CheckRepetition(const std::shared_ptr<OpInfo> &op_info); | static bool CheckRepetition(const std::shared_ptr<OpInfo> &op_info); | ||||
| @@ -173,7 +173,7 @@ void TbeAdapter::NormalizeFuncName(std::string *func_name) { | |||||
| *func_name = name_tmp; | *func_name = name_tmp; | ||||
| auto iter = tbe_func_adapter_map.find(*func_name); | auto iter = tbe_func_adapter_map.find(*func_name); | ||||
| if (iter != tbe_func_adapter_map.end()) { | if (iter != tbe_func_adapter_map.end()) { | ||||
| MS_LOG(INFO) << "map actual op from me " << *func_name << " to tbe op" << iter->second; | |||||
| MS_LOG(INFO) << "Map actual op from me: " << *func_name << " to tbe op: " << iter->second; | |||||
| *func_name = iter->second; | *func_name = iter->second; | ||||
| } | } | ||||
| } | } | ||||
| @@ -27,7 +27,7 @@ | |||||
| // the TBE back-end operator implementation difference | // the TBE back-end operator implementation difference | ||||
| namespace mindspore { | namespace mindspore { | ||||
| namespace kernel { | namespace kernel { | ||||
| enum kCreaterType : int { SINGLE_BUILD = 0, PREBUILD, OP_SELECT_FORMAT, CHECK_SUPPORTED, OP_PRE_COMPILE }; | |||||
| enum kCreaterType : int { SINGLE_BUILD = 0, OP_SELECT_FORMAT, CHECK_SUPPORTED, OP_PRE_COMPILE }; | |||||
| namespace tbe { | namespace tbe { | ||||
| using FAttrsPass = void (*)(const AnfNodePtr &anf_node, const std::vector<std::shared_ptr<OpAttr>> &op_info_attrs, | using FAttrsPass = void (*)(const AnfNodePtr &anf_node, const std::vector<std::shared_ptr<OpAttr>> &op_info_attrs, | ||||
| nlohmann::json *attrs_json); | nlohmann::json *attrs_json); | ||||
| @@ -63,7 +63,7 @@ const std::unordered_map<std::string, size_t> type_nbyte_maps = { | |||||
| const std::unordered_map<std::string, FusionType> fusion_type_maps = { | const std::unordered_map<std::string, FusionType> fusion_type_maps = { | ||||
| {"CONVLUTION", FusionType::CONVLUTION}, {"ELEMWISE", FusionType::ELEMWISE}, {"COMMREDUCE", FusionType::COMMREDUCE}, | {"CONVLUTION", FusionType::CONVLUTION}, {"ELEMWISE", FusionType::ELEMWISE}, {"COMMREDUCE", FusionType::COMMREDUCE}, | ||||
| {"SEGMENT", FusionType::SEGMENT}, {"DYNAMIC", FusionType::DYNAMIC}, {"OPAQUE", FusionType::OPAQUE}, | |||||
| {"SEGMENT", FusionType::SEGMENT}, {"OPAQUE", FusionType::OPAQUE}, | |||||
| }; | }; | ||||
| TypeId DtypeToTypeId(const std::string &dtypes) { | TypeId DtypeToTypeId(const std::string &dtypes) { | ||||
| @@ -24,6 +24,7 @@ | |||||
| #include "backend/kernel_compiler/tbe/tbe_adapter.h" | #include "backend/kernel_compiler/tbe/tbe_adapter.h" | ||||
| #include "backend/kernel_compiler/tbe/tbe_convert_utils.h" | #include "backend/kernel_compiler/tbe/tbe_convert_utils.h" | ||||
| #include "backend/kernel_compiler/tbe/tbe_utils.h" | #include "backend/kernel_compiler/tbe/tbe_utils.h" | ||||
| #include "utils/ms_context.h" | |||||
| namespace mindspore { | namespace mindspore { | ||||
| namespace kernel { | namespace kernel { | ||||
| @@ -71,14 +72,20 @@ constexpr auto kVTypeListListInt = "listListInt"; | |||||
| constexpr auto kJValue = "value"; | constexpr auto kJValue = "value"; | ||||
| constexpr auto kJDynIndex = "dyn_index"; | constexpr auto kJDynIndex = "dyn_index"; | ||||
| constexpr auto kJFuncName = "func_name"; | constexpr auto kJFuncName = "func_name"; | ||||
| std::string NormalizeFullScopeName(const string &full_scope_name) { | |||||
| // exp:Default/ReLU-op0 -->Default_ReLU_op0 | |||||
| string normal_ret = full_scope_name; | |||||
| std::replace(normal_ret.begin(), normal_ret.end(), '/', '_'); | |||||
| std::replace(normal_ret.begin(), normal_ret.end(), '-', '_'); | |||||
| return normal_ret; | |||||
| } | |||||
| constexpr auto kJL1AddrOffset = "L1_addr_offset"; | |||||
| constexpr auto kJL1FusionType = "L1_fusion_type"; | |||||
| constexpr auto kJL1WorkspaceSize = "L1_workspace_size"; | |||||
| constexpr auto kJAddrType = "addr_type"; | |||||
| constexpr auto kJSliceOffset = "slice_offset"; | |||||
| constexpr auto kJSplitIndex = "split_index"; | |||||
| constexpr auto kJTotalShape = "total_shape"; | |||||
| constexpr auto kJValidShape = "valid_shape"; | |||||
| constexpr auto kJModuleName = "module_name"; | |||||
| constexpr auto kJPattern = "pattern"; | |||||
| constexpr auto kJPyModulePath = "py_module_path"; | |||||
| constexpr auto kJPreBuildOutsAttrs = "prebuild_outs_attrs"; | |||||
| constexpr auto kJKwdArgs = "kwds_args"; | |||||
| constexpr auto kJListArgs = "list_args"; | |||||
| bool TbeKernelJsonCreator::GenTbeSingleKernelJson(const std::shared_ptr<mindspore::AnfNode> &anf_node, | bool TbeKernelJsonCreator::GenTbeSingleKernelJson(const std::shared_ptr<mindspore::AnfNode> &anf_node, | ||||
| nlohmann::json *kernel_json) { | nlohmann::json *kernel_json) { | ||||
| @@ -117,13 +124,12 @@ bool TbeKernelJsonCreator::GenTbeSingleKernelJson(const std::shared_ptr<mindspor | |||||
| op_info_json[kJAttrs] = attrs_json; | op_info_json[kJAttrs] = attrs_json; | ||||
| std::string json_str = op_info_json.dump(); | std::string json_str = op_info_json.dump(); | ||||
| size_t hash_id = std::hash<std::string>()(json_str); | size_t hash_id = std::hash<std::string>()(json_str); | ||||
| json_name_ = op_name + "_" + std::to_string(hash_id); | |||||
| auto context_ptr = MsContext::GetInstance(); | |||||
| MS_EXCEPTION_IF_NULL(context_ptr); | |||||
| auto device_id = context_ptr->get_param<uint32_t>(MS_CTX_DEVICE_ID); | |||||
| json_name_ = op_name + "_" + std::to_string(hash_id) + "_" + std::to_string(device_id); | |||||
| json_info_ = json_str; | json_info_ = json_str; | ||||
| if (creater_type_ == PREBUILD) { | |||||
| op_info_json[kJKernelName] = NormalizeFullScopeName(anf_node->fullname_with_scope()); | |||||
| } else { | |||||
| op_info_json[kJKernelName] = json_name_; | |||||
| } | |||||
| op_info_json[kJKernelName] = json_name_; | |||||
| (*kernel_json)[kJOpInfo] = op_info_json; | (*kernel_json)[kJOpInfo] = op_info_json; | ||||
| (*kernel_json)[kJFullName] = anf_node->fullname_with_scope(); | (*kernel_json)[kJFullName] = anf_node->fullname_with_scope(); | ||||
| if (creater_type_ == SINGLE_BUILD) { | if (creater_type_ == SINGLE_BUILD) { | ||||
| @@ -581,25 +587,25 @@ bool TbeKernelBuild::GetIOSize(const nlohmann::json &kernel_json, std::vector<si | |||||
| bool TbeKernelBuild::GenFusionScopeJson(const std::vector<mindspore::AnfNodePtr> &input_nodes, | bool TbeKernelBuild::GenFusionScopeJson(const std::vector<mindspore::AnfNodePtr> &input_nodes, | ||||
| const std::vector<mindspore::AnfNodePtr> &compute_nodes, | const std::vector<mindspore::AnfNodePtr> &compute_nodes, | ||||
| nlohmann::json *fusion_str, std::string *fusion_kernel) { | |||||
| MS_EXCEPTION_IF_NULL(fusion_str); | |||||
| MS_EXCEPTION_IF_NULL(fusion_kernel); | |||||
| nlohmann::json *fusion_json, std::string *fusion_kernel_name) { | |||||
| MS_EXCEPTION_IF_NULL(fusion_json); | |||||
| MS_EXCEPTION_IF_NULL(fusion_kernel_name); | |||||
| // get input layer info | // get input layer info | ||||
| std::vector<std::vector<mindspore::AnfNodePtr>> input_layers; | std::vector<std::vector<mindspore::AnfNodePtr>> input_layers; | ||||
| std::map<const AnfNodePtr, FusionDataType> spec_data_input; | std::map<const AnfNodePtr, FusionDataType> spec_data_input; | ||||
| if (!GetInputLayers(input_nodes, compute_nodes, &input_layers, &spec_data_input)) { | if (!GetInputLayers(input_nodes, compute_nodes, &input_layers, &spec_data_input)) { | ||||
| return false; | return false; | ||||
| } | } | ||||
| // gen fusion scopre_op jsom | |||||
| // gen fusion scopre_op json | |||||
| std::vector<nlohmann::json> compute_list; | std::vector<nlohmann::json> compute_list; | ||||
| (*fusion_kernel) = kFusionKernelNamePrfix; | |||||
| (*fusion_kernel_name) = kFusionKernelNamePrfix; | |||||
| // index: fusion build option input record, next one from 0 | // index: fusion build option input record, next one from 0 | ||||
| static size_t index = 0; | static size_t index = 0; | ||||
| auto layer_iter = input_layers.begin(); | auto layer_iter = input_layers.begin(); | ||||
| auto compute_op_iter = compute_nodes.begin(); | auto compute_op_iter = compute_nodes.begin(); | ||||
| for (; compute_op_iter != compute_nodes.end(); ++compute_op_iter, ++layer_iter) { | for (; compute_op_iter != compute_nodes.end(); ++compute_op_iter, ++layer_iter) { | ||||
| nlohmann::json compute_op_str; | nlohmann::json compute_op_str; | ||||
| (void)GenFusionComputeJson(*compute_op_iter, &layer_iter, &compute_op_str, fusion_kernel, &index); | |||||
| (void)GenFusionComputeJson(*compute_op_iter, &layer_iter, &compute_op_str, fusion_kernel_name, &index); | |||||
| compute_list.push_back(compute_op_str); | compute_list.push_back(compute_op_str); | ||||
| } | } | ||||
| index = 0; | index = 0; | ||||
| @@ -617,36 +623,122 @@ bool TbeKernelBuild::GenFusionScopeJson(const std::vector<mindspore::AnfNodePtr> | |||||
| } | } | ||||
| index = 0; | index = 0; | ||||
| data_list.insert(data_list.end(), compute_list.begin(), compute_list.end()); | data_list.insert(data_list.end(), compute_list.begin(), compute_list.end()); | ||||
| (*fusion_str)[kFusionOpList] = data_list; | |||||
| (*fusion_json)[kFusionOpList] = data_list; | |||||
| return true; | return true; | ||||
| } | } | ||||
| void TbeKernelBuild::GenPreDescJson(nlohmann::json *output_desc) { | |||||
| MS_EXCEPTION_IF_NULL(output_desc); | |||||
| (*output_desc)[kJL1AddrOffset] = 0; | |||||
| (*output_desc)[kJL1FusionType] = -1; | |||||
| (*output_desc)[kJL1WorkspaceSize] = -1; | |||||
| (*output_desc)[kJAddrType] = 0; | |||||
| } | |||||
| void TbeKernelBuild::GenFusionComputeCommonJson(const mindspore::CNodePtr &cnode, nlohmann::json *compute_op_str, | |||||
| std::string *fusion_kernel_name) { | |||||
| MS_EXCEPTION_IF_NULL(compute_op_str); | |||||
| MS_EXCEPTION_IF_NULL(fusion_kernel_name); | |||||
| // gen others | |||||
| auto origin_type = AnfAlgo::GetCNodeName(cnode); | |||||
| // replace special op type for buffer fusion op | |||||
| auto type = GetRealOpType(origin_type); | |||||
| (*compute_op_str)[kJtype] = type; | |||||
| tbe::TbeAdapter::NormalizeFuncName(&type); | |||||
| (*compute_op_str)[kJFuncName] = type; | |||||
| (*compute_op_str)[kJModuleName] = std::string("impl.") + type; | |||||
| (*compute_op_str)[kJName] = cnode->fullname_with_scope(); | |||||
| (*compute_op_str)[kJPattern] = GetNodeFusionType(cnode); | |||||
| (*compute_op_str)[kJPyModulePath] = "/usr/local/Ascend/opp/op_impl/build_in/ai_core/tbe"; | |||||
| (void)(*fusion_kernel_name).append("_"); | |||||
| (void)(*fusion_kernel_name).append(type); | |||||
| } | |||||
| void TbeKernelBuild::GenFusionComputePreBuildJson(const mindspore::CNodePtr &cnode, nlohmann::json *compute_op_str) { | |||||
| MS_EXCEPTION_IF_NULL(cnode); | |||||
| MS_EXCEPTION_IF_NULL(compute_op_str); | |||||
| // kwds args | |||||
| nlohmann::json json_prebuild_args; | |||||
| json_prebuild_args[kJKwdArgs] = nlohmann::json::object(); | |||||
| // list_args | |||||
| nlohmann::json json_list_args; | |||||
| // list_args: output args | |||||
| auto output_size = AnfAlgo::GetOutputTensorNum(cnode); | |||||
| for (size_t i = 0; i < output_size; ++i) { | |||||
| nlohmann::json output_desc; | |||||
| GenDescJson(cnode, i, i, &output_desc); | |||||
| output_desc[kJDtype] = output_desc[kJDataType]; | |||||
| json_list_args.push_back(output_desc); | |||||
| } | |||||
| // list_args: attr args | |||||
| auto op_name = AnfAlgo::GetCNodeName(cnode); | |||||
| auto opinfo = OpLib::FindOp(op_name, OpImplyType::kTBE); | |||||
| MS_EXCEPTION_IF_NULL(opinfo); | |||||
| TbeKernelJsonCreator json_creater(SINGLE_BUILD); | |||||
| nlohmann::json json_attr_args; | |||||
| if (!json_creater.GenTbeAttrJson(cnode, opinfo, &json_attr_args)) { | |||||
| MS_LOG(INFO) << "Fusion warning: get prebuild args of attr failed."; | |||||
| } | |||||
| for (const auto &attr : json_attr_args) { | |||||
| // if(attr[kJName] != "isRef" && attr["valid"] == true) { | |||||
| if (attr[kJName] != "isRef" && attr[kJValid] == true) { | |||||
| json_list_args.push_back(attr[kJValue]); | |||||
| } | |||||
| } | |||||
| json_prebuild_args[kJListArgs] = json_list_args; | |||||
| (*compute_op_str)[kJPreBuildOutsAttrs] = json_prebuild_args; | |||||
| } | |||||
| void TbeKernelBuild::GenSuffixDescJson(nlohmann::json *output_desc) { | |||||
| MS_EXCEPTION_IF_NULL(output_desc); | |||||
| (*output_desc)[kJSliceOffset] = nlohmann::json::array(); | |||||
| (*output_desc)[kJSplitIndex] = 0; | |||||
| (*output_desc)[kJTotalShape] = nlohmann::json::array(); | |||||
| (*output_desc)[kJValidShape] = nlohmann::json::array(); | |||||
| } | |||||
| // anf_node: this node is used to get output desc(type\foramt\shape ...) | |||||
| // node_out_idx: node output index | |||||
| // desc_output_idx: this index use to add json | |||||
| // nlohmann::json *output_desc: for return | |||||
| // FusionDataType fusion_data_type: speceial process json desc output shape [kFusionAddN, kFusionReLUGradV2] | |||||
| void TbeKernelBuild::GenDescJson(const std::shared_ptr<mindspore::AnfNode> &anf_node, size_t node_out_idx, | void TbeKernelBuild::GenDescJson(const std::shared_ptr<mindspore::AnfNode> &anf_node, size_t node_out_idx, | ||||
| size_t desc_output_idx, nlohmann::json *output_desc, FusionDataType fusion_data_type) { | size_t desc_output_idx, nlohmann::json *output_desc, FusionDataType fusion_data_type) { | ||||
| GenPreDescJson(output_desc); | |||||
| // data_type | |||||
| auto type_id = AnfAlgo::GetOutputDeviceDataType(anf_node, node_out_idx); | |||||
| (*output_desc)[kJDataType] = tbe::TypeIdToString(type_id); | |||||
| // name | |||||
| std::string output_desc_name = anf_node->fullname_with_scope(); | std::string output_desc_name = anf_node->fullname_with_scope(); | ||||
| if (node_out_idx > 0) { | if (node_out_idx > 0) { | ||||
| output_desc_name = output_desc_name + "_" + std::to_string(node_out_idx); | output_desc_name = output_desc_name + "_" + std::to_string(node_out_idx); | ||||
| } | } | ||||
| (*output_desc)[kJName] = NormalizeFullScopeName(output_desc_name); | |||||
| auto type_id = AnfAlgo::GetOutputDeviceDataType(anf_node, node_out_idx); | |||||
| (*output_desc)[kJDataType] = tbe::TypeIdToString(type_id); | |||||
| (*output_desc)[kJName] = output_desc_name; | |||||
| // ori_format | |||||
| (*output_desc)[kJOriFormat] = kOpFormat_NCHW; | |||||
| // ori_shape | |||||
| auto ori_shape = AnfAlgo::GetOutputInferShape(anf_node, node_out_idx); | auto ori_shape = AnfAlgo::GetOutputInferShape(anf_node, node_out_idx); | ||||
| if (ori_shape.empty()) { | if (ori_shape.empty()) { | ||||
| ori_shape.emplace_back(1); | ori_shape.emplace_back(1); | ||||
| } | } | ||||
| (*output_desc)[kJOriShape] = ori_shape; | (*output_desc)[kJOriShape] = ori_shape; | ||||
| // !! Note: output_index, only node's output use it | |||||
| (*output_desc)[kJOutputIndex] = desc_output_idx; | |||||
| // shape | |||||
| auto shape = AnfAlgo::GetOutputDeviceShape(anf_node, node_out_idx); | auto shape = AnfAlgo::GetOutputDeviceShape(anf_node, node_out_idx); | ||||
| if (shape.empty()) { | if (shape.empty()) { | ||||
| shape.emplace_back(1); | shape.emplace_back(1); | ||||
| } | } | ||||
| (*output_desc)[kJShape] = shape; | (*output_desc)[kJShape] = shape; | ||||
| // !! Note: format: only data node's output use it | |||||
| auto format = AnfAlgo::GetOutputFormat(anf_node, node_out_idx); | auto format = AnfAlgo::GetOutputFormat(anf_node, node_out_idx); | ||||
| if (format == kOpFormat_DEFAULT) { | if (format == kOpFormat_DEFAULT) { | ||||
| format = ori_shape.size() == 4 ? kOpFormat_NCHW : kOpFormat_ND; | format = ori_shape.size() == 4 ? kOpFormat_NCHW : kOpFormat_ND; | ||||
| } else if (format == kOpFormat_FRAC_Z) { | |||||
| format = kOpFormat_FRACTAL_Z; | |||||
| } | } | ||||
| (*output_desc)[kJFormat] = format; | (*output_desc)[kJFormat] = format; | ||||
| (*output_desc)[kJOriFormat] = kOpFormat_NCHW; | |||||
| (*output_desc)[kJOutputIndex] = desc_output_idx; | |||||
| // special node | |||||
| if (fusion_data_type == kFusionAddN && format == kOpFormat_NC1HWC0) { | if (fusion_data_type == kFusionAddN && format == kOpFormat_NC1HWC0) { | ||||
| std::vector<size_t> spec_shape = {}; | std::vector<size_t> spec_shape = {}; | ||||
| spec_shape.emplace_back(shape[0]); | spec_shape.emplace_back(shape[0]); | ||||
| @@ -663,12 +755,13 @@ void TbeKernelBuild::GenDescJson(const std::shared_ptr<mindspore::AnfNode> &anf_ | |||||
| (*output_desc)[kJShape] = spec_shape; | (*output_desc)[kJShape] = spec_shape; | ||||
| (*output_desc)[kJDataType] = kVTypeBool; | (*output_desc)[kJDataType] = kVTypeBool; | ||||
| } | } | ||||
| GenSuffixDescJson(output_desc); | |||||
| } | } | ||||
| void TbeKernelBuild::GenReusedOutputDesc(const std::shared_ptr<mindspore::AnfNode> &anf_node, size_t index, | void TbeKernelBuild::GenReusedOutputDesc(const std::shared_ptr<mindspore::AnfNode> &anf_node, size_t index, | ||||
| size_t output_index, nlohmann::json *output_desc) { | size_t output_index, nlohmann::json *output_desc) { | ||||
| std::string output_desc_name = anf_node->fullname_with_scope() + "_" + std::to_string(index); | std::string output_desc_name = anf_node->fullname_with_scope() + "_" + std::to_string(index); | ||||
| (*output_desc)[kJName] = NormalizeFullScopeName(output_desc_name); | |||||
| (*output_desc)[kJName] = output_desc_name; | |||||
| (*output_desc)[kJOutputIndex] = output_index; | (*output_desc)[kJOutputIndex] = output_index; | ||||
| std::vector<size_t> shape; | std::vector<size_t> shape; | ||||
| (*output_desc)[kJShape] = shape; | (*output_desc)[kJShape] = shape; | ||||
| @@ -692,6 +785,9 @@ bool TbeKernelBuild::GetSpecInputLayers(const std::string &op_name, | |||||
| return true; | return true; | ||||
| } | } | ||||
| // <input_nodes> : contains parameter/data node, input order may doesn't match tbe input order; | |||||
| // <compute_nodes> : contains cnode, inputs order may doesn't match tbe input order; | |||||
| // Special process node list: reference tbe_adapter.cc [except: Conv2DBackpropInput] | |||||
| bool TbeKernelBuild::GetInputLayers(const std::vector<mindspore::AnfNodePtr> &input_nodes, | bool TbeKernelBuild::GetInputLayers(const std::vector<mindspore::AnfNodePtr> &input_nodes, | ||||
| const std::vector<mindspore::AnfNodePtr> &compute_nodes, | const std::vector<mindspore::AnfNodePtr> &compute_nodes, | ||||
| std::vector<std::vector<mindspore::AnfNodePtr>> *input_layers, | std::vector<std::vector<mindspore::AnfNodePtr>> *input_layers, | ||||
| @@ -722,7 +818,7 @@ bool TbeKernelBuild::GetInputLayers(const std::vector<mindspore::AnfNodePtr> &in | |||||
| MS_LOG(INFO) << "Fusion info: add compute node's [" << i << "] input: " << input->fullname_with_scope(); | MS_LOG(INFO) << "Fusion info: add compute node's [" << i << "] input: " << input->fullname_with_scope(); | ||||
| layer.emplace_back((*find_iter)); | layer.emplace_back((*find_iter)); | ||||
| } else { | } else { | ||||
| MS_LOG(INFO) << "Fusion warnig: this input [" << i << "] may be pre compute(" << input->fullname_with_scope() | |||||
| MS_LOG(INFO) << "Fusion warning: this input [" << i << "] may be pre compute(" << input->fullname_with_scope() | |||||
| << ") node's output."; | << ") node's output."; | ||||
| } | } | ||||
| } | } | ||||
| @@ -750,8 +846,9 @@ bool TbeKernelBuild::GenFusionDataInputJson(const std::shared_ptr<mindspore::Anf | |||||
| MS_EXCEPTION_IF_NULL(data_str); | MS_EXCEPTION_IF_NULL(data_str); | ||||
| MS_EXCEPTION_IF_NULL(index); | MS_EXCEPTION_IF_NULL(index); | ||||
| std::vector<nlohmann::json> output_desc_list; | std::vector<nlohmann::json> output_desc_list; | ||||
| // if data_input is null, this is optional input. | |||||
| if (!data_input) { | if (!data_input) { | ||||
| MS_LOG(INFO) << "Data input is optional node"; | |||||
| MS_LOG(INFO) << "Fusion info: data input is optional node"; | |||||
| auto name = std::string(kOptional) + std::to_string(*index); | auto name = std::string(kOptional) + std::to_string(*index); | ||||
| (*data_str)[kJName] = name; | (*data_str)[kJName] = name; | ||||
| nlohmann::json output_desc; | nlohmann::json output_desc; | ||||
| @@ -767,12 +864,16 @@ bool TbeKernelBuild::GenFusionDataInputJson(const std::shared_ptr<mindspore::Anf | |||||
| auto kernel_idx = AnfAlgo::VisitKernel(data_input, 0); | auto kernel_idx = AnfAlgo::VisitKernel(data_input, 0); | ||||
| auto real_node = kernel_idx.first; | auto real_node = kernel_idx.first; | ||||
| size_t real_idx = kernel_idx.second; | size_t real_idx = kernel_idx.second; | ||||
| MS_LOG(INFO) << "Real name " << real_node->fullname_with_scope() << " index:" << real_idx; | |||||
| MS_LOG(INFO) << "Fusion info: Real name: " << real_node->fullname_with_scope() << ". index:" << real_idx; | |||||
| // kJOutputDesc | // kJOutputDesc | ||||
| nlohmann::json output_desc; | nlohmann::json output_desc; | ||||
| GenDescJson(real_node, real_idx, real_idx, &output_desc, fusion_data_type); | GenDescJson(real_node, real_idx, real_idx, &output_desc, fusion_data_type); | ||||
| output_desc_list.push_back(output_desc); | output_desc_list.push_back(output_desc); | ||||
| (*data_str)[kJName] = NormalizeFullScopeName(real_node->fullname_with_scope()); | |||||
| auto full_name = real_node->fullname_with_scope(); | |||||
| if (real_idx > 0) { | |||||
| full_name = full_name.append("_").append(std::to_string(real_idx)); | |||||
| } | |||||
| (*data_str)[kJName] = full_name; | |||||
| } | } | ||||
| (*data_str)[kJOutputDesc] = output_desc_list; | (*data_str)[kJOutputDesc] = output_desc_list; | ||||
| (*data_str)[kJtype] = "Data"; | (*data_str)[kJtype] = "Data"; | ||||
| @@ -808,6 +909,7 @@ bool TbeKernelBuild::IsDynamicInput(const mindspore::CNodePtr &cnode) { | |||||
| size_t TbeKernelBuild::GetOptionalInput(const mindspore::CNodePtr &cnode, bool is_dynamic_input) { | size_t TbeKernelBuild::GetOptionalInput(const mindspore::CNodePtr &cnode, bool is_dynamic_input) { | ||||
| MS_EXCEPTION_IF_NULL(cnode); | MS_EXCEPTION_IF_NULL(cnode); | ||||
| if (is_dynamic_input) { | if (is_dynamic_input) { | ||||
| // Node can not have optional & dynamic input. | |||||
| return 0; | return 0; | ||||
| } | } | ||||
| MS_EXCEPTION_IF_NULL(cnode); | MS_EXCEPTION_IF_NULL(cnode); | ||||
| @@ -831,22 +933,46 @@ std::string TbeKernelBuild::GetRealOpType(const std::string &origin_type) { | |||||
| return result; | return result; | ||||
| } | } | ||||
| std::string TbeKernelBuild::GetNodeFusionType(const mindspore::CNodePtr &cnode) { | |||||
| MS_EXCEPTION_IF_NULL(cnode); | |||||
| auto node_type = AnfAlgo::GetCNodeName(cnode); | |||||
| static std::map<std::string, std::string> fusion_type_map = {{kConv2DOpName, "Convolution"}, | |||||
| {kBNTrainingReduceOpName, "bn_reduce"}, | |||||
| {kBNTrainingUpdateOpName, "bn_update"}, | |||||
| {kReluV2OpName, "ElemWise"}, | |||||
| {kTensorAddOpName, "ElemWise"}, | |||||
| {kConv2DBackpropInputOpName, "Conv2d_backprop_input"}, | |||||
| {kAddNOpName, "ElemWise"}, | |||||
| {kReluGradV2OpName, "ElemWise"}, | |||||
| {kRealDivOpName, "ElemWise"}}; | |||||
| auto find = fusion_type_map.find(node_type); | |||||
| if (find == fusion_type_map.end()) { | |||||
| MS_LOG(INFO) << "Fusion warning: get node fusion type failed, origin node type: " << node_type | |||||
| << " return null string."; | |||||
| return ""; | |||||
| } else { | |||||
| return find->second; | |||||
| } | |||||
| } | |||||
| bool TbeKernelBuild::GenFusionComputeInputJson(const mindspore::CNodePtr &cnode, | bool TbeKernelBuild::GenFusionComputeInputJson(const mindspore::CNodePtr &cnode, | ||||
| std::vector<std::vector<mindspore::AnfNodePtr>>::iterator *layer_iter, | std::vector<std::vector<mindspore::AnfNodePtr>>::iterator *layer_iter, | ||||
| std::vector<nlohmann::json> *input_desc_list, size_t *index) { | std::vector<nlohmann::json> *input_desc_list, size_t *index) { | ||||
| MS_EXCEPTION_IF_NULL(cnode); | MS_EXCEPTION_IF_NULL(cnode); | ||||
| MS_EXCEPTION_IF_NULL(input_desc_list); | MS_EXCEPTION_IF_NULL(input_desc_list); | ||||
| std::vector<nlohmann::json> input_desc_list_tmp = {}; | std::vector<nlohmann::json> input_desc_list_tmp = {}; | ||||
| // 1. input json | |||||
| bool is_dynamic_input = IsDynamicInput(cnode); | bool is_dynamic_input = IsDynamicInput(cnode); | ||||
| for (size_t i = 1; i < cnode->inputs().size(); ++i) { | for (size_t i = 1; i < cnode->inputs().size(); ++i) { | ||||
| auto input = cnode->input(i); | auto input = cnode->input(i); | ||||
| auto kernel_idx = AnfAlgo::VisitKernel(input, 0); | auto kernel_idx = AnfAlgo::VisitKernel(input, 0); | ||||
| auto real_node = kernel_idx.first; | auto real_node = kernel_idx.first; | ||||
| size_t real_idx = kernel_idx.second; | size_t real_idx = kernel_idx.second; | ||||
| MS_LOG(INFO) << "Real name" << real_node->fullname_with_scope() << "index:" << real_idx; | |||||
| MS_LOG(INFO) << "Fusion info: real name: " << real_node->fullname_with_scope() << ". index:" << real_idx; | |||||
| nlohmann::json input_desc; | nlohmann::json input_desc; | ||||
| GenDescJson(real_node, real_idx, real_idx, &input_desc); | GenDescJson(real_node, real_idx, real_idx, &input_desc); | ||||
| if (is_dynamic_input) { | if (is_dynamic_input) { | ||||
| // 2. dynamic input json | |||||
| MS_LOG(INFO) << "Node has dynamic input."; | MS_LOG(INFO) << "Node has dynamic input."; | ||||
| input_desc[kJDynIndex] = (i - 1); | input_desc[kJDynIndex] = (i - 1); | ||||
| } | } | ||||
| @@ -854,7 +980,8 @@ bool TbeKernelBuild::GenFusionComputeInputJson(const mindspore::CNodePtr &cnode, | |||||
| } | } | ||||
| size_t optional_num = GetOptionalInput(cnode, is_dynamic_input); | size_t optional_num = GetOptionalInput(cnode, is_dynamic_input); | ||||
| if (optional_num > 0) { | if (optional_num > 0) { | ||||
| MS_LOG(INFO) << "Node has optional input."; | |||||
| // 3. optional input | |||||
| MS_LOG(INFO) << "Fusion info: node has optional input."; | |||||
| for (size_t i = 0; i < optional_num; ++i) { | for (size_t i = 0; i < optional_num; ++i) { | ||||
| nlohmann::json optional_input_desc; | nlohmann::json optional_input_desc; | ||||
| optional_input_desc[kJName] = std::string(kOptional) + std::to_string(*index); | optional_input_desc[kJName] = std::string(kOptional) + std::to_string(*index); | ||||
| @@ -872,7 +999,7 @@ std::vector<size_t> TbeKernelBuild::GetDescOutputIndex(const std::vector<int> &o | |||||
| std::vector<size_t> desc_output_index = {}; | std::vector<size_t> desc_output_index = {}; | ||||
| for (size_t idx = 0; idx < output_used_nums.size(); ++idx) { | for (size_t idx = 0; idx < output_used_nums.size(); ++idx) { | ||||
| auto output_use_num_item = output_used_nums[idx]; | auto output_use_num_item = output_used_nums[idx]; | ||||
| MS_LOG(INFO) << "Output used num[" << idx << "] = " << output_use_num_item; | |||||
| MS_LOG(INFO) << "Fusion info: output used num[" << idx << "] = " << output_use_num_item; | |||||
| desc_output_index.emplace_back(idx); | desc_output_index.emplace_back(idx); | ||||
| if (output_use_num_item > 1) { | if (output_use_num_item > 1) { | ||||
| desc_output_index.emplace_back(idx); | desc_output_index.emplace_back(idx); | ||||
| @@ -887,7 +1014,7 @@ bool TbeKernelBuild::GenFusionComputeOutputJson(const mindspore::CNodePtr &cnode | |||||
| auto output_size = AnfAlgo::GetOutputTensorNum(cnode); | auto output_size = AnfAlgo::GetOutputTensorNum(cnode); | ||||
| if (AnfAlgo::HasNodeAttr(kAttrOutputUsedNum, cnode)) { | if (AnfAlgo::HasNodeAttr(kAttrOutputUsedNum, cnode)) { | ||||
| auto output_used_nums = AnfAlgo::GetNodeAttr<std::vector<int>>(cnode, kAttrOutputUsedNum); | auto output_used_nums = AnfAlgo::GetNodeAttr<std::vector<int>>(cnode, kAttrOutputUsedNum); | ||||
| MS_LOG(INFO) << "This node's output has been reused, node name: " << cnode->fullname_with_scope(); | |||||
| MS_LOG(INFO) << "Fusion info: this node's output has been reused, node name: " << cnode->fullname_with_scope(); | |||||
| if (output_used_nums.size() != output_size) { | if (output_used_nums.size() != output_size) { | ||||
| MS_LOG(INFO) << "Fusion error: output tenor num(" << output_size << ")" | MS_LOG(INFO) << "Fusion error: output tenor num(" << output_size << ")" | ||||
| << " is not match output used num(" << output_used_nums.size() << ")"; | << " is not match output used num(" << output_used_nums.size() << ")"; | ||||
| @@ -930,20 +1057,14 @@ bool TbeKernelBuild::GenFusionComputeJson(const mindspore::AnfNodePtr &compute_n | |||||
| // gen output desc | // gen output desc | ||||
| std::vector<nlohmann::json> output_desc_list; | std::vector<nlohmann::json> output_desc_list; | ||||
| if (!GenFusionComputeOutputJson(cnode, &output_desc_list)) { | if (!GenFusionComputeOutputJson(cnode, &output_desc_list)) { | ||||
| MS_LOG(INFO) << "Fusion Error: gen fusion output desc faild, node full name: " << cnode->fullname_with_scope(); | |||||
| MS_LOG(INFO) << "Fusion Error: gen fusion output desc failed, node full name: " << cnode->fullname_with_scope(); | |||||
| return false; | return false; | ||||
| } | } | ||||
| (*compute_op_str)[kJOutputDesc] = output_desc_list; | (*compute_op_str)[kJOutputDesc] = output_desc_list; | ||||
| // gen others | |||||
| auto origin_type = AnfAlgo::GetCNodeName(cnode); | |||||
| // replace special op type for buffer fusion op | |||||
| auto type = GetRealOpType(origin_type); | |||||
| (*compute_op_str)[kJtype] = type; | |||||
| tbe::TbeAdapter::NormalizeFuncName(&type); | |||||
| (*compute_op_str)[kJFuncName] = type; | |||||
| (*compute_op_str)[kJName] = NormalizeFullScopeName(cnode->fullname_with_scope()); | |||||
| (void)(*fusion_kernel_name).append("_"); | |||||
| (void)(*fusion_kernel_name).append(type); | |||||
| // gen common desc | |||||
| GenFusionComputeCommonJson(cnode, compute_op_str, fusion_kernel_name); | |||||
| // gen prebuild args | |||||
| GenFusionComputePreBuildJson(cnode, compute_op_str); | |||||
| return true; | return true; | ||||
| } | } | ||||
| @@ -965,7 +1086,7 @@ bool TbeKernelBuild::GetIOSize(const nlohmann::json &fusion_op_list, | |||||
| MS_EXCEPTION_IF_NULL(output_size_list); | MS_EXCEPTION_IF_NULL(output_size_list); | ||||
| input_size_list->clear(); | input_size_list->clear(); | ||||
| output_size_list->clear(); | output_size_list->clear(); | ||||
| // cal input size for malloc | |||||
| for (const auto &op : fusion_op_list) { | for (const auto &op : fusion_op_list) { | ||||
| if (op[kJtype] == "Data") { | if (op[kJtype] == "Data") { | ||||
| const auto &data_output_desc = op[kJOutputDesc]; | const auto &data_output_desc = op[kJOutputDesc]; | ||||
| @@ -975,23 +1096,23 @@ bool TbeKernelBuild::GetIOSize(const nlohmann::json &fusion_op_list, | |||||
| } | } | ||||
| auto ret = GetIOSizeImpl(data_output); | auto ret = GetIOSizeImpl(data_output); | ||||
| input_size_list->push_back(ret); | input_size_list->push_back(ret); | ||||
| MS_LOG(INFO) << "Fusion info: scope input name: " << op[kJName] << ", size: " << ret; | |||||
| MS_LOG(INFO) << "Fusion info: input node name: " << op[kJName] << ", size: " << ret; | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| // cal output size for malloc | |||||
| for (const auto &output_node : output_nodes) { | for (const auto &output_node : output_nodes) { | ||||
| auto kernel_idx = AnfAlgo::VisitKernel(output_node, 0); | auto kernel_idx = AnfAlgo::VisitKernel(output_node, 0); | ||||
| auto real_node = kernel_idx.first; | auto real_node = kernel_idx.first; | ||||
| size_t real_idx = kernel_idx.second; | size_t real_idx = kernel_idx.second; | ||||
| auto normal_name = NormalizeFullScopeName(real_node->fullname_with_scope()); | |||||
| MS_LOG(INFO) << "Fusion info: real node name: " << normal_name << ", real output index: " << real_idx; | |||||
| auto full_name = real_node->fullname_with_scope(); | |||||
| MS_LOG(INFO) << "Fusion info: real output node name: " << full_name << ", real output index: " << real_idx; | |||||
| for (const auto &op : fusion_op_list) { | for (const auto &op : fusion_op_list) { | ||||
| if (op[kJName] == normal_name) { | |||||
| if (op[kJName] == full_name) { | |||||
| auto op_output_desces = op[kJOutputDesc]; | auto op_output_desces = op[kJOutputDesc]; | ||||
| if (output_node != real_node) { | if (output_node != real_node) { | ||||
| // tuple_get item | // tuple_get item | ||||
| MS_LOG(INFO) << "Output is a tuple getitem node"; | |||||
| MS_LOG(INFO) << "Fusion info: output is a tuple get_item node"; | |||||
| auto output_desc = op_output_desces[real_idx]; | auto output_desc = op_output_desces[real_idx]; | ||||
| if (output_desc[kJShape].empty()) { | if (output_desc[kJShape].empty()) { | ||||
| MS_LOG(INFO) << "Fusion error: output_desc's shape is empty. real_index " << real_idx; | MS_LOG(INFO) << "Fusion error: output_desc's shape is empty. real_index " << real_idx; | ||||
| @@ -1001,6 +1122,7 @@ bool TbeKernelBuild::GetIOSize(const nlohmann::json &fusion_op_list, | |||||
| output_size_list->push_back(ret); | output_size_list->push_back(ret); | ||||
| MS_LOG(INFO) << "Fusion info: scope output index: " << real_idx << ", size: " << ret; | MS_LOG(INFO) << "Fusion info: scope output index: " << real_idx << ", size: " << ret; | ||||
| } else { | } else { | ||||
| MS_LOG(INFO) << "Fusion info: output is self."; | |||||
| for (const auto &output_desc : op_output_desces) { | for (const auto &output_desc : op_output_desces) { | ||||
| if (output_desc[kJShape].empty()) { | if (output_desc[kJShape].empty()) { | ||||
| MS_LOG(INFO) << "Fusion info: output_desc's shape is empty, may be this node output"; | MS_LOG(INFO) << "Fusion info: output_desc's shape is empty, may be this node output"; | ||||
| @@ -41,8 +41,8 @@ class TbeKernelBuild { | |||||
| std::vector<size_t> *output_size_list); | std::vector<size_t> *output_size_list); | ||||
| // Ub Fuison | // Ub Fuison | ||||
| static bool GenFusionScopeJson(const std::vector<AnfNodePtr> &input_nodes, | static bool GenFusionScopeJson(const std::vector<AnfNodePtr> &input_nodes, | ||||
| const std::vector<AnfNodePtr> &compute_nodes, nlohmann::json *fusion_str, | |||||
| std::string *fusion_kernel); | |||||
| const std::vector<AnfNodePtr> &compute_nodes, nlohmann::json *fusion_json, | |||||
| std::string *fusion_kernel_name); | |||||
| static bool GetIOSize(const nlohmann::json &fusion_op_list, const std::vector<AnfNodePtr> &output_nodes, | static bool GetIOSize(const nlohmann::json &fusion_op_list, const std::vector<AnfNodePtr> &output_nodes, | ||||
| std::vector<size_t> *input_size_list, std::vector<size_t> *output_size_list); | std::vector<size_t> *input_size_list, std::vector<size_t> *output_size_list); | ||||
| @@ -61,9 +61,14 @@ class TbeKernelBuild { | |||||
| static std::vector<size_t> GetDescOutputIndex(const std::vector<int> &output_used_nums); | static std::vector<size_t> GetDescOutputIndex(const std::vector<int> &output_used_nums); | ||||
| static bool GenFusionComputeOutputJson(const mindspore::CNodePtr &cnode, | static bool GenFusionComputeOutputJson(const mindspore::CNodePtr &cnode, | ||||
| std::vector<nlohmann::json> *output_desc_list); | std::vector<nlohmann::json> *output_desc_list); | ||||
| static void GenPreDescJson(nlohmann::json *output_desc); | |||||
| static void GenFusionComputeCommonJson(const mindspore::CNodePtr &cnode, nlohmann::json *compute_op_str, | |||||
| std::string *fusion_kernel_name); | |||||
| static void GenFusionComputePreBuildJson(const mindspore::CNodePtr &cnode, nlohmann::json *compute_op_str); | |||||
| static void GenDescJson(const std::shared_ptr<mindspore::AnfNode> &anf_node, size_t node_out_idx, | static void GenDescJson(const std::shared_ptr<mindspore::AnfNode> &anf_node, size_t node_out_idx, | ||||
| size_t desc_output_idx, nlohmann::json *output_desc, | size_t desc_output_idx, nlohmann::json *output_desc, | ||||
| FusionDataType fusion_data_type = kFusionNormal); | FusionDataType fusion_data_type = kFusionNormal); | ||||
| static void GenSuffixDescJson(nlohmann::json *output_desc); | |||||
| static void GenReusedOutputDesc(const std::shared_ptr<mindspore::AnfNode> &anf_node, size_t index, | static void GenReusedOutputDesc(const std::shared_ptr<mindspore::AnfNode> &anf_node, size_t index, | ||||
| size_t output_index, nlohmann::json *output_desc); | size_t output_index, nlohmann::json *output_desc); | ||||
| static size_t GetIOSizeImpl(const nlohmann::json &desc); | static size_t GetIOSizeImpl(const nlohmann::json &desc); | ||||
| @@ -76,6 +81,7 @@ class TbeKernelBuild { | |||||
| static bool IsDynamicInput(const CNodePtr &cnode); | static bool IsDynamicInput(const CNodePtr &cnode); | ||||
| static size_t GetOptionalInput(const CNodePtr &cnode, bool is_dynamic_input); | static size_t GetOptionalInput(const CNodePtr &cnode, bool is_dynamic_input); | ||||
| static std::string GetRealOpType(const std::string &origin_type); | static std::string GetRealOpType(const std::string &origin_type); | ||||
| static std::string GetNodeFusionType(const CNodePtr &cnode); | |||||
| }; | }; | ||||
| class TbeKernelJsonCreator { | class TbeKernelJsonCreator { | ||||
| @@ -84,14 +90,14 @@ class TbeKernelJsonCreator { | |||||
| ~TbeKernelJsonCreator() = default; | ~TbeKernelJsonCreator() = default; | ||||
| bool GenTbeSingleKernelJson(const std::shared_ptr<AnfNode> &anf_node, nlohmann::json *kernel_json); | bool GenTbeSingleKernelJson(const std::shared_ptr<AnfNode> &anf_node, nlohmann::json *kernel_json); | ||||
| std::string json_name() { return json_name_; } | std::string json_name() { return json_name_; } | ||||
| bool GenTbeAttrJson(const std::shared_ptr<AnfNode> &anf_node, const std::shared_ptr<OpInfo> &op_info, | |||||
| nlohmann::json *attrs_json); | |||||
| private: | private: | ||||
| bool GenTbeInputsJson(const std::shared_ptr<AnfNode> &anf_node, const std::shared_ptr<OpInfo> &op_info, | bool GenTbeInputsJson(const std::shared_ptr<AnfNode> &anf_node, const std::shared_ptr<OpInfo> &op_info, | ||||
| nlohmann::json *inputs_json); | nlohmann::json *inputs_json); | ||||
| bool GenTbeOutputsJson(const std::shared_ptr<AnfNode> &anf_node, const std::shared_ptr<OpInfo> &op_info, | bool GenTbeOutputsJson(const std::shared_ptr<AnfNode> &anf_node, const std::shared_ptr<OpInfo> &op_info, | ||||
| nlohmann::json *outputs_json); | nlohmann::json *outputs_json); | ||||
| bool GenTbeAttrJson(const std::shared_ptr<AnfNode> &anf_node, const std::shared_ptr<OpInfo> &op_info, | |||||
| nlohmann::json *attrs_json); | |||||
| static void ParseAttrValue(const std::string &type, const ValuePtr &value, nlohmann::json *attr_obj); | static void ParseAttrValue(const std::string &type, const ValuePtr &value, nlohmann::json *attr_obj); | ||||
| bool GenInputDescJson(const std::shared_ptr<AnfNode> &anf_node, size_t real_input_index, bool value, | bool GenInputDescJson(const std::shared_ptr<AnfNode> &anf_node, size_t real_input_index, bool value, | ||||
| const std::shared_ptr<OpIOInfo> &input_ptr, const string &op_input_name, size_t input_i, | const std::shared_ptr<OpIOInfo> &input_ptr, const string &op_input_name, size_t input_i, | ||||
| @@ -33,42 +33,6 @@ | |||||
| namespace mindspore { | namespace mindspore { | ||||
| namespace kernel { | namespace kernel { | ||||
| using mindspore::kernel::tbe::TbeUtils; | using mindspore::kernel::tbe::TbeUtils; | ||||
| bool TbeOpParallelPreBuild(const std::vector<AnfNodePtr> &anf_nodes) { | |||||
| auto build_manger = std::make_shared<ParallelBuildManager>(); | |||||
| MS_EXCEPTION_IF_NULL(build_manger); | |||||
| for (const auto &anf_node : anf_nodes) { | |||||
| // gen kernel json | |||||
| MS_EXCEPTION_IF_NULL(anf_node); | |||||
| nlohmann::json kernel_json; | |||||
| TbeKernelJsonCreator creator(OP_PRE_COMPILE); | |||||
| if (!creator.GenTbeSingleKernelJson(anf_node, &kernel_json)) { | |||||
| MS_LOG(ERROR) << "GenTbeSingleKernelJson failed"; | |||||
| return false; | |||||
| } | |||||
| kernel_json["compile_type"] = "pre_build"; | |||||
| // op build | |||||
| auto task_id = build_manger->StartCompileOp(kernel_json); | |||||
| build_manger->SavePreTaskInfo(task_id, anf_node); | |||||
| } | |||||
| while (!build_manger->IsAllPreTaskFinish()) { | |||||
| int task_id = -1; | |||||
| std::string task_result; | |||||
| std::string pre_build_result; | |||||
| auto ret = build_manger->WaitOne(&task_id, &task_result, &pre_build_result); | |||||
| if (!ret) { | |||||
| MS_EXCEPTION(ArgumentError) << "Pre Build Failed. wait one ret:" << ret << ", task id:" << task_id; | |||||
| } | |||||
| if (task_result != "Success") { | |||||
| MS_EXCEPTION(ArgumentError) << "task pre compile Failed, task id:" << task_id << ", cause:" << task_result; | |||||
| } | |||||
| build_manger->PreTaskFinishProcess(task_id, pre_build_result); | |||||
| } | |||||
| return true; | |||||
| } | |||||
| bool TbeOpParallelBuild(const std::vector<AnfNodePtr> &anf_nodes) { | bool TbeOpParallelBuild(const std::vector<AnfNodePtr> &anf_nodes) { | ||||
| auto build_manger = std::make_shared<ParallelBuildManager>(); | auto build_manger = std::make_shared<ParallelBuildManager>(); | ||||
| MS_EXCEPTION_IF_NULL(build_manger); | MS_EXCEPTION_IF_NULL(build_manger); | ||||
| @@ -122,15 +86,8 @@ bool TbeOpParallelBuild(const std::vector<AnfNodePtr> &anf_nodes) { | |||||
| return build_manger->GenSameOpKernelMod(); | return build_manger->GenSameOpKernelMod(); | ||||
| } | } | ||||
| ParallelBuildManager::ParallelBuildManager() {} | |||||
| ParallelBuildManager::~ParallelBuildManager() { ResetTaskInfo(); } | ParallelBuildManager::~ParallelBuildManager() { ResetTaskInfo(); } | ||||
| void ParallelBuildManager::SavePreTaskInfo(int32_t task_id, const mindspore::AnfNodePtr &anf_node) { | |||||
| MS_LOG(INFO) << "SavePreTaskInfo, task id: " << task_id; | |||||
| pre_task_map_[task_id] = anf_node; | |||||
| } | |||||
| void ParallelBuildManager::SaveTaskInfo(int32_t task_id, const mindspore::AnfNodePtr &anf_node, | void ParallelBuildManager::SaveTaskInfo(int32_t task_id, const mindspore::AnfNodePtr &anf_node, | ||||
| const std::string &json_name, const std::vector<size_t> &input_size_list, | const std::string &json_name, const std::vector<size_t> &input_size_list, | ||||
| const std::vector<size_t> &output_size_list, int32_t scope_id) { | const std::vector<size_t> &output_size_list, int32_t scope_id) { | ||||
| @@ -149,42 +106,11 @@ void ParallelBuildManager::SaveTaskInfo(int32_t task_id, const mindspore::AnfNod | |||||
| task_map_[task_id] = task_info; | task_map_[task_id] = task_info; | ||||
| } | } | ||||
| bool ParallelBuildManager::IsAllPreTaskFinish() const { | |||||
| MS_LOG(INFO) << "wait pre build process task_num: " << pre_task_map_.size(); | |||||
| return pre_task_map_.empty(); | |||||
| } | |||||
| bool ParallelBuildManager::IsAllTaskFinish() const { | bool ParallelBuildManager::IsAllTaskFinish() const { | ||||
| MS_LOG(INFO) << "wait process task_num: " << task_map_.size(); | MS_LOG(INFO) << "wait process task_num: " << task_map_.size(); | ||||
| return task_map_.empty(); | return task_map_.empty(); | ||||
| } | } | ||||
| void ParallelBuildManager::PreTaskFinishProcess(int32_t task_id, const std::string &pre_build_result) { | |||||
| auto task_iter = pre_task_map_.find(task_id); | |||||
| if (task_iter == pre_task_map_.end()) { | |||||
| MS_EXCEPTION(ArgumentError) << "can find pre task_id:" << task_id; | |||||
| } | |||||
| auto node = task_iter->second; | |||||
| auto builder = | |||||
| std::make_shared<kernel::KernelBuildInfo::KernelBuildInfoBuilder>(AnfAlgo::GetSelectKernelBuildInfo(node)); | |||||
| std::string start_flag = "fusion_pattern_start"; | |||||
| std::string end_flag = "fusion_pattern_end"; | |||||
| int start = pre_build_result.find(start_flag); | |||||
| int end = pre_build_result.find(end_flag); | |||||
| if (start != -1 && end != -1 && end >= start) { | |||||
| std::string result = pre_build_result.substr(start + start_flag.size(), end - start - start_flag.size()); | |||||
| if (result == "") { | |||||
| (void)pre_task_map_.erase(task_iter); | |||||
| return; | |||||
| } | |||||
| transform(result.begin(), result.end(), result.begin(), ::toupper); | |||||
| FusionType fusion_type = tbe::GetFusionType(result); | |||||
| builder->SetFusionType(fusion_type); | |||||
| AnfAlgo::SetSelectKernelBuildInfo(builder->Build(), node.get()); | |||||
| } | |||||
| (void)pre_task_map_.erase(task_iter); | |||||
| } | |||||
| std::pair<int32_t, KernelModPtr> ParallelBuildManager::TaskFinishProcess(int32_t task_id, bool set_kernel_mod) { | std::pair<int32_t, KernelModPtr> ParallelBuildManager::TaskFinishProcess(int32_t task_id, bool set_kernel_mod) { | ||||
| auto task_iter = task_map_.find(task_id); | auto task_iter = task_map_.find(task_id); | ||||
| if (task_iter == task_map_.end()) { | if (task_iter == task_map_.end()) { | ||||
| @@ -28,7 +28,6 @@ | |||||
| namespace mindspore { | namespace mindspore { | ||||
| namespace kernel { | namespace kernel { | ||||
| bool TbeOpParallelPreBuild(const std::vector<AnfNodePtr> &anf_nodes); | |||||
| bool TbeOpParallelBuild(const std::vector<AnfNodePtr> &anf_nodes); | bool TbeOpParallelBuild(const std::vector<AnfNodePtr> &anf_nodes); | ||||
| struct KernelBuildTaskInfo { | struct KernelBuildTaskInfo { | ||||
| @@ -42,9 +41,8 @@ struct KernelBuildTaskInfo { | |||||
| class ParallelBuildManager { | class ParallelBuildManager { | ||||
| public: | public: | ||||
| ParallelBuildManager(); | |||||
| ParallelBuildManager() = default; | |||||
| ~ParallelBuildManager(); | ~ParallelBuildManager(); | ||||
| void SavePreTaskInfo(int32_t task_id, const AnfNodePtr &anf_node); | |||||
| void SaveTaskInfo(int32_t task_id, const AnfNodePtr &anf_node, const std::string &json_name, | void SaveTaskInfo(int32_t task_id, const AnfNodePtr &anf_node, const std::string &json_name, | ||||
| const std::vector<size_t> &input_size_list, const std::vector<size_t> &output_size_list, | const std::vector<size_t> &input_size_list, const std::vector<size_t> &output_size_list, | ||||
| int32_t scope_id = 0); | int32_t scope_id = 0); | ||||
| @@ -54,10 +52,7 @@ class ParallelBuildManager { | |||||
| bool SearchInCache(const std::string &json_name, const std::string &processor, | bool SearchInCache(const std::string &json_name, const std::string &processor, | ||||
| const std::vector<size_t> &input_size_list, const std::vector<size_t> &output_size_list, | const std::vector<size_t> &input_size_list, const std::vector<size_t> &output_size_list, | ||||
| AnfNode *node) const; | AnfNode *node) const; | ||||
| bool IsAllPreTaskFinish() const; | |||||
| bool IsAllTaskFinish() const; | bool IsAllTaskFinish() const; | ||||
| void PreTaskFinishProcess(int32_t task_id, const std::string &pre_build_result); | |||||
| std::pair<int32_t, KernelModPtr> TaskFinishProcess(int32_t task_id, bool set_kernel_mod = true); | std::pair<int32_t, KernelModPtr> TaskFinishProcess(int32_t task_id, bool set_kernel_mod = true); | ||||
| KernelModPtr GenKernelMod(const string &json_name, const string &processor, | KernelModPtr GenKernelMod(const string &json_name, const string &processor, | ||||
| const std::vector<size_t> &input_size_list, const std::vector<size_t> &output_size_list, | const std::vector<size_t> &input_size_list, const std::vector<size_t> &output_size_list, | ||||
| @@ -1187,6 +1187,19 @@ TypeId AnfRuntimeAlgorithm::GetPrevNodeOutputPrecision(const AnfNodePtr &node, s | |||||
| return GetCNodeOutputPrecision(kernel_with_index.first); | return GetCNodeOutputPrecision(kernel_with_index.first); | ||||
| } | } | ||||
| bool AnfRuntimeAlgorithm::IsDynamicShape(const AnfNodePtr &node) { | |||||
| if (!node->isa<CNode>()) { | |||||
| return false; | |||||
| } | |||||
| auto cnode = node->cast<CNodePtr>(); | |||||
| MS_EXCEPTION_IF_NULL(cnode); | |||||
| auto has_attr = AnfAlgo::HasNodeAttr(kAttrIsDynamicShape, cnode); | |||||
| if (!has_attr) { | |||||
| return false; | |||||
| } | |||||
| return AnfAlgo::GetNodeAttr<bool>(node, kAttrIsDynamicShape); | |||||
| } | |||||
| bool AnfRuntimeAlgorithm::IsCondControlKernel(const CNodePtr &node) { | bool AnfRuntimeAlgorithm::IsCondControlKernel(const CNodePtr &node) { | ||||
| MS_EXCEPTION_IF_NULL(node); | MS_EXCEPTION_IF_NULL(node); | ||||
| if (node->inputs().empty()) { | if (node->inputs().empty()) { | ||||
| @@ -217,6 +217,7 @@ class AnfRuntimeAlgorithm { | |||||
| static TypeId GetCNodeOutputPrecision(const AnfNodePtr &node); | static TypeId GetCNodeOutputPrecision(const AnfNodePtr &node); | ||||
| // get fix output precision from prev node, input_idx is the input index of current node related to prev node. | // get fix output precision from prev node, input_idx is the input index of current node related to prev node. | ||||
| static TypeId GetPrevNodeOutputPrecision(const AnfNodePtr &node, size_t input_idx); | static TypeId GetPrevNodeOutputPrecision(const AnfNodePtr &node, size_t input_idx); | ||||
| static bool IsDynamicShape(const AnfNodePtr &node); | |||||
| static bool IsCondControlKernel(const CNodePtr &node); | static bool IsCondControlKernel(const CNodePtr &node); | ||||
| static bool IsIndependentNode(const CNodePtr &node); | static bool IsIndependentNode(const CNodePtr &node); | ||||
| }; | }; | ||||
| @@ -445,7 +445,6 @@ void AscendSession::InitRuntimeResource() { | |||||
| } | } | ||||
| void AscendSession::HardwareOptimize(const std::shared_ptr<KernelGraph> &kernel_graph) const { | void AscendSession::HardwareOptimize(const std::shared_ptr<KernelGraph> &kernel_graph) const { | ||||
| device::ascend::KernelPreBuild(kernel_graph.get()); | |||||
| MS_LOG(INFO) << "HardwareOptimize start!"; | MS_LOG(INFO) << "HardwareOptimize start!"; | ||||
| opt::AscendBackendOptimization(kernel_graph); | opt::AscendBackendOptimization(kernel_graph); | ||||
| opt::AscendGraphKernelCommonProcess(kernel_graph); | opt::AscendGraphKernelCommonProcess(kernel_graph); | ||||
| @@ -19,7 +19,8 @@ | |||||
| #include <vector> | #include <vector> | ||||
| #include <string> | #include <string> | ||||
| #include <memory> | #include <memory> | ||||
| #include <set> | |||||
| #include <map> | |||||
| #include "runtime/device/ascend/kernel_select_ascend.h" | #include "runtime/device/ascend/kernel_select_ascend.h" | ||||
| #include "runtime/device/kernel_info.h" | #include "runtime/device/kernel_info.h" | ||||
| #include "backend/kernel_compiler/kernel.h" | #include "backend/kernel_compiler/kernel.h" | ||||
| @@ -61,32 +62,6 @@ static kernel::KernelModPtr SerialCompileImpl(const AnfNodePtr &anf_node) { | |||||
| return kernel_mod_ptr; | return kernel_mod_ptr; | ||||
| } | } | ||||
| static bool KernelPreBuildParallelCompile(const mindspore::session::KernelGraph *kernel_graph_ptr) { | |||||
| MS_EXCEPTION_IF_NULL(kernel_graph_ptr); | |||||
| std::vector<AnfNodePtr> tbe_nodes; | |||||
| for (const auto &anf_node : kernel_graph_ptr->execution_order()) { | |||||
| MS_EXCEPTION_IF_NULL(anf_node); | |||||
| if (!AnfAlgo::IsRealKernel(anf_node)) { | |||||
| continue; | |||||
| } | |||||
| KernelType kernel_type = AnfAlgo::GetKernelType(anf_node); | |||||
| switch (kernel_type) { | |||||
| case KernelType::TBE_KERNEL: { | |||||
| if (AnfAlgo::GetKernelMod(anf_node) == nullptr && | |||||
| AnfAlgo::GetFusionType(anf_node) == kernel::FusionType::DYNAMIC) { | |||||
| tbe_nodes.push_back(anf_node); | |||||
| } | |||||
| break; | |||||
| } | |||||
| default: { | |||||
| break; | |||||
| } | |||||
| } | |||||
| } | |||||
| bool ret = kernel::TbeOpParallelPreBuild(tbe_nodes); | |||||
| return ret; | |||||
| } | |||||
| static bool KernelBuildParallelCompile(const mindspore::session::KernelGraph *kernel_graph_ptr) { | static bool KernelBuildParallelCompile(const mindspore::session::KernelGraph *kernel_graph_ptr) { | ||||
| MS_EXCEPTION_IF_NULL(kernel_graph_ptr); | MS_EXCEPTION_IF_NULL(kernel_graph_ptr); | ||||
| std::vector<AnfNodePtr> tbe_nodes; | std::vector<AnfNodePtr> tbe_nodes; | ||||
| @@ -237,12 +212,6 @@ static bool IsAtomicNode(const CNodePtr &kernel_node) { | |||||
| return !(workspace_indexs.empty() && output_indexs.empty()); | return !(workspace_indexs.empty() && output_indexs.empty()); | ||||
| } | } | ||||
| bool KernelPreBuild(const mindspore::session::KernelGraph *kernel_graph_ptr) { | |||||
| MS_EXCEPTION_IF_NULL(kernel_graph_ptr); | |||||
| bool ret = device::ascend::KernelPreBuildParallelCompile(kernel_graph_ptr); | |||||
| return ret; | |||||
| } | |||||
| bool KernelBuild(const mindspore::session::KernelGraph *kernel_graph_ptr) { | bool KernelBuild(const mindspore::session::KernelGraph *kernel_graph_ptr) { | ||||
| MS_EXCEPTION_IF_NULL(kernel_graph_ptr); | MS_EXCEPTION_IF_NULL(kernel_graph_ptr); | ||||
| TbeUtils::LoadCache(); | TbeUtils::LoadCache(); | ||||
| @@ -22,10 +22,6 @@ | |||||
| namespace mindspore { | namespace mindspore { | ||||
| namespace device { | namespace device { | ||||
| namespace ascend { | namespace ascend { | ||||
| /** | |||||
| * @brief kernel pre build for ascend. | |||||
| */ | |||||
| bool KernelPreBuild(const mindspore::session::KernelGraph *kernel_graph_ptr); | |||||
| /** | /** | ||||
| * @brief kernel build for ascend. | * @brief kernel build for ascend. | ||||
| */ | */ | ||||
| @@ -32,6 +32,7 @@ namespace mindspore { | |||||
| // op name. Op which not exists in operator/ops.h, so define it's name here | // op name. Op which not exists in operator/ops.h, so define it's name here | ||||
| constexpr auto kFour2FiveOpName = "Four2Five"; | constexpr auto kFour2FiveOpName = "Four2Five"; | ||||
| constexpr auto kFive2FourOpName = "Five2Four"; | constexpr auto kFive2FourOpName = "Five2Four"; | ||||
| constexpr auto kConv2DOpName = "Conv2D"; | |||||
| constexpr auto kConvBN1OpName = "ConvBN1"; | constexpr auto kConvBN1OpName = "ConvBN1"; | ||||
| constexpr auto kBN2AddReluOpName = "BN2AddRelu"; | constexpr auto kBN2AddReluOpName = "BN2AddRelu"; | ||||
| constexpr auto kBN2ReLUOpName = "BN2Relu"; | constexpr auto kBN2ReLUOpName = "BN2Relu"; | ||||
| @@ -273,6 +274,7 @@ constexpr auto kAttrPadDimSize = "pad_dim_size"; | |||||
| constexpr auto kAttrNumSegments = "num_segments"; | constexpr auto kAttrNumSegments = "num_segments"; | ||||
| constexpr auto kAttrBegin = "begin"; | constexpr auto kAttrBegin = "begin"; | ||||
| constexpr auto kAttrSize = "size"; | constexpr auto kAttrSize = "size"; | ||||
| constexpr auto kAttrIsDynamicShape = "is_dynamic_shape"; | |||||
| // attr value | // attr value | ||||
| constexpr auto kValueTargetSwitch = "target_switch"; | constexpr auto kValueTargetSwitch = "target_switch"; | ||||
| @@ -13,6 +13,7 @@ | |||||
| # limitations under the License. | # limitations under the License. | ||||
| """aicpu ops""" | """aicpu ops""" | ||||
| from .unique import _unique_aicpu | |||||
| from .init_data_set_queue import _init_data_set_queue_aicpu | from .init_data_set_queue import _init_data_set_queue_aicpu | ||||
| from .embedding_lookup import _embedding_lookup_aicpu | from .embedding_lookup import _embedding_lookup_aicpu | ||||
| from .padding import _padding_aicpu | from .padding import _padding_aicpu | ||||
| @@ -0,0 +1,31 @@ | |||||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||||
| # | |||||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| # you may not use this file except in compliance with the License. | |||||
| # You may obtain a copy of the License at | |||||
| # | |||||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||||
| # | |||||
| # Unless required by applicable law or agreed to in writing, software | |||||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| # See the License for the specific language governing permissions and | |||||
| # limitations under the License. | |||||
| # ============================================================================ | |||||
| """Unique op""" | |||||
| from mindspore.ops.op_info_register import op_info_register, AiCPURegOp, DataType | |||||
| unique_op_info = AiCPURegOp("Unique") \ | |||||
| .fusion_type("OPAQUE") \ | |||||
| .input(0, "x", "required") \ | |||||
| .output(0, "y", "required") \ | |||||
| .output(1, "idx", "required") \ | |||||
| .dtype_format(DataType.I32_Default, DataType.I32_Default, DataType.I32_Default) \ | |||||
| .dtype_format(DataType.I64_Default, DataType.I64_Default, DataType.I64_Default) \ | |||||
| .get_op_info() | |||||
| @op_info_register(unique_op_info) | |||||
| def _unique_aicpu(): | |||||
| """Unique AiCPU register""" | |||||
| return | |||||
| @@ -17,7 +17,7 @@ | |||||
| from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType | from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType | ||||
| matmul_op_info = TBERegOp("MatMul") \ | matmul_op_info = TBERegOp("MatMul") \ | ||||
| .fusion_type("ELEMWISE") \ | |||||
| .fusion_type("OPAQUE") \ | |||||
| .async_flag(False) \ | .async_flag(False) \ | ||||
| .binfile_name("matmul.so") \ | .binfile_name("matmul.so") \ | ||||
| .compute_cost(10) \ | .compute_cost(10) \ | ||||
| @@ -91,6 +91,7 @@ from ._thor_ops import (CusBatchMatMul, CusCholeskyTrsm, CusFusedAbsMax1, CusImg | |||||
| from .sparse_ops import SparseToDense | from .sparse_ops import SparseToDense | ||||
| __all__ = [ | __all__ = [ | ||||
| 'Unique', | |||||
| 'ReverseSequence', | 'ReverseSequence', | ||||
| 'EditDistance', | 'EditDistance', | ||||
| 'CropAndResize', | 'CropAndResize', | ||||
| @@ -597,9 +597,9 @@ class Unique(Primitive): | |||||
| containing indices of elements in the input coressponding to the output tensor. | containing indices of elements in the input coressponding to the output tensor. | ||||
| Examples: | Examples: | ||||
| >>> x = Tensor(np.array([1, 2, 5, 2]), mindspore.float32) | |||||
| >>> x = Tensor(np.array([1, 2, 5, 2]), mindspore.int32) | |||||
| >>> out = P.Unique()(x) | >>> out = P.Unique()(x) | ||||
| (Tensor([1, 2, 5], mindspore.int32), Tensor([0, 1, 2, 1], mindspore.float32)) | |||||
| (Tensor([1, 2, 5], mindspore.int32), Tensor([0, 1, 2, 1], mindspore.int32)) | |||||
| """ | """ | ||||
| @prim_attr_register | @prim_attr_register | ||||
| def __init__(self): | def __init__(self): | ||||
| @@ -35,39 +35,5 @@ StatusFactory::StatusFactory() {} | |||||
| std::mutex& StatusFactory::GetMutex() { return GetInstance()->rwMutex_; } | std::mutex& StatusFactory::GetMutex() { return GetInstance()->rwMutex_; } | ||||
| TsdClient* TsdClient::GetInstance() { | |||||
| static TsdClient instance; | |||||
| return &instance; | |||||
| } | |||||
| /** | |||||
| * @ingroup TsdClient | |||||
| * @brief 构造函数 | |||||
| */ | |||||
| TsdClient::TsdClient() { rankSize_ = 1; } | |||||
| /** | |||||
| * @ingroup TsdClient | |||||
| * @brief 析构函数 | |||||
| */ | |||||
| TsdClient::~TsdClient() = default; | |||||
| /** | |||||
| * @ingroup TsdClient | |||||
| * @brief framework发送拉起hccp和computer process的命令 | |||||
| * @param [in] phyDeviceId : FMK传入物理ID | |||||
| * @param [in] phyDeviceId : FMK传入rankSize | |||||
| * @return TDT_OK:成功 或者其他错误码 | |||||
| */ | |||||
| TDT_StatusT TsdClient::Open(const uint32_t deviceId, const uint32_t rankSize) { return TDT_OK; } | |||||
| /** | |||||
| * @ingroup TsdClient | |||||
| * @brief 通知TsdClient关闭相关资源 | |||||
| * @param 无 | |||||
| * @return TDT_OK:成功 或者其他错误码 | |||||
| */ | |||||
| TDT_StatusT TsdClient::Close() { return TDT_OK; } | |||||
| } // namespace tdt | } // namespace tdt | ||||
| #endif // TDT_MOCK_H | #endif // TDT_MOCK_H | ||||