Merge pull request !5685 from guozhijian/udpate_run_from_c75b100_to_c75b150_mastertags/v1.0.0
| @@ -1 +1 @@ | |||
| Subproject commit 622af6c1c50034bea5a08bd409c5a410782bfe53 | |||
| Subproject commit 2dbfefcdd0d4b958801403dbaf9efe46447dccd2 | |||
| @@ -17,8 +17,6 @@ import json | |||
| import os | |||
| import sys | |||
| from te.platform.cce_conf import te_set_version | |||
| from te.platform.fusion_manager import op_build_cfg_dis, op_build_cfg_en, set_current_op_name, \ | |||
| init_op_pattern, set_op_params, set_op_build_type, get_op_pattern, set_current_op_func_name | |||
| from te.platform.fusion_util import fusion_op | |||
| from common import check_kernel_info, get_args, get_build_in_impl_path, get_ddk_version | |||
| @@ -27,7 +25,6 @@ build_in_impl_path = get_build_in_impl_path() | |||
| # op function list | |||
| op_build = "compile" | |||
| op_pre_build = "pre_build" | |||
| fusion_pattern_start_flag = "fusion_pattern_start" | |||
| fusion_pattern_end_flag = "fusion_pattern_end" | |||
| @@ -83,19 +80,7 @@ def build_op(build_type, json_str): | |||
| else: | |||
| op_module = __import__("impl."+op_name, globals(), locals(), [op_name], 0) | |||
| # get function | |||
| if build_type == op_pre_build: | |||
| # set op parameter | |||
| op_build_cfg_dis() | |||
| set_current_op_func_name(op_name) | |||
| set_current_op_name(kernel_name) | |||
| init_op_pattern() | |||
| set_op_params(*outputs_args, *attrs_args, kernel_name=kernel_name) | |||
| set_op_build_type('prebuild') | |||
| if custom_flag: | |||
| py_fn_name = kernel_info['op_info']['name'] | |||
| else: | |||
| py_fn_name = op_name | |||
| elif build_type == op_build: | |||
| if build_type == op_build: | |||
| if custom_flag: | |||
| py_fn_name = kernel_info['op_info']['name'] | |||
| else: | |||
| @@ -106,13 +91,6 @@ def build_op(build_type, json_str): | |||
| if op_func is None: | |||
| raise ValueError("Op:{} function {} is not supported by Tbe.".format(op_name, build_type)) | |||
| # pre build | |||
| if build_type == op_pre_build: | |||
| op_func(*inputs_args, *outputs_args, *attrs_args, kernel_name=kernel_name) | |||
| # disable only pattern configuration | |||
| op_build_cfg_en() | |||
| return get_op_pattern() | |||
| # call function | |||
| if kernel_name[0:19] == "bounding_box_encode": | |||
| return op_func(*inputs_args, *outputs_args, *attrs_args, kernel_name_val=kernel_name) | |||
| @@ -120,8 +98,6 @@ def build_op(build_type, json_str): | |||
| return op_func(*inputs_args, *outputs_args, *attrs_args, kernel_name=kernel_name) | |||
| except Exception as e: | |||
| if build_type == op_pre_build: | |||
| op_build_cfg_en() | |||
| raise RuntimeError(e) | |||
| @@ -136,14 +112,9 @@ def compile_fusion_op(json_str): | |||
| Exception: If specific keyword is not found. | |||
| """ | |||
| args = json.loads(json_str) | |||
| te_set_version(ddk_version) | |||
| if 'fusion_op' not in args or not args['fusion_op']: | |||
| raise ValueError("Json string Errors, key:fusion_op not found.") | |||
| if 'prebuild_ops' not in args or not args['prebuild_ops']: | |||
| raise ValueError("Json string Errors, key:prebuild_ops not found.") | |||
| pre_build_op_list = args['prebuild_ops'] | |||
| for op in pre_build_op_list: | |||
| build_op(op_pre_build, json.dumps(op)) | |||
| fusion_op_arg = args['fusion_op'] | |||
| return fusion_op(json.dumps(fusion_op_arg)) | |||
| @@ -159,8 +130,6 @@ def compile_with_json(json_str): | |||
| json_info = json.loads(json_str) | |||
| if "fusion_op" in json_info: | |||
| ret = compile_fusion_op(json_str) | |||
| elif "compile_type" in json_info: | |||
| ret = build_op(op_pre_build, json_str) | |||
| else: | |||
| ret = build_op(op_build, json_str) | |||
| return ret | |||
| @@ -20,6 +20,8 @@ | |||
| #include <vector> | |||
| #include <memory> | |||
| #include <algorithm> | |||
| #include <map> | |||
| #include <climits> | |||
| #include "runtime/device/kernel_runtime.h" | |||
| #include "backend/kernel_compiler/aicpu/aicpu_kernel_mod.h" | |||
| #include "backend/kernel_compiler/akg/akg_kernel_build.h" | |||
| @@ -218,7 +220,7 @@ void SetNodeInputs(const std::shared_ptr<AnfNode> &anf_node, mindspore::NodeDef | |||
| mindspore::TensorShape_Dim *dim = tensorShape->add_dim(); | |||
| dim->set_size((::google::protobuf::int64)item); | |||
| } | |||
| node_inputs->set_tensor_type((mindspore::DataType)input_data_type); | |||
| node_inputs->set_tensor_type(input_data_type); | |||
| node_inputs->set_mem_device("HBM"); | |||
| } | |||
| } | |||
| @@ -245,7 +247,7 @@ void SetNodeOutputs(const std::shared_ptr<AnfNode> &anf_node, mindspore::NodeDef | |||
| } | |||
| TypeId output_type = AnfAlgo::GetOutputDeviceDataType(anf_node, output_index); | |||
| int32_t output_data_type = AicpuOpUtil::MsTypeToProtoType(output_type); | |||
| node_outputs->set_tensor_type((mindspore::DataType)output_data_type); | |||
| node_outputs->set_tensor_type(output_data_type); | |||
| node_outputs->set_mem_device("HBM"); | |||
| } | |||
| } | |||
| @@ -287,6 +289,109 @@ bool CreateNodeDefBytes(const std::shared_ptr<AnfNode> &anf_node, | |||
| return true; | |||
| } | |||
| bool CreateExtInfo(const std::shared_ptr<AnfNode> &anf_node, const std::shared_ptr<AicpuOpKernelMod> &kernel_mod_ptr) { | |||
| if (!anf_node->isa<CNode>()) { | |||
| return true; | |||
| } | |||
| if (!AnfAlgo::IsDynamicShape(anf_node)) { | |||
| return true; | |||
| } | |||
| MS_LOG(INFO) << "CreateExtInfo start, " << anf_node->fullname_with_scope(); | |||
| int32_t unknown_shape_type = UnknowShapeOpType::DEPEND_COMPUTE; | |||
| uint64_t ext_info_head_len = kExtInfoHeadSize; | |||
| std::string ext_info; | |||
| size_t input_num = AnfAlgo::GetInputTensorNum(anf_node); | |||
| size_t output_num = AnfAlgo::GetOutputTensorNum(anf_node); | |||
| // 1.addr:unknown shape type | |||
| uint64_t ext_info_len = ext_info.size(); | |||
| ext_info_len += ext_info_head_len + sizeof(int32_t); | |||
| // 2.addr:input ShapeAndType | |||
| ext_info_len += ext_info_head_len + input_num * sizeof(ShapeAndType); | |||
| // 3.addr:output ShapeAndType | |||
| ext_info_len += ext_info_head_len + output_num * sizeof(ShapeAndType); | |||
| uint64_t ext_info_offset = ext_info.size(); | |||
| ext_info.resize(ext_info_len, 0); | |||
| char *ext_info_buf = ext_info.data(); | |||
| // deal1: unknown shape type | |||
| ExtInfo *info = reinterpret_cast<ExtInfo *>(ext_info_buf + ext_info_offset); | |||
| info->infoType = FWK_ADPT_EXT_SHAPE_TYPE; | |||
| info->infoLen = sizeof(int32_t); | |||
| ext_info_offset += ext_info_head_len; | |||
| int32_t *shape_type = reinterpret_cast<int32_t *>(ext_info_buf + ext_info_offset); | |||
| *shape_type = unknown_shape_type; | |||
| ext_info_offset += info->infoLen; | |||
| // deal2:input ShapeAndType | |||
| info = reinterpret_cast<ExtInfo *>(ext_info_buf + ext_info_offset); | |||
| info->infoType = FWK_ADPT_EXT_INPUT_SHAPE; | |||
| info->infoLen = input_num * sizeof(ShapeAndType); | |||
| ext_info_offset += ext_info_head_len; | |||
| ShapeAndType *inputs = reinterpret_cast<ShapeAndType *>(ext_info_buf + ext_info_offset); | |||
| for (size_t input_index = 0; input_index < input_num; input_index++) { | |||
| TypeId input_type = AnfAlgo::GetInputDeviceDataType(anf_node, input_index); | |||
| std::vector<size_t> input_shape; | |||
| int32_t input_data_type; | |||
| if (input_type == kObjectTypeString) { | |||
| auto cnode = anf_node->cast<CNodePtr>(); | |||
| MS_EXCEPTION_IF_NULL(cnode); | |||
| auto input_node = cnode->inputs()[input_index + 1]; | |||
| auto value_ptr = GetValueNode(input_node); | |||
| auto value = GetValue<std::string>(value_ptr); | |||
| input_shape.push_back(1); | |||
| input_shape.push_back(value.size()); | |||
| input_data_type = AicpuOpUtil::MsTypeToProtoType(kTypeUnknown); | |||
| } else { | |||
| input_shape = AnfAlgo::GetInputDeviceShape(anf_node, input_index); | |||
| input_data_type = AicpuOpUtil::MsTypeToProtoType(input_type); | |||
| } | |||
| inputs[input_index].type = input_data_type; | |||
| size_t input_shape_index = 0; | |||
| for (; input_shape_index < input_shape.size(); input_shape_index++) { | |||
| inputs[input_index].dims[input_shape_index] = SizeToLong(input_shape[input_shape_index]); | |||
| } | |||
| if (input_shape.size() < kMaxShapeDims) { | |||
| inputs[input_index].dims[input_shape_index] = LLONG_MIN; | |||
| } | |||
| } | |||
| ext_info_offset += info->infoLen; | |||
| // deal3:output ShapeAndType | |||
| info = reinterpret_cast<ExtInfo *>(ext_info_buf + ext_info_offset); | |||
| info->infoType = FWK_ADPT_EXT_OUTPUT_SHAPE; | |||
| info->infoLen = output_num * sizeof(ShapeAndType); | |||
| ext_info_offset += ext_info_head_len; | |||
| ShapeAndType *outputs = reinterpret_cast<ShapeAndType *>(ext_info_buf + ext_info_offset); | |||
| for (size_t output_index = 0; output_index < output_num; output_index++) { | |||
| std::vector<size_t> output_shape = AnfAlgo::GetOutputDeviceShape(anf_node, output_index); | |||
| TypeId output_type = AnfAlgo::GetOutputDeviceDataType(anf_node, output_index); | |||
| int32_t output_data_type = AicpuOpUtil::MsTypeToProtoType(output_type); | |||
| outputs[output_index].type = output_data_type; | |||
| size_t output_shape_index = 0; | |||
| for (; output_shape_index < output_shape.size(); output_shape_index++) { | |||
| outputs[output_index].dims[output_shape_index] = SizeToLong(output_shape[output_shape_index]); | |||
| } | |||
| if (output_shape_index < kMaxShapeDims) { | |||
| outputs[output_index].dims[output_shape_index] = LLONG_MIN; | |||
| } | |||
| } | |||
| // set ext info | |||
| kernel_mod_ptr->SetExtInfo(ext_info); | |||
| return true; | |||
| } | |||
| KernelModPtr AicpuOpBuild(const std::shared_ptr<AnfNode> &anf_node) { | |||
| MS_EXCEPTION_IF_NULL(anf_node); | |||
| std::string op_name = AnfAlgo::GetCNodeName(anf_node); | |||
| @@ -300,6 +405,11 @@ KernelModPtr AicpuOpBuild(const std::shared_ptr<AnfNode> &anf_node) { | |||
| if (!CreateNodeDefBytes(anf_node, kernel_mod_ptr)) { | |||
| MS_LOG(EXCEPTION) << "Create nodeDefBytes faild!"; | |||
| } | |||
| if (!CreateExtInfo(anf_node, kernel_mod_ptr)) { | |||
| MS_LOG(EXCEPTION) << "Create nodeDefBytes faild!"; | |||
| } | |||
| if (!SetIOSize(anf_node, kernel_mod_ptr)) { | |||
| MS_LOG(EXCEPTION) << "Set input output size list failed."; | |||
| } | |||
| @@ -43,6 +43,7 @@ AicpuOpKernelMod::~AicpuOpKernelMod() { | |||
| input_size_list_.clear(); | |||
| output_size_list_.clear(); | |||
| workspace_size_list_.clear(); | |||
| ext_info_.clear(); | |||
| } | |||
| void AicpuOpKernelMod::SetInputSizeList(const std::vector<size_t> &size_list) { input_size_list_ = size_list; } | |||
| @@ -54,6 +55,7 @@ const std::vector<size_t> &AicpuOpKernelMod::GetWorkspaceSizeList() const { retu | |||
| void AicpuOpKernelMod::SetInputList(const std::vector<int64_t> &inputList) { inputList_ = inputList; } | |||
| void AicpuOpKernelMod::SetOutputList(const std::vector<int64_t> &outputList) { outputList_ = outputList; } | |||
| void AicpuOpKernelMod::SetNodeDef(const std::string &nodeDef) { (void)node_def_str_.assign(nodeDef); } | |||
| void AicpuOpKernelMod::SetExtInfo(const std::string &ext_info) { ext_info_ = ext_info; } | |||
| void AicpuOpKernelMod::SetNodeName(const std::string &node_name) { node_name_ = node_name; } | |||
| void AicpuOpKernelMod::SetAnfNode(const mindspore::AnfNodePtr &anf_node) { | |||
| MS_EXCEPTION_IF_NULL(anf_node); | |||
| @@ -84,16 +86,30 @@ void AicpuOpKernelMod::CreateCpuKernelInfo(const std::vector<AddressPtr> &inputs | |||
| auto node_def_len = node_def_str_.length(); | |||
| param_len += node_def_len; | |||
| param_len += sizeof(uint32_t); | |||
| AicpuParamHead aicpu_param_head; | |||
| aicpu_param_head.length = param_len; | |||
| aicpu_param_head.ioAddrNum = io_addrs_num; | |||
| if (ext_info_.empty()) { | |||
| MS_LOG(INFO) << "Static Shape Kernel"; | |||
| aicpu_param_head.extInfoLength = 0; | |||
| aicpu_param_head.extInfoAddr = 0; | |||
| } else { | |||
| MS_LOG(INFO) << "Dynamic Kernel Ext Info size:" << ext_info_.size(); | |||
| } | |||
| // Create taskArgs: AicpuParamHead + ioAddrs + notifyId + customizedAttr | |||
| AicpuParamHead paramHead = {static_cast<uint32_t>(param_len), static_cast<uint32_t>(io_addrs_num)}; | |||
| args_.clear(); | |||
| (void)args_.append(reinterpret_cast<const char *>(¶mHead), sizeof(AicpuParamHead)); | |||
| (void)args_.append(reinterpret_cast<const char *>(&aicpu_param_head), sizeof(AicpuParamHead)); | |||
| // TaskArgs append ioAddrs | |||
| if (io_addrs_size != 0) { | |||
| (void)args_.append(reinterpret_cast<const char *>(io_addrs.data()), io_addrs_size); | |||
| } | |||
| // size for node_def | |||
| args_.append(reinterpret_cast<const char *>(&node_def_len), sizeof(uint32_t)); | |||
| // When it's aicpu customized ops, taskArgs should append customized attr | |||
| if (node_def_len != 0) { | |||
| (void)args_.append(reinterpret_cast<const char *>(node_def_str_.data()), node_def_len); | |||
| @@ -145,8 +161,9 @@ std::vector<TaskInfoPtr> AicpuOpKernelMod::GenTask(const std::vector<AddressPtr> | |||
| node_name_ = kTopKV2; | |||
| } | |||
| AicpuTaskInfoPtr task_info_ptr = make_shared<ge::model_runner::AicpuTaskInfo>( | |||
| kernel_name_, stream_id, node_so_, node_name_, node_def_str_, input_data_addrs, output_data_addrs, NeedDump()); | |||
| AicpuTaskInfoPtr task_info_ptr = | |||
| make_shared<ge::model_runner::AicpuTaskInfo>(kernel_name_, stream_id, node_so_, node_name_, node_def_str_, | |||
| ext_info_, input_data_addrs, output_data_addrs, NeedDump()); | |||
| MS_LOG(INFO) << "AicpuOpKernelMod GenTask end"; | |||
| return {task_info_ptr}; | |||
| @@ -36,6 +36,7 @@ class AicpuOpKernelMod : public AscendKernelMod { | |||
| void SetOutputList(const std::vector<int64_t> &outputList); | |||
| void SetAnfNode(const AnfNodePtr &anf_node); | |||
| void SetNodeDef(const std::string &nodeDef); | |||
| void SetExtInfo(const std::string &ext_info); | |||
| void SetNodeName(const std::string &node_name); | |||
| /** | |||
| @@ -58,6 +59,7 @@ class AicpuOpKernelMod : public AscendKernelMod { | |||
| std::string node_def_str_; | |||
| std::string node_name_; | |||
| std::string node_so_; | |||
| std::string ext_info_; | |||
| std::vector<int64_t> inputList_; | |||
| std::vector<int64_t> outputList_; | |||
| AnfNodePtr anf_node_; | |||
| @@ -21,7 +21,6 @@ | |||
| #include <map> | |||
| #include <string> | |||
| #include "backend/kernel_compiler/kernel.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| constexpr auto kInitDataSetQueue = "InitDataSetQueue"; | |||
| @@ -50,6 +49,36 @@ struct AicpuParamHead { | |||
| uint64_t extInfoAddr; // extInfo address | |||
| } __attribute__((packed)); | |||
| const uint32_t kExtInfoHeadSize = 8; | |||
| struct ExtInfo { | |||
| int32_t infoType; // extend type | |||
| uint32_t infoLen; // length for infoMsg | |||
| char infoMsg[0]; // extend value | |||
| } __attribute__((packed)); | |||
| // Extent info ShapeAndType | |||
| const uint32_t kMaxShapeDims = 8; | |||
| struct ShapeAndType { | |||
| int32_t type; | |||
| int64_t dims[kMaxShapeDims]; | |||
| } __attribute__((packed)); | |||
| // Extend Info type for task | |||
| enum FWKTaskExtInfoType { | |||
| FWK_ADPT_EXT_SHAPE_TYPE = 0, | |||
| FWK_ADPT_EXT_INPUT_SHAPE, | |||
| FWK_ADPT_EXT_OUTPUT_SHAPE, | |||
| FWK_ADPT_EXT_INVALID | |||
| }; | |||
| // for unknown shape op type | |||
| enum UnknowShapeOpType { | |||
| DEPEND_IN_SHAPE = 1, // op out shape get by input shape | |||
| DEPEND_CONST_VALUE = 2, // op out shape get by const op value | |||
| DEPEND_SHAPE_RANGE = 3, // op out shape get by range | |||
| DEPEND_COMPUTE = 4 // op out shape get by totally computing | |||
| }; | |||
| class AicpuOpUtil { | |||
| public: | |||
| static int MsTypeToProtoType(TypeId ms_type); | |||
| @@ -26,7 +26,7 @@ message AttrValue { | |||
| repeated int64 i = 3 [ packed = true ]; //"array(int)" | |||
| repeated float f = 4 [ packed = true ]; //"array(float)" | |||
| repeated bool b = 5 [ packed = true ]; //"array(bool)" | |||
| repeated DataType type = 6 [ packed = true ]; //"array(type)" | |||
| repeated int32 type = 6 [ packed = true ]; //"array(type)" | |||
| repeated TensorShape shape = 7; //"array(shape)" | |||
| repeated Tensor tensor = 8; //"array(tensor)" | |||
| } | |||
| @@ -18,9 +18,16 @@ package mindspore; | |||
| import "attr.proto"; | |||
| import "tensor.proto"; | |||
| message DynamicIdx { | |||
| int32 idx = 1; | |||
| int32 num = 2; | |||
| } | |||
| message NodeDef { | |||
| string op = 2; | |||
| map<string, AttrValue> attrs = 3; | |||
| repeated Tensor inputs = 4; | |||
| repeated Tensor outputs = 5; | |||
| map<string, DynamicIdx> dym_inputs = 6; | |||
| map<string, DynamicIdx> dym_outputs = 7; | |||
| } | |||
| @@ -26,9 +26,12 @@ message Tensor { | |||
| TensorShape tensor_shape = 1; | |||
| // tensor content data type | |||
| DataType tensor_type = 2; | |||
| int32 tensor_type = 2; | |||
| // tensor memory device | |||
| // data located memory device , "DDR" "HBM" OR "NONE" | |||
| string mem_device = 3; | |||
| string name = 4; | |||
| uint64 data_ptr = 5; | |||
| uint64 data_size = 6; | |||
| } | |||
| @@ -31,5 +31,5 @@ message TensorShape { | |||
| bool unknown_rank = 3; | |||
| // data format "NHWC" "NCHW" "NC1HWC0" OR "NONE" | |||
| string data_format = 4; | |||
| int32 data_format = 4; | |||
| }; | |||
| @@ -19,17 +19,30 @@ option cc_enable_arenas = true; | |||
| package mindspore; | |||
| enum DataType { | |||
| MS_UNKNOWN = 0; | |||
| MS_BOOL = 1; | |||
| MS_FLOAT32 = 0; | |||
| MS_FLOAT16 = 1; | |||
| MS_INT8 = 2; | |||
| MS_UINT8 = 3; | |||
| MS_INT16 = 4; | |||
| MS_UINT16 = 5; | |||
| MS_INT32 = 6; | |||
| MS_UINT32 = 7; | |||
| MS_INT64 = 8; | |||
| MS_UINT64 = 9; | |||
| MS_FLOAT16 = 10; | |||
| MS_FLOAT32 = 11; | |||
| MS_FLOAT64 = 12; | |||
| MS_INT32 = 3; | |||
| MS_UINT8 = 4; | |||
| MS_INT16 = 6; | |||
| MS_UINT16 = 7; | |||
| MS_UINT32 = 8; | |||
| MS_INT64 = 9; | |||
| MS_UINT64 = 10; | |||
| MS_FLOAT64 = 11; | |||
| MS_BOOL = 12; | |||
| MS_STRING = 13; | |||
| MS_DUAL_SUB_INT8 = 14; | |||
| MS_DUAL_SUB_UINT8 = 15; | |||
| MS_COMPLEX64 = 16; | |||
| MS_COMPLEX128 = 17; | |||
| MS_QINT8 = 18; | |||
| MS_QINT16 = 19; | |||
| MS_QINT32 = 20; | |||
| MS_QUINT8 = 21; | |||
| MS_QUINT16 = 22; | |||
| MS_RESOURCE = 23; | |||
| MS_STRING_REF = 24; | |||
| MS_DUAL = 25; | |||
| MS_UNKNOWN = 26; | |||
| } | |||
| @@ -37,7 +37,6 @@ enum FusionType { | |||
| COMMREDUCE, | |||
| SEGMENT, | |||
| OPAQUE, | |||
| DYNAMIC, | |||
| UNKNOWN_FUSION_TYPE = -1, | |||
| }; | |||
| enum OpPattern { | |||
| @@ -80,8 +79,8 @@ class KernelPack { | |||
| bool LoadKernelMeta(const std::string &json_f, const std::string &processor); | |||
| bool ReadFromJsonFile(const std::string &json_f, const std::string &processor); | |||
| const std::string Serialize() const; | |||
| const FlexArray *const GetJson() const { return json_; } | |||
| const FlexArray *const GetKernel() const { return kernel_; } | |||
| const FlexArray *GetJson() const { return json_; } | |||
| const FlexArray *GetKernel() const { return kernel_; } | |||
| ~KernelPack() { | |||
| if (json_) { | |||
| delete[] json_; | |||
| @@ -19,53 +19,36 @@ | |||
| #include <map> | |||
| #include <string> | |||
| #include <memory> | |||
| #include <utility> | |||
| #include "backend/kernel_compiler/tbe/tbe_kernel_build.h" | |||
| #include "backend/kernel_compiler/tbe/tbe_kernel_parallel_build.h" | |||
| #include "backend/kernel_compiler/tbe/tbe_utils.h" | |||
| #include "backend/kernel_compiler/tbe/tbe_convert_utils.h" | |||
| #include "utils/ms_context.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| using mindspore::kernel::tbe::TbeUtils; | |||
| static bool GenPreBuildKernelJson(const std::vector<AnfNodePtr> &compute_nodes, | |||
| std::vector<nlohmann::json> *prebuild_op_list) { | |||
| MS_EXCEPTION_IF_NULL(prebuild_op_list); | |||
| TbeKernelJsonCreator creator(PREBUILD); | |||
| for (const auto &anf_node : compute_nodes) { | |||
| nlohmann::json prebuild; | |||
| if (!creator.GenTbeSingleKernelJson(anf_node, &prebuild)) { | |||
| MS_LOG(ERROR) << "GenTbeSingleKernelJson failed"; | |||
| return false; | |||
| } | |||
| (*prebuild_op_list).push_back(prebuild); | |||
| } | |||
| return true; | |||
| } | |||
| std::map<int32_t, KernelModPtr> KernelFusion(const std::vector<FusionScopeInfo> &fusion_scopes) { | |||
| MS_LOG(INFO) << "kernel fusion build start, scope size:" << fusion_scopes.size(); | |||
| std::map<int32_t, KernelModPtr> kernel_mod_ret; | |||
| auto build_manger = std::make_shared<ParallelBuildManager>(); | |||
| MS_EXCEPTION_IF_NULL(build_manger); | |||
| for (const auto &fusion_scope_iter : fusion_scopes) { | |||
| auto scope_id = fusion_scope_iter.scope_id; | |||
| string fusion_kernel_name; | |||
| nlohmann::json fusion_op; | |||
| string fusion_kernel = "te_fusion"; | |||
| if (!TbeKernelBuild::GenFusionScopeJson(fusion_scope_iter.input_nodes, fusion_scope_iter.compute_nodes, &fusion_op, | |||
| &fusion_kernel)) { | |||
| &fusion_kernel_name)) { | |||
| continue; | |||
| } | |||
| // gen kernel_name & check cache | |||
| std::string json_str = fusion_op.dump(); | |||
| size_t hash_id = std::hash<std::string>()(json_str); | |||
| auto json_name = fusion_kernel.append("_").append(std::to_string(hash_id)); | |||
| auto context_ptr = MsContext::GetInstance(); | |||
| MS_EXCEPTION_IF_NULL(context_ptr); | |||
| auto device_id = context_ptr->get_param<uint32_t>(MS_CTX_DEVICE_ID); | |||
| auto json_name = | |||
| fusion_kernel_name.append("_").append(std::to_string(hash_id)).append("_").append(std::to_string(device_id)); | |||
| fusion_op["fusion_op_name"] = json_name; | |||
| // gen json for prebuild | |||
| std::vector<nlohmann::json> prebuild_op_list; | |||
| if (!GenPreBuildKernelJson(fusion_scope_iter.compute_nodes, &prebuild_op_list)) { | |||
| continue; | |||
| } | |||
| // get io size | |||
| std::vector<size_t> input_size_list; | |||
| std::vector<size_t> output_size_list; | |||
| @@ -80,20 +63,20 @@ std::map<int32_t, KernelModPtr> KernelFusion(const std::vector<FusionScopeInfo> | |||
| auto kernel_mod = | |||
| build_manger->GenKernelMod(json_name, tbe::kProcessorAiCore, input_size_list, output_size_list, kernel_pack); | |||
| if (kernel_mod != nullptr) { | |||
| kernel_mod_ret[scope_id] = kernel_mod; | |||
| kernel_mod_ret[fusion_scope_iter.scope_id] = kernel_mod; | |||
| continue; | |||
| } | |||
| } | |||
| // fusion build | |||
| nlohmann::json fusion_json; | |||
| fusion_json["fusion_op"] = fusion_op; | |||
| fusion_json["prebuild_ops"] = prebuild_op_list; | |||
| auto task_id = build_manger->StartCompileOp(fusion_json); | |||
| TbeUtils::SaveJsonInfo(json_name, fusion_json.dump()); | |||
| if (task_id < 0) { | |||
| MS_EXCEPTION(ArgumentError) << "start compile failed."; | |||
| } | |||
| build_manger->SaveTaskInfo(task_id, nullptr, json_name, input_size_list, output_size_list, scope_id); | |||
| build_manger->SaveTaskInfo(task_id, nullptr, json_name, input_size_list, output_size_list, | |||
| fusion_scope_iter.scope_id); | |||
| } | |||
| int build_failed_num = 0; | |||
| @@ -16,6 +16,7 @@ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_KERNELFUSION_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_KERNELFUSION_H_ | |||
| #include <utility> | |||
| #include <vector> | |||
| #include <map> | |||
| #include "backend/kernel_compiler/kernel.h" | |||
| @@ -25,11 +26,9 @@ namespace kernel { | |||
| * @brief fuse op and return a callable mod | |||
| */ | |||
| struct FusionScopeInfo { | |||
| FusionScopeInfo() {} | |||
| FusionScopeInfo(int32_t id, const std::vector<AnfNodePtr> &in, const std::vector<AnfNodePtr> &comp, | |||
| const std::vector<AnfNodePtr> &out) | |||
| : scope_id(id), input_nodes(in), compute_nodes(comp), output_nodes(out) {} | |||
| int32_t scope_id; | |||
| FusionScopeInfo(int32_t id, std::vector<AnfNodePtr> in, std::vector<AnfNodePtr> comp, std::vector<AnfNodePtr> out) | |||
| : scope_id(id), input_nodes(std::move(in)), compute_nodes(std::move(comp)), output_nodes(std::move(out)) {} | |||
| int32_t scope_id{}; | |||
| std::vector<AnfNodePtr> input_nodes; | |||
| std::vector<AnfNodePtr> compute_nodes; | |||
| std::vector<AnfNodePtr> output_nodes; | |||
| @@ -40,14 +40,13 @@ class OpLib { | |||
| private: | |||
| static bool RegOpFromLocalInfo(); | |||
| static bool DecodeOpInfo(const nlohmann::json &obj, const OpImplyType imply_type, const std::string &impl_path); | |||
| static bool DecodeAttr(const nlohmann::json &obj, const OpImplyType imply_type, | |||
| const std::shared_ptr<OpInfo> &op_info); | |||
| static bool DecodeOpInfo(const nlohmann::json &obj, OpImplyType imply_type, const std::string &impl_path); | |||
| static bool DecodeAttr(const nlohmann::json &obj, OpImplyType imply_type, const std::shared_ptr<OpInfo> &op_info); | |||
| static bool DecodeDtypeFormat(const nlohmann::json &dtype_format, const std::shared_ptr<OpIOInfo> &op_io, | |||
| size_t index); | |||
| static void DecodeTBESpecificInfo(const nlohmann::json &obj, const std::shared_ptr<OpInfo> &op_info); | |||
| static void DecodeAKGSpecificInfo(const nlohmann::json &obj, const std::shared_ptr<OpInfo> &op_info); | |||
| static bool DecodeInputOutput(const nlohmann::json &obj, const OpImplyType imply_type, const OpIOType io_type, | |||
| static bool DecodeInputOutput(const nlohmann::json &obj, OpImplyType imply_type, OpIOType io_type, | |||
| const std::shared_ptr<OpInfo> &op_info, const nlohmann::json &dtype_format); | |||
| static bool GetRefInfo(const std::shared_ptr<OpInfo> &op_info); | |||
| static bool CheckRepetition(const std::shared_ptr<OpInfo> &op_info); | |||
| @@ -173,7 +173,7 @@ void TbeAdapter::NormalizeFuncName(std::string *func_name) { | |||
| *func_name = name_tmp; | |||
| auto iter = tbe_func_adapter_map.find(*func_name); | |||
| if (iter != tbe_func_adapter_map.end()) { | |||
| MS_LOG(INFO) << "map actual op from me " << *func_name << " to tbe op" << iter->second; | |||
| MS_LOG(INFO) << "Map actual op from me: " << *func_name << " to tbe op: " << iter->second; | |||
| *func_name = iter->second; | |||
| } | |||
| } | |||
| @@ -27,7 +27,7 @@ | |||
| // the TBE back-end operator implementation difference | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| enum kCreaterType : int { SINGLE_BUILD = 0, PREBUILD, OP_SELECT_FORMAT, CHECK_SUPPORTED, OP_PRE_COMPILE }; | |||
| enum kCreaterType : int { SINGLE_BUILD = 0, OP_SELECT_FORMAT, CHECK_SUPPORTED, OP_PRE_COMPILE }; | |||
| namespace tbe { | |||
| using FAttrsPass = void (*)(const AnfNodePtr &anf_node, const std::vector<std::shared_ptr<OpAttr>> &op_info_attrs, | |||
| nlohmann::json *attrs_json); | |||
| @@ -63,7 +63,7 @@ const std::unordered_map<std::string, size_t> type_nbyte_maps = { | |||
| const std::unordered_map<std::string, FusionType> fusion_type_maps = { | |||
| {"CONVLUTION", FusionType::CONVLUTION}, {"ELEMWISE", FusionType::ELEMWISE}, {"COMMREDUCE", FusionType::COMMREDUCE}, | |||
| {"SEGMENT", FusionType::SEGMENT}, {"DYNAMIC", FusionType::DYNAMIC}, {"OPAQUE", FusionType::OPAQUE}, | |||
| {"SEGMENT", FusionType::SEGMENT}, {"OPAQUE", FusionType::OPAQUE}, | |||
| }; | |||
| TypeId DtypeToTypeId(const std::string &dtypes) { | |||
| @@ -24,6 +24,7 @@ | |||
| #include "backend/kernel_compiler/tbe/tbe_adapter.h" | |||
| #include "backend/kernel_compiler/tbe/tbe_convert_utils.h" | |||
| #include "backend/kernel_compiler/tbe/tbe_utils.h" | |||
| #include "utils/ms_context.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| @@ -71,14 +72,20 @@ constexpr auto kVTypeListListInt = "listListInt"; | |||
| constexpr auto kJValue = "value"; | |||
| constexpr auto kJDynIndex = "dyn_index"; | |||
| constexpr auto kJFuncName = "func_name"; | |||
| std::string NormalizeFullScopeName(const string &full_scope_name) { | |||
| // exp:Default/ReLU-op0 -->Default_ReLU_op0 | |||
| string normal_ret = full_scope_name; | |||
| std::replace(normal_ret.begin(), normal_ret.end(), '/', '_'); | |||
| std::replace(normal_ret.begin(), normal_ret.end(), '-', '_'); | |||
| return normal_ret; | |||
| } | |||
| constexpr auto kJL1AddrOffset = "L1_addr_offset"; | |||
| constexpr auto kJL1FusionType = "L1_fusion_type"; | |||
| constexpr auto kJL1WorkspaceSize = "L1_workspace_size"; | |||
| constexpr auto kJAddrType = "addr_type"; | |||
| constexpr auto kJSliceOffset = "slice_offset"; | |||
| constexpr auto kJSplitIndex = "split_index"; | |||
| constexpr auto kJTotalShape = "total_shape"; | |||
| constexpr auto kJValidShape = "valid_shape"; | |||
| constexpr auto kJModuleName = "module_name"; | |||
| constexpr auto kJPattern = "pattern"; | |||
| constexpr auto kJPyModulePath = "py_module_path"; | |||
| constexpr auto kJPreBuildOutsAttrs = "prebuild_outs_attrs"; | |||
| constexpr auto kJKwdArgs = "kwds_args"; | |||
| constexpr auto kJListArgs = "list_args"; | |||
| bool TbeKernelJsonCreator::GenTbeSingleKernelJson(const std::shared_ptr<mindspore::AnfNode> &anf_node, | |||
| nlohmann::json *kernel_json) { | |||
| @@ -117,13 +124,12 @@ bool TbeKernelJsonCreator::GenTbeSingleKernelJson(const std::shared_ptr<mindspor | |||
| op_info_json[kJAttrs] = attrs_json; | |||
| std::string json_str = op_info_json.dump(); | |||
| size_t hash_id = std::hash<std::string>()(json_str); | |||
| json_name_ = op_name + "_" + std::to_string(hash_id); | |||
| auto context_ptr = MsContext::GetInstance(); | |||
| MS_EXCEPTION_IF_NULL(context_ptr); | |||
| auto device_id = context_ptr->get_param<uint32_t>(MS_CTX_DEVICE_ID); | |||
| json_name_ = op_name + "_" + std::to_string(hash_id) + "_" + std::to_string(device_id); | |||
| json_info_ = json_str; | |||
| if (creater_type_ == PREBUILD) { | |||
| op_info_json[kJKernelName] = NormalizeFullScopeName(anf_node->fullname_with_scope()); | |||
| } else { | |||
| op_info_json[kJKernelName] = json_name_; | |||
| } | |||
| op_info_json[kJKernelName] = json_name_; | |||
| (*kernel_json)[kJOpInfo] = op_info_json; | |||
| (*kernel_json)[kJFullName] = anf_node->fullname_with_scope(); | |||
| if (creater_type_ == SINGLE_BUILD) { | |||
| @@ -581,25 +587,25 @@ bool TbeKernelBuild::GetIOSize(const nlohmann::json &kernel_json, std::vector<si | |||
| bool TbeKernelBuild::GenFusionScopeJson(const std::vector<mindspore::AnfNodePtr> &input_nodes, | |||
| const std::vector<mindspore::AnfNodePtr> &compute_nodes, | |||
| nlohmann::json *fusion_str, std::string *fusion_kernel) { | |||
| MS_EXCEPTION_IF_NULL(fusion_str); | |||
| MS_EXCEPTION_IF_NULL(fusion_kernel); | |||
| nlohmann::json *fusion_json, std::string *fusion_kernel_name) { | |||
| MS_EXCEPTION_IF_NULL(fusion_json); | |||
| MS_EXCEPTION_IF_NULL(fusion_kernel_name); | |||
| // get input layer info | |||
| std::vector<std::vector<mindspore::AnfNodePtr>> input_layers; | |||
| std::map<const AnfNodePtr, FusionDataType> spec_data_input; | |||
| if (!GetInputLayers(input_nodes, compute_nodes, &input_layers, &spec_data_input)) { | |||
| return false; | |||
| } | |||
| // gen fusion scopre_op jsom | |||
| // gen fusion scopre_op json | |||
| std::vector<nlohmann::json> compute_list; | |||
| (*fusion_kernel) = kFusionKernelNamePrfix; | |||
| (*fusion_kernel_name) = kFusionKernelNamePrfix; | |||
| // index: fusion build option input record, next one from 0 | |||
| static size_t index = 0; | |||
| auto layer_iter = input_layers.begin(); | |||
| auto compute_op_iter = compute_nodes.begin(); | |||
| for (; compute_op_iter != compute_nodes.end(); ++compute_op_iter, ++layer_iter) { | |||
| nlohmann::json compute_op_str; | |||
| (void)GenFusionComputeJson(*compute_op_iter, &layer_iter, &compute_op_str, fusion_kernel, &index); | |||
| (void)GenFusionComputeJson(*compute_op_iter, &layer_iter, &compute_op_str, fusion_kernel_name, &index); | |||
| compute_list.push_back(compute_op_str); | |||
| } | |||
| index = 0; | |||
| @@ -617,36 +623,122 @@ bool TbeKernelBuild::GenFusionScopeJson(const std::vector<mindspore::AnfNodePtr> | |||
| } | |||
| index = 0; | |||
| data_list.insert(data_list.end(), compute_list.begin(), compute_list.end()); | |||
| (*fusion_str)[kFusionOpList] = data_list; | |||
| (*fusion_json)[kFusionOpList] = data_list; | |||
| return true; | |||
| } | |||
| void TbeKernelBuild::GenPreDescJson(nlohmann::json *output_desc) { | |||
| MS_EXCEPTION_IF_NULL(output_desc); | |||
| (*output_desc)[kJL1AddrOffset] = 0; | |||
| (*output_desc)[kJL1FusionType] = -1; | |||
| (*output_desc)[kJL1WorkspaceSize] = -1; | |||
| (*output_desc)[kJAddrType] = 0; | |||
| } | |||
| void TbeKernelBuild::GenFusionComputeCommonJson(const mindspore::CNodePtr &cnode, nlohmann::json *compute_op_str, | |||
| std::string *fusion_kernel_name) { | |||
| MS_EXCEPTION_IF_NULL(compute_op_str); | |||
| MS_EXCEPTION_IF_NULL(fusion_kernel_name); | |||
| // gen others | |||
| auto origin_type = AnfAlgo::GetCNodeName(cnode); | |||
| // replace special op type for buffer fusion op | |||
| auto type = GetRealOpType(origin_type); | |||
| (*compute_op_str)[kJtype] = type; | |||
| tbe::TbeAdapter::NormalizeFuncName(&type); | |||
| (*compute_op_str)[kJFuncName] = type; | |||
| (*compute_op_str)[kJModuleName] = std::string("impl.") + type; | |||
| (*compute_op_str)[kJName] = cnode->fullname_with_scope(); | |||
| (*compute_op_str)[kJPattern] = GetNodeFusionType(cnode); | |||
| (*compute_op_str)[kJPyModulePath] = "/usr/local/Ascend/opp/op_impl/build_in/ai_core/tbe"; | |||
| (void)(*fusion_kernel_name).append("_"); | |||
| (void)(*fusion_kernel_name).append(type); | |||
| } | |||
| void TbeKernelBuild::GenFusionComputePreBuildJson(const mindspore::CNodePtr &cnode, nlohmann::json *compute_op_str) { | |||
| MS_EXCEPTION_IF_NULL(cnode); | |||
| MS_EXCEPTION_IF_NULL(compute_op_str); | |||
| // kwds args | |||
| nlohmann::json json_prebuild_args; | |||
| json_prebuild_args[kJKwdArgs] = nlohmann::json::object(); | |||
| // list_args | |||
| nlohmann::json json_list_args; | |||
| // list_args: output args | |||
| auto output_size = AnfAlgo::GetOutputTensorNum(cnode); | |||
| for (size_t i = 0; i < output_size; ++i) { | |||
| nlohmann::json output_desc; | |||
| GenDescJson(cnode, i, i, &output_desc); | |||
| output_desc[kJDtype] = output_desc[kJDataType]; | |||
| json_list_args.push_back(output_desc); | |||
| } | |||
| // list_args: attr args | |||
| auto op_name = AnfAlgo::GetCNodeName(cnode); | |||
| auto opinfo = OpLib::FindOp(op_name, OpImplyType::kTBE); | |||
| MS_EXCEPTION_IF_NULL(opinfo); | |||
| TbeKernelJsonCreator json_creater(SINGLE_BUILD); | |||
| nlohmann::json json_attr_args; | |||
| if (!json_creater.GenTbeAttrJson(cnode, opinfo, &json_attr_args)) { | |||
| MS_LOG(INFO) << "Fusion warning: get prebuild args of attr failed."; | |||
| } | |||
| for (const auto &attr : json_attr_args) { | |||
| // if(attr[kJName] != "isRef" && attr["valid"] == true) { | |||
| if (attr[kJName] != "isRef" && attr[kJValid] == true) { | |||
| json_list_args.push_back(attr[kJValue]); | |||
| } | |||
| } | |||
| json_prebuild_args[kJListArgs] = json_list_args; | |||
| (*compute_op_str)[kJPreBuildOutsAttrs] = json_prebuild_args; | |||
| } | |||
| void TbeKernelBuild::GenSuffixDescJson(nlohmann::json *output_desc) { | |||
| MS_EXCEPTION_IF_NULL(output_desc); | |||
| (*output_desc)[kJSliceOffset] = nlohmann::json::array(); | |||
| (*output_desc)[kJSplitIndex] = 0; | |||
| (*output_desc)[kJTotalShape] = nlohmann::json::array(); | |||
| (*output_desc)[kJValidShape] = nlohmann::json::array(); | |||
| } | |||
| // anf_node: this node is used to get output desc(type\foramt\shape ...) | |||
| // node_out_idx: node output index | |||
| // desc_output_idx: this index use to add json | |||
| // nlohmann::json *output_desc: for return | |||
| // FusionDataType fusion_data_type: speceial process json desc output shape [kFusionAddN, kFusionReLUGradV2] | |||
| void TbeKernelBuild::GenDescJson(const std::shared_ptr<mindspore::AnfNode> &anf_node, size_t node_out_idx, | |||
| size_t desc_output_idx, nlohmann::json *output_desc, FusionDataType fusion_data_type) { | |||
| GenPreDescJson(output_desc); | |||
| // data_type | |||
| auto type_id = AnfAlgo::GetOutputDeviceDataType(anf_node, node_out_idx); | |||
| (*output_desc)[kJDataType] = tbe::TypeIdToString(type_id); | |||
| // name | |||
| std::string output_desc_name = anf_node->fullname_with_scope(); | |||
| if (node_out_idx > 0) { | |||
| output_desc_name = output_desc_name + "_" + std::to_string(node_out_idx); | |||
| } | |||
| (*output_desc)[kJName] = NormalizeFullScopeName(output_desc_name); | |||
| auto type_id = AnfAlgo::GetOutputDeviceDataType(anf_node, node_out_idx); | |||
| (*output_desc)[kJDataType] = tbe::TypeIdToString(type_id); | |||
| (*output_desc)[kJName] = output_desc_name; | |||
| // ori_format | |||
| (*output_desc)[kJOriFormat] = kOpFormat_NCHW; | |||
| // ori_shape | |||
| auto ori_shape = AnfAlgo::GetOutputInferShape(anf_node, node_out_idx); | |||
| if (ori_shape.empty()) { | |||
| ori_shape.emplace_back(1); | |||
| } | |||
| (*output_desc)[kJOriShape] = ori_shape; | |||
| // !! Note: output_index, only node's output use it | |||
| (*output_desc)[kJOutputIndex] = desc_output_idx; | |||
| // shape | |||
| auto shape = AnfAlgo::GetOutputDeviceShape(anf_node, node_out_idx); | |||
| if (shape.empty()) { | |||
| shape.emplace_back(1); | |||
| } | |||
| (*output_desc)[kJShape] = shape; | |||
| // !! Note: format: only data node's output use it | |||
| auto format = AnfAlgo::GetOutputFormat(anf_node, node_out_idx); | |||
| if (format == kOpFormat_DEFAULT) { | |||
| format = ori_shape.size() == 4 ? kOpFormat_NCHW : kOpFormat_ND; | |||
| } else if (format == kOpFormat_FRAC_Z) { | |||
| format = kOpFormat_FRACTAL_Z; | |||
| } | |||
| (*output_desc)[kJFormat] = format; | |||
| (*output_desc)[kJOriFormat] = kOpFormat_NCHW; | |||
| (*output_desc)[kJOutputIndex] = desc_output_idx; | |||
| // special node | |||
| if (fusion_data_type == kFusionAddN && format == kOpFormat_NC1HWC0) { | |||
| std::vector<size_t> spec_shape = {}; | |||
| spec_shape.emplace_back(shape[0]); | |||
| @@ -663,12 +755,13 @@ void TbeKernelBuild::GenDescJson(const std::shared_ptr<mindspore::AnfNode> &anf_ | |||
| (*output_desc)[kJShape] = spec_shape; | |||
| (*output_desc)[kJDataType] = kVTypeBool; | |||
| } | |||
| GenSuffixDescJson(output_desc); | |||
| } | |||
| void TbeKernelBuild::GenReusedOutputDesc(const std::shared_ptr<mindspore::AnfNode> &anf_node, size_t index, | |||
| size_t output_index, nlohmann::json *output_desc) { | |||
| std::string output_desc_name = anf_node->fullname_with_scope() + "_" + std::to_string(index); | |||
| (*output_desc)[kJName] = NormalizeFullScopeName(output_desc_name); | |||
| (*output_desc)[kJName] = output_desc_name; | |||
| (*output_desc)[kJOutputIndex] = output_index; | |||
| std::vector<size_t> shape; | |||
| (*output_desc)[kJShape] = shape; | |||
| @@ -692,6 +785,9 @@ bool TbeKernelBuild::GetSpecInputLayers(const std::string &op_name, | |||
| return true; | |||
| } | |||
| // <input_nodes> : contains parameter/data node, input order may doesn't match tbe input order; | |||
| // <compute_nodes> : contains cnode, inputs order may doesn't match tbe input order; | |||
| // Special process node list: reference tbe_adapter.cc [except: Conv2DBackpropInput] | |||
| bool TbeKernelBuild::GetInputLayers(const std::vector<mindspore::AnfNodePtr> &input_nodes, | |||
| const std::vector<mindspore::AnfNodePtr> &compute_nodes, | |||
| std::vector<std::vector<mindspore::AnfNodePtr>> *input_layers, | |||
| @@ -722,7 +818,7 @@ bool TbeKernelBuild::GetInputLayers(const std::vector<mindspore::AnfNodePtr> &in | |||
| MS_LOG(INFO) << "Fusion info: add compute node's [" << i << "] input: " << input->fullname_with_scope(); | |||
| layer.emplace_back((*find_iter)); | |||
| } else { | |||
| MS_LOG(INFO) << "Fusion warnig: this input [" << i << "] may be pre compute(" << input->fullname_with_scope() | |||
| MS_LOG(INFO) << "Fusion warning: this input [" << i << "] may be pre compute(" << input->fullname_with_scope() | |||
| << ") node's output."; | |||
| } | |||
| } | |||
| @@ -750,8 +846,9 @@ bool TbeKernelBuild::GenFusionDataInputJson(const std::shared_ptr<mindspore::Anf | |||
| MS_EXCEPTION_IF_NULL(data_str); | |||
| MS_EXCEPTION_IF_NULL(index); | |||
| std::vector<nlohmann::json> output_desc_list; | |||
| // if data_input is null, this is optional input. | |||
| if (!data_input) { | |||
| MS_LOG(INFO) << "Data input is optional node"; | |||
| MS_LOG(INFO) << "Fusion info: data input is optional node"; | |||
| auto name = std::string(kOptional) + std::to_string(*index); | |||
| (*data_str)[kJName] = name; | |||
| nlohmann::json output_desc; | |||
| @@ -767,12 +864,16 @@ bool TbeKernelBuild::GenFusionDataInputJson(const std::shared_ptr<mindspore::Anf | |||
| auto kernel_idx = AnfAlgo::VisitKernel(data_input, 0); | |||
| auto real_node = kernel_idx.first; | |||
| size_t real_idx = kernel_idx.second; | |||
| MS_LOG(INFO) << "Real name " << real_node->fullname_with_scope() << " index:" << real_idx; | |||
| MS_LOG(INFO) << "Fusion info: Real name: " << real_node->fullname_with_scope() << ". index:" << real_idx; | |||
| // kJOutputDesc | |||
| nlohmann::json output_desc; | |||
| GenDescJson(real_node, real_idx, real_idx, &output_desc, fusion_data_type); | |||
| output_desc_list.push_back(output_desc); | |||
| (*data_str)[kJName] = NormalizeFullScopeName(real_node->fullname_with_scope()); | |||
| auto full_name = real_node->fullname_with_scope(); | |||
| if (real_idx > 0) { | |||
| full_name = full_name.append("_").append(std::to_string(real_idx)); | |||
| } | |||
| (*data_str)[kJName] = full_name; | |||
| } | |||
| (*data_str)[kJOutputDesc] = output_desc_list; | |||
| (*data_str)[kJtype] = "Data"; | |||
| @@ -808,6 +909,7 @@ bool TbeKernelBuild::IsDynamicInput(const mindspore::CNodePtr &cnode) { | |||
| size_t TbeKernelBuild::GetOptionalInput(const mindspore::CNodePtr &cnode, bool is_dynamic_input) { | |||
| MS_EXCEPTION_IF_NULL(cnode); | |||
| if (is_dynamic_input) { | |||
| // Node can not have optional & dynamic input. | |||
| return 0; | |||
| } | |||
| MS_EXCEPTION_IF_NULL(cnode); | |||
| @@ -831,22 +933,46 @@ std::string TbeKernelBuild::GetRealOpType(const std::string &origin_type) { | |||
| return result; | |||
| } | |||
| std::string TbeKernelBuild::GetNodeFusionType(const mindspore::CNodePtr &cnode) { | |||
| MS_EXCEPTION_IF_NULL(cnode); | |||
| auto node_type = AnfAlgo::GetCNodeName(cnode); | |||
| static std::map<std::string, std::string> fusion_type_map = {{kConv2DOpName, "Convolution"}, | |||
| {kBNTrainingReduceOpName, "bn_reduce"}, | |||
| {kBNTrainingUpdateOpName, "bn_update"}, | |||
| {kReluV2OpName, "ElemWise"}, | |||
| {kTensorAddOpName, "ElemWise"}, | |||
| {kConv2DBackpropInputOpName, "Conv2d_backprop_input"}, | |||
| {kAddNOpName, "ElemWise"}, | |||
| {kReluGradV2OpName, "ElemWise"}, | |||
| {kRealDivOpName, "ElemWise"}}; | |||
| auto find = fusion_type_map.find(node_type); | |||
| if (find == fusion_type_map.end()) { | |||
| MS_LOG(INFO) << "Fusion warning: get node fusion type failed, origin node type: " << node_type | |||
| << " return null string."; | |||
| return ""; | |||
| } else { | |||
| return find->second; | |||
| } | |||
| } | |||
| bool TbeKernelBuild::GenFusionComputeInputJson(const mindspore::CNodePtr &cnode, | |||
| std::vector<std::vector<mindspore::AnfNodePtr>>::iterator *layer_iter, | |||
| std::vector<nlohmann::json> *input_desc_list, size_t *index) { | |||
| MS_EXCEPTION_IF_NULL(cnode); | |||
| MS_EXCEPTION_IF_NULL(input_desc_list); | |||
| std::vector<nlohmann::json> input_desc_list_tmp = {}; | |||
| // 1. input json | |||
| bool is_dynamic_input = IsDynamicInput(cnode); | |||
| for (size_t i = 1; i < cnode->inputs().size(); ++i) { | |||
| auto input = cnode->input(i); | |||
| auto kernel_idx = AnfAlgo::VisitKernel(input, 0); | |||
| auto real_node = kernel_idx.first; | |||
| size_t real_idx = kernel_idx.second; | |||
| MS_LOG(INFO) << "Real name" << real_node->fullname_with_scope() << "index:" << real_idx; | |||
| MS_LOG(INFO) << "Fusion info: real name: " << real_node->fullname_with_scope() << ". index:" << real_idx; | |||
| nlohmann::json input_desc; | |||
| GenDescJson(real_node, real_idx, real_idx, &input_desc); | |||
| if (is_dynamic_input) { | |||
| // 2. dynamic input json | |||
| MS_LOG(INFO) << "Node has dynamic input."; | |||
| input_desc[kJDynIndex] = (i - 1); | |||
| } | |||
| @@ -854,7 +980,8 @@ bool TbeKernelBuild::GenFusionComputeInputJson(const mindspore::CNodePtr &cnode, | |||
| } | |||
| size_t optional_num = GetOptionalInput(cnode, is_dynamic_input); | |||
| if (optional_num > 0) { | |||
| MS_LOG(INFO) << "Node has optional input."; | |||
| // 3. optional input | |||
| MS_LOG(INFO) << "Fusion info: node has optional input."; | |||
| for (size_t i = 0; i < optional_num; ++i) { | |||
| nlohmann::json optional_input_desc; | |||
| optional_input_desc[kJName] = std::string(kOptional) + std::to_string(*index); | |||
| @@ -872,7 +999,7 @@ std::vector<size_t> TbeKernelBuild::GetDescOutputIndex(const std::vector<int> &o | |||
| std::vector<size_t> desc_output_index = {}; | |||
| for (size_t idx = 0; idx < output_used_nums.size(); ++idx) { | |||
| auto output_use_num_item = output_used_nums[idx]; | |||
| MS_LOG(INFO) << "Output used num[" << idx << "] = " << output_use_num_item; | |||
| MS_LOG(INFO) << "Fusion info: output used num[" << idx << "] = " << output_use_num_item; | |||
| desc_output_index.emplace_back(idx); | |||
| if (output_use_num_item > 1) { | |||
| desc_output_index.emplace_back(idx); | |||
| @@ -887,7 +1014,7 @@ bool TbeKernelBuild::GenFusionComputeOutputJson(const mindspore::CNodePtr &cnode | |||
| auto output_size = AnfAlgo::GetOutputTensorNum(cnode); | |||
| if (AnfAlgo::HasNodeAttr(kAttrOutputUsedNum, cnode)) { | |||
| auto output_used_nums = AnfAlgo::GetNodeAttr<std::vector<int>>(cnode, kAttrOutputUsedNum); | |||
| MS_LOG(INFO) << "This node's output has been reused, node name: " << cnode->fullname_with_scope(); | |||
| MS_LOG(INFO) << "Fusion info: this node's output has been reused, node name: " << cnode->fullname_with_scope(); | |||
| if (output_used_nums.size() != output_size) { | |||
| MS_LOG(INFO) << "Fusion error: output tenor num(" << output_size << ")" | |||
| << " is not match output used num(" << output_used_nums.size() << ")"; | |||
| @@ -930,20 +1057,14 @@ bool TbeKernelBuild::GenFusionComputeJson(const mindspore::AnfNodePtr &compute_n | |||
| // gen output desc | |||
| std::vector<nlohmann::json> output_desc_list; | |||
| if (!GenFusionComputeOutputJson(cnode, &output_desc_list)) { | |||
| MS_LOG(INFO) << "Fusion Error: gen fusion output desc faild, node full name: " << cnode->fullname_with_scope(); | |||
| MS_LOG(INFO) << "Fusion Error: gen fusion output desc failed, node full name: " << cnode->fullname_with_scope(); | |||
| return false; | |||
| } | |||
| (*compute_op_str)[kJOutputDesc] = output_desc_list; | |||
| // gen others | |||
| auto origin_type = AnfAlgo::GetCNodeName(cnode); | |||
| // replace special op type for buffer fusion op | |||
| auto type = GetRealOpType(origin_type); | |||
| (*compute_op_str)[kJtype] = type; | |||
| tbe::TbeAdapter::NormalizeFuncName(&type); | |||
| (*compute_op_str)[kJFuncName] = type; | |||
| (*compute_op_str)[kJName] = NormalizeFullScopeName(cnode->fullname_with_scope()); | |||
| (void)(*fusion_kernel_name).append("_"); | |||
| (void)(*fusion_kernel_name).append(type); | |||
| // gen common desc | |||
| GenFusionComputeCommonJson(cnode, compute_op_str, fusion_kernel_name); | |||
| // gen prebuild args | |||
| GenFusionComputePreBuildJson(cnode, compute_op_str); | |||
| return true; | |||
| } | |||
| @@ -965,7 +1086,7 @@ bool TbeKernelBuild::GetIOSize(const nlohmann::json &fusion_op_list, | |||
| MS_EXCEPTION_IF_NULL(output_size_list); | |||
| input_size_list->clear(); | |||
| output_size_list->clear(); | |||
| // cal input size for malloc | |||
| for (const auto &op : fusion_op_list) { | |||
| if (op[kJtype] == "Data") { | |||
| const auto &data_output_desc = op[kJOutputDesc]; | |||
| @@ -975,23 +1096,23 @@ bool TbeKernelBuild::GetIOSize(const nlohmann::json &fusion_op_list, | |||
| } | |||
| auto ret = GetIOSizeImpl(data_output); | |||
| input_size_list->push_back(ret); | |||
| MS_LOG(INFO) << "Fusion info: scope input name: " << op[kJName] << ", size: " << ret; | |||
| MS_LOG(INFO) << "Fusion info: input node name: " << op[kJName] << ", size: " << ret; | |||
| } | |||
| } | |||
| } | |||
| // cal output size for malloc | |||
| for (const auto &output_node : output_nodes) { | |||
| auto kernel_idx = AnfAlgo::VisitKernel(output_node, 0); | |||
| auto real_node = kernel_idx.first; | |||
| size_t real_idx = kernel_idx.second; | |||
| auto normal_name = NormalizeFullScopeName(real_node->fullname_with_scope()); | |||
| MS_LOG(INFO) << "Fusion info: real node name: " << normal_name << ", real output index: " << real_idx; | |||
| auto full_name = real_node->fullname_with_scope(); | |||
| MS_LOG(INFO) << "Fusion info: real output node name: " << full_name << ", real output index: " << real_idx; | |||
| for (const auto &op : fusion_op_list) { | |||
| if (op[kJName] == normal_name) { | |||
| if (op[kJName] == full_name) { | |||
| auto op_output_desces = op[kJOutputDesc]; | |||
| if (output_node != real_node) { | |||
| // tuple_get item | |||
| MS_LOG(INFO) << "Output is a tuple getitem node"; | |||
| MS_LOG(INFO) << "Fusion info: output is a tuple get_item node"; | |||
| auto output_desc = op_output_desces[real_idx]; | |||
| if (output_desc[kJShape].empty()) { | |||
| MS_LOG(INFO) << "Fusion error: output_desc's shape is empty. real_index " << real_idx; | |||
| @@ -1001,6 +1122,7 @@ bool TbeKernelBuild::GetIOSize(const nlohmann::json &fusion_op_list, | |||
| output_size_list->push_back(ret); | |||
| MS_LOG(INFO) << "Fusion info: scope output index: " << real_idx << ", size: " << ret; | |||
| } else { | |||
| MS_LOG(INFO) << "Fusion info: output is self."; | |||
| for (const auto &output_desc : op_output_desces) { | |||
| if (output_desc[kJShape].empty()) { | |||
| MS_LOG(INFO) << "Fusion info: output_desc's shape is empty, may be this node output"; | |||
| @@ -41,8 +41,8 @@ class TbeKernelBuild { | |||
| std::vector<size_t> *output_size_list); | |||
| // Ub Fuison | |||
| static bool GenFusionScopeJson(const std::vector<AnfNodePtr> &input_nodes, | |||
| const std::vector<AnfNodePtr> &compute_nodes, nlohmann::json *fusion_str, | |||
| std::string *fusion_kernel); | |||
| const std::vector<AnfNodePtr> &compute_nodes, nlohmann::json *fusion_json, | |||
| std::string *fusion_kernel_name); | |||
| static bool GetIOSize(const nlohmann::json &fusion_op_list, const std::vector<AnfNodePtr> &output_nodes, | |||
| std::vector<size_t> *input_size_list, std::vector<size_t> *output_size_list); | |||
| @@ -61,9 +61,14 @@ class TbeKernelBuild { | |||
| static std::vector<size_t> GetDescOutputIndex(const std::vector<int> &output_used_nums); | |||
| static bool GenFusionComputeOutputJson(const mindspore::CNodePtr &cnode, | |||
| std::vector<nlohmann::json> *output_desc_list); | |||
| static void GenPreDescJson(nlohmann::json *output_desc); | |||
| static void GenFusionComputeCommonJson(const mindspore::CNodePtr &cnode, nlohmann::json *compute_op_str, | |||
| std::string *fusion_kernel_name); | |||
| static void GenFusionComputePreBuildJson(const mindspore::CNodePtr &cnode, nlohmann::json *compute_op_str); | |||
| static void GenDescJson(const std::shared_ptr<mindspore::AnfNode> &anf_node, size_t node_out_idx, | |||
| size_t desc_output_idx, nlohmann::json *output_desc, | |||
| FusionDataType fusion_data_type = kFusionNormal); | |||
| static void GenSuffixDescJson(nlohmann::json *output_desc); | |||
| static void GenReusedOutputDesc(const std::shared_ptr<mindspore::AnfNode> &anf_node, size_t index, | |||
| size_t output_index, nlohmann::json *output_desc); | |||
| static size_t GetIOSizeImpl(const nlohmann::json &desc); | |||
| @@ -76,6 +81,7 @@ class TbeKernelBuild { | |||
| static bool IsDynamicInput(const CNodePtr &cnode); | |||
| static size_t GetOptionalInput(const CNodePtr &cnode, bool is_dynamic_input); | |||
| static std::string GetRealOpType(const std::string &origin_type); | |||
| static std::string GetNodeFusionType(const CNodePtr &cnode); | |||
| }; | |||
| class TbeKernelJsonCreator { | |||
| @@ -84,14 +90,14 @@ class TbeKernelJsonCreator { | |||
| ~TbeKernelJsonCreator() = default; | |||
| bool GenTbeSingleKernelJson(const std::shared_ptr<AnfNode> &anf_node, nlohmann::json *kernel_json); | |||
| std::string json_name() { return json_name_; } | |||
| bool GenTbeAttrJson(const std::shared_ptr<AnfNode> &anf_node, const std::shared_ptr<OpInfo> &op_info, | |||
| nlohmann::json *attrs_json); | |||
| private: | |||
| bool GenTbeInputsJson(const std::shared_ptr<AnfNode> &anf_node, const std::shared_ptr<OpInfo> &op_info, | |||
| nlohmann::json *inputs_json); | |||
| bool GenTbeOutputsJson(const std::shared_ptr<AnfNode> &anf_node, const std::shared_ptr<OpInfo> &op_info, | |||
| nlohmann::json *outputs_json); | |||
| bool GenTbeAttrJson(const std::shared_ptr<AnfNode> &anf_node, const std::shared_ptr<OpInfo> &op_info, | |||
| nlohmann::json *attrs_json); | |||
| static void ParseAttrValue(const std::string &type, const ValuePtr &value, nlohmann::json *attr_obj); | |||
| bool GenInputDescJson(const std::shared_ptr<AnfNode> &anf_node, size_t real_input_index, bool value, | |||
| const std::shared_ptr<OpIOInfo> &input_ptr, const string &op_input_name, size_t input_i, | |||
| @@ -33,42 +33,6 @@ | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| using mindspore::kernel::tbe::TbeUtils; | |||
| bool TbeOpParallelPreBuild(const std::vector<AnfNodePtr> &anf_nodes) { | |||
| auto build_manger = std::make_shared<ParallelBuildManager>(); | |||
| MS_EXCEPTION_IF_NULL(build_manger); | |||
| for (const auto &anf_node : anf_nodes) { | |||
| // gen kernel json | |||
| MS_EXCEPTION_IF_NULL(anf_node); | |||
| nlohmann::json kernel_json; | |||
| TbeKernelJsonCreator creator(OP_PRE_COMPILE); | |||
| if (!creator.GenTbeSingleKernelJson(anf_node, &kernel_json)) { | |||
| MS_LOG(ERROR) << "GenTbeSingleKernelJson failed"; | |||
| return false; | |||
| } | |||
| kernel_json["compile_type"] = "pre_build"; | |||
| // op build | |||
| auto task_id = build_manger->StartCompileOp(kernel_json); | |||
| build_manger->SavePreTaskInfo(task_id, anf_node); | |||
| } | |||
| while (!build_manger->IsAllPreTaskFinish()) { | |||
| int task_id = -1; | |||
| std::string task_result; | |||
| std::string pre_build_result; | |||
| auto ret = build_manger->WaitOne(&task_id, &task_result, &pre_build_result); | |||
| if (!ret) { | |||
| MS_EXCEPTION(ArgumentError) << "Pre Build Failed. wait one ret:" << ret << ", task id:" << task_id; | |||
| } | |||
| if (task_result != "Success") { | |||
| MS_EXCEPTION(ArgumentError) << "task pre compile Failed, task id:" << task_id << ", cause:" << task_result; | |||
| } | |||
| build_manger->PreTaskFinishProcess(task_id, pre_build_result); | |||
| } | |||
| return true; | |||
| } | |||
| bool TbeOpParallelBuild(const std::vector<AnfNodePtr> &anf_nodes) { | |||
| auto build_manger = std::make_shared<ParallelBuildManager>(); | |||
| MS_EXCEPTION_IF_NULL(build_manger); | |||
| @@ -122,15 +86,8 @@ bool TbeOpParallelBuild(const std::vector<AnfNodePtr> &anf_nodes) { | |||
| return build_manger->GenSameOpKernelMod(); | |||
| } | |||
| ParallelBuildManager::ParallelBuildManager() {} | |||
| ParallelBuildManager::~ParallelBuildManager() { ResetTaskInfo(); } | |||
| void ParallelBuildManager::SavePreTaskInfo(int32_t task_id, const mindspore::AnfNodePtr &anf_node) { | |||
| MS_LOG(INFO) << "SavePreTaskInfo, task id: " << task_id; | |||
| pre_task_map_[task_id] = anf_node; | |||
| } | |||
| void ParallelBuildManager::SaveTaskInfo(int32_t task_id, const mindspore::AnfNodePtr &anf_node, | |||
| const std::string &json_name, const std::vector<size_t> &input_size_list, | |||
| const std::vector<size_t> &output_size_list, int32_t scope_id) { | |||
| @@ -149,42 +106,11 @@ void ParallelBuildManager::SaveTaskInfo(int32_t task_id, const mindspore::AnfNod | |||
| task_map_[task_id] = task_info; | |||
| } | |||
| bool ParallelBuildManager::IsAllPreTaskFinish() const { | |||
| MS_LOG(INFO) << "wait pre build process task_num: " << pre_task_map_.size(); | |||
| return pre_task_map_.empty(); | |||
| } | |||
| bool ParallelBuildManager::IsAllTaskFinish() const { | |||
| MS_LOG(INFO) << "wait process task_num: " << task_map_.size(); | |||
| return task_map_.empty(); | |||
| } | |||
| void ParallelBuildManager::PreTaskFinishProcess(int32_t task_id, const std::string &pre_build_result) { | |||
| auto task_iter = pre_task_map_.find(task_id); | |||
| if (task_iter == pre_task_map_.end()) { | |||
| MS_EXCEPTION(ArgumentError) << "can find pre task_id:" << task_id; | |||
| } | |||
| auto node = task_iter->second; | |||
| auto builder = | |||
| std::make_shared<kernel::KernelBuildInfo::KernelBuildInfoBuilder>(AnfAlgo::GetSelectKernelBuildInfo(node)); | |||
| std::string start_flag = "fusion_pattern_start"; | |||
| std::string end_flag = "fusion_pattern_end"; | |||
| int start = pre_build_result.find(start_flag); | |||
| int end = pre_build_result.find(end_flag); | |||
| if (start != -1 && end != -1 && end >= start) { | |||
| std::string result = pre_build_result.substr(start + start_flag.size(), end - start - start_flag.size()); | |||
| if (result == "") { | |||
| (void)pre_task_map_.erase(task_iter); | |||
| return; | |||
| } | |||
| transform(result.begin(), result.end(), result.begin(), ::toupper); | |||
| FusionType fusion_type = tbe::GetFusionType(result); | |||
| builder->SetFusionType(fusion_type); | |||
| AnfAlgo::SetSelectKernelBuildInfo(builder->Build(), node.get()); | |||
| } | |||
| (void)pre_task_map_.erase(task_iter); | |||
| } | |||
| std::pair<int32_t, KernelModPtr> ParallelBuildManager::TaskFinishProcess(int32_t task_id, bool set_kernel_mod) { | |||
| auto task_iter = task_map_.find(task_id); | |||
| if (task_iter == task_map_.end()) { | |||
| @@ -28,7 +28,6 @@ | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| bool TbeOpParallelPreBuild(const std::vector<AnfNodePtr> &anf_nodes); | |||
| bool TbeOpParallelBuild(const std::vector<AnfNodePtr> &anf_nodes); | |||
| struct KernelBuildTaskInfo { | |||
| @@ -42,9 +41,8 @@ struct KernelBuildTaskInfo { | |||
| class ParallelBuildManager { | |||
| public: | |||
| ParallelBuildManager(); | |||
| ParallelBuildManager() = default; | |||
| ~ParallelBuildManager(); | |||
| void SavePreTaskInfo(int32_t task_id, const AnfNodePtr &anf_node); | |||
| void SaveTaskInfo(int32_t task_id, const AnfNodePtr &anf_node, const std::string &json_name, | |||
| const std::vector<size_t> &input_size_list, const std::vector<size_t> &output_size_list, | |||
| int32_t scope_id = 0); | |||
| @@ -54,10 +52,7 @@ class ParallelBuildManager { | |||
| bool SearchInCache(const std::string &json_name, const std::string &processor, | |||
| const std::vector<size_t> &input_size_list, const std::vector<size_t> &output_size_list, | |||
| AnfNode *node) const; | |||
| bool IsAllPreTaskFinish() const; | |||
| bool IsAllTaskFinish() const; | |||
| void PreTaskFinishProcess(int32_t task_id, const std::string &pre_build_result); | |||
| std::pair<int32_t, KernelModPtr> TaskFinishProcess(int32_t task_id, bool set_kernel_mod = true); | |||
| KernelModPtr GenKernelMod(const string &json_name, const string &processor, | |||
| const std::vector<size_t> &input_size_list, const std::vector<size_t> &output_size_list, | |||
| @@ -1187,6 +1187,19 @@ TypeId AnfRuntimeAlgorithm::GetPrevNodeOutputPrecision(const AnfNodePtr &node, s | |||
| return GetCNodeOutputPrecision(kernel_with_index.first); | |||
| } | |||
| bool AnfRuntimeAlgorithm::IsDynamicShape(const AnfNodePtr &node) { | |||
| if (!node->isa<CNode>()) { | |||
| return false; | |||
| } | |||
| auto cnode = node->cast<CNodePtr>(); | |||
| MS_EXCEPTION_IF_NULL(cnode); | |||
| auto has_attr = AnfAlgo::HasNodeAttr(kAttrIsDynamicShape, cnode); | |||
| if (!has_attr) { | |||
| return false; | |||
| } | |||
| return AnfAlgo::GetNodeAttr<bool>(node, kAttrIsDynamicShape); | |||
| } | |||
| bool AnfRuntimeAlgorithm::IsCondControlKernel(const CNodePtr &node) { | |||
| MS_EXCEPTION_IF_NULL(node); | |||
| if (node->inputs().empty()) { | |||
| @@ -217,6 +217,7 @@ class AnfRuntimeAlgorithm { | |||
| static TypeId GetCNodeOutputPrecision(const AnfNodePtr &node); | |||
| // get fix output precision from prev node, input_idx is the input index of current node related to prev node. | |||
| static TypeId GetPrevNodeOutputPrecision(const AnfNodePtr &node, size_t input_idx); | |||
| static bool IsDynamicShape(const AnfNodePtr &node); | |||
| static bool IsCondControlKernel(const CNodePtr &node); | |||
| static bool IsIndependentNode(const CNodePtr &node); | |||
| }; | |||
| @@ -445,7 +445,6 @@ void AscendSession::InitRuntimeResource() { | |||
| } | |||
| void AscendSession::HardwareOptimize(const std::shared_ptr<KernelGraph> &kernel_graph) const { | |||
| device::ascend::KernelPreBuild(kernel_graph.get()); | |||
| MS_LOG(INFO) << "HardwareOptimize start!"; | |||
| opt::AscendBackendOptimization(kernel_graph); | |||
| opt::AscendGraphKernelCommonProcess(kernel_graph); | |||
| @@ -19,7 +19,8 @@ | |||
| #include <vector> | |||
| #include <string> | |||
| #include <memory> | |||
| #include <set> | |||
| #include <map> | |||
| #include "runtime/device/ascend/kernel_select_ascend.h" | |||
| #include "runtime/device/kernel_info.h" | |||
| #include "backend/kernel_compiler/kernel.h" | |||
| @@ -61,32 +62,6 @@ static kernel::KernelModPtr SerialCompileImpl(const AnfNodePtr &anf_node) { | |||
| return kernel_mod_ptr; | |||
| } | |||
| static bool KernelPreBuildParallelCompile(const mindspore::session::KernelGraph *kernel_graph_ptr) { | |||
| MS_EXCEPTION_IF_NULL(kernel_graph_ptr); | |||
| std::vector<AnfNodePtr> tbe_nodes; | |||
| for (const auto &anf_node : kernel_graph_ptr->execution_order()) { | |||
| MS_EXCEPTION_IF_NULL(anf_node); | |||
| if (!AnfAlgo::IsRealKernel(anf_node)) { | |||
| continue; | |||
| } | |||
| KernelType kernel_type = AnfAlgo::GetKernelType(anf_node); | |||
| switch (kernel_type) { | |||
| case KernelType::TBE_KERNEL: { | |||
| if (AnfAlgo::GetKernelMod(anf_node) == nullptr && | |||
| AnfAlgo::GetFusionType(anf_node) == kernel::FusionType::DYNAMIC) { | |||
| tbe_nodes.push_back(anf_node); | |||
| } | |||
| break; | |||
| } | |||
| default: { | |||
| break; | |||
| } | |||
| } | |||
| } | |||
| bool ret = kernel::TbeOpParallelPreBuild(tbe_nodes); | |||
| return ret; | |||
| } | |||
| static bool KernelBuildParallelCompile(const mindspore::session::KernelGraph *kernel_graph_ptr) { | |||
| MS_EXCEPTION_IF_NULL(kernel_graph_ptr); | |||
| std::vector<AnfNodePtr> tbe_nodes; | |||
| @@ -237,12 +212,6 @@ static bool IsAtomicNode(const CNodePtr &kernel_node) { | |||
| return !(workspace_indexs.empty() && output_indexs.empty()); | |||
| } | |||
| bool KernelPreBuild(const mindspore::session::KernelGraph *kernel_graph_ptr) { | |||
| MS_EXCEPTION_IF_NULL(kernel_graph_ptr); | |||
| bool ret = device::ascend::KernelPreBuildParallelCompile(kernel_graph_ptr); | |||
| return ret; | |||
| } | |||
| bool KernelBuild(const mindspore::session::KernelGraph *kernel_graph_ptr) { | |||
| MS_EXCEPTION_IF_NULL(kernel_graph_ptr); | |||
| TbeUtils::LoadCache(); | |||
| @@ -22,10 +22,6 @@ | |||
| namespace mindspore { | |||
| namespace device { | |||
| namespace ascend { | |||
| /** | |||
| * @brief kernel pre build for ascend. | |||
| */ | |||
| bool KernelPreBuild(const mindspore::session::KernelGraph *kernel_graph_ptr); | |||
| /** | |||
| * @brief kernel build for ascend. | |||
| */ | |||
| @@ -32,6 +32,7 @@ namespace mindspore { | |||
| // op name. Op which not exists in operator/ops.h, so define it's name here | |||
| constexpr auto kFour2FiveOpName = "Four2Five"; | |||
| constexpr auto kFive2FourOpName = "Five2Four"; | |||
| constexpr auto kConv2DOpName = "Conv2D"; | |||
| constexpr auto kConvBN1OpName = "ConvBN1"; | |||
| constexpr auto kBN2AddReluOpName = "BN2AddRelu"; | |||
| constexpr auto kBN2ReLUOpName = "BN2Relu"; | |||
| @@ -273,6 +274,7 @@ constexpr auto kAttrPadDimSize = "pad_dim_size"; | |||
| constexpr auto kAttrNumSegments = "num_segments"; | |||
| constexpr auto kAttrBegin = "begin"; | |||
| constexpr auto kAttrSize = "size"; | |||
| constexpr auto kAttrIsDynamicShape = "is_dynamic_shape"; | |||
| // attr value | |||
| constexpr auto kValueTargetSwitch = "target_switch"; | |||
| @@ -13,6 +13,7 @@ | |||
| # limitations under the License. | |||
| """aicpu ops""" | |||
| from .unique import _unique_aicpu | |||
| from .init_data_set_queue import _init_data_set_queue_aicpu | |||
| from .embedding_lookup import _embedding_lookup_aicpu | |||
| from .padding import _padding_aicpu | |||
| @@ -0,0 +1,31 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """Unique op""" | |||
| from mindspore.ops.op_info_register import op_info_register, AiCPURegOp, DataType | |||
| unique_op_info = AiCPURegOp("Unique") \ | |||
| .fusion_type("OPAQUE") \ | |||
| .input(0, "x", "required") \ | |||
| .output(0, "y", "required") \ | |||
| .output(1, "idx", "required") \ | |||
| .dtype_format(DataType.I32_Default, DataType.I32_Default, DataType.I32_Default) \ | |||
| .dtype_format(DataType.I64_Default, DataType.I64_Default, DataType.I64_Default) \ | |||
| .get_op_info() | |||
| @op_info_register(unique_op_info) | |||
| def _unique_aicpu(): | |||
| """Unique AiCPU register""" | |||
| return | |||
| @@ -17,7 +17,7 @@ | |||
| from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType | |||
| matmul_op_info = TBERegOp("MatMul") \ | |||
| .fusion_type("ELEMWISE") \ | |||
| .fusion_type("OPAQUE") \ | |||
| .async_flag(False) \ | |||
| .binfile_name("matmul.so") \ | |||
| .compute_cost(10) \ | |||
| @@ -91,6 +91,7 @@ from ._thor_ops import (CusBatchMatMul, CusCholeskyTrsm, CusFusedAbsMax1, CusImg | |||
| from .sparse_ops import SparseToDense | |||
| __all__ = [ | |||
| 'Unique', | |||
| 'ReverseSequence', | |||
| 'EditDistance', | |||
| 'CropAndResize', | |||
| @@ -597,9 +597,9 @@ class Unique(Primitive): | |||
| containing indices of elements in the input coressponding to the output tensor. | |||
| Examples: | |||
| >>> x = Tensor(np.array([1, 2, 5, 2]), mindspore.float32) | |||
| >>> x = Tensor(np.array([1, 2, 5, 2]), mindspore.int32) | |||
| >>> out = P.Unique()(x) | |||
| (Tensor([1, 2, 5], mindspore.int32), Tensor([0, 1, 2, 1], mindspore.float32)) | |||
| (Tensor([1, 2, 5], mindspore.int32), Tensor([0, 1, 2, 1], mindspore.int32)) | |||
| """ | |||
| @prim_attr_register | |||
| def __init__(self): | |||
| @@ -35,39 +35,5 @@ StatusFactory::StatusFactory() {} | |||
| std::mutex& StatusFactory::GetMutex() { return GetInstance()->rwMutex_; } | |||
| TsdClient* TsdClient::GetInstance() { | |||
| static TsdClient instance; | |||
| return &instance; | |||
| } | |||
| /** | |||
| * @ingroup TsdClient | |||
| * @brief 构造函数 | |||
| */ | |||
| TsdClient::TsdClient() { rankSize_ = 1; } | |||
| /** | |||
| * @ingroup TsdClient | |||
| * @brief 析构函数 | |||
| */ | |||
| TsdClient::~TsdClient() = default; | |||
| /** | |||
| * @ingroup TsdClient | |||
| * @brief framework发送拉起hccp和computer process的命令 | |||
| * @param [in] phyDeviceId : FMK传入物理ID | |||
| * @param [in] phyDeviceId : FMK传入rankSize | |||
| * @return TDT_OK:成功 或者其他错误码 | |||
| */ | |||
| TDT_StatusT TsdClient::Open(const uint32_t deviceId, const uint32_t rankSize) { return TDT_OK; } | |||
| /** | |||
| * @ingroup TsdClient | |||
| * @brief 通知TsdClient关闭相关资源 | |||
| * @param 无 | |||
| * @return TDT_OK:成功 或者其他错误码 | |||
| */ | |||
| TDT_StatusT TsdClient::Close() { return TDT_OK; } | |||
| } // namespace tdt | |||
| #endif // TDT_MOCK_H | |||