Merge pull request !23940 from zhaosida/code_clean_mastertags/v1.6.0
| @@ -34,6 +34,8 @@ bool AssignKernel::Launch(const std::vector<AddressPtr> &inputs, const std::vect | |||
| return false; | |||
| } | |||
| MS_EXCEPTION_IF_NULL(inputs[0]); | |||
| MS_EXCEPTION_IF_NULL(inputs[1]); | |||
| if (inputs[0]->addr == inputs[1]->addr) { | |||
| MS_LOG(INFO) << "first addr is same with second addr , no need assign"; | |||
| return true; | |||
| @@ -54,6 +56,8 @@ std::vector<TaskInfoPtr> AssignKernel::GenTask(const std::vector<AddressPtr> &in | |||
| } | |||
| stream_id_ = stream_id; | |||
| MS_EXCEPTION_IF_NULL(inputs[0]); | |||
| MS_EXCEPTION_IF_NULL(inputs[1]); | |||
| std::shared_ptr<MemcpyAsyncTaskInfo> task_info_ptr = | |||
| std::make_shared<MemcpyAsyncTaskInfo>(unique_name_, stream_id, inputs[0]->addr, inputs[0]->size, inputs[1]->addr, | |||
| inputs[1]->size, RT_MEMCPY_DEVICE_TO_DEVICE, false); | |||
| @@ -63,6 +63,10 @@ std::vector<TaskInfoPtr> LabelSwitchKernel::GenTask(const std::vector<AddressPtr | |||
| uint32_t stream_id) { | |||
| MS_LOG(INFO) << "LabelSwitchKernel GenTask label size:" << label_size_ << ", stream id:" << stream_id; | |||
| std::vector<TaskInfoPtr> task_info_list; | |||
| if (inputs.empty()) { | |||
| MS_LOG(EXCEPTION) << "LabelSwitchKernel is empty"; | |||
| } | |||
| MS_EXCEPTION_IF_NULL(inputs[0]); | |||
| cond_ = inputs[0]->addr; | |||
| auto task_info_ptr = std::make_shared<LabelSwitchTaskInfo>(unique_name_, stream_id, label_size_, label_list_, cond_); | |||
| MS_EXCEPTION_IF_NULL(task_info_ptr); | |||
| @@ -47,6 +47,8 @@ bool MemCpyAsyncKernel::Launch(const std::vector<AddressPtr> &inputs, const std: | |||
| return false; | |||
| } | |||
| MS_EXCEPTION_IF_NULL(inputs[0]); | |||
| MS_EXCEPTION_IF_NULL(outputs[0]); | |||
| if (inputs[0]->addr == outputs[0]->addr) { | |||
| MS_LOG(INFO) << "input addr is same with output addr , no need exe memcpy async"; | |||
| return true; | |||
| @@ -93,9 +95,9 @@ void MemCpyAsyncKernel::GetInputOutputTotalCount(const AnfNodePtr &anf_node) { | |||
| std::vector<size_t> shape_i = AnfAlgo::GetInputDeviceShape(anf_node, 0); | |||
| size_t total_size = 1; | |||
| for (size_t i = 0; i < shape_i.size(); i++) { | |||
| total_size = total_size * shape_i[i]; | |||
| total_size = SizetMulWithOverflowCheck(total_size, shape_i[i]); | |||
| } | |||
| total_size *= type_size; | |||
| total_size = SizetMulWithOverflowCheck(total_size, type_size); | |||
| MS_LOG(INFO) << "MemCpyAsync size[" << total_size << "]"; | |||
| input_size_list_.emplace_back(total_size); | |||
| output_size_list_.emplace_back(total_size); | |||
| @@ -112,6 +114,8 @@ std::vector<TaskInfoPtr> MemCpyAsyncKernel::GenTask(const std::vector<AddressPtr | |||
| MS_LOG(EXCEPTION) << "MemCpyAsync op output is not one"; | |||
| } | |||
| MS_EXCEPTION_IF_NULL(outputs[0]); | |||
| MS_EXCEPTION_IF_NULL(inputs[0]); | |||
| if (outputs[0]->size < inputs[0]->size) { | |||
| MS_LOG(EXCEPTION) << "rtMemcpyAsync destMax < src size"; | |||
| } | |||
| @@ -127,6 +131,7 @@ std::vector<TaskInfoPtr> MemCpyAsyncKernel::GenTask(const std::vector<AddressPtr | |||
| MS_EXCEPTION_IF_NULL(task_info_ptr); | |||
| return {task_info_ptr}; | |||
| } | |||
| device::DynamicKernelPtr MemCpyAsyncKernel::GenDynamicKernel(const CNodePtr &cnode_ptr, void *stream_ptr) { | |||
| AddressPtrList kernel_inputs; | |||
| AddressPtrList kernel_workspaces; | |||
| @@ -141,6 +146,8 @@ device::DynamicKernelPtr MemCpyAsyncKernel::GenDynamicKernel(const CNodePtr &cno | |||
| MS_LOG(EXCEPTION) << "MemCpyAsync op output is not one, got " << kernel_outputs.size(); | |||
| } | |||
| MS_EXCEPTION_IF_NULL(kernel_outputs[0]); | |||
| MS_EXCEPTION_IF_NULL(kernel_inputs[0]); | |||
| if (kernel_outputs[0]->size < kernel_inputs[0]->size) { | |||
| MS_LOG(EXCEPTION) << "rtMemcpyAsync destMax " << kernel_outputs[0]->size << " is less than src size " | |||
| << kernel_inputs[0]->size; | |||
| @@ -33,6 +33,7 @@ bool ProfilingKernelMod::Init(const AnfNodePtr &anf_node) { | |||
| MS_LOG(INFO) << "[profiling] init profiling kernel mod"; | |||
| auto primitive = AnfAlgo::GetCNodePrimitive(anf_node); | |||
| MS_EXCEPTION_IF_NULL(primitive); | |||
| ValuePtr notify_ptr = primitive->GetAttr(ProfilingUtils::kNotify); | |||
| MS_EXCEPTION_IF_NULL(notify_ptr); | |||
| @@ -68,6 +68,7 @@ void GetRtKelInfo(const CNodePtr &kernel_node, | |||
| auto kernel_build_info_builder = std::make_shared<kernel::KernelBuildInfo::KernelBuildInfoBuilder>(); | |||
| // set input infos | |||
| auto input_num = AnfAlgo::GetInputTensorNum(kernel_node); | |||
| MS_EXCEPTION_IF_NULL(kernel_build_info_builder); | |||
| kernel_build_info_builder->SetInputsFormat(std::vector<std::string>(input_num, kOpFormat_DEFAULT)); | |||
| std::vector<TypeId> input_types = {}; | |||
| for (size_t i = 0; i < input_num; i++) { | |||
| @@ -52,8 +52,13 @@ bool StreamActiveKernel::Launch(const std::vector<AddressPtr> &, const std::vect | |||
| rtStream_t act_stream; | |||
| rtError_t status; | |||
| MS_EXCEPTION_IF_NULL(kernel::TaskStream::GetInstance()); | |||
| auto stream_list = kernel::TaskStream::GetInstance()->gen_stream_list(); | |||
| for (auto index : active_streams_index_) { | |||
| act_stream = kernel::TaskStream::GetInstance()->gen_stream_list()[index]; | |||
| if (index >= stream_list.size()) { | |||
| MS_LOG(EXCEPTION) << "Invalid index: " << index << " stream_list size: " << stream_list.size(); | |||
| } | |||
| act_stream = stream_list[index]; | |||
| status = rtStreamActive(act_stream, stream_ptr); | |||
| if (status != RT_ERROR_NONE) { | |||
| MS_LOG(ERROR) << "Stream active failed!"; | |||
| @@ -65,9 +65,17 @@ bool StreamSwitchKernel::Launch(const std::vector<AddressPtr> &inputs, const std | |||
| MS_LOG(EXCEPTION) << "Stream switch inputs size is " << inputs.size() << ", only support 2"; | |||
| } | |||
| MS_EXCEPTION_IF_NULL(inputs[0]); | |||
| MS_EXCEPTION_IF_NULL(inputs[1]); | |||
| void *loop_cnt = inputs[0]->addr; | |||
| void *ites_per_loop = inputs[1]->addr; | |||
| rtStream_t true_stream_ = kernel::TaskStream::GetInstance()->gen_stream_list()[true_stream_index_]; | |||
| MS_EXCEPTION_IF_NULL(kernel::TaskStream::GetInstance()); | |||
| auto stream_list = kernel::TaskStream::GetInstance()->gen_stream_list(); | |||
| if (true_stream_index_ >= stream_list.size()) { | |||
| MS_LOG(EXCEPTION) << "Invalid true_stream_index_: " << true_stream_index_ | |||
| << " total stream size: " << stream_list.size(); | |||
| } | |||
| rtStream_t true_stream_ = stream_list[true_stream_index_]; | |||
| rtError_t status = rtStreamSwitchEx(loop_cnt, cond_, ites_per_loop, true_stream_, stream_ptr, data_type_); | |||
| if (status != RT_ERROR_NONE) { | |||
| MS_LOG(ERROR) << "Stream switch failed!"; | |||
| @@ -330,6 +330,7 @@ bool TbeDynamicShapeUtil::IsDynamicShapeNode(const AnfNodePtr &anf_node) { | |||
| } | |||
| void TbeDynamicShapeUtil::SetDynamicShapeAttr(const CNodePtr &cnode) { | |||
| MS_EXCEPTION_IF_NULL(cnode); | |||
| auto is_dyanmic_shape = IsDynamicShapeNode(cnode); | |||
| AnfAlgo::SetNodeAttr(kAttrIsDynamicShape, MakeValue(is_dyanmic_shape), cnode); | |||
| } | |||
| @@ -272,6 +272,8 @@ bool TbeKernelJsonCreator::GenTbeSingleKernelJson(const std::shared_ptr<mindspor | |||
| void GenNoneInputDescJson(const std::shared_ptr<OpIOInfo> &input_ptr, size_t input_i, | |||
| std::vector<nlohmann::json> *const input_list) { | |||
| MS_EXCEPTION_IF_NULL(input_ptr); | |||
| MS_EXCEPTION_IF_NULL(input_list); | |||
| nlohmann::json input_desc_json; | |||
| auto in_name = input_ptr->name(); | |||
| input_desc_json[kJName] = in_name + std::to_string(input_i); | |||
| @@ -283,6 +285,9 @@ void TbeKernelJsonCreator::GenValidInputDescJson(const std::shared_ptr<AnfNode> | |||
| bool value, const std::shared_ptr<OpIOInfo> &input_ptr, | |||
| const string &op_input_name, size_t input_i, | |||
| std::vector<nlohmann::json> *const input_list) { | |||
| MS_EXCEPTION_IF_NULL(anf_node); | |||
| MS_EXCEPTION_IF_NULL(input_ptr); | |||
| MS_EXCEPTION_IF_NULL(input_list); | |||
| auto def_format = kOpFormat_NCHW; | |||
| auto dtype = GetDeviceInputType(anf_node, real_input_index); | |||
| auto format = GetDeviceInputFormat(anf_node, real_input_index); | |||
| @@ -814,6 +819,7 @@ std::string TbeKernelJsonCreator::GetDeviceOutputFormat(const AnfNodePtr &anf_no | |||
| } | |||
| void GetInputSizeList(const nlohmann::json &input_json, std::vector<size_t> *input_size_list) { | |||
| MS_EXCEPTION_IF_NULL(input_size_list); | |||
| for (size_t i = 0; i < input_json.size(); i++) { | |||
| for (size_t m = 0; m < input_json[i].size(); m++) { | |||
| size_t size_i = 1; | |||
| @@ -841,6 +847,7 @@ void GetInputSizeList(const nlohmann::json &input_json, std::vector<size_t> *inp | |||
| } | |||
| void GetOutputSizeList(const nlohmann::json &output_json, std::vector<size_t> *output_size_list) { | |||
| MS_EXCEPTION_IF_NULL(output_size_list); | |||
| for (size_t i = 0; i < output_json.size(); i++) { | |||
| for (size_t m = 0; m < output_json[i].size(); m++) { | |||
| size_t size_i = 1; | |||
| @@ -871,6 +878,8 @@ void GetOutputSizeList(const nlohmann::json &output_json, std::vector<size_t> *o | |||
| bool TbeKernelBuild::GetIOSize(const nlohmann::json &kernel_json, std::vector<size_t> *input_size_list, | |||
| std::vector<size_t> *output_size_list, const AnfNodePtr &anf_node) { | |||
| MS_EXCEPTION_IF_NULL(output_size_list); | |||
| MS_EXCEPTION_IF_NULL(input_size_list); | |||
| if (input_size_list == nullptr || output_size_list == nullptr) { | |||
| MS_LOG(ERROR) << "Input size or output size is nullptr"; | |||
| return false; | |||
| @@ -1046,6 +1055,7 @@ void TbeKernelBuild::GenFusionComputeCommonJson(const mindspore::CNodePtr &cnode | |||
| std::string *fusion_kernel_name) { | |||
| MS_EXCEPTION_IF_NULL(compute_op_str); | |||
| MS_EXCEPTION_IF_NULL(fusion_kernel_name); | |||
| MS_EXCEPTION_IF_NULL(cnode); | |||
| // gen others | |||
| auto origin_type = AnfAlgo::GetCNodeName(cnode); | |||
| auto op_info_ptr = tbe::TbeDynamicShapeUtil::FindOp(origin_type, cnode); | |||
| @@ -1163,6 +1173,7 @@ void TbeKernelBuild::GenFusionOutputDescJson(const std::shared_ptr<mindspore::An | |||
| nlohmann::json *output_data_desc) { | |||
| MS_EXCEPTION_IF_NULL(output_desc); | |||
| MS_EXCEPTION_IF_NULL(output_data_desc); | |||
| MS_EXCEPTION_IF_NULL(anf_node); | |||
| GenDescJson(anf_node, node_out_idx, desc_output_idx, output_desc); | |||
| *output_data_desc = *output_desc; | |||
| (*output_data_desc)[kJDtype] = (*output_desc)[kJDataType]; | |||
| @@ -1186,6 +1197,7 @@ void TbeKernelBuild::GenReusedOutputDesc(const std::shared_ptr<mindspore::AnfNod | |||
| bool TbeKernelBuild::GetSpecInputLayers(const std::string &op_name, | |||
| const std::vector<mindspore::AnfNodePtr> &reorder_layer, | |||
| std::map<const AnfNodePtr, FusionDataType> *spec_data_input) { | |||
| MS_EXCEPTION_IF_NULL(spec_data_input); | |||
| if ((op_name == kReluGradV2OpName || op_name == kAddNOpName || op_name == kTensorAddOpName) && | |||
| reorder_layer.empty()) { | |||
| MS_LOG(INFO) << "Fusion error: node(" << op_name << " )'s input is null. "; | |||
| @@ -1381,6 +1393,8 @@ bool TbeKernelBuild::GenFusionComputeInputJson(const mindspore::CNodePtr &cnode, | |||
| std::vector<nlohmann::json> *input_desc_list, size_t *index) { | |||
| MS_EXCEPTION_IF_NULL(cnode); | |||
| MS_EXCEPTION_IF_NULL(input_desc_list); | |||
| MS_EXCEPTION_IF_NULL(layer_iter); | |||
| MS_EXCEPTION_IF_NULL(index); | |||
| std::vector<nlohmann::json> input_desc_list_tmp = {}; | |||
| // 1. input json | |||
| bool is_dynamic_input = IsDynamicInput(cnode); | |||
| @@ -253,6 +253,7 @@ bool ParallelBuildManager::GenSameOpKernelMod() const { | |||
| } | |||
| bool ParallelBuildManager::GenSameFusionOpKernelMod(std::map<int64_t, KernelModPtr> *kernel_mode_ret) const { | |||
| MS_EXCEPTION_IF_NULL(kernel_mode_ret); | |||
| bool ret = true; | |||
| for (const auto &task_info : same_op_list_) { | |||
| auto kernel_pack = TbeUtils::SearchCache(task_info.json_name); | |||
| @@ -87,10 +87,12 @@ bool TbeKernelReduceSelecter::IsReduceSupportNDC1HWC0(SupportFormat *support_for | |||
| } | |||
| bool TbeKernelReduceSelecter::IsReduceSupportFracZ(SupportFormat *support_format) const { | |||
| MS_EXCEPTION_IF_NULL(support_format); | |||
| return IsFracZAndC1HWNCoC0Common(kOpFormat_FRAC_Z, support_format); | |||
| } | |||
| bool TbeKernelReduceSelecter::IsReduceSupportC1HWNCoC0(SupportFormat *support_format) const { | |||
| MS_EXCEPTION_IF_NULL(support_format); | |||
| return IsFracZAndC1HWNCoC0Common(kOpFormat_C1HWNCoC0, support_format); | |||
| } | |||
| @@ -132,7 +134,7 @@ bool TbeKernelReduceSelecter::IsFracZAndC1HWNCoC0Common(const std::string &forma | |||
| void TbeKernelReduceSelecter::GetReduceAttrKeepDim() { | |||
| if (!AnfAlgo::HasNodeAttr(kAttrKeepDims, cnode_ptr_)) { | |||
| MS_LOG(INFO) << "This node does't have keep_attr."; | |||
| MS_LOG(INFO) << "This node doesn't have keep_attr."; | |||
| keep_dims_ = false; | |||
| return; | |||
| } | |||
| @@ -352,7 +352,7 @@ bool TbeKernelSelect::GenBuilderItem(bool is_input, size_t kernel_build_info_ind | |||
| value_depends->emplace_back(value_depend); | |||
| } | |||
| dynamic_input_index++; | |||
| real_io_tensor_index += LongToSize(dynamic_input_size); | |||
| real_io_tensor_index = SizetAddWithOverflowCheck(real_io_tensor_index, LongToSize(dynamic_input_size)); | |||
| } else { | |||
| if (ios_info.size() != 1) { | |||
| MS_LOG(EXCEPTION) << "if output is dynamic, so output must has one output."; | |||
| @@ -363,7 +363,7 @@ bool TbeKernelSelect::GenBuilderItem(bool is_input, size_t kernel_build_info_ind | |||
| reshape_types->emplace_back(reshape_type); | |||
| value_depends->emplace_back(value_depend); | |||
| } | |||
| real_io_tensor_index += real_io_tensor_num; | |||
| real_io_tensor_index = SizetAddWithOverflowCheck(real_io_tensor_index, real_io_tensor_num); | |||
| } | |||
| } else if (io_param_type == kParamTypeRequre || io_param_type == kParamTypeOptional) { | |||
| // require or optional io | |||
| @@ -472,6 +472,11 @@ std::string TbeKernelSelect::OpSelectFormat() { | |||
| void TbeKernelSelect::CreateNewOpInfo(const mindspore::kernel::OpInfo &op_info, const SupportFormat &support_format, | |||
| mindspore::kernel::OpInfo *op_info_new) { | |||
| MS_EXCEPTION_IF_NULL(op_info_new); | |||
| if (support_format.input_format.empty() || support_format.output_format.empty()) { | |||
| MS_LOG(EXCEPTION) << "Support input format and output format size can not be empty, but the input format size is: " | |||
| << support_format.input_format.size() | |||
| << ", output format size is: " << support_format.output_format.size(); | |||
| } | |||
| if (op_info.inputs_ptr().size() != support_format.input_format[0].size() || | |||
| op_info.outputs_ptr().size() != support_format.output_format[0].size()) { | |||
| MS_LOG(EXCEPTION) << "BroadCast input/output size not match, op info input size:" << op_info.inputs_ptr().size() | |||
| @@ -45,8 +45,10 @@ static bool CheckStridedSlice(const CNodePtr &cnode) { | |||
| auto shrink_axis_mask = static_cast<int>(AnfAlgo::GetNodeAttr<int64_t>(cnode, kAttrShrinkAxisMask)); | |||
| AnfNodePtr input = cnode->input(1); | |||
| int input_dims = 0; | |||
| MS_EXCEPTION_IF_NULL(input); | |||
| if (input->isa<ValueNode>()) { | |||
| ValuePtr input_value = input->cast<ValueNodePtr>()->value(); | |||
| MS_EXCEPTION_IF_NULL(input_value); | |||
| if (!input_value->isa<Tensor>()) { | |||
| MS_LOG(EXCEPTION) << "For 'StrideSlice', the first input value should be a tensor, but got " | |||
| << input_value->ToString(); | |||
| @@ -54,6 +56,7 @@ static bool CheckStridedSlice(const CNodePtr &cnode) { | |||
| input_dims = SizeToInt(input_value->cast<TensorPtr>()->shape().size()); | |||
| } else if (input->isa<CNode>() || input->isa<Parameter>()) { | |||
| AbstractBasePtr input_abstract = input->abstract(); | |||
| MS_EXCEPTION_IF_NULL(input_abstract); | |||
| if (!input_abstract->isa<AbstractTensor>()) { | |||
| MS_LOG(EXCEPTION) << "For 'StrideSlice', the first input value should be a tensor, but got " | |||
| << input_abstract->ToString(); | |||
| @@ -277,6 +277,7 @@ int KernelManager::BinaryRegister(const mindspore::kernel::FlexArray &kernel_buf | |||
| uintptr_t KernelManager::GenFuncStub(const mindspore::kernel::KernelPack &kernel_pack, bool force_reload, | |||
| uint32_t *block_dim, const bool dynamic_flag, void **handle, | |||
| std::string *origin_key) { | |||
| MS_EXCEPTION_IF_NULL(block_dim); | |||
| auto kernel = kernel_pack.GetKernel(); | |||
| if (kernel == nullptr) { | |||
| MS_LOG(EXCEPTION) << "Invalid kernel pack, json or kernel is nullptr."; | |||
| @@ -45,6 +45,7 @@ AnfNodePtr CreateReshapeNode(const FuncGraphPtr &func_graph, const AnfNodePtr &i | |||
| trans_inputs.emplace_back(NewValueNode(prim)); | |||
| trans_inputs.emplace_back(input_node); | |||
| auto reshape = func_graph->NewCNode(trans_inputs); | |||
| MS_EXCEPTION_IF_NULL(reshape); | |||
| AnfAlgo::SetOutputInferTypeAndShape({AnfAlgo::GetOutputInferDataType(input_node, 0)}, {dst_shape}, reshape.get()); | |||
| AnfAlgo::SetNodeAttr(kAttrVisited, MakeValue(true), reshape); | |||
| AnfAlgo::SetNodeAttr(kAttrShape, MakeValue(dst_shape), reshape); | |||
| @@ -72,6 +73,7 @@ void SetTransNodeAttr(const CNodePtr &trans_node) { | |||
| void ReFreshInferShape(const AnfNodePtr &trans_node, const AnfNodePtr &node) { | |||
| MS_EXCEPTION_IF_NULL(trans_node); | |||
| MS_EXCEPTION_IF_NULL(node); | |||
| auto real_input_node = AnfAlgo::VisitKernelWithReturnType(node, 0).first; | |||
| if (!real_input_node->isa<CNode>()) { | |||
| return; | |||
| @@ -106,6 +108,7 @@ void SetGroupAttr(const ParameterPtr ¶m, const AnfNodePtr &out_trans, const | |||
| AnfNodePtr GetTransInputNodePtr(const FuncGraphPtr &func_graph, const CNodePtr &node, size_t index, | |||
| const KernelSelectPtr &kernel_select) { | |||
| MS_EXCEPTION_IF_NULL(node); | |||
| MS_EXCEPTION_IF_NULL(func_graph); | |||
| auto input_node = AnfAlgo::GetInputNode(node, index); | |||
| if (HasAbstractMonad(input_node)) { | |||
| // No transfer for monad inputs. | |||
| @@ -136,6 +139,7 @@ AnfNodePtr GetTransInputNodePtr(const FuncGraphPtr &func_graph, const CNodePtr & | |||
| AnfNodePtr InsertTransOpForSingleOutput(const FuncGraphPtr &func_graph, const AnfNodePtr &node, | |||
| const KernelSelectPtr &kernel_select) { | |||
| MS_EXCEPTION_IF_NULL(node); | |||
| MS_EXCEPTION_IF_NULL(func_graph); | |||
| std::string output_format = AnfAlgo::GetOutputFormat(node, 0); | |||
| std::vector<size_t> origin_shape = AnfAlgo::GetOutputInferShape(node, 0); | |||
| if (output_format == kOpFormat_NC1KHKWHWC0) { | |||
| @@ -242,6 +246,7 @@ void RefreshKernelBuildInfo(const std::string &input_format, const std::string & | |||
| auto ori_build_info = AnfAlgo::GetSelectKernelBuildInfo(trans_data); | |||
| MS_EXCEPTION_IF_NULL(ori_build_info); | |||
| auto builder = std::make_shared<kernel::KernelBuildInfo::KernelBuildInfoBuilder>(ori_build_info); | |||
| MS_EXCEPTION_IF_NULL(builder); | |||
| builder->SetInputsFormat({input_format}); | |||
| builder->SetInputsReshapeType({reshape_type}); | |||
| builder->SetOutputsReshapeType({reshape_type}); | |||
| @@ -406,10 +411,12 @@ AnfNodePtr InsertTransOpForInput(const FuncGraphPtr &func_graph, const AnfNodePt | |||
| CNodePtr InsertCastForInput(const FuncGraphPtr &func_graph, const CNodePtr &cnode) { | |||
| MS_EXCEPTION_IF_NULL(cnode); | |||
| MS_EXCEPTION_IF_NULL(func_graph); | |||
| std::vector<AnfNodePtr> new_inputs = {AnfAlgo::GetCNodePrimitiveNode(cnode)}; | |||
| size_t in_num = AnfAlgo::GetInputNum(cnode); // include monads. | |||
| for (size_t input_index = 0; input_index < in_num; ++input_index) { | |||
| auto cur_input = AnfAlgo::GetInputNode(cnode, input_index); | |||
| MS_EXCEPTION_IF_NULL(cur_input); | |||
| if (HasAbstractMonad(cur_input)) { | |||
| // No cast for monad inputs. | |||
| new_inputs.push_back(cur_input); | |||
| @@ -421,6 +428,7 @@ CNodePtr InsertCastForInput(const FuncGraphPtr &func_graph, const CNodePtr &cnod | |||
| auto kernel_with_index = AnfAlgo::VisitKernelWithReturnType(cur_input, 0); | |||
| auto real_input_node = kernel_with_index.first; | |||
| MS_EXCEPTION_IF_NULL(real_input_node); | |||
| if (kernel::IsWeightBoundary(real_input_node)) { | |||
| // weight | |||
| origin_type = AnfAlgo::GetPrevNodeOutputPrecision(cnode, input_index); | |||
| @@ -57,6 +57,7 @@ const AnfNodePtr InsertPlaceholderForDynamicGRUV2::Process(const FuncGraphPtr &f | |||
| if (item != none_index.end()) { | |||
| auto value = std::make_shared<None>(); | |||
| auto value_node = NewValueNode(value); | |||
| MS_EXCEPTION_IF_NULL(value_node); | |||
| value_node->set_abstract(std::make_shared<abstract::AbstractNone>()); | |||
| auto new_node = kernel_graph->NewValueNode(value_node); | |||
| kernel_graph->AddValueNodeToGraph(new_node); | |||
| @@ -56,6 +56,7 @@ const AnfNodePtr InsertPlaceholderForDynamicRNN::Process(const FuncGraphPtr &fun | |||
| if (in_idx == kInsertIdx) { | |||
| auto value = std::make_shared<None>(); | |||
| auto value_node = NewValueNode(value); | |||
| MS_EXCEPTION_IF_NULL(value_node); | |||
| value_node->set_abstract(std::make_shared<abstract::AbstractNone>()); | |||
| auto new_node = kernel_graph->NewValueNode(value_node); | |||
| new_inputs.push_back(new_node); | |||
| @@ -44,6 +44,7 @@ bool InsertDependForAllGather::Run(const FuncGraphPtr &graph) { | |||
| for (int64_t i = 0; i < SizeToInt(all_gather_node.size()) - 1; ++i) { | |||
| auto current_node = iter->second; | |||
| auto next_node = (++iter)->second; | |||
| MS_EXCEPTION_IF_NULL(next_node); | |||
| auto next_cnode = next_node->cast<CNodePtr>(); | |||
| std::vector<AnfNodePtr> inputs = {NewValueNode(std::make_shared<Primitive>(prim::kPrimDepend->name())), | |||
| AnfAlgo::GetInputNode(next_cnode, 0), current_node}; | |||
| @@ -42,7 +42,6 @@ std::vector<AnfNodePtr> SplitInputsForReduceScatter::InsertSplitForInput(const F | |||
| size_splits.push_back(output_node_shape[0]); | |||
| } | |||
| AnfAlgo::SetOutputInferTypeAndShape(dtypes, shapes, split.get()); | |||
| AnfAlgo::SetNodeAttr("split_dim", MakeValue(0L), split); | |||
| AnfAlgo::SetNodeAttr("num_split", MakeValue(rank_size), split); | |||
| AnfAlgo::SetNodeAttr("size_splits", MakeValue(size_splits), split); | |||
| @@ -73,7 +72,6 @@ AnfNodePtr SplitInputsForReduceScatter::RearrangeInputsForReduceScatter(const Fu | |||
| auto reduce_scatter = func_graph->NewCNode(reduce_scatter_inputs); | |||
| MS_EXCEPTION_IF_NULL(reduce_scatter); | |||
| reduce_scatter->set_abstract(node->abstract()); | |||
| AnfAlgo::CopyNodeAttrs(node, reduce_scatter); | |||
| AnfAlgo::SetNodeAttr(kAttrFusion, MakeValue(1L), reduce_scatter); | |||
| kernel_select_->SelectKernel(reduce_scatter); | |||
| @@ -95,6 +95,7 @@ bool InputCheck(const AnfNodePtr &node) { | |||
| auto in_nums = AnfAlgo::GetInputTensorNum(node); | |||
| for (size_t i = 0; i < in_nums; i++) { | |||
| auto in_node = VisitSplitKernel(AnfAlgo::GetInputNode(cnode, i), 0).first; | |||
| MS_EXCEPTION_IF_NULL(in_node); | |||
| if (in_node->isa<Parameter>() || in_node->isa<ValueNode>()) { | |||
| MS_LOG(INFO) << "Input is a Parameter or ValueNode, can not optimizer."; | |||
| return false; | |||
| @@ -104,6 +105,7 @@ bool InputCheck(const AnfNodePtr &node) { | |||
| MS_EXCEPTION_IF_NULL(in_cnode); | |||
| auto in_node_name = AnfAlgo::GetCNodeName(in_cnode); | |||
| auto trans_input = AnfAlgo::VisitKernel(in_node, 0).first; | |||
| MS_EXCEPTION_IF_NULL(trans_input); | |||
| if (in_node_name == kTransDataOpName && (trans_input->isa<Parameter>() || trans_input->isa<ValueNode>())) { | |||
| MS_LOG(INFO) << "Data->TransData->split, can not optimizer."; | |||
| return false; | |||
| @@ -38,6 +38,7 @@ const int64_t kAxisDim = 4; | |||
| const std::map<std::string, ConvertFunction> kReduceConvertMap = {{kOpFormat_FRAC_Z, ConvertReduceAttrFraczAnd6HD}, | |||
| {kOpFormat_C1HWNCoC0, ConvertReduceAttrFraczAnd6HD}}; | |||
| void SafeCheckFunction(const CNodePtr &cnode, const std::vector<int64_t> &reduce_axis) { | |||
| MS_EXCEPTION_IF_NULL(cnode); | |||
| if (reduce_axis.empty()) { | |||
| MS_LOG(EXCEPTION) << "The node " << cnode->DebugString() << "'s reduce axis got a empty vector"; | |||
| } | |||
| @@ -65,6 +66,7 @@ void DynamicAttrUpdate(const AnfNodePtr &node) { | |||
| } | |||
| void ConvertReduceAttrFraczAnd6HD(const CNodePtr &cnode) { | |||
| MS_EXCEPTION_IF_NULL(cnode); | |||
| auto axis = kernel::GetReduceAttrAxis(cnode); | |||
| std::vector<int64_t> convert_axis; | |||
| SafeCheckFunction(cnode, axis); | |||
| @@ -91,6 +91,7 @@ const AnfNodePtr CheckConsistency::Process(const FuncGraphPtr &, const AnfNodePt | |||
| } | |||
| CNodePtr cnode = node->cast<CNodePtr>(); | |||
| MS_EXCEPTION_IF_NULL(cnode); | |||
| size_t in_num = AnfAlgo::GetInputTensorNum(cnode); | |||
| for (size_t i = 0; i < in_num; ++i) { | |||
| if (!CheckFormatForConsistency(cnode, i) || !CheckDataTypeForConsistency(cnode, i)) { | |||
| @@ -50,14 +50,17 @@ const AnfNodePtr ConvertCastFormat::Process(const FuncGraphPtr &func_graph, cons | |||
| continue; | |||
| } | |||
| auto cast_node = input_node->cast<CNodePtr>(); | |||
| MS_EXCEPTION_IF_NULL(cast_node); | |||
| ChangeCastFormat(cast_node, func_graph); | |||
| } | |||
| return nullptr; | |||
| } | |||
| void ConvertCastFormat::SetCastFormat(const CNodePtr &cast_node, const string &format) const { | |||
| MS_EXCEPTION_IF_NULL(cast_node); | |||
| auto info_builder = | |||
| std::make_shared<kernel::KernelBuildInfo::KernelBuildInfoBuilder>(AnfAlgo::GetSelectKernelBuildInfo(cast_node)); | |||
| MS_EXCEPTION_IF_NULL(info_builder); | |||
| info_builder->SetInputsFormat({format}); | |||
| info_builder->SetOutputsFormat({format}); | |||
| AnfAlgo::SetSelectKernelBuildInfo(info_builder->Build(), cast_node.get()); | |||
| @@ -65,6 +68,7 @@ void ConvertCastFormat::SetCastFormat(const CNodePtr &cast_node, const string &f | |||
| void ConvertCastFormat::ChangeCastFormat(const CNodePtr &cast_node, const FuncGraphPtr &func_graph) const { | |||
| MS_EXCEPTION_IF_NULL(cast_node); | |||
| MS_EXCEPTION_IF_NULL(func_graph); | |||
| auto input_node_name = AnfAlgo::GetCNodeName(cast_node); | |||
| if (input_node_name != prim::kPrimCast->name()) { | |||
| return; | |||
| @@ -42,6 +42,7 @@ const AnfNodePtr ConvertUnSupportNodeToAICPU::Process(const mindspore::FuncGraph | |||
| return nullptr; | |||
| } else if (supported_checker_->CheckAICPUSupported(node, kernel_builder_info)) { | |||
| auto builder = std::make_shared<kernel::KernelBuildInfo::KernelBuildInfoBuilder>(kernel_builder_info); | |||
| MS_EXCEPTION_IF_NULL(builder); | |||
| builder->SetKernelType(AICPU_KERNEL); | |||
| AnfAlgo::SetSelectKernelBuildInfo(builder->Build(), node.get()); | |||
| AnfAlgo::SetNodeAttr(kAttrIsAICPUKernel, MakeValue(true), node); | |||
| @@ -58,11 +58,13 @@ CNodePtr Insert(const FuncGraphPtr &func_graph, const CNodePtr &cnode) { | |||
| new_transpose_node = | |||
| NewTransOpNode(func_graph, AnfAlgo::GetInputNode(transdata_node->cast<CNodePtr>(), 0), kernel_select, false, | |||
| prim::kPrimTranspose->name(), std::vector<int64_t>{2, 3, 1, 0}); | |||
| MS_EXCEPTION_IF_NULL(new_transpose_node); | |||
| AnfAlgo::SetNodeAttr("nop_op", MakeValue(true), new_transpose_node); | |||
| RefreshKernelBuildInfo(input_format, kOpFormat_HWCN, new_transpose_node); | |||
| // trans hwcn to output_format | |||
| new_transdata_node = | |||
| NewTransOpNode(func_graph, new_transpose_node, kernel_select, false, prim::kPrimTransData->name()); | |||
| MS_EXCEPTION_IF_NULL(new_transdata_node); | |||
| RefreshKernelBuildInfo(kOpFormat_HWCN, output_format, new_transdata_node, padding_axis); | |||
| new_transdata_node->set_abstract(transdata_node->abstract()); | |||
| new_node = new_transdata_node; | |||
| @@ -120,6 +120,8 @@ bool CheckIndexOutput(const CNodePtr &node, const std::shared_ptr<kernel::Kernel | |||
| } | |||
| void ChangeNodeInferInfo(const CNodePtr &cnode, const CNodePtr &cast, const size_t cast_index) { | |||
| MS_EXCEPTION_IF_NULL(cnode); | |||
| MS_EXCEPTION_IF_NULL(cast); | |||
| using Shape = std::vector<size_t>; | |||
| auto cast_dtype = AnfAlgo::GetOutputInferDataType(cast, 0); | |||
| auto cast_shape = AnfAlgo::GetOutputInferShape(cast, 0); | |||
| @@ -172,6 +174,7 @@ AnfNodePtr MergeCastToNextOp(const FuncGraphPtr &graph, const CNodePtr &node, co | |||
| return nullptr; | |||
| } | |||
| auto ori_kernel_info = AnfAlgo::GetSelectKernelBuildInfo(next_node); | |||
| MS_EXCEPTION_IF_NULL(ori_kernel_info); | |||
| MS_LOG(INFO) << "Found alternative kernel info for current anf kernel " << next_cnode->DebugString() | |||
| << "ori kernel info" << ori_kernel_info->ToString() << "alternative kernel info" | |||
| << (*alternative_kernel_info)->ToString(); | |||
| @@ -244,6 +247,7 @@ AnfNodePtr MergeCastToPriorOp(const FuncGraphPtr &graph, const CNodePtr &cur_nod | |||
| return nullptr; | |||
| } | |||
| auto ori_kernel_info = AnfAlgo::GetSelectKernelBuildInfo(prior_op); | |||
| MS_EXCEPTION_IF_NULL(ori_kernel_info); | |||
| MS_LOG(INFO) << "Found alternative kernel info for current anf kernel " << prior_op->DebugString() | |||
| << "ori kernel info" << ori_kernel_info->ToString() << "alternative kernel info" | |||
| << (*kernel_info_it)->ToString(); | |||
| @@ -118,6 +118,7 @@ void RectifyDoMaskKernelInfo::RectifyDropOutDoMaskKernelInfo(const std::vector<C | |||
| if (AnfAlgo::GetInputFormat(do_mask, 0) != format) { | |||
| auto builder = | |||
| std::make_shared<kernel::KernelBuildInfo::KernelBuildInfoBuilder>(AnfAlgo::GetSelectKernelBuildInfo(do_mask)); | |||
| MS_EXCEPTION_IF_NULL(builder); | |||
| builder->SetInputFormat(format, 0); | |||
| builder->SetOutputFormat(format, 0); | |||
| AnfAlgo::SetSelectKernelBuildInfo(builder->Build(), do_mask.get()); | |||
| @@ -139,6 +140,7 @@ AnfNodePtr RectifyDoMaskKernelInfo::RectifyKernelInfoInPynativeProcess(const Anf | |||
| if (do_mask_input_format != kOpFormat_DEFAULT) { | |||
| auto builder = | |||
| std::make_shared<kernel::KernelBuildInfo::KernelBuildInfoBuilder>(AnfAlgo::GetSelectKernelBuildInfo(node)); | |||
| MS_EXCEPTION_IF_NULL(builder); | |||
| builder->SetInputFormat(kOpFormat_DEFAULT, 0); | |||
| builder->SetOutputFormat(kOpFormat_DEFAULT, 0); | |||
| AnfAlgo::SetSelectKernelBuildInfo(builder->Build(), node.get()); | |||
| @@ -40,6 +40,7 @@ const AnfNodePtr TransOpFormatRefine::Process(const FuncGraphPtr &func_graph, co | |||
| auto out_format = AnfAlgo::GetOutputFormat(node, 0); | |||
| auto builder = | |||
| std::make_shared<kernel::KernelBuildInfo::KernelBuildInfoBuilder>(AnfAlgo::GetSelectKernelBuildInfo(node)); | |||
| MS_EXCEPTION_IF_NULL(builder); | |||
| if (in_format == kOpFormat_DEFAULT && k3DFormatSet.find(out_format) != k3DFormatSet.end()) { | |||
| builder->SetInputsFormat({kOpFormat_NCDHW}); | |||
| builder->SetOutputsFormat({out_format}); | |||
| @@ -38,6 +38,7 @@ constexpr auto kSeed1 = "Seed1"; | |||
| constexpr auto kUint8BitSize = 8; | |||
| constexpr int64_t kMaskAlignNum = 128; | |||
| constexpr int64_t kMaskMultiNum = 16; | |||
| constexpr size_t kDropoutGradInputTensorNum = 2; | |||
| constexpr size_t kFloat16Len = 2; // size of float16 | |||
| constexpr size_t kInt64Len = 8; // size of int64 | |||
| @@ -69,7 +70,9 @@ ValueNodePtr CreateKeepPorbValueNode(const FuncGraphPtr &func_graph, const AnfNo | |||
| if (!AnfAlgo::HasNodeAttr(kKeepProb, cnode)) { | |||
| MS_LOG(EXCEPTION) << "Dropout node does not have attr: keep_prob."; | |||
| } | |||
| if (AnfAlgo::GetCNodePrimitive(cnode)->ToString() == kDropoutOpName) { | |||
| auto prim = AnfAlgo::GetCNodePrimitive(cnode); | |||
| MS_EXCEPTION_IF_NULL(prim); | |||
| if (prim->ToString() == kDropoutOpName) { | |||
| if (!AnfAlgo::HasNodeAttr(kSeed0, cnode) || !AnfAlgo::HasNodeAttr(kSeed1, cnode)) { | |||
| MS_LOG(EXCEPTION) << "Dropout node does not have attr: seed0 or seed1."; | |||
| } | |||
| @@ -279,7 +282,7 @@ const AnfNodePtr DropoutUnifyMindIR0::Process(const FuncGraphPtr &func_graph, co | |||
| MS_EXCEPTION_IF_NULL(func_graph); | |||
| MS_EXCEPTION_IF_NULL(node); | |||
| auto tuple_cnode = node->cast<CNodePtr>(); | |||
| MS_EXCEPTION_IF_NULL(tuple_cnode); | |||
| CheckCNodeInputSize(tuple_cnode, kTupleGetItemInputTensorNum); | |||
| if (!NeedUpdate(tuple_cnode)) { | |||
| return nullptr; | |||
| } | |||
| @@ -332,6 +335,7 @@ const AnfNodePtr DropoutUnifyMindIR1::Process(const FuncGraphPtr &func_graph, co | |||
| auto inputx_type_id = GetInputXDataType(dropout_node); | |||
| auto keep_prob_value = CreateKeepPorbValueNode(func_graph, dropout_node, inputx_type_id); | |||
| CheckCNodeInputSize(dropout_node, kDropoutInputTensorNum); | |||
| auto dropout_input = dropout_node->input(kIndex1); | |||
| auto input_shape = GetDropoutInputShape(dropout_input); | |||
| // CreateDropoutGenMask | |||
| @@ -363,7 +367,7 @@ const AnfNodePtr DropoutGradUnifyMindIR::Process(const FuncGraphPtr &func_graph, | |||
| MS_EXCEPTION_IF_NULL(func_graph); | |||
| MS_EXCEPTION_IF_NULL(node); | |||
| auto dropout_grad_cnode = node->cast<CNodePtr>(); | |||
| MS_EXCEPTION_IF_NULL(dropout_grad_cnode); | |||
| CheckCNodeInputSize(dropout_grad_cnode, kDropoutGradInputTensorNum); | |||
| auto grad_input_type_id = GetInputXDataType(dropout_grad_cnode); | |||
| auto grad_input_shape = GetInputXShape(dropout_grad_cnode); | |||
| @@ -26,6 +26,7 @@ | |||
| namespace mindspore { | |||
| namespace opt { | |||
| namespace { | |||
| constexpr size_t kMaxPoolGradWithArgmaxInputTensorNum = 3; | |||
| constexpr size_t kMaxPoolGradWithArgmaxInputNum = 4; | |||
| constexpr size_t kMaxPoolWithArgmaxShape = 4; | |||
| constexpr size_t kAlignBytes = 16; | |||
| @@ -40,10 +41,7 @@ bool IsC(const BaseRef &n) { | |||
| } | |||
| CNodePtr GetMaxPoolWithArgmax(const CNodePtr &maxpool_grad_with_argmax) { | |||
| MS_EXCEPTION_IF_NULL(maxpool_grad_with_argmax); | |||
| if (maxpool_grad_with_argmax->inputs().size() != kMaxPoolGradWithArgmaxInputNum) { | |||
| MS_LOG(EXCEPTION) << "MaxPoolGradWithArgmax has wrong input size."; | |||
| } | |||
| CheckCNodeInputSize(maxpool_grad_with_argmax, kMaxPoolGradWithArgmaxInputTensorNum); | |||
| auto tuple_getitem0_anf = maxpool_grad_with_argmax->input(kIndex3); | |||
| MS_EXCEPTION_IF_NULL(tuple_getitem0_anf); | |||
| return tuple_getitem0_anf->cast<CNodePtr>(); | |||
| @@ -292,7 +292,7 @@ CNodePtr CreateTile(const FuncGraphPtr &graph, const CNodePtr &sparse_softmax_no | |||
| } | |||
| // feature map set | |||
| std::vector<size_t> feature_map_input_indexs; | |||
| feature_map_input_indexs.push_back(0); | |||
| feature_map_input_indexs.emplace_back(0); | |||
| AnfAlgo::SetNodeAttr(kIsFeatureMapInputList, MakeValue(feature_map_input_indexs), tile_node); | |||
| return tile_node; | |||
| } | |||
| @@ -37,6 +37,7 @@ std::unordered_map<std::string, std::unordered_set<std::string>> MarkOp{ | |||
| bool CheckOP(const FuncGraphManagerPtr &manager, const AnfNodePtr &cnode, const std::unordered_set<std::string> &set) { | |||
| for (const auto &node_index : manager->node_users()[cnode]) { | |||
| auto output = node_index.first; | |||
| MS_EXCEPTION_IF_NULL(output); | |||
| if (AnfAlgo::CheckPrimitiveType(output, prim::kPrimTupleGetItem)) { | |||
| if (CheckOP(manager, output, set)) { | |||
| return true; | |||
| @@ -102,7 +102,9 @@ bool BackendCSE::CheckReplace(const AnfNodePtr &main, const AnfNodePtr &node, bo | |||
| if (main->isa<ValueNode>() && node->isa<ValueNode>()) { | |||
| auto main_value = GetValueNode(main); | |||
| MS_EXCEPTION_IF_NULL(main_value); | |||
| auto node_value = GetValueNode(node); | |||
| MS_EXCEPTION_IF_NULL(node_value); | |||
| if (main_value->isa<Primitive>() && node_value->isa<Primitive>()) { | |||
| return false; | |||
| } else if (main_value->isa<tensor::Tensor>() && node_value->isa<tensor::Tensor>()) { | |||
| @@ -52,6 +52,9 @@ kernel::KernelBuildInfoPtr GenerateKernelBuildInfo(const CommunicationOpInfo &co | |||
| rank_size = AnfAlgo::GetNodeAttr<int64_t>(cnode, kAttrRankSize); | |||
| } | |||
| size_t rank_size_t = LongToSize(rank_size); | |||
| if (rank_size_t == 0) { | |||
| MS_LOG(EXCEPTION) << "Rank size should not be zero."; | |||
| } | |||
| MS_EXCEPTION_IF_NULL(cnode); | |||
| size_t input_num = AnfAlgo::GetInputTensorNum(cnode); | |||
| for (size_t input_index = 0; input_index < input_num; ++input_index) { | |||
| @@ -336,6 +339,9 @@ AnfNodePtr CommunicationOpFusion::CreateFusedCommunicationOp(const FuncGraphPtr | |||
| rank_size = AnfAlgo::GetNodeAttr<int64_t>(final_node, kAttrRankSize); | |||
| } | |||
| size_t rank_size_t = LongToSize(rank_size); | |||
| if (rank_size_t == 0) { | |||
| MS_LOG(EXCEPTION) << "Rank size should not be zero."; | |||
| } | |||
| size_t output_num = node_num * rank_size_t; | |||
| std::vector<TypeId> dtypes(output_num, AnfAlgo::GetOutputInferDataType(final_node, 0)); | |||
| std::vector<std::vector<size_t>> shapes; | |||
| @@ -31,6 +31,7 @@ const size_t strides_index = 5; | |||
| bool GetStridesValues(const CNodePtr &strided_slice_grad, ValuePtrList *strides_values) { | |||
| MS_EXCEPTION_IF_NULL(strided_slice_grad); | |||
| MS_EXCEPTION_IF_NULL(strides_values); | |||
| constexpr size_t kSizeChange = 6; | |||
| if (strided_slice_grad->size() < kSizeChange) { | |||
| MS_LOG(DEBUG) << "Op strided_slice_grad's inputs size less than 6, graph not changed"; | |||
| @@ -31,6 +31,7 @@ int64_t SplitTupleInputs(const FuncGraphPtr &graph, const AnfNodePtr &tuple_inpu | |||
| std::vector<AnfNodePtr> *plant_inputs) { | |||
| if (!AnfAlgo::IsTupleOutput(tuple_input)) { | |||
| auto abs = tuple_input->abstract(); | |||
| MS_EXCEPTION_IF_NULL(abs); | |||
| MS_LOG(WARNING) << "The Function only split the output type is tuple type but got" << abs->ToString(); | |||
| return -1; | |||
| } | |||
| @@ -109,6 +109,9 @@ const AnfNodePtr ProcessMatchedNodes(const FuncGraphPtr &func_graph, const CNode | |||
| (void)manager->Replace(prev_cnode, prev_cnode->input(1)); | |||
| return cnode->input(1); | |||
| } else { // rebuild the pass nodes | |||
| if (pass_size < kOffset) { | |||
| MS_LOG(ERROR) << "pass_size should >= 2"; | |||
| } | |||
| for (size_t idx = pass_size - kOffset; idx > 0; --idx) { | |||
| auto new_node = func_graph->NewCNode((*pass_vector)[idx].first->inputs()); | |||
| if (idx == pass_size - kOffset) { | |||
| @@ -68,6 +68,7 @@ bool IsRealKernelCNode(const CNodePtr &cnode) { | |||
| prim::kPrimReturn, prim::kPrimPartial, prim::kPrimDepend, | |||
| prim::kPrimUpdateState, prim::kPrimLoad}; | |||
| #endif | |||
| MS_EXCEPTION_IF_NULL(cnode); | |||
| if (cnode->inputs().empty()) { | |||
| MS_LOG(EXCEPTION) << "Illegal null input of cnode(%s)" << cnode->DebugString(); | |||
| } | |||
| @@ -1282,6 +1283,8 @@ void AnfRuntimeAlgorithm::SetOutputInferTypeAndShape(const std::vector<TypeId> & | |||
| } | |||
| // copy an abstract of a node to another node | |||
| void AnfRuntimeAlgorithm::CopyAbstract(const AnfNodePtr &from_node, AnfNode *to_node) { | |||
| MS_EXCEPTION_IF_NULL(from_node); | |||
| MS_EXCEPTION_IF_NULL(to_node); | |||
| to_node->set_abstract(from_node->abstract()); | |||
| } | |||
| @@ -1555,6 +1558,7 @@ bool AnfRuntimeAlgorithm::IsFeatureMapOutput(const AnfNodePtr &node) { | |||
| } | |||
| bool AnfRuntimeAlgorithm::IsFeatureMapInput(const AnfNodePtr &node, size_t input_index) { | |||
| MS_EXCEPTION_IF_NULL(node); | |||
| if (!node->isa<CNode>()) { | |||
| MS_LOG(EXCEPTION) << "Cannot input a parameter or a valuenode to charge it's input if is a feature map" | |||
| << " trace: " << trace::DumpSourceLines(node); | |||
| @@ -1735,6 +1739,7 @@ bool AnfRuntimeAlgorithm::IsSwitchCall(const CNodePtr &call_node) { | |||
| << " trace: " << trace::DumpSourceLines(call_node); | |||
| } | |||
| auto input1 = call_node->input(1); | |||
| MS_EXCEPTION_IF_NULL(input1); | |||
| if (input1->isa<ValueNode>()) { | |||
| return false; | |||
| } else if (input1->isa<CNode>() && AnfAlgo::CheckPrimitiveType(input1, prim::kPrimSwitch)) { | |||
| @@ -1881,6 +1886,7 @@ TypeId AnfRuntimeAlgorithm::GetCNodeOutputPrecision(const AnfNodePtr &node) { | |||
| } | |||
| TypeId AnfRuntimeAlgorithm::GetPrevNodeOutputPrecision(const AnfNodePtr &node, size_t input_idx) { | |||
| MS_EXCEPTION_IF_NULL(node); | |||
| if (!node->isa<CNode>()) { | |||
| MS_LOG(EXCEPTION) << node->DebugString() << ", input node is not CNode." | |||
| << " trace: " << trace::DumpSourceLines(node); | |||
| @@ -419,6 +419,9 @@ void Executor::RunOp(const SessionPtr &session, OpRunInfo *op_run_info, const Gr | |||
| std::vector<tensor::TensorPtr> *input_tensors, VectorRef *outputs, | |||
| const std::vector<int64_t> &tensors_mask) { | |||
| MS_EXCEPTION_IF_NULL(session); | |||
| MS_EXCEPTION_IF_NULL(input_tensors); | |||
| MS_EXCEPTION_IF_NULL(outputs); | |||
| MS_EXCEPTION_IF_NULL(op_run_info); | |||
| auto ms_context = MsContext::GetInstance(); | |||
| auto target = ms_context->get_param<std::string>(MS_CTX_DEVICE_TARGET); | |||
| if (target == kGPUDevice) { | |||
| @@ -141,7 +141,7 @@ class KernelBuildClient { | |||
| std::shared_ptr<DuplexPipe> dp_; | |||
| }; | |||
| static std::string GetScriptFilePath(const std::string cmd_env, const std::string &cmd_script, | |||
| static std::string GetScriptFilePath(const std::string &cmd_env, const std::string &cmd_script, | |||
| const std::string &server_script) { | |||
| auto ms_context = MsContext::GetInstance(); | |||
| MS_EXCEPTION_IF_NULL(ms_context); | |||
| @@ -76,6 +76,7 @@ std::vector<AnfNodePtr> GetCallRealOutputs(const AnfNodePtr &call_node) { | |||
| std::vector<AnfNodePtr> real_inputs; | |||
| auto child_graphs = AnfAlgo::GetCallSwitchKernelGraph(node->cast<CNodePtr>()); | |||
| for (const auto &child_graph : child_graphs) { | |||
| MS_EXCEPTION_IF_NULL(child_graph); | |||
| auto real_input = child_graph->output(); | |||
| auto child_real_inputs = GetCallRealOutputs(real_input); | |||
| std::copy(child_real_inputs.begin(), child_real_inputs.end(), std::back_inserter(real_inputs)); | |||
| @@ -138,11 +139,13 @@ std::string GetNodeGroup(const AnfNodePtr &node) { | |||
| } // namespace | |||
| AnfNodePtr KernelGraph::MakeValueNode(const AnfNodePtr &node) const { | |||
| MS_EXCEPTION_IF_NULL(node); | |||
| auto value_node = node->cast<ValueNodePtr>(); | |||
| if (value_node == nullptr) { | |||
| return nullptr; | |||
| } | |||
| ValueNodePtr new_value_node = std::make_shared<ValueNode>(value_node->value()); | |||
| MS_EXCEPTION_IF_NULL(new_value_node); | |||
| new_value_node->set_abstract(value_node->abstract()); | |||
| this->SetKernelInfoForNode(new_value_node); | |||
| return new_value_node; | |||
| @@ -331,7 +334,7 @@ void KernelGraph::GetLoopNodesByDFS(const AnfNodePtr &node, uint32_t *loop_num) | |||
| return; | |||
| } | |||
| (void)visited_nodes_.insert(node); | |||
| for (auto input_edge : node_input_edges_[node]) { | |||
| for (auto &input_edge : node_input_edges_[node]) { | |||
| size_t input_num = node_input_num_[input_edge.first]; | |||
| if (input_num == 0) { | |||
| continue; | |||
| @@ -366,9 +369,9 @@ void KernelGraph::GetLoopNodesByDFS(const AnfNodePtr &node, uint32_t *loop_num) | |||
| } | |||
| } | |||
| uint32_t KernelGraph::GetLoopNum(std::map<AnfNodePtr, size_t> none_zero_nodes) { | |||
| uint32_t KernelGraph::GetLoopNum(const std::map<AnfNodePtr, size_t> &none_zero_nodes) { | |||
| uint32_t loop_num = 0; | |||
| for (auto iter : none_zero_nodes) { | |||
| for (auto &iter : none_zero_nodes) { | |||
| auto node = iter.first; | |||
| MS_EXCEPTION_IF_NULL(node); | |||
| if (node_input_num_[node] == 0) { | |||
| @@ -477,12 +480,14 @@ void KernelGraph::ResetAssignInputFeatureMapFlag(const CNodePtr &cnode) const { | |||
| << cnode->DebugString(); | |||
| } | |||
| auto input_node = AnfAlgo::GetInputNode(cnode, 0); | |||
| MS_EXCEPTION_IF_NULL(input_node); | |||
| auto assign_value_node = AnfAlgo::GetInputNode(cnode, 1); | |||
| if (AnfAlgo::IsFeatureMapOutput(input_node)) { | |||
| return; | |||
| } | |||
| if (!AnfAlgo::IsFeatureMapOutput(input_node) && AnfAlgo::IsFeatureMapOutput(assign_value_node)) { | |||
| auto kernel_info = dynamic_cast<device::KernelInfo *>(input_node->kernel_info()); | |||
| MS_EXCEPTION_IF_NULL(kernel_info); | |||
| kernel_info->set_feature_map_flag(true); | |||
| } | |||
| } | |||
| @@ -490,6 +495,7 @@ void KernelGraph::ResetAssignInputFeatureMapFlag(const CNodePtr &cnode) const { | |||
| void KernelGraph::SetKernelInfoForNode(const AnfNodePtr &node) const { | |||
| MS_EXCEPTION_IF_NULL(node); | |||
| auto kernel_info = std::make_shared<device::KernelInfo>(); | |||
| MS_EXCEPTION_IF_NULL(kernel_info); | |||
| node->set_kernel_info(kernel_info); | |||
| if (node->isa<CNode>()) { | |||
| if (kOpAssignKernelNameList.find(AnfAlgo::GetCNodeName(node)) != kOpAssignKernelNameList.end()) { | |||
| @@ -520,6 +526,7 @@ void KernelGraph::SetKernelInfoForNode(const AnfNodePtr &node) const { | |||
| return; | |||
| } | |||
| auto kernel_build_info_builder = std::make_shared<kernel::KernelBuildInfo::KernelBuildInfoBuilder>(); | |||
| MS_EXCEPTION_IF_NULL(kernel_build_info_builder); | |||
| // set the format of value_node to DEFAULT_FORMAT | |||
| std::vector<TypeId> types; | |||
| std::vector<std::string> formats = {kOpFormat_DEFAULT}; | |||
| @@ -589,6 +596,7 @@ ValueNodePtr KernelGraph::NewValueNode(const AbstractBasePtr &abstract, const Va | |||
| MS_EXCEPTION_IF_NULL(abstract); | |||
| MS_EXCEPTION_IF_NULL(value); | |||
| ValueNodePtr new_value_node = std::make_shared<ValueNode>(value); | |||
| MS_EXCEPTION_IF_NULL(new_value_node); | |||
| new_value_node->set_abstract(abstract); | |||
| SetKernelInfoForNode(new_value_node); | |||
| AnfAlgo::SetGraphId(graph_id(), new_value_node.get()); | |||
| @@ -610,7 +618,7 @@ ValueNodePtr KernelGraph::NewValueNode(const tensor::TensorPtr &input_tensor) { | |||
| return input_value_node; | |||
| } | |||
| AnfNodePtr KernelGraph::TransValueNodeTuple(const AbstractBasePtr abstract, const ValuePtr &value) { | |||
| AnfNodePtr KernelGraph::TransValueNodeTuple(const AbstractBasePtr &abstract, const ValuePtr &value) { | |||
| MS_EXCEPTION_IF_NULL(abstract); | |||
| MS_EXCEPTION_IF_NULL(value); | |||
| if (!abstract->isa<abstract::AbstractTuple>()) { | |||
| @@ -632,6 +640,7 @@ AnfNodePtr KernelGraph::TransValueNodeTuple(const AbstractBasePtr abstract, cons | |||
| make_tuple_inputs.push_back(TransValueNodeTuple((*tuple_abstract)[index], (*value_tuple)[index])); | |||
| } | |||
| auto make_tuple = NewCNode(make_tuple_inputs); | |||
| MS_EXCEPTION_IF_NULL(make_tuple); | |||
| make_tuple->set_abstract(tuple_abstract); | |||
| return make_tuple; | |||
| } | |||
| @@ -721,6 +730,7 @@ void KernelGraph::FrontBackendlMapAdd(const AnfNodePtr &front_anf, const AnfNode | |||
| auto front_node = front_anf->cast<CNodePtr>(); | |||
| MS_EXCEPTION_IF_NULL(front_node); | |||
| auto attr_input = front_node->input(kAnfPrimitiveIndex); | |||
| MS_EXCEPTION_IF_NULL(attr_input); | |||
| if (!attr_input->isa<CNode>()) { | |||
| MS_LOG(EXCEPTION) << "Kernel " << backend_anf->DebugString() << "has been exist in the backend_front_anf_map_"; | |||
| } | |||
| @@ -959,6 +969,7 @@ bool KernelGraph::IsLeafGraph() const { return child_graph_order_.empty(); } | |||
| std::vector<CNodePtr> KernelGraph::FindNodeByPrimitive(const PrimitivePtr &primitive) const { | |||
| std::vector<CNodePtr> result; | |||
| for (const auto &anf : execution_order_) { | |||
| MS_EXCEPTION_IF_NULL(anf); | |||
| if (AnfAlgo::CheckPrimitiveType(anf, primitive) && AnfAlgo::GetGraphId(anf.get()) == graph_id_) { | |||
| result.push_back(anf->cast<CNodePtr>()); | |||
| } | |||
| @@ -969,6 +980,7 @@ std::vector<CNodePtr> KernelGraph::FindNodeByPrimitive(const PrimitivePtr &primi | |||
| std::vector<CNodePtr> KernelGraph::FindNodeByPrimitive(const std::vector<PrimitivePtr> &primitive_list) const { | |||
| std::vector<CNodePtr> result; | |||
| for (const auto &anf : execution_order_) { | |||
| MS_EXCEPTION_IF_NULL(anf); | |||
| for (const auto &primitive : primitive_list) { | |||
| if (AnfAlgo::CheckPrimitiveType(anf, primitive) && AnfAlgo::GetGraphId(anf.get()) == graph_id_) { | |||
| result.push_back(anf->cast<CNodePtr>()); | |||
| @@ -1310,6 +1322,7 @@ void KernelGraph::UpdateChildGraphOrder() { | |||
| } | |||
| void KernelGraph::RemoveNodeFromGraph(const AnfNodePtr &node) { | |||
| MS_EXCEPTION_IF_NULL(node); | |||
| if (backend_front_anf_map_.find(node) != backend_front_anf_map_.end()) { | |||
| auto front_node = backend_front_anf_map_[node]; | |||
| (void)backend_front_anf_map_.erase(node); | |||
| @@ -389,14 +389,14 @@ class KernelGraph : public FuncGraph { | |||
| // add node depend edge by data edge | |||
| void AddDependEdge(const AnfNodePtr &node, const AnfNodePtr &input, size_t depend_edge_num); | |||
| std::vector<AnfNodePtr> GetOutputNodes(const AnfNodePtr &node); | |||
| AnfNodePtr TransValueNodeTuple(const AbstractBasePtr abstract, const ValuePtr &value); | |||
| AnfNodePtr TransValueNodeTuple(const AbstractBasePtr &abstract, const ValuePtr &value); | |||
| AnfNodePtr TransParameterTuple(const AbstractBasePtr &abstract); | |||
| AnfNodePtr TransCNodeTuple(const CNodePtr &node); | |||
| AnfNodePtr CreatTupleGetItemNode(const AnfNodePtr &node, size_t output_idx); | |||
| std::vector<CNodePtr> SortStartLabelAndEndGoto(); | |||
| // checkout whether loop exist in graph | |||
| void CheckLoop(); | |||
| uint32_t GetLoopNum(std::map<AnfNodePtr, size_t> none_zero_nodes); | |||
| uint32_t GetLoopNum(const std::map<AnfNodePtr, size_t> &none_zero_nodes); | |||
| void GetLoopNodesByDFS(const AnfNodePtr &node, uint32_t *loop_num); | |||
| // members | |||
| @@ -27,6 +27,7 @@ std::shared_ptr<session::KernelGraph> SingleKernelGraph::ConstructKernelGraphBas | |||
| const std::string &op_name, const std::vector<TypeId> &input_dtypes, const std::vector<ShapeVector> &input_shapes, | |||
| const std::vector<TypeId> &output_dtypes, const std::vector<std::vector<size_t>> &output_shapes) { | |||
| auto graph = std::make_shared<session::KernelGraph>(); | |||
| MS_EXCEPTION_IF_NULL(graph); | |||
| std::vector<AnfNodePtr> inputs; | |||
| // set input[0] | |||
| PrimitivePtr op_prim = std::make_shared<Primitive>(op_name); | |||
| @@ -599,6 +599,7 @@ void DumpJsonParser::UpdateNeedDumpKernels(const session::KernelGraph &kernel_gr | |||
| for (size_t i = 0; i < input_size; ++i) { | |||
| auto input_with_index = AnfAlgo::GetPrevNodeOutput(kernel, i); | |||
| auto input = input_with_index.first; | |||
| MS_EXCEPTION_IF_NULL(input); | |||
| if (input->isa<CNode>()) { | |||
| MS_LOG(INFO) << "[AsyncDump] Match Hccl Node:" << GetKernelNodeName(kernel) | |||
| << " Input:" << GetKernelNodeName(input); | |||
| @@ -60,10 +60,12 @@ void GetFileKernelName(NotNull<std::string *> kernel_name) { | |||
| } | |||
| void SetConstNodeId(const AnfNodePtr &node, std::map<std::string, size_t> *const_map) { | |||
| MS_EXCEPTION_IF_NULL(node); | |||
| if (!node->isa<ValueNode>()) { | |||
| return; | |||
| } | |||
| std::string node_name = GetKernelNodeName(node); | |||
| MS_EXCEPTION_IF_NULL(const_map); | |||
| auto iter = const_map->find(node_name); | |||
| if (iter == const_map->end()) { | |||
| auto const_idx = const_map->size() + 1; | |||
| @@ -72,6 +74,7 @@ void SetConstNodeId(const AnfNodePtr &node, std::map<std::string, size_t> *const | |||
| } | |||
| void GetCNodeConstantId(const CNodePtr &node, std::map<std::string, size_t> *const_map) { | |||
| MS_EXCEPTION_IF_NULL(node); | |||
| auto &inputs = node->inputs(); | |||
| if (inputs.empty()) { | |||
| MS_LOG(EXCEPTION) << "Inputs of apply node is empty"; | |||
| @@ -79,6 +82,7 @@ void GetCNodeConstantId(const CNodePtr &node, std::map<std::string, size_t> *con | |||
| AnfNodePtr op = inputs[0]; | |||
| // CNode/ConstGraph/Const/Parameter | |||
| MS_EXCEPTION_IF_NULL(op); | |||
| if (op->isa<CNode>() || IsValueNode<FuncGraph>(op) || op->isa<Parameter>()) { | |||
| MS_LOG(WARNING) << "Operator must be a primitive."; | |||
| } else { | |||
| @@ -90,6 +94,7 @@ void GetCNodeConstantId(const CNodePtr &node, std::map<std::string, size_t> *con | |||
| } | |||
| void GetConstantId(const session::KernelGraph *graph, std::map<std::string, size_t> *const_map) { | |||
| MS_EXCEPTION_IF_NULL(graph); | |||
| std::vector<AnfNodePtr> nodes = TopoSort(graph->get_return(), SuccIncoming, AlwaysInclude); | |||
| for (const AnfNodePtr &node : nodes) { | |||
| MS_EXCEPTION_IF_NULL(node); | |||
| @@ -97,6 +102,7 @@ void GetConstantId(const session::KernelGraph *graph, std::map<std::string, size | |||
| continue; | |||
| } | |||
| auto cnode = node->cast<CNodePtr>(); | |||
| MS_EXCEPTION_IF_NULL(cnode); | |||
| if (cnode != graph->get_return()) { | |||
| GetCNodeConstantId(cnode, const_map); | |||
| } else { | |||
| @@ -197,6 +197,7 @@ void E2eDump::DumpInputImpl(const CNodePtr &node, bool trans_flag, const std::st | |||
| uint32_t stream_id = 0; | |||
| std::string file_path = dump_path + '/' + op_type + '.' + op_name + '.' + std::to_string(task_id) + '.' + | |||
| std::to_string(stream_id) + '.' + std::to_string(timestamp) + ".input." + std::to_string(j); | |||
| MS_EXCEPTION_IF_NULL(addr); | |||
| if (IsDeviceTargetGPU()) { | |||
| DumpGPUMemToFile(file_path, tensor_name, *addr, int_shapes, type, device_type, trans_flag, slot, debugger); | |||
| } else { | |||
| @@ -215,6 +216,7 @@ void E2eDump::DumpSingleAnfNode(const AnfNodePtr &anf_node, const size_t output_ | |||
| std::string node_name = GetKernelNodeName(anf_node); | |||
| std::string dump_name = node_name; | |||
| if (anf_node->isa<ValueNode>()) { | |||
| MS_EXCEPTION_IF_NULL(const_map); | |||
| auto iter = const_map->find(node_name); | |||
| if (iter == const_map->end()) { | |||
| return; | |||
| @@ -63,6 +63,10 @@ void AscendBucket::AllocateAllReduceAddr() { | |||
| // generate memecpy output addr | |||
| uint8_t *memcpy_output = ar_input_addr_; | |||
| if (origin_size_list.size() < bucket_size_ || align_size_list_.size() < bucket_size_) { | |||
| MS_LOG(EXCEPTION) << "Invalid bucket_size_:" << bucket_size_ << " origin_size_list.size:" << origin_size_list.size() | |||
| << " align_size_list.size:" << align_size_list_.size(); | |||
| } | |||
| for (size_t i = 0; i < bucket_size_; ++i) { | |||
| memcpy_output_addrs_.emplace_back(std::make_shared<kernel::Address>(memcpy_output, origin_size_list[i])); | |||
| memcpy_output += align_size_list_[i]; | |||
| @@ -95,6 +99,11 @@ void AscendBucket::FreeAllDeviceMem() { | |||
| void AscendBucket::CopyTensorToContiguousMemory() { | |||
| // clear allreduce input addr | |||
| CleanAllReduceInputAddr(); | |||
| if (memcpy_input_addrs_.size() < bucket_size_ || memcpy_output_addrs_.size() < bucket_size_) { | |||
| MS_LOG(EXCEPTION) << "Invalid bucket_size_:" << bucket_size_ | |||
| << " memcpy_input_addr_.size:" << memcpy_input_addrs_.size() | |||
| << " memcpy_output_addr_.size:" << memcpy_output_addrs_.size(); | |||
| } | |||
| for (size_t i = 0; i < bucket_size_; ++i) { | |||
| MS_EXCEPTION_IF_NULL(memcpy_input_addrs_[i]); | |||
| MS_EXCEPTION_IF_NULL(memcpy_output_addrs_[i]); | |||
| @@ -239,6 +239,7 @@ bool AscendDeviceAddress::SyncDeviceToHost(const ShapeVector &shape, size_t size | |||
| } | |||
| std::vector<size_t> AscendDeviceAddress::GetDeviceShape(std::vector<size_t> *host_shape) const { | |||
| MS_EXCEPTION_IF_NULL(host_shape); | |||
| std::vector<size_t> device_shape; | |||
| auto node_index = GetNodeIndex(); | |||
| if (format_ == kOpFormat_FRAC_NZ || format_ == kOpFormat_NCDHW) { | |||
| @@ -504,6 +505,7 @@ bool AscendDeviceAddress::DumpMemToFile(const std::string &filepath, const std:: | |||
| std::string path = filepath + '.' + host_fmt; | |||
| MS_LOG(INFO) << "E2E Dump path is " << path; | |||
| mindspore::tensor::TensorPtr out_tensor = std::make_shared<tensor::Tensor>(host_type, host_shape); | |||
| MS_EXCEPTION_IF_NULL(out_tensor); | |||
| size_t host_size = out_tensor->data().nbytes(); | |||
| ret = SyncDeviceToHost(host_shape, host_size, host_type, out_tensor->data_c()); | |||
| if (!ret) { | |||
| @@ -531,17 +533,21 @@ bool AscendDeviceAddress::LoadMemToHost(const std::string &tensor_name, int exec | |||
| const ShapeVector &host_shape, TypeId host_type, size_t slot, | |||
| bool keep_prev) const { | |||
| bool ret = false; | |||
| if (Debugger::GetInstance()->TensorExistsInCurrent(tensor_name)) { | |||
| auto debugger = Debugger::GetInstance(); | |||
| MS_EXCEPTION_IF_NULL(debugger); | |||
| if (debugger->TensorExistsInCurrent(tensor_name)) { | |||
| MS_LOG(INFO) << tensor_name << " already loaded for this step so not loading it again."; | |||
| return true; | |||
| } | |||
| // TensorData is freed up in AscendSession class | |||
| auto tensor_data = std::make_shared<mindspore::TensorData>(); | |||
| MS_EXCEPTION_IF_NULL(tensor_data); | |||
| tensor_data->SetName(tensor_name); | |||
| tensor_data->SetExecutionOrder(execution_order); | |||
| tensor_data->SetSlot(slot); | |||
| mindspore::tensor::TensorPtr out_tensor = std::make_shared<tensor::Tensor>(host_type, host_shape); | |||
| MS_EXCEPTION_IF_NULL(out_tensor); | |||
| size_t host_size = out_tensor->data().nbytes(); | |||
| auto ret_sync = SyncDeviceToHost(host_shape, host_size, host_type, out_tensor->data_c()); | |||
| if (!ret_sync) { | |||
| @@ -554,7 +560,7 @@ bool AscendDeviceAddress::LoadMemToHost(const std::string &tensor_name, int exec | |||
| tensor_data->SetByteSize(LongToSize(out_tensor->data().nbytes())); | |||
| tensor_data->SetType((unsigned int)host_type); | |||
| tensor_data->SetShape(out_tensor->shape()); | |||
| ret = Debugger::GetInstance()->LoadNewTensor(tensor_data, keep_prev); | |||
| ret = debugger->LoadNewTensor(tensor_data, keep_prev); | |||
| return ret; | |||
| } | |||
| #endif | |||
| @@ -255,6 +255,7 @@ void AscendKernelRuntime::ReportProfilingData() { | |||
| void AscendKernelRuntime::ReleaseDeviceRes() { | |||
| MS_LOG(INFO) << "Ascend finalize start"; | |||
| #ifdef ENABLE_DEBUGGER | |||
| MS_EXCEPTION_IF_NULL(debugger_); | |||
| if (debugger_ && debugger_->debugger_enabled()) { | |||
| debugger_->SetTrainingDone(true); | |||
| bool ret = debugger_->SendMetadata(false); | |||
| @@ -373,6 +374,7 @@ bool AscendKernelRuntime::Init() { | |||
| bool AscendKernelRuntime::LoadData(const session::KernelGraph &graph) { | |||
| #ifdef ENABLE_DEBUGGER | |||
| MS_LOG(INFO) << "Start load step"; | |||
| MS_EXCEPTION_IF_NULL(debugger_); | |||
| for (const auto &graph_ptr : debugger_->GetGraphPtrList()) { | |||
| debugger_->SetGraphPtr(graph_ptr); | |||
| // load output | |||
| @@ -594,6 +596,7 @@ void AscendKernelRuntime::LaunchDataDump(GraphId graph_id) { | |||
| void AscendKernelRuntime::TaskFailCallback(rtExceptionInfo *task_fail_info) { | |||
| MS_EXCEPTION_IF_NULL(task_fail_info); | |||
| MS_EXCEPTION_IF_NULL(current_graph_); | |||
| static std::mutex exception_mutex; | |||
| constexpr uint32_t kOverflowThreshold = 5; | |||
| std::lock_guard<std::mutex> lock(exception_mutex); | |||
| @@ -628,12 +631,15 @@ CNodePtr AscendKernelRuntime::GetErrorNodeName(uint32_t streamid, uint32_t taski | |||
| } | |||
| auto runtime_info_map = ModelRunner::Instance().GetRuntimeInfoMap(current_graph_->graph_id()); | |||
| for (const auto &iter : runtime_info_map) { | |||
| MS_EXCEPTION_IF_NULL(iter.second); | |||
| auto task_id = std::get<kTupleTaskId>(*iter.second); | |||
| auto stream_id = std::get<kTupleStreamId>(*iter.second); | |||
| if (task_id == taskid && stream_id == streamid) { | |||
| auto &execute_node = current_graph_->execution_order(); | |||
| auto node = std::find_if(execute_node.begin(), execute_node.end(), | |||
| [&iter](const auto &node) { return node->UniqueName() == iter.first; }); | |||
| auto node = std::find_if(execute_node.begin(), execute_node.end(), [&iter](const auto &node) { | |||
| MS_EXCEPTION_IF_NULL(node); | |||
| return node->UniqueName() == iter.first; | |||
| }); | |||
| if (node != execute_node.end()) { | |||
| return *node; | |||
| } | |||
| @@ -1214,6 +1220,7 @@ int AscendKernelRuntime::DeleteDumpFile(std::string path) { | |||
| if (path[path.size() - 1] != '/') { | |||
| path = path + "/"; | |||
| } | |||
| MS_EXCEPTION_IF_NULL(dirinfo); | |||
| filepath = path + dirinfo->d_name; | |||
| if (strcmp(dirinfo->d_name, ".") == 0 || strcmp(dirinfo->d_name, "..") == 0) continue; | |||
| result = DeleteDumpFile(filepath); | |||
| @@ -51,6 +51,7 @@ static void UpdateLabelSwitch(NotNull<CNodePtr> node) { | |||
| std::vector<uint32_t> label_list; | |||
| for (size_t i = kLabelSwitchLabelId; i < node->size(); ++i) { | |||
| auto input = node->input(i); | |||
| MS_EXCEPTION_IF_NULL(input); | |||
| if (!input->isa<CNode>() || AnfAlgo::GetCNodeName(input) != kLabelSetOpName) { | |||
| break; | |||
| } | |||
| @@ -74,6 +75,7 @@ static void AssignLabelForLabelSet(NotNull<std::shared_ptr<session::KernelGraph> | |||
| const auto &nodes = graph->execution_order(); | |||
| for (auto &node : nodes) { | |||
| MS_EXCEPTION_IF_NULL(node); | |||
| if (!node->isa<CNode>()) { | |||
| continue; | |||
| } | |||
| @@ -104,6 +106,7 @@ static void AssignLabelForGotoSwitch(NotNull<std::shared_ptr<session::KernelGrap | |||
| const auto &nodes = graph->execution_order(); | |||
| for (auto &node : nodes) { | |||
| MS_EXCEPTION_IF_NULL(node); | |||
| if (!node->isa<CNode>()) { | |||
| continue; | |||
| } | |||
| @@ -201,6 +201,7 @@ void AscendMemoryManager::MallocSomasDynamicMem(const session::KernelGraph &grap | |||
| MemoryManager::MallocSomasDynamicMem(graph); | |||
| #ifndef ENABLE_SECURITY | |||
| if (MemoryProfiling::GetInstance().IsMemoryProfilingEnable()) { | |||
| MS_EXCEPTION_IF_NULL(somas_reuse_util_ptr_); | |||
| somas_reuse_util_ptr_->ConvertToProfilingNode(graph.graph_id()); | |||
| } | |||
| #endif | |||
| @@ -106,6 +106,7 @@ bool AscendMemoryPool::FreeDeviceMem(const DeviceMemPtr &addr) { | |||
| void AscendMemoryPool::ResetIdleMemBuf() { | |||
| auto idle_mem_buf_map = DynamicMemPoolBestFit::global_idle_mem_buf_map(); | |||
| for (auto &it : idle_mem_buf_map) { | |||
| MS_EXCEPTION_IF_NULL(it.second); | |||
| (void)rtMemset(it.second->device_addr_, it.first, 0, it.first); | |||
| } | |||
| } | |||
| @@ -152,6 +152,7 @@ uint32_t GetHcomTaskNum(const CNodePtr &cnode) { | |||
| } | |||
| CNodePtr GetHcomAndOverflowMarker(const NotNull<KernelGraphPtr> &graph_ptr, vector<CNodePtr> *hcom_nodes) { | |||
| MS_EXCEPTION_IF_NULL(hcom_nodes); | |||
| auto cnode_ptr_list = graph_ptr->execution_order(); | |||
| CNodePtr overflow_marker = nullptr; | |||
| std::string kNPUGetFloatStatusOpName = "NPUGetFloatStatus"; | |||
| @@ -322,6 +323,7 @@ void AscendStreamAssign::ReorderIndependentOrders(const NotNull<KernelGraphPtr> | |||
| void AscendStreamAssign::CheckScenario(const NotNull<KernelGraphPtr> &graph_ptr, | |||
| vector<CNodePtr> *last_grad_and_status) { | |||
| MS_EXCEPTION_IF_NULL(last_grad_and_status); | |||
| auto cnode_ptr_list = graph_ptr->execution_order(); | |||
| vector<CNodePtr> hcom_nodes; | |||
| auto overflow_marker = GetHcomAndOverflowMarker(graph_ptr, &hcom_nodes); | |||
| @@ -376,6 +378,8 @@ CNodePtr AscendStreamAssign::GetCNodesNeededMoved(vector<CNodePtr> *moved_backwa | |||
| vector<CNodePtr> *moved_forward_cnodes, | |||
| const vector<CNodePtr> &last_grad_and_status, | |||
| const NotNull<KernelGraphPtr> &graph_ptr) { | |||
| MS_EXCEPTION_IF_NULL(moved_backward_cnodes); | |||
| MS_EXCEPTION_IF_NULL(moved_forward_cnodes); | |||
| auto cnode_ptr_list = graph_ptr->execution_order(); | |||
| if (last_grad_and_status.size() != kLastGradAndStatusNum) { | |||
| return nullptr; | |||
| @@ -618,7 +622,8 @@ void AscendStreamAssign::AssignAllNodesStream(const NotNull<KernelGraphPtr> &gra | |||
| AssignIndependent(graph_ptr); | |||
| } | |||
| auto independent_stream_num = resource_manager.get_cur_stream_num() - common_stream_num - hcom_stream_num; | |||
| auto total_stream_num = resource_manager.get_cur_stream_num() + hcom_stream_num * kHcomSecondaryStreamNum; | |||
| auto total_stream_num = | |||
| resource_manager.get_cur_stream_num() + Uint32tMulWithOverflowCheck(hcom_stream_num, kHcomSecondaryStreamNum); | |||
| MS_LOG(INFO) << "Total stream number: " << total_stream_num << ", common stream number: " << common_stream_num | |||
| << ", hcom stream number: " << hcom_stream_num << "*" << (kHcomSecondaryStreamNum + 1) | |||
| << ", independent stream number: " << independent_stream_num << "."; | |||
| @@ -728,7 +733,7 @@ uint32_t AscendStreamAssign::AssignHcomStreamId(const CNodePtr &cur_cnode_ptr, b | |||
| } else { | |||
| if (it->second <= kMaxTaskNumPerStream - task_num) { | |||
| AnfAlgo::SetStreamId(it->first, cur_cnode_ptr.get()); | |||
| it->second += task_num; | |||
| it->second = Uint32tAddWithOverflowCheck(it->second, task_num); | |||
| } else { | |||
| cur_hcom_stream_id = resource_manager.ApplyNewStream(); | |||
| AnfAlgo::SetStreamId(cur_hcom_stream_id, cur_cnode_ptr.get()); | |||
| @@ -743,6 +748,7 @@ void AscendStreamAssign::AssignIndependent(const NotNull<KernelGraphPtr> &graph_ | |||
| std::map<uint32_t, std::vector<CNodePtr>> graph_nodes_map; | |||
| for (size_t i = 0; i < cnode_ptr_list.size(); ++i) { | |||
| CNodePtr cur_cnode_ptr = cnode_ptr_list[i]; | |||
| MS_EXCEPTION_IF_NULL(cur_cnode_ptr); | |||
| if (AnfAlgo::GetStreamId(cur_cnode_ptr) != kInvalidStreamId) { | |||
| continue; | |||
| } | |||
| @@ -1176,6 +1182,8 @@ bool AscendStreamAssign::IsProcessedStream(uint32_t stream_id) { | |||
| } | |||
| bool AscendStreamAssign::IsAllOutGraphOut(const KernelGraphPtr &graph, const CNodePtr &cnode) { | |||
| MS_EXCEPTION_IF_NULL(graph); | |||
| MS_EXCEPTION_IF_NULL(cnode); | |||
| auto cnode_out_num = AnfAlgo::GetOutputTensorNum(cnode); | |||
| auto nodes = AnfAlgo::GetAllOutput(graph->output(), {prim::kPrimTupleGetItem}); | |||
| std::set<int> output_index_set; | |||
| @@ -1238,6 +1246,7 @@ void AscendStreamAssign::InsertEventCommonDependHcom(const NotNull<KernelGraphPt | |||
| } | |||
| if (target == cnodes.end()) { | |||
| MS_EXCEPTION_IF_NULL(*(it - 1)); | |||
| MS_LOG(WARNING) << "Hcom node:" << (*(it - 1))->fullname_with_scope() | |||
| << ", can't find target for insert recv op, no insert send/recv"; | |||
| it = cnodes.erase(it); | |||
| @@ -1361,12 +1370,14 @@ vector<CNodePtr> AscendStreamAssign::GetLastInputCnode(const NotNull<KernelGraph | |||
| } | |||
| vector<CNodePtr> AscendStreamAssign::GetInputKernels(const CNodePtr &cnode) { | |||
| MS_EXCEPTION_IF_NULL(cnode); | |||
| vector<CNodePtr> input_cnodes; | |||
| queue<CNodePtr> nop_nodes; | |||
| auto inputs = cnode->inputs(); | |||
| for (size_t i = 1; i < inputs.size(); i++) { | |||
| auto real_input = AnfAlgo::VisitKernel(inputs[i], 0); | |||
| auto node = real_input.first; | |||
| MS_EXCEPTION_IF_NULL(node); | |||
| if (opt::IsNopNode(node)) { | |||
| nop_nodes.push(node->cast<CNodePtr>()); | |||
| while (!nop_nodes.empty()) { | |||
| @@ -1376,6 +1387,7 @@ vector<CNodePtr> AscendStreamAssign::GetInputKernels(const CNodePtr &cnode) { | |||
| for (size_t j = 1; j < new_inputs.size(); j++) { | |||
| auto new_real_input = AnfAlgo::VisitKernel(new_inputs[j], 0); | |||
| auto new_node = new_real_input.first; | |||
| MS_EXCEPTION_IF_NULL(new_node); | |||
| if (opt::IsNopNode(new_node)) { | |||
| nop_nodes.push(new_node->cast<CNodePtr>()); | |||
| } else if (new_node->isa<CNode>()) { | |||
| @@ -1494,6 +1506,9 @@ void AscendStreamAssign::InsertEventBetweenHcom(const NotNull<KernelGraphPtr> &g | |||
| AscendResourceMng &resource_manager = AscendResourceMng::GetInstance(); | |||
| auto cnode_ptr_list = graph_ptr->execution_order(); | |||
| uint32_t cur_event_id = resource_manager.ApplyNewEvent(); | |||
| if (hcom_index.empty()) { | |||
| MS_LOG(EXCEPTION) << "Hcom stream number is empty"; | |||
| } | |||
| size_t first_stream_last_index = hcom_index[0].second.back(); | |||
| size_t last_stream_first_index = hcom_index.back().second.front(); | |||
| MS_LOG(INFO) << "First stream last index:" << first_stream_last_index | |||
| @@ -1652,8 +1667,9 @@ void AscendStreamAssign::GetIndependentMaxTarget(const NotNull<KernelGraphPtr> & | |||
| auto inputs = target_node->inputs(); | |||
| for (size_t m = 1; m < inputs.size(); m++) { | |||
| auto input = inputs[m]; | |||
| MS_EXCEPTION_IF_NULL(input); | |||
| if (opt::IsNopNode(input)) { | |||
| CNodePtr cnode = input->cast<CNodePtr>(); | |||
| auto cnode = input->cast<CNodePtr>(); | |||
| auto new_inputs = cnode->inputs(); | |||
| for (size_t k = 1; k < new_inputs.size(); k++) { | |||
| auto new_real_input = AnfAlgo::VisitKernel(new_inputs[k], 0); | |||
| @@ -1974,6 +1990,7 @@ vector<CNodePtr>::iterator AscendStreamAssign::FindTargetOp(vector<CNodePtr>::it | |||
| auto inputs = (*begin)->inputs(); | |||
| for (size_t i = 1; i < inputs.size(); i++) { | |||
| auto input = inputs[i]; | |||
| MS_EXCEPTION_IF_NULL(input); | |||
| if (opt::IsNopNode(input)) { | |||
| if (IsNopNodeTarget(input, node, *begin, exclude_hcom)) { | |||
| return begin; | |||
| @@ -2084,6 +2101,7 @@ bool AscendStreamAssign::IsVecExist(const std::vector<uint32_t> &group) { | |||
| } | |||
| void AscendStreamAssign::DFS(uint32_t start, std::vector<uint32_t> *group) { | |||
| MS_EXCEPTION_IF_NULL(group); | |||
| auto it = stream_relations_.find(start); | |||
| if (it == stream_relations_.end()) { | |||
| if (!IsVecExist(*group)) { | |||
| @@ -2169,6 +2187,9 @@ void AscendStreamAssign::GetStreamActiveStreamRelation(const NotNull<KernelGraph | |||
| } | |||
| auto orders = graph_ptr->execution_order(); | |||
| if (index >= orders.size()) { | |||
| MS_LOG(EXCEPTION) << "Invalid index."; | |||
| } | |||
| auto cur_cnode = orders[index]; | |||
| auto cur_stream_id = AnfAlgo::GetStreamId(cur_cnode); | |||
| auto active_list = AnfAlgo::GetNodeAttr<vector<uint32_t>>(cur_cnode, kAttrActiveStreamList); | |||
| @@ -22,6 +22,7 @@ | |||
| #include <limits> | |||
| #include "utility" | |||
| #include "backend/session/anf_runtime_algorithm.h" | |||
| #include "utils/convert_utils_base.h" | |||
| #include "runtime/mem.h" | |||
| #include "runtime/kernel.h" | |||
| #include "runtime/rt_model.h" | |||
| @@ -71,22 +72,31 @@ DataDumper::~DataDumper() { | |||
| #ifndef ENABLE_SECURITY | |||
| void DataDumper::GetNeedDumpKernelList(NotNull<std::map<std::string, CNodePtr> *> kernel_map) const { | |||
| MS_EXCEPTION_IF_NULL(kernel_graph_); | |||
| for (const auto &kernel : kernel_graph_->execution_order()) { | |||
| MS_EXCEPTION_IF_NULL(kernel); | |||
| if (AnfAlgo::GetKernelType(kernel) == HCCL_KERNEL && | |||
| DumpJsonParser::GetInstance().NeedDump(kernel->fullname_with_scope())) { | |||
| auto input_size = AnfAlgo::GetInputTensorNum(kernel); | |||
| for (size_t i = 0; i < input_size; ++i) { | |||
| auto input_with_index = AnfAlgo::GetPrevNodeOutput(kernel, i); | |||
| auto input = input_with_index.first; | |||
| MS_EXCEPTION_IF_NULL(input); | |||
| if (input->isa<CNode>()) { | |||
| MS_LOG(INFO) << "[AsyncDump] Match Hccl Node:" << kernel->fullname_with_scope() | |||
| << " Input:" << input->fullname_with_scope(); | |||
| kernel_map->try_emplace(input->fullname_with_scope(), input->cast<CNodePtr>()); | |||
| auto it = kernel_map->try_emplace(input->fullname_with_scope(), input->cast<CNodePtr>()); | |||
| if (!it.second) { | |||
| MS_LOG(INFO) << "Node name already exist: " << input->fullname_with_scope(); | |||
| } | |||
| } | |||
| } | |||
| } else if (KernelNeedDump(kernel)) { | |||
| MS_LOG(INFO) << "[AsyncDump] Match Node:" << kernel->fullname_with_scope(); | |||
| kernel_map->try_emplace(kernel->fullname_with_scope(), kernel); | |||
| auto it = kernel_map->try_emplace(kernel->fullname_with_scope(), kernel); | |||
| if (!it.second) { | |||
| MS_LOG(INFO) << "Node name already exist: " << kernel->fullname_with_scope(); | |||
| } | |||
| } | |||
| } | |||
| } | |||
| @@ -276,6 +286,7 @@ void DataDumper::SetOpDebugMappingInfo(const NotNull<aicpu::dump::OpMappingInfo | |||
| task.set_end_graph(false); | |||
| task.set_task_id(debug_task_id_); | |||
| task.set_stream_id(debug_stream_id_); | |||
| MS_EXCEPTION_IF_NULL(task.mutable_op()); | |||
| task.mutable_op()->set_op_name(kNodeNameOpDebug); | |||
| task.mutable_op()->set_op_type(kOpTypeOpDebug); | |||
| @@ -283,6 +294,7 @@ void DataDumper::SetOpDebugMappingInfo(const NotNull<aicpu::dump::OpMappingInfo | |||
| output.set_data_type(ge::proto::DataType::DT_UINT8); | |||
| output.set_format(ge::Format::FORMAT_ND); | |||
| MS_EXCEPTION_IF_NULL(output.mutable_shape()); | |||
| output.mutable_shape()->add_dim(kOpDebugShape); | |||
| output.set_original_name(kNodeNameOpDebug); | |||
| @@ -293,7 +305,9 @@ void DataDumper::SetOpDebugMappingInfo(const NotNull<aicpu::dump::OpMappingInfo | |||
| output.set_address(static_cast<uint64_t>(reinterpret_cast<uintptr_t>(op_debug_dump_args_))); | |||
| output.set_size(kOpDebugHostMemSize); | |||
| MS_EXCEPTION_IF_NULL(task.mutable_output()); | |||
| task.mutable_output()->Add(std::move(output)); | |||
| MS_EXCEPTION_IF_NULL(dump_info->mutable_task()); | |||
| dump_info->mutable_task()->Add(std::move(task)); | |||
| } | |||
| @@ -419,7 +433,7 @@ void DataDumper::DumpKernelOutput(const CNodePtr &kernel, void *args, NotNull<ai | |||
| MS_LOG(INFO) << "[DataDump] output " << i << " address size:" << output.size(); | |||
| MS_EXCEPTION_IF_NULL(task->mutable_output()); | |||
| task->mutable_output()->Add(std::move(output)); | |||
| offset += sizeof(void *); | |||
| offset = SizetAddWithOverflowCheck(offset, sizeof(void *)); | |||
| } | |||
| } | |||
| @@ -428,6 +442,7 @@ void DataDumper::DumpKernelInput(const CNodePtr &kernel, void *args, NotNull<aic | |||
| MS_LOG(INFO) << "Skip dump input"; | |||
| return; | |||
| } | |||
| MS_EXCEPTION_IF_NULL(kernel); | |||
| if (AnfAlgo::IsNodeInputContainMonad(kernel)) { | |||
| MS_LOG(WARNING) << "Skip Monad node:" << kernel->fullname_with_scope(); | |||
| return; | |||
| @@ -462,7 +477,7 @@ void DataDumper::DumpKernelInput(const CNodePtr &kernel, void *args, NotNull<aic | |||
| MS_LOG(INFO) << "[DataDump] input " << i << " address size:" << input.size(); | |||
| MS_EXCEPTION_IF_NULL(task->mutable_input()); | |||
| task->mutable_input()->Add(std::move(input)); | |||
| offset += sizeof(void *); | |||
| offset = SizetAddWithOverflowCheck(offset, sizeof(void *)); | |||
| } | |||
| } | |||
| #endif | |||
| @@ -81,6 +81,7 @@ void AiCpuDynamicKernel::Initialize() { | |||
| if (is_dynamic_shape_) { | |||
| ext_info_handler_ = | |||
| std::make_shared<AicpuExtInfoHandler>(cnode->fullname_with_scope(), input_num_, output_num_, shape_type); | |||
| MS_EXCEPTION_IF_NULL(ext_info_handler_); | |||
| ext_info_handler_->Parse(ext_info_data_); | |||
| } | |||
| @@ -103,6 +104,7 @@ void AiCpuDynamicKernel::Initialize() { | |||
| } | |||
| auto aicpu_param_head = reinterpret_cast<kernel::AicpuParamHead *>(args_.data()); | |||
| MS_EXCEPTION_IF_NULL(aicpu_param_head); | |||
| aicpu_param_head->extInfoLength = SizeToUint(ext_info_size_); | |||
| aicpu_param_head->extInfoAddr = reinterpret_cast<uint64_t>(ext_info_addr_dev_); | |||
| } | |||
| @@ -114,11 +116,13 @@ bool AiCpuDynamicKernel::UpdateInputOutputAddr() { | |||
| MS_EXCEPTION_IF_NULL(cnode); | |||
| for (size_t i = 0; i < input_num_; ++i) { | |||
| auto input_addr = AnfAlgo::GetPrevNodeOutputAddr(cnode, i); | |||
| MS_EXCEPTION_IF_NULL(input_addr); | |||
| io_addrs.emplace_back(reinterpret_cast<uintptr_t>(input_addr->GetMutablePtr())); | |||
| } | |||
| for (size_t i = 0; i < output_num_; ++i) { | |||
| auto output_addr = AnfAlgo::GetOutputAddr(cnode, i); | |||
| MS_EXCEPTION_IF_NULL(output_addr); | |||
| io_addrs.emplace_back(reinterpret_cast<uintptr_t>(output_addr->GetMutablePtr())); | |||
| } | |||
| @@ -128,6 +132,10 @@ bool AiCpuDynamicKernel::UpdateInputOutputAddr() { | |||
| } | |||
| auto io_ptr = args_.data() + sizeof(kernel::AicpuParamHead); | |||
| if (io_addrs.empty()) { | |||
| MS_LOG(ERROR) << "The io_addrs is empty"; | |||
| return false; | |||
| } | |||
| auto ret = | |||
| memcpy_s(io_ptr, args_.size() - sizeof(kernel::AicpuParamHead), &io_addrs[0], sizeof(uint64_t) * io_addrs.size()); | |||
| if (ret != 0) { | |||
| @@ -146,6 +154,7 @@ bool AiCpuDynamicKernel::UpdateExtInfo() { | |||
| return true; | |||
| } | |||
| MS_EXCEPTION_IF_NULL(ext_info_handler_); | |||
| for (size_t i = 0; i < input_num_; ++i) { | |||
| ext_info_handler_->UpdateInputShapeAndType(i, NOT_NULL(cnode)); | |||
| } | |||
| @@ -171,6 +180,7 @@ bool AiCpuDynamicKernel::UpdateOutputShapeFromExtInfo() { | |||
| auto cnode = cnode_ptr_.lock(); | |||
| MS_EXCEPTION_IF_NULL(cnode); | |||
| MS_LOG(INFO) << "UpdateOutputShapeFromExtInfo start. Op name " << cnode->fullname_with_scope(); | |||
| MS_EXCEPTION_IF_NULL(ext_info_handler_); | |||
| auto ret = rtMemcpy(ext_info_handler_->GetExtInfo(), ext_info_handler_->GetExtInfoLen(), ext_info_addr_dev_, | |||
| ext_info_size_, RT_MEMCPY_DEVICE_TO_HOST); | |||
| if (ret != RT_ERROR_NONE) { | |||
| @@ -71,8 +71,8 @@ bool AicpuExtInfoHandler::Parse(const std::string &ext_info) { | |||
| << " infoLen:" << aicpu_ext_info->infoLen; | |||
| break; | |||
| } | |||
| offset += sizeof(AicpuExtInfo); | |||
| offset += aicpu_ext_info->infoLen; | |||
| offset = SizetAddWithOverflowCheck(offset, sizeof(AicpuExtInfo)); | |||
| offset = SizetAddWithOverflowCheck(offset, aicpu_ext_info->infoLen); | |||
| } | |||
| if (offset != ext_info_len_) { | |||
| @@ -84,6 +84,7 @@ bool AicpuExtInfoHandler::Parse(const std::string &ext_info) { | |||
| } | |||
| bool AicpuExtInfoHandler::ParseExtShapeType(AicpuExtInfo *aicpu_ext_info) { | |||
| MS_EXCEPTION_IF_NULL(aicpu_ext_info); | |||
| if (aicpu_ext_info->infoLen != sizeof(int32_t)) { | |||
| MS_LOG(ERROR) << "Node:" << node_name_ << " parse ext shape type failed as infoLen must be " << sizeof(int32_t) | |||
| << " but got:" << aicpu_ext_info->infoLen; | |||
| @@ -120,6 +121,7 @@ bool AicpuExtInfoHandler::ParseExtInputShape(AicpuExtInfo *aicpu_ext_info) { | |||
| bool AicpuExtInfoHandler::ParseExtOutputShape(AicpuExtInfo *aicpu_ext_info) { | |||
| auto need_len = output_num_ * sizeof(AicpuShapeAndType); | |||
| MS_EXCEPTION_IF_NULL(aicpu_ext_info); | |||
| if (aicpu_ext_info->infoLen != need_len) { | |||
| MS_LOG(INFO) << "Node:" << node_name_ | |||
| << " parse ext output shape failed, aicpu_ext_info->infoLen:" << aicpu_ext_info->infoLen | |||
| @@ -144,6 +146,10 @@ bool AicpuExtInfoHandler::UpdateInputShapeAndType(uint32_t input_index, const No | |||
| auto input_shape = AnfAlgo::GetInputDeviceShape(anf_node, input_index); | |||
| std::vector<int64_t> tmp_shape; | |||
| std::transform(input_shape.begin(), input_shape.end(), std::back_inserter(tmp_shape), SizeToLong); | |||
| if (input_index >= input_shape_and_type_.size()) { | |||
| MS_LOG(EXCEPTION) << "Invalid input_index: " << input_index | |||
| << " the size of input_shape_and_type_ is: " << input_shape_and_type_.size(); | |||
| } | |||
| return UpdateShapeAndType(tmp_shape, NOT_NULL(input_shape_and_type_[input_index])); | |||
| } | |||
| @@ -170,12 +176,20 @@ bool AicpuExtInfoHandler::UpdateOutputShapeAndType(uint32_t output_index, const | |||
| std::vector<int64_t> tmp_shape; | |||
| std::transform(shape.begin(), shape.end(), std::back_inserter(tmp_shape), SizeToLong); | |||
| if (output_index >= output_shape_and_type_.size()) { | |||
| MS_LOG(EXCEPTION) << "Invalid output_index: " << output_index | |||
| << " the size of output_shape_and_type_ is: " << output_shape_and_type_.size(); | |||
| } | |||
| return UpdateShapeAndType(tmp_shape, NOT_NULL(output_shape_and_type_[output_index])); | |||
| } | |||
| bool AicpuExtInfoHandler::GetOutputShapeAndType(uint32_t output_index, NotNull<std::vector<int64_t> *> shape, | |||
| NotNull<TypeId *> data_type) { | |||
| MS_LOG(INFO) << "Get " << node_name_ << " Output:" << output_index << " Shape And Type"; | |||
| if (output_index >= output_shape_and_type_.size()) { | |||
| MS_LOG(EXCEPTION) << "Invalid output_index: " << output_index | |||
| << " the size of output_shape_and_type_ is: " << output_shape_and_type_.size(); | |||
| } | |||
| GetShapeAndType(NOT_NULL(output_shape_and_type_[output_index]), shape, data_type); | |||
| return true; | |||
| } | |||
| @@ -183,7 +197,7 @@ bool AicpuExtInfoHandler::GetOutputShapeAndType(uint32_t output_index, NotNull<s | |||
| bool AicpuExtInfoHandler::UpdateShapeAndType(const std::vector<int64_t> &shape, | |||
| NotNull<AicpuShapeAndType *> shape_and_type) { | |||
| if (shape.empty() || shape.size() > kernel::kMaxShapeDims) { | |||
| MS_LOG(ERROR) << "Invalid shape:" << shape.size(); | |||
| MS_LOG(ERROR) << "Invalid shape:" << shape.size() << " Only support 0-8"; | |||
| return false; | |||
| } | |||
| @@ -64,6 +64,7 @@ static kernel::KernelModPtr SerialCompileImpl(const AnfNodePtr &anf_node) { | |||
| break; | |||
| } | |||
| default: { | |||
| MS_EXCEPTION_IF_NULL(anf_node); | |||
| MS_LOG(EXCEPTION) << "node [" << anf_node->DebugString() << "] Unsupported kernel_type:" << kernel_type; | |||
| } | |||
| } | |||
| @@ -100,6 +101,7 @@ static bool KernelBuildParallelCompile(const std::vector<CNodePtr> &kernels) { | |||
| bool tbe_ret = true; | |||
| bool akg_ret = true; | |||
| auto bin_map = kernel::tbe::KernelMeta::GetInstance(); | |||
| MS_EXCEPTION_IF_NULL(bin_map); | |||
| if (!tbe_nodes.empty()) { | |||
| std::string old_build = common::GetEnv("MS_OLD_BUILD_PROCESS"); | |||
| if (!old_build.empty()) { | |||
| @@ -171,6 +173,7 @@ static void AddTbeClearZeroNode(mindspore::session::KernelGraph *const kernel_gr | |||
| MS_EXCEPTION_IF_NULL(abstract); | |||
| clear_zero->set_abstract(abstract); | |||
| auto builder = std::make_shared<kernel::KernelBuildInfo::KernelBuildInfoBuilder>(); | |||
| MS_EXCEPTION_IF_NULL(builder); | |||
| builder->SetKernelType(KernelType::TBE_KERNEL); | |||
| AnfAlgo::SetSelectKernelBuildInfo(builder->Build(), clear_zero.get()); | |||
| auto clean_size = CalCleanZerosSize(pre_node); | |||
| @@ -197,6 +200,7 @@ static void AddFusionTbeClearZeroNode(mindspore::session::KernelGraph *const ker | |||
| MS_EXCEPTION_IF_NULL(abstract); | |||
| clear_zero->set_abstract(abstract); | |||
| auto builder = std::make_shared<kernel::KernelBuildInfo::KernelBuildInfoBuilder>(); | |||
| MS_EXCEPTION_IF_NULL(builder); | |||
| builder->SetKernelType(KernelType::TBE_KERNEL); | |||
| AnfAlgo::SetSelectKernelBuildInfo(builder->Build(), clear_zero.get()); | |||
| AnfAlgo::SetNodeAttr(kAttrAtomicAddMemSize, MakeValue(clean_size_list), clear_zero); | |||
| @@ -286,17 +290,21 @@ bool KernelBuild(const std::vector<CNodePtr> &kernels) { | |||
| std::map<AnfNodePtr, std::vector<size_t>> GetCommunicationOpInputInfo( | |||
| const mindspore::session::KernelGraph *kernel_graph) { | |||
| MS_EXCEPTION_IF_NULL(kernel_graph); | |||
| std::map<AnfNodePtr, std::vector<size_t>> comm_input_info_map; | |||
| for (auto &kernel : kernel_graph->execution_order()) { | |||
| MS_EXCEPTION_IF_NULL(kernel); | |||
| auto input_num = AnfAlgo::GetInputTensorNum(kernel); | |||
| if (mindspore::session::AnfRuntimeAlgorithm::IsCommunicationOp(kernel)) { | |||
| for (size_t i = 0; i < input_num; i++) { | |||
| auto input_node = kernel->input(i + 1); | |||
| auto kernel_input = AnfAlgo::VisitKernelWithReturnType(input_node, 0, true); | |||
| MS_EXCEPTION_IF_NULL(kernel_input.first); | |||
| if (!kernel_input.first->isa<CNode>()) { | |||
| continue; | |||
| } | |||
| auto cnode = kernel_input.first->cast<CNodePtr>(); | |||
| MS_EXCEPTION_IF_NULL(cnode); | |||
| if (AnfAlgo::IsCommunicationOp(cnode) || AnfAlgo::IsIndependentNode(cnode) || | |||
| AnfAlgo::GetCNodeName(cnode) == kGetNextOpName) { | |||
| // no need to add atomic for communication/independent/getnext op 's output | |||
| @@ -289,6 +289,7 @@ bool TagRaiseReduce(const std::shared_ptr<kernel::KernelBuildInfo> &kernel_build | |||
| std::vector<std::shared_ptr<kernel::KernelBuildInfo>> FilterRaisedOrReducePrecisionMatchedKernelInfo( | |||
| const CNodePtr &cnode, const std::vector<std::shared_ptr<kernel::KernelBuildInfo>> &kernel_info_list, | |||
| bool *precision_reduce) { | |||
| MS_EXCEPTION_IF_NULL(precision_reduce); | |||
| std::vector<std::shared_ptr<kernel::KernelBuildInfo>> filtered_kernel_info_list; | |||
| const std::map<TypeId, TypeId> raise_map = {{kNumberTypeFloat16, kNumberTypeFloat32}}; | |||
| const std::map<TypeId, TypeId> reduce_map = {{kNumberTypeInt64, kNumberTypeInt32}, | |||
| @@ -350,6 +351,7 @@ void SetCastAndWeightFormat(const CNodePtr &kernel_node) { | |||
| auto format = iter->second[next_index]; | |||
| auto info_builder = | |||
| std::make_shared<kernel::KernelBuildInfo::KernelBuildInfoBuilder>(AnfAlgo::GetSelectKernelBuildInfo(kernel_node)); | |||
| MS_EXCEPTION_IF_NULL(info_builder); | |||
| info_builder->SetInputsFormat({format}); | |||
| info_builder->SetOutputsFormat({format}); | |||
| AnfAlgo::SetSelectKernelBuildInfo(info_builder->Build(), kernel_node.get()); | |||
| @@ -372,12 +374,14 @@ void SetWeightFormat(const AnfNodePtr &real_input_node, std::vector<string> outp | |||
| output_format = {AnfAlgo::GetOutputFormat(real_input_node, 0)}; | |||
| } | |||
| auto builder = std::make_shared<kernel::KernelBuildInfo::KernelBuildInfoBuilder>(); | |||
| MS_EXCEPTION_IF_NULL(builder); | |||
| // we set special device info of a input tensor. | |||
| auto op_info = kernel::tbe::TbeDynamicShapeUtil::FindOp(AnfAlgo::GetCNodeName(kernel_node), kernel_node); | |||
| if (op_info != nullptr) { | |||
| force_fresh = op_info->is_ref() || force_fresh; | |||
| } | |||
| auto selected_kernel_info = AnfAlgo::GetSelectKernelBuildInfo(kernel_node); | |||
| MS_EXCEPTION_IF_NULL(selected_kernel_info); | |||
| if (IsValueNode<tensor::Tensor>(real_input_node) && | |||
| AnfAlgo::GetOutputDeviceDataType(real_input_node, 0) == kTypeUnknown) { | |||
| builder->SetOutputsFormat(output_format); | |||
| @@ -403,6 +407,7 @@ bool RefreshCastAndParamWeightFormat(const AnfNodePtr &input_node, const string | |||
| return false; | |||
| } | |||
| auto cast_node = input_node->cast<CNodePtr>(); | |||
| MS_EXCEPTION_IF_NULL(cast_node); | |||
| if (AnfAlgo::GetCNodeName(cast_node) != prim::kPrimCast->name()) { | |||
| return true; | |||
| } | |||
| @@ -414,6 +419,7 @@ bool RefreshCastAndParamWeightFormat(const AnfNodePtr &input_node, const string | |||
| } | |||
| auto info_builder = | |||
| std::make_shared<kernel::KernelBuildInfo::KernelBuildInfoBuilder>(AnfAlgo::GetSelectKernelBuildInfo(input_node)); | |||
| MS_EXCEPTION_IF_NULL(info_builder); | |||
| info_builder->SetInputsFormat({format}); | |||
| info_builder->SetOutputsFormat({format}); | |||
| AnfAlgo::SetSelectKernelBuildInfo(info_builder->Build(), cast_node.get()); | |||
| @@ -433,6 +439,7 @@ void SetTensorDeviceInfo(const CNodePtr &kernel_node) { | |||
| auto input_with_index = AnfAlgo::VisitKernelWithReturnType(input_kernel_node, 0); | |||
| MS_EXCEPTION_IF_NULL(input_with_index.first); | |||
| auto real_input_node = input_with_index.first; | |||
| MS_EXCEPTION_IF_NULL(real_input_node); | |||
| if (RefreshCastAndParamWeightFormat(real_input_node, selected_kernel_info->GetInputFormat(input_index))) { | |||
| continue; | |||
| } | |||
| @@ -534,6 +541,7 @@ KernelSelectStatus SelectKernelInfo(const CNodePtr &kernel_node, KernelType kern | |||
| } | |||
| void SetKernelInfo(const CNodePtr &kernel_node, KernelType kernel_type) { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| auto kernel_info = dynamic_cast<device::KernelInfo *>(kernel_node->kernel_info()); | |||
| MS_EXCEPTION_IF_NULL(kernel_info); | |||
| auto kernel_build_info = kernel_info->select_kernel_build_info(); | |||
| @@ -544,6 +552,7 @@ void SetKernelInfo(const CNodePtr &kernel_node, KernelType kernel_type) { | |||
| } | |||
| auto builder = std::make_shared<kernel::KernelBuildInfo::KernelBuildInfoBuilder>(); | |||
| MS_EXCEPTION_IF_NULL(builder); | |||
| builder->SetOriginDataFormat(kernel_build_info->GetOriginDataFormat()); | |||
| builder->SetInputsFormat(kernel_build_info->GetAllInputFormats()); | |||
| builder->SetInputsDeviceType(kernel_build_info->GetAllInputDeviceTypes()); | |||
| @@ -134,6 +134,7 @@ void ProfilingUtils::GetTraceBegin(const session::KernelGraph &kernel_graph, con | |||
| fp_start_str = first_node->fullname_with_scope(); | |||
| } else { | |||
| for (auto &cnode : execution_orders) { | |||
| MS_EXCEPTION_IF_NULL(cnode); | |||
| if (getnext_outputs.count(cnode->fullname_with_scope()) != 0) { | |||
| fp_start_str = cnode->fullname_with_scope(); | |||
| break; | |||
| @@ -149,6 +150,7 @@ void ProfilingUtils::GetCNodeOutputRealNode(const std::string &node_name, const | |||
| MS_EXCEPTION_IF_NULL(cnode); | |||
| for (const auto &input : cnode->inputs()) { | |||
| auto prev_cnode = AnfAlgo::VisitKernel(input, 0); | |||
| MS_EXCEPTION_IF_NULL(prev_cnode.first); | |||
| if (!prev_cnode.first->isa<CNode>()) { | |||
| continue; | |||
| } | |||
| @@ -190,12 +192,14 @@ void ProfilingUtils::GetTraceBpEnd(const session::KernelGraph &kernel_graph, con | |||
| for (size_t i = 0; i < input_num; ++i) { | |||
| auto input_node_with_index = AnfAlgo::GetPrevNodeOutput(*iter, i); | |||
| auto input_node = input_node_with_index.first; | |||
| MS_EXCEPTION_IF_NULL(input_node); | |||
| ar_input_node_names.insert(input_node->fullname_with_scope()); | |||
| } | |||
| // start from previous node | |||
| ++iter; | |||
| // find input names in previous node | |||
| while (iter != execution_orders.rend()) { | |||
| MS_EXCEPTION_IF_NULL(*iter); | |||
| if (ar_input_node_names.find((*iter)->fullname_with_scope()) != ar_input_node_names.end()) { | |||
| bp_end_str = (*iter)->fullname_with_scope(); | |||
| break; | |||
| @@ -219,6 +223,7 @@ std::string ProfilingUtils::GetGraphLastKernelName(const session::KernelGraph &k | |||
| auto &execution_order = kernel_graph.execution_order(); | |||
| // find last tbe_kernel | |||
| for (auto iter = execution_order.rbegin(); iter != execution_order.rend(); ++iter) { | |||
| MS_EXCEPTION_IF_NULL(*iter); | |||
| if (AnfAlgo::GetKernelType(*iter) == TBE_KERNEL || AnfAlgo::GetKernelType(*iter) == AKG_KERNEL || | |||
| AnfAlgo::IsCommunicationOp(*iter)) { | |||
| last_tbe_kernel_name = (*iter)->fullname_with_scope(); | |||
| @@ -297,6 +302,7 @@ void ProfilingUtils::InsertProfilingTraceFp(const mindspore::AnfNodePtr &anf_nod | |||
| const ProfilingTraceInfo &profiling_trace_info, | |||
| NotNull<session::KernelGraph *> graph_ptr, | |||
| NotNull<std::vector<mindspore::CNodePtr> *> kernel_list) { | |||
| MS_EXCEPTION_IF_NULL(anf_node); | |||
| if (profiling_trace_info.trace_begin == anf_node->fullname_with_scope()) { | |||
| MS_LOG(INFO) << "Profiling graph:" << graph_ptr->graph_id() | |||
| << " Match FpStart:" << profiling_trace_info.trace_begin; | |||
| @@ -55,6 +55,7 @@ void DescReporter::ReportByLine(const std::string &data, const std::string &file | |||
| void DescReporter::ReportAllLine() { | |||
| for (const auto &desc : prof_desc_list_) { | |||
| MS_EXCEPTION_IF_NULL(desc); | |||
| auto data = desc->ToString(); | |||
| ReportByLine(data, file_name_); | |||
| } | |||
| @@ -24,13 +24,13 @@ namespace device { | |||
| namespace ascend { | |||
| void GraphDescReporter::ReportData() { | |||
| for (const auto &node : cnode_list_) { | |||
| MS_EXCEPTION_IF_NULL(node); | |||
| if (AnfAlgo::GetKernelType(node) != TBE_KERNEL && AnfAlgo::GetKernelType(node) != AKG_KERNEL) { | |||
| MS_LOG(INFO) << "Skip non tbe kernel:" << node->fullname_with_scope(); | |||
| continue; | |||
| } | |||
| std::vector<DataElement> input_data_list; | |||
| std::vector<DataElement> output_data_list; | |||
| MS_EXCEPTION_IF_NULL(node); | |||
| auto op_name = node->fullname_with_scope(); | |||
| auto op_type = AnfAlgo::GetCNodeName(node); | |||
| auto input_size = AnfAlgo::GetInputTensorNum(node); | |||
| @@ -31,6 +31,7 @@ void TaskDescReporter::ReportData() { | |||
| size_t task_index = 0; | |||
| for (const auto &node : cnode_list_) { | |||
| MS_EXCEPTION_IF_NULL(node); | |||
| if (AnfAlgo::GetKernelType(node) != TBE_KERNEL && AnfAlgo::GetKernelType(node) != AKG_KERNEL) { | |||
| MS_LOG(INFO) << "Skip non tbe kernel:" << node->fullname_with_scope(); | |||
| ++task_index; | |||
| @@ -38,7 +39,6 @@ void TaskDescReporter::ReportData() { | |||
| } | |||
| auto kernel_mod = AnfAlgo::GetKernelMod(node); | |||
| auto ascend_kernel_mod = dynamic_cast<kernel::AscendKernelMod *>(kernel_mod); | |||
| MS_EXCEPTION_IF_NULL(node); | |||
| MS_EXCEPTION_IF_NULL(ascend_kernel_mod); | |||
| // Check task_id and stream_id valid | |||
| CheckStreamTaskValid(task_index, task_index); | |||
| @@ -84,6 +84,7 @@ void TaskGenerator::LaunchAddrCleanAkgKernel(const CNodePtr &anf_node_ptr, Addre | |||
| auto device_address = AnfAlgo::GetOutputAddr(post_node, index); | |||
| kernel::AddressPtr input = std::make_shared<kernel::Address>(); | |||
| MS_EXCEPTION_IF_NULL(input); | |||
| MS_EXCEPTION_IF_NULL(device_address); | |||
| input->addr = device_address->ptr_; | |||
| input->size = device_address->size_; | |||
| kernel_inputs->push_back(input); | |||
| @@ -112,6 +113,7 @@ void TaskGenerator::LaunchAddrCleanKernel(const CNodePtr &anf_node_ptr, AddressP | |||
| auto device_address = AnfAlgo::GetOutputAddr(pre_node, index); | |||
| kernel::AddressPtr input = std::make_shared<kernel::Address>(); | |||
| MS_EXCEPTION_IF_NULL(input); | |||
| MS_EXCEPTION_IF_NULL(device_address); | |||
| input->addr = device_address->ptr_; | |||
| MS_EXCEPTION_IF_NULL(input->addr); | |||
| input->size = device_address->size_; | |||
| @@ -126,6 +128,7 @@ void TaskGenerator::LaunchAddrCleanKernel(const CNodePtr &anf_node_ptr, AddressP | |||
| auto device_address = AnfAlgo::GetWorkspaceAddr(pre_node, index); | |||
| kernel::AddressPtr workspace = std::make_shared<kernel::Address>(); | |||
| MS_EXCEPTION_IF_NULL(workspace); | |||
| MS_EXCEPTION_IF_NULL(device_address); | |||
| workspace->addr = device_address->ptr_; | |||
| MS_EXCEPTION_IF_NULL(workspace->addr); | |||
| workspace->size = device_address->size_; | |||
| @@ -158,6 +161,7 @@ bool TaskGenerator::LaunchKernel(const CNodePtr &anf_node_ptr, uint32_t stream_i | |||
| if ((op_name == kSplitOpName || op_name == kSplitVOpName) && AnfAlgo::HasNodeAttr(kAttrNonTask, anf_node_ptr)) { | |||
| MS_LOG(INFO) << "Skip task generation for NonTask op " << anf_node_ptr->fullname_with_scope(); | |||
| auto debug_info = std::make_shared<TaskDebugInfo>(); | |||
| MS_EXCEPTION_IF_NULL(debug_info); | |||
| debug_info->op_name_ = anf_node_ptr->fullname_with_scope() + "-NonTask"; | |||
| debug_info->task_num_ = 0; | |||
| task_debug_info_list_.push_back(debug_info); | |||
| @@ -180,10 +184,12 @@ bool TaskGenerator::LaunchKernel(const CNodePtr &anf_node_ptr, uint32_t stream_i | |||
| auto real_input_index = AnfAlgo::GetRealInputIndex(anf_node_ptr, i); | |||
| auto device_address = AnfAlgo::GetPrevNodeOutputAddr(anf_node_ptr, real_input_index); | |||
| AddressPtr input = std::make_shared<Address>(); | |||
| MS_EXCEPTION_IF_NULL(input); | |||
| input->addr = device_address->ptr_; | |||
| input->size = device_address->size_; | |||
| auto prenode_with_index = AnfAlgo::GetPrevNodeOutput(anf_node_ptr, i); | |||
| MS_EXCEPTION_IF_NULL(prenode_with_index.first); | |||
| if (AnfAlgo::IsRealCNodeKernel(prenode_with_index.first)) { | |||
| if ((AnfAlgo::GetCNodeName(prenode_with_index.first) == kSplitOpName || | |||
| AnfAlgo::GetCNodeName(prenode_with_index.first) == kSplitVOpName) && | |||
| @@ -192,6 +198,7 @@ bool TaskGenerator::LaunchKernel(const CNodePtr &anf_node_ptr, uint32_t stream_i | |||
| // when op A -> split(NonTask) -> op B, op B's input addr is split's input0's addr + offset | |||
| // offset is split's output index * split's output size | |||
| auto split_input0_device_address = AnfAlgo::GetPrevNodeOutputAddr(prenode_with_index.first, 0); | |||
| MS_EXCEPTION_IF_NULL(split_input0_device_address); | |||
| input->addr = | |||
| static_cast<uint8_t *>(split_input0_device_address->ptr_) + (prenode_with_index.second * input->size); | |||
| MS_LOG(INFO) << "Change " << anf_node_ptr->fullname_with_scope() << "'s input " << i << " address to " | |||
| @@ -231,6 +238,11 @@ bool TaskGenerator::LaunchKernel(const CNodePtr &anf_node_ptr, uint32_t stream_i | |||
| ascend_kernel_mod->GenTask(kernel_inputs, kernel_workspaces, kernel_outputs, stream_id); | |||
| task_info_list->insert(task_info_list->end(), task_info_ptrs.begin(), task_info_ptrs.end()); | |||
| auto debug_info = std::make_shared<TaskDebugInfo>(); | |||
| MS_EXCEPTION_IF_NULL(debug_info); | |||
| if (task_info_ptrs.empty()) { | |||
| MS_LOG(ERROR) << "Empty task_info_ptrs."; | |||
| return false; | |||
| } | |||
| debug_info->op_name_ = anf_node_ptr->fullname_with_scope(); | |||
| debug_info->task_num_ = task_info_ptrs.size(); | |||
| debug_info->stream_id_ = task_info_ptrs[0]->stream_id(); | |||
| @@ -338,6 +350,7 @@ void TaskGenerator::SaveTaskDebugInfoToFile(const std::string &real_filename, | |||
| size_t index = 0; | |||
| for (auto &task_debug_info : task_debug_info_list) { | |||
| MS_EXCEPTION_IF_NULL(task_debug_info); | |||
| fout << "op_name:" << task_debug_info->op_name_ << "\n" | |||
| << "task_index:" << index << "\t" | |||
| << "task_num:" << task_debug_info->task_num_ << "\t" | |||
| @@ -345,25 +358,28 @@ void TaskGenerator::SaveTaskDebugInfoToFile(const std::string &real_filename, | |||
| << "task0_type:" << task_debug_info->type_ << "\t" | |||
| << "task0_dump_flag:" << task_debug_info->dump_flag_ << "\n"; | |||
| index++; | |||
| if (task_debug_info->input_addrs_.size()) { | |||
| if (!task_debug_info->input_addrs_.empty()) { | |||
| fout << "input address:"; | |||
| for (auto &input : task_debug_info->input_addrs_) { | |||
| MS_EXCEPTION_IF_NULL(input); | |||
| fout << input->addr << "(" << input->size << ")\t"; | |||
| } | |||
| fout << "\n"; | |||
| } | |||
| if (task_debug_info->output_addrs_.size()) { | |||
| if (!task_debug_info->output_addrs_.empty()) { | |||
| fout << "output address:"; | |||
| for (auto &output : task_debug_info->output_addrs_) { | |||
| MS_EXCEPTION_IF_NULL(output); | |||
| fout << output->addr << "(" << output->size << ")\t"; | |||
| } | |||
| fout << "\n"; | |||
| } | |||
| if (task_debug_info->workspace_addrs_.size()) { | |||
| if (!task_debug_info->workspace_addrs_.empty()) { | |||
| fout << "workspace address:"; | |||
| for (auto &workspace : task_debug_info->workspace_addrs_) { | |||
| MS_EXCEPTION_IF_NULL(workspace); | |||
| fout << workspace->addr << "(" << workspace->size << ")\t"; | |||
| } | |||
| fout << "\n"; | |||
| @@ -382,6 +382,7 @@ void CPUKernelRuntime::BindOutputTensorAddressPtr(const VectorRef *outputs) { | |||
| void CPUKernelRuntime::BindInputOutput(session::KernelGraph *kernel_graph, const std::vector<tensor::TensorPtr> &inputs, | |||
| VectorRef *outputs) { | |||
| MS_EXCEPTION_IF_NULL(kernel_graph); | |||
| MS_EXCEPTION_IF_NULL(outputs); | |||
| BindInputTensorAddressPtr(*kernel_graph, inputs); | |||
| BindOutputTensorAddressPtr(outputs); | |||
| } | |||
| @@ -279,7 +279,7 @@ bool SelectKernel(const CNodePtr &kernel_node, KernelAttr *selected_kernel_attr, | |||
| } | |||
| size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); | |||
| if (kernel_attr.GetOutputSize() != output_num) { | |||
| MS_LOG(DEBUG) << "Output num is not equal!"; | |||
| MS_LOG(EXCEPTION) << "Output num is not equal!"; | |||
| continue; | |||
| } | |||
| int input_dtype_matched_num = | |||
| @@ -299,6 +299,7 @@ bool SelectKernel(const CNodePtr &kernel_node, KernelAttr *selected_kernel_attr, | |||
| } | |||
| void SetKernelInfo(const CNodePtr &kernel_node) { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| // Select for dynamic kernel(both the number and data type are undetermined). | |||
| const std::string &op_name = AnfAlgo::GetCNodeName(kernel_node); | |||
| if (IsDynamicParamKernel(op_name)) { | |||
| @@ -1168,7 +1168,8 @@ void KernelRuntime::GenAddrCleanLaunchArgs(const CNodePtr &cnode, AddressPtrList | |||
| const std::shared_ptr<MemScheduler> &mem_scheduler) { | |||
| MS_EXCEPTION_IF_NULL(cnode); | |||
| MS_EXCEPTION_IF_NULL(kernel_inputs); | |||
| if (cnode->inputs().size() != 2) { | |||
| const size_t kNodeInputSize = 2; | |||
| if (cnode->inputs().size() != kNodeInputSize) { | |||
| MS_LOG(EXCEPTION) << "Atomic Addr clean Node Input nodes not equal 2."; | |||
| } | |||
| MS_EXCEPTION_IF_NULL(cnode->inputs()[1]); | |||
| @@ -144,6 +144,7 @@ bool MemoryManager::MallocMemFromMemPool(const DeviceAddressPtr address, size_t | |||
| if (!device_ptr) { | |||
| return false; | |||
| } | |||
| MS_EXCEPTION_IF_NULL(address); | |||
| address->ptr_ = device_ptr; | |||
| address->size_ = size; | |||
| address->from_mem_pool_ = true; | |||
| @@ -169,6 +169,32 @@ inline size_t SizetMulWithOverflowCheck(size_t a, size_t b) { | |||
| return out; | |||
| } | |||
| inline uint32_t Uint32tMulWithOverflowCheck(uint32_t a, uint32_t b) { | |||
| uint32_t out = a * b; | |||
| if (a != 0) { | |||
| if ((out / a) != b) { | |||
| MS_LOG(EXCEPTION) << "Mul: a(" << a << ") * b(" << b << ") result is overflow"; | |||
| } | |||
| } | |||
| return out; | |||
| } | |||
| inline size_t SizetAddWithOverflowCheck(size_t x, size_t y) { | |||
| size_t sum = x + y; | |||
| if (sum < x || sum < y) { | |||
| MS_LOG(EXCEPTION) << "Add: a(" << x << ") + b(" << y << ") result is overflow"; | |||
| } | |||
| return sum; | |||
| } | |||
| inline uint32_t Uint32tAddWithOverflowCheck(uint32_t x, uint32_t y) { | |||
| uint32_t sum = x + y; | |||
| if (sum < x || sum < y) { | |||
| MS_LOG(EXCEPTION) << "Add: a(" << x << ") + b(" << y << ") result is overflow"; | |||
| } | |||
| return sum; | |||
| } | |||
| inline uint8_t *AddressOffset(void *address, size_t offset) { | |||
| MS_EXCEPTION_IF_NULL(address); | |||
| return static_cast<uint8_t *>(address) + offset; | |||