| @@ -89,8 +89,8 @@ void AddAscendBackendOptionalIRFusion(PassManager *ir_fusion_pm) { | |||||
| ir_fusion_pm->AddPass(std::make_shared<ClipByNormNoDivSquareSumFusion>()); | ir_fusion_pm->AddPass(std::make_shared<ClipByNormNoDivSquareSumFusion>()); | ||||
| ir_fusion_pm->AddPass(std::make_shared<LambUpdateWithLRRuleFusion>()); | ir_fusion_pm->AddPass(std::make_shared<LambUpdateWithLRRuleFusion>()); | ||||
| ir_fusion_pm->AddPass(std::make_shared<ConfusionSoftmaxGradRule>()); | ir_fusion_pm->AddPass(std::make_shared<ConfusionSoftmaxGradRule>()); | ||||
| ir_fusion_pm->AddPass(std::make_shared<LambNextMVRule>()); | |||||
| ir_fusion_pm->AddPass(std::make_shared<LambNextMVWithDecayRule>()); | ir_fusion_pm->AddPass(std::make_shared<LambNextMVWithDecayRule>()); | ||||
| ir_fusion_pm->AddPass(std::make_shared<LambNextMVRule>()); | |||||
| ir_fusion_pm->AddPass(std::make_shared<LambNextRightRule>()); | ir_fusion_pm->AddPass(std::make_shared<LambNextRightRule>()); | ||||
| ir_fusion_pm->AddPass(std::make_shared<LambUpdateWithLrV2>()); | ir_fusion_pm->AddPass(std::make_shared<LambUpdateWithLrV2>()); | ||||
| ir_fusion_pm->AddPass(std::make_shared<ReshapeTransposeFusion>()); | ir_fusion_pm->AddPass(std::make_shared<ReshapeTransposeFusion>()); | ||||
| @@ -201,7 +201,7 @@ void AscendBackendIRFusionOptimization(const std::shared_ptr<session::KernelGrap | |||||
| std::string file_path = save_graphs_path + "/" + "hwopt_d_ir_fusion_before" + "_graph_" + | std::string file_path = save_graphs_path + "/" + "hwopt_d_ir_fusion_before" + "_graph_" + | ||||
| std::to_string(kernel_graph->graph_id()) + ".ir"; | std::to_string(kernel_graph->graph_id()) + ".ir"; | ||||
| DumpIR(file_path, kernel_graph); | DumpIR(file_path, kernel_graph); | ||||
| DumpIRProto(kernel_graph, "before_hwopt"); | |||||
| DumpIRProto(kernel_graph, "before_hwopt_" + std::to_string(kernel_graph->graph_id())); | |||||
| } | } | ||||
| auto optimizer = std::make_shared<GraphOptimizer>(); | auto optimizer = std::make_shared<GraphOptimizer>(); | ||||
| auto ir_fusion_pm = std::make_shared<PassManager>("ir_fusion_pm"); | auto ir_fusion_pm = std::make_shared<PassManager>("ir_fusion_pm"); | ||||
| @@ -305,7 +305,7 @@ void AscendBackendOptimization(const std::shared_ptr<session::KernelGraph> &kern | |||||
| std::string file_path = | std::string file_path = | ||||
| save_graphs_path + "/" + "hwopt_d_end" + "_graph_" + std::to_string(kernel_graph->graph_id()) + ".ir"; | save_graphs_path + "/" + "hwopt_d_end" + "_graph_" + std::to_string(kernel_graph->graph_id()) + ".ir"; | ||||
| DumpIR(file_path, kernel_graph, true); | DumpIR(file_path, kernel_graph, true); | ||||
| DumpIRProto(kernel_graph, "after_hwopt"); | |||||
| DumpIRProto(kernel_graph, "after_hwopt_" + std::to_string(kernel_graph->graph_id())); | |||||
| } | } | ||||
| } | } | ||||
| @@ -263,6 +263,7 @@ void AscendSession::BuildGraph(GraphId graph_id) { | |||||
| } | } | ||||
| // sync the inital const tensor to device | // sync the inital const tensor to device | ||||
| SyncInitialTenosrToDevice(); | SyncInitialTenosrToDevice(); | ||||
| ExportChildGraphs(graph_id); | |||||
| MS_LOG(INFO) << "end"; | MS_LOG(INFO) << "end"; | ||||
| } | } | ||||
| @@ -558,6 +559,36 @@ void AscendSession::Dump(const std::shared_ptr<KernelGraph> &kernel_graph) const | |||||
| MS_LOG(INFO) << "Finish!"; | MS_LOG(INFO) << "Finish!"; | ||||
| } | } | ||||
| void AscendSession::ExportChildGraphs(const GraphId graph_id) { | |||||
| #ifdef ENABLE_DUMP_IR | |||||
| auto context_ptr = MsContext::GetInstance(); | |||||
| MS_EXCEPTION_IF_NULL(context_ptr); | |||||
| bool save_graphs = context_ptr->save_graphs_flag(); | |||||
| if (!save_graphs) { | |||||
| return; | |||||
| } | |||||
| auto save_graphs_path = context_ptr->save_graphs_path(); | |||||
| if (save_graphs_path.empty()) { | |||||
| save_graphs_path = "."; | |||||
| } | |||||
| if (graph_id == final_graph_id_) { | |||||
| auto &graph_order = GetGraphOrder(final_graph_id_); | |||||
| auto &graph_type = GetGraphOrderType(final_graph_id_); | |||||
| for (size_t i = 0; i < graph_order.size(); i++) { | |||||
| if (graph_type[i] == BRANCH_END || graph_type[i] == BRANCH_START) { | |||||
| continue; | |||||
| } | |||||
| auto child_graph = GetGraph(graph_order[i]); | |||||
| MS_LOG(DEBUG) << "Start export child graph " << graph_order[i]; | |||||
| std::string file_path = save_graphs_path + "/graph_build_" + std::to_string(child_graph->graph_id()) + ".ir"; | |||||
| DumpIR(file_path, child_graph, true); | |||||
| DumpIRProto(child_graph, "vm_build_" + std::to_string(child_graph->graph_id())); | |||||
| MS_LOG(DEBUG) << "End export child graph " << graph_order[i]; | |||||
| } | |||||
| } | |||||
| #endif | |||||
| } | |||||
| GraphId AscendSession::SetFinalGraphInput(const std::vector<AnfNodePtr> &args) { | GraphId AscendSession::SetFinalGraphInput(const std::vector<AnfNodePtr> &args) { | ||||
| MS_LOG(INFO) << "Start! Args size " << args.size(); | MS_LOG(INFO) << "Start! Args size " << args.size(); | ||||
| auto final_graph = NewKernelGraph(); | auto final_graph = NewKernelGraph(); | ||||
| @@ -82,6 +82,7 @@ class AscendSession : public SessionBasic { | |||||
| void LoadTask(const std::shared_ptr<KernelGraph> &kernel_graph) const; | void LoadTask(const std::shared_ptr<KernelGraph> &kernel_graph) const; | ||||
| void ExecTask(const std::shared_ptr<KernelGraph> &kernel_graph) const; | void ExecTask(const std::shared_ptr<KernelGraph> &kernel_graph) const; | ||||
| void Dump(const std::shared_ptr<KernelGraph> &kernel_graph) const; | void Dump(const std::shared_ptr<KernelGraph> &kernel_graph) const; | ||||
| void ExportChildGraphs(const GraphId graph_id); | |||||
| // below functions are used for run op | // below functions are used for run op | ||||
| void RunOpHardwareOptimize(const std::shared_ptr<session::KernelGraph> &kernel_graph) const; | void RunOpHardwareOptimize(const std::shared_ptr<session::KernelGraph> &kernel_graph) const; | ||||
| void RunOpExecTask(const std::shared_ptr<KernelGraph> &kernel_graph) const; | void RunOpExecTask(const std::shared_ptr<KernelGraph> &kernel_graph) const; | ||||