You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

trace.cc 18 kB

4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526
  1. /**
  2. * Copyright 2019-2021 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "debug/trace.h"
  17. #include <iostream>
  18. #include <fstream>
  19. #include <map>
  20. #include <unordered_map>
  21. #include <unordered_set>
  22. #include <vector>
  23. #include <string>
  24. #include <sstream>
  25. #include <utility>
  26. #include <stack>
  27. #include <algorithm>
  28. #include "ir/meta_func_graph.h"
  29. #include "ir/graph_utils.h"
  30. #include "frontend/operator/composite/composite.h"
  31. #include "ir/tensor.h"
  32. #include "debug/anf_ir_utils.h"
  33. #include "debug/common.h"
  34. #include "pipeline/jit/static_analysis/evaluator.h"
  35. #include "pipeline/jit/static_analysis/async_eval_result.h"
  36. #include "utils/log_adapter.h"
  37. #include "abstract/abstract_value.h"
  38. namespace mindspore {
  39. // namespace to support debug trace information
  40. namespace trace {
  41. using abstract::AbstractBasePtr;
  42. using abstract::AnalysisContextPtr;
  43. using abstract::AnalysisEnginePtr;
  44. using abstract::AnfNodeConfigPtr;
  45. std::string GetAbstractStr(const abstract::AbstractBasePtr &abs) {
  46. if (abs == nullptr) {
  47. return "NullAbstract";
  48. }
  49. auto shape = abs->BuildShape()->cast<abstract::ShapePtr>();
  50. TypePtr type = abs->BuildType();
  51. std::ostringstream oss;
  52. if ((shape != nullptr) && (type != nullptr)) {
  53. oss << type->DumpText() << shape->DumpText();
  54. } else if (type != nullptr) {
  55. oss << type->DumpText();
  56. } else {
  57. oss << "Undefined";
  58. }
  59. return oss.str();
  60. }
  61. std::string GetGraphParamString(const FuncGraphPtr &graph, const abstract::AbstractBasePtrList &args_spec_list) {
  62. MS_EXCEPTION_IF_NULL(graph);
  63. std::ostringstream oss;
  64. oss << "graph:" << graph->ToString() << " with args[";
  65. auto params = graph->parameters();
  66. if (params.size() < args_spec_list.size()) {
  67. MS_EXCEPTION(TypeError) << "The size of parameters less than args_spec_list's size.";
  68. }
  69. for (size_t i = 0; i < args_spec_list.size(); i++) {
  70. auto parameter = params[i];
  71. MS_EXCEPTION_IF_NULL(parameter);
  72. oss << parameter->ToString() << ":<" << GetAbstractStr(args_spec_list[i]) << ">,";
  73. }
  74. oss << "]";
  75. oss << GetDebugInfo(graph->debug_info(), kSourceLineTipDiscard);
  76. return oss.str();
  77. }
  78. void DumpInferStack(std::ostringstream &oss) {
  79. auto &graph_stack = GetCurrenGraphEvalStack();
  80. if (graph_stack.empty()) {
  81. return;
  82. }
  83. std::vector<std::pair<abstract::AnalysisContextPtr, abstract::AnfNodeConfigPtr>> infer_vec;
  84. while (!graph_stack.empty()) {
  85. auto top = graph_stack.back();
  86. infer_vec.push_back(top);
  87. graph_stack.pop_back();
  88. }
  89. std::reverse(infer_vec.begin(), infer_vec.end());
  90. int index = 0;
  91. for (const auto &item : infer_vec) {
  92. auto context = item.first;
  93. if (context == nullptr) {
  94. MS_LOG(EXCEPTION) << "DumpInferStack failed, got null graph context";
  95. }
  96. auto graph = context->func_graph();
  97. if (graph == nullptr) { // Top context.
  98. continue;
  99. }
  100. auto args_spec_list = context->args_spec_list();
  101. if (graph->parameters().size() < args_spec_list.size()) {
  102. continue;
  103. }
  104. oss << " #" << index++ << " " << GetGraphParamString(graph, args_spec_list) << "\n";
  105. }
  106. }
  107. void TraceGraphEval() {
  108. auto &graph_stack = GetCurrenGraphEvalStack();
  109. if (graph_stack.empty()) {
  110. MS_LOG(INFO) << "Length of analysis graph stack is empty.";
  111. return;
  112. }
  113. std::ostringstream oss;
  114. oss << "\n*******************************graph evaluate stack**********************************";
  115. oss << std::endl;
  116. DumpInferStack(oss);
  117. oss << "\n*************************************************************************************";
  118. MS_LOG(ERROR) << oss.str();
  119. }
  120. class AnalyzeFailExporter : public AnfExporter {
  121. public:
  122. AnalyzeFailExporter() : AnfExporter(true, false) {}
  123. ~AnalyzeFailExporter() override = default;
  124. bool ExportFuncGraph(const std::string &filename, const TraceCNodeEvalStack &node_config_stack);
  125. private:
  126. void OutputCNode(std::ofstream &ofs, const CNodePtr &cnode, const FuncGraphPtr &func_graph, int *idx,
  127. std::map<AnfNodePtr, int> *const apply_map) override;
  128. std::string GetNodeType(const AnfNodePtr &nd) override;
  129. AbstractBasePtr GetNodeAbstract(const AnfNodePtr &nd);
  130. AnfNodeConfigPtr GetFordwardConfig(const AnfNodeConfigPtr &cfg);
  131. void ProcessFuncGraphCall(const CNodePtr &node, std::string *const op_comment);
  132. void OutputStatementComment(std::ofstream &ofs, const CNodePtr &node);
  133. std::unordered_map<FuncGraphPtr, TaggedNodeMap> CreateTaggedNodeMap(
  134. const std::vector<abstract::AnfNodeConfigPtr> &node_config_stack);
  135. AnalysisContextPtr current_context_ = nullptr;
  136. AnalysisEnginePtr engine_ = nullptr;
  137. };
  138. std::unordered_map<FuncGraphPtr, TaggedNodeMap> AnalyzeFailExporter::CreateTaggedNodeMap(
  139. const std::vector<abstract::AnfNodeConfigPtr> &node_config_stack) {
  140. std::unordered_set<abstract::AnfNodeConfigPtr> forwarded_configs; // Check if config. is forwarded.
  141. std::unordered_map<FuncGraphPtr, TaggedNodeMap> tagged_func_graphs;
  142. size_t index = 0;
  143. for (auto &node_config : node_config_stack) {
  144. MS_EXCEPTION_IF_NULL(node_config);
  145. // Record new config in set.
  146. auto new_config = GetFordwardConfig(node_config);
  147. if (new_config != node_config) {
  148. MS_LOG(DEBUG) << "The node_config is forwarded, old config: " << node_config->ToString()
  149. << ", new_config: " << new_config->ToString();
  150. forwarded_configs.emplace(new_config);
  151. }
  152. // Ignore the new config.
  153. if (forwarded_configs.find(node_config) != forwarded_configs.end()) {
  154. continue;
  155. }
  156. auto fg = node_config->func_graph();
  157. MS_EXCEPTION_IF_NULL(fg);
  158. auto node = node_config->node();
  159. tagged_func_graphs[fg][node] = index;
  160. index++;
  161. }
  162. return tagged_func_graphs;
  163. }
  164. bool OutputAnalyzedGraphWithType(const string &file_path) {
  165. AnalyzeFailExporter exporter;
  166. return exporter.ExportFuncGraph(file_path, GetCNodeDebugStack());
  167. }
  168. std::string AnalyzeFailExporter::GetNodeType(const AnfNodePtr &node) {
  169. if (current_context_ == nullptr) {
  170. return AnfExporter::GetNodeType(node);
  171. }
  172. MS_EXCEPTION_IF_NULL(engine_);
  173. try {
  174. FuncGraphPtr dummy_call_func_graph = nullptr;
  175. auto cfg = engine_->MakeConfig(node, current_context_, dummy_call_func_graph);
  176. auto res = abstract::AnalysisResultCacheMgr::GetInstance().GetValue(cfg);
  177. if (res != nullptr) {
  178. return GetAbstractStr(res->abstract());
  179. }
  180. } catch (const std::exception &e) {
  181. MS_LOG(INFO) << "Exception: " << e.what();
  182. }
  183. return "Undefined";
  184. }
  185. AbstractBasePtr AnalyzeFailExporter::GetNodeAbstract(const AnfNodePtr &node) {
  186. if (current_context_ == nullptr) {
  187. return nullptr;
  188. }
  189. MS_EXCEPTION_IF_NULL(engine_);
  190. try {
  191. FuncGraphPtr dummy_call_func_graph = nullptr;
  192. auto cfg = engine_->MakeConfig(node, current_context_, dummy_call_func_graph);
  193. auto res = abstract::AnalysisResultCacheMgr::GetInstance().GetValue(cfg);
  194. return res == nullptr ? nullptr : res->abstract();
  195. } catch (const std::exception &e) {
  196. MS_LOG(INFO) << "Exception: " << e.what();
  197. }
  198. return nullptr;
  199. }
  200. AnfNodeConfigPtr AnalyzeFailExporter::GetFordwardConfig(const AnfNodeConfigPtr &cfg) {
  201. MS_EXCEPTION_IF_NULL(cfg);
  202. MS_EXCEPTION_IF_NULL(engine_);
  203. AnfNodeConfigPtr cur_cfg = cfg;
  204. auto iter = engine_->anfnode_config_map().find(cur_cfg);
  205. while (iter != engine_->anfnode_config_map().end()) {
  206. auto node = cur_cfg->node();
  207. cur_cfg = iter->second;
  208. MS_LOG(DEBUG) << "Get forword node: " << node << "[" << node->DebugString() << "] --> " << cur_cfg->node() << "["
  209. << cur_cfg->node()->DebugString() << "]";
  210. iter = engine_->anfnode_config_map().find(cur_cfg);
  211. }
  212. return cur_cfg;
  213. }
  214. void AnalyzeFailExporter::ProcessFuncGraphCall(const CNodePtr &node, std::string *const op_comment) {
  215. if (node == nullptr) {
  216. MS_LOG(ERROR) << "Node is nullptr";
  217. return;
  218. }
  219. CNodePtr cnode = nullptr;
  220. try {
  221. FuncGraphPtr dummy_call_func_graph = nullptr;
  222. auto cfg = engine_->MakeConfig(node, current_context_, dummy_call_func_graph);
  223. cfg = GetFordwardConfig(cfg);
  224. cnode = dyn_cast<CNode>(cfg->node());
  225. } catch (const std::exception &e) {
  226. MS_LOG(INFO) << "Exception: " << e.what();
  227. }
  228. if (cnode == nullptr) {
  229. MS_LOG(INFO) << "CNode is nullptr";
  230. return;
  231. }
  232. const auto &inputs = cnode->inputs();
  233. for (size_t i = 0; i < inputs.size(); ++i) {
  234. auto op_abs = GetNodeAbstract(inputs[i]);
  235. if (op_abs == nullptr) {
  236. MS_LOG(DEBUG) << "Abstract of inputs[" << i << "] of cnode " << cnode->ToString() << " is nullptr";
  237. continue;
  238. }
  239. if (!op_abs->isa<abstract::FuncGraphAbstractClosure>() && !op_abs->isa<abstract::MetaFuncGraphAbstractClosure>()) {
  240. MS_LOG(DEBUG) << "Inputs[" << i << "] of cnode " << cnode->ToString() << " is of type " << op_abs->type_name()
  241. << ", not function, ignore it";
  242. // Get prototype of VirtualEvaluator for printing
  243. if (i == 0 && op_abs->isa<abstract::VirtualAbstractClosure>()) {
  244. auto func = dyn_cast<abstract::VirtualAbstractClosure>(op_abs);
  245. std::ostringstream oss;
  246. oss << "(";
  247. bool first_flag = false;
  248. for (const auto &arg : func->args_spec_list()) {
  249. if (!first_flag) {
  250. first_flag = true;
  251. } else {
  252. oss << ", ";
  253. }
  254. oss << GetAbstractStr(arg);
  255. }
  256. oss << ") -> " << GetAbstractStr(func->output()) << " ";
  257. *op_comment = oss.str();
  258. }
  259. }
  260. }
  261. }
  262. void AnalyzeFailExporter::OutputStatementComment(std::ofstream &ofs, const CNodePtr &node) {
  263. if (node == nullptr) {
  264. return;
  265. }
  266. // Output type of each input argument
  267. auto &inputs = node->inputs();
  268. if (inputs.size() > 1) {
  269. ofs << " #(";
  270. for (size_t i = 1; i < inputs.size(); ++i) {
  271. if (i != 1) {
  272. ofs << ", ";
  273. }
  274. AnfNodePtr arg = inputs[i];
  275. ofs << GetNodeType(arg);
  276. }
  277. ofs << ")";
  278. }
  279. // Output other comment, map the graph name to original representation(containing unicode character)
  280. std::ostringstream comment;
  281. comment << " #";
  282. bool has_comment = false;
  283. for (size_t i = 0; i < inputs.size(); ++i) {
  284. AnfNodePtr arg = inputs[i];
  285. if (!IsValueNode<FuncGraph>(arg)) {
  286. continue;
  287. }
  288. if (!has_comment) {
  289. has_comment = true;
  290. } else {
  291. comment << ",";
  292. }
  293. FuncGraphPtr fg = GetValueNode<FuncGraphPtr>(arg);
  294. std::string func_graph_id = fg->debug_info()->get_id();
  295. comment << " fg_" << func_graph_id << "=" << fg->ToString();
  296. }
  297. if (has_comment) {
  298. ofs << comment.str();
  299. }
  300. ofs << " #scope: " << node->scope()->name();
  301. }
  302. void AnalyzeFailExporter::OutputCNode(std::ofstream &ofs, const CNodePtr &cnode, const FuncGraphPtr &func_graph,
  303. int *idx, std::map<AnfNodePtr, int> *const apply_map) {
  304. OutputCNodeText(ofs, cnode, func_graph, idx, apply_map);
  305. // Process function graph call
  306. std::string op_comment;
  307. ProcessFuncGraphCall(cnode, &op_comment);
  308. if (!op_comment.empty()) {
  309. auto &inputs = cnode->inputs();
  310. ofs << " #" << GetAnfNodeText(func_graph, inputs[0], *apply_map) << ".prototype = " << op_comment;
  311. }
  312. // Output comment
  313. OutputStatementComment(ofs, cnode);
  314. ofs << "\n";
  315. }
  316. bool AnalyzeFailExporter::ExportFuncGraph(const std::string &filename, const TraceCNodeEvalStack &node_config_stack) {
  317. if (node_config_stack.empty()) {
  318. MS_LOG(DEBUG) << "Node configs is empty";
  319. return false;
  320. }
  321. std::ofstream ofs(filename);
  322. if (!ofs.is_open()) {
  323. MS_LOG(ERROR) << "Open file '" << filename << "' failed!"
  324. << " Errno:" << errno << " ErrInfo:" << strerror(errno);
  325. return false;
  326. }
  327. if (engine_ == nullptr) {
  328. engine_ = node_config_stack.front()->engine();
  329. }
  330. auto tagged_func_graphs = CreateTaggedNodeMap(node_config_stack);
  331. std::unordered_set<FuncGraphPtr> printed_func_graphs; // Check if func graph has been printed.
  332. // Output graph on the analysis stack
  333. for (const auto &node_config : node_config_stack) {
  334. auto fg = node_config->func_graph();
  335. MS_LOG(INFO) << "Node: " << node_config->node()->DebugString()
  336. << ", FV: " << (node_config->func_graph() != node_config->context()->func_graph())
  337. << ", calling func graph: " << node_config->func_graph()->ToString()
  338. << ", context func graph: " << node_config->context()->func_graph()->ToString();
  339. if (fg == nullptr) {
  340. MS_LOG(ERROR) << "FuncGraph is null, context: " << node_config->ToString();
  341. continue;
  342. }
  343. if (printed_func_graphs.find(fg) != printed_func_graphs.end()) {
  344. continue;
  345. }
  346. (void)printed_func_graphs.emplace(fg);
  347. current_context_ = node_config->context(); // Set current context.
  348. ExportOneFuncGraph(ofs, fg, tagged_func_graphs[fg]);
  349. ofs << "\n\n";
  350. }
  351. ofs << "#===============================================================================\n";
  352. ofs << "# num of function graphs in stack: ";
  353. auto ignored_num = (node_config_stack.size() - printed_func_graphs.size());
  354. if (ignored_num == 0) {
  355. ofs << node_config_stack.size() << "\n";
  356. } else {
  357. ofs << printed_func_graphs.size() << "/" << node_config_stack.size() << " (Ignored " << ignored_num
  358. << " internal frames).\n";
  359. }
  360. ofs.close();
  361. return true;
  362. }
  363. void GetEvalStackInfo(std::ostringstream &oss) {
  364. MS_LOG(INFO) << "Get graph analysis information begin";
  365. auto stack = GetCNodeDebugStack();
  366. if (stack.empty()) {
  367. MS_LOG(INFO) << "Length of analysis information stack is empty.";
  368. return;
  369. }
  370. string file_name = "analyze_fail.dat";
  371. auto ms_om_path = common::GetEnv("MS_OM_PATH");
  372. if (!ms_om_path.empty()) {
  373. auto path = ms_om_path + "/" + file_name;
  374. auto realpath = Common::GetRealPath(path);
  375. if (!realpath.has_value()) {
  376. MS_EXCEPTION(ValueError) << "Get real path failed. path=" << path;
  377. }
  378. file_name = realpath.value();
  379. }
  380. auto ret = OutputAnalyzedGraphWithType(file_name);
  381. oss << "\nThe function call stack";
  382. if (ret) {
  383. oss << " (See file '" << file_name << "' for more details)";
  384. }
  385. oss << ":\n";
  386. int index = 0;
  387. std::string last_location_info = "";
  388. for (size_t i = 0; i < stack.size(); ++i) {
  389. auto node_config = stack[i];
  390. MS_EXCEPTION_IF_NULL(node_config);
  391. auto cnode = dyn_cast<CNode>(node_config->node());
  392. if (cnode == nullptr) {
  393. MS_LOG(DEBUG) << "CNode of elements[" << i << "] is nullptr.";
  394. continue;
  395. }
  396. auto debug_info = cnode->debug_info();
  397. auto this_location_info = trace::GetDebugInfo(debug_info, std::string(""));
  398. if (this_location_info.empty() || this_location_info == last_location_info) {
  399. continue;
  400. }
  401. last_location_info = this_location_info;
  402. oss << "# " << index++ << " " << this_location_info;
  403. }
  404. stack.clear();
  405. MS_LOG(INFO) << "Get graph analysis information *end*";
  406. }
  407. // Trace the graph evaluator stack
  408. thread_local TraceGraphEvalStack graph_infer_stack;
  409. // Trace the cnode infer debug info
  410. thread_local TraceCNodeEvalStack cnode_debug_stack{};
  411. void TraceGraphEvalEnter(const abstract::AnalysisContextPtr &context, const abstract::AnfNodeConfigPtr &node) {
  412. if (context == nullptr) {
  413. MS_LOG(EXCEPTION) << "GraphInferEnter got null context";
  414. }
  415. (void)graph_infer_stack.push_back(std::pair<abstract::AnalysisContextPtr, abstract::AnfNodeConfigPtr>(context, node));
  416. }
  417. void TraceGraphEvalLeave(const abstract::AnalysisContextPtr &context) {
  418. if (context == nullptr || graph_infer_stack.empty()) {
  419. MS_LOG(EXCEPTION) << "The context is null, or call stack is empty.";
  420. }
  421. if (context != graph_infer_stack.back().first) {
  422. MS_LOG(EXCEPTION) << "Different context: " << context->func_graph()->ToString() << ", "
  423. << graph_infer_stack.back().first->func_graph()->ToString();
  424. }
  425. graph_infer_stack.pop_back();
  426. }
  427. void TraceGraphEvalStackPrepare(const TraceGraphEvalStack &graphEvals) {
  428. graph_infer_stack.insert(graph_infer_stack.end(), graphEvals.begin(), graphEvals.end());
  429. }
  430. void TraceEvalCNodeStackPrepare(const TraceCNodeEvalStack &cnodeEvals) {
  431. cnode_debug_stack.insert(cnode_debug_stack.end(), cnodeEvals.begin(), cnodeEvals.end());
  432. }
  433. void TraceEvalCNodeEnter(const abstract::AnfNodeConfigPtr &node_config) { cnode_debug_stack.push_back(node_config); }
  434. void TraceEvalCNodeLeave() { cnode_debug_stack.pop_back(); }
  435. TraceCNodeEvalStack &GetCNodeDebugStack() { return cnode_debug_stack; }
  436. TraceGraphEvalStack &GetCurrenGraphEvalStack() { return graph_infer_stack; }
  437. void ClearTraceStack() {
  438. while (!graph_infer_stack.empty()) {
  439. graph_infer_stack.pop_back();
  440. }
  441. cnode_debug_stack.clear();
  442. }
  443. void GetTraceStackInfo(std::ostringstream &oss) {
  444. TraceGraphEval();
  445. std::ostringstream trace_info;
  446. GetEvalStackInfo(trace_info);
  447. if (trace_info.str().empty()) {
  448. DebugInfoPtr debug_info = TraceManager::GetParseOrResolveDebugInfo();
  449. if (debug_info != nullptr) {
  450. oss << "\n\n# " << trace::GetDebugInfo(debug_info);
  451. }
  452. } else {
  453. oss << trace_info.str();
  454. }
  455. }
  456. // Register trace provider to LogWriter.
  457. struct TraceProviderRegister {
  458. TraceProviderRegister() { LogWriter::set_trace_provider(GetTraceStackInfo); }
  459. ~TraceProviderRegister() = default;
  460. } trace_provider_regsiter;
  461. // Register trace cnode provider to AbstractBase.
  462. struct TraceNodeProviderRegister {
  463. TraceNodeProviderRegister() {
  464. abstract::AbstractBase::set_trace_node_provider([](AnfNodePtr *node) {
  465. auto stack = GetCNodeDebugStack();
  466. if (!stack.empty()) {
  467. auto conf = stack.back();
  468. *node = conf->node();
  469. }
  470. });
  471. }
  472. ~TraceNodeProviderRegister() = default;
  473. } trace_node_provider_regsiter;
  474. } // namespace trace
  475. } // namespace mindspore