You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

trace.cc 18 kB

4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506
  1. /**
  2. * Copyright 2019-2021 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "debug/trace.h"
  17. #include <iostream>
  18. #include <fstream>
  19. #include <map>
  20. #include <vector>
  21. #include <string>
  22. #include <sstream>
  23. #include <utility>
  24. #include <stack>
  25. #include <algorithm>
  26. #include "utils/hash_map.h"
  27. #include "utils/hash_set.h"
  28. #include "ir/meta_func_graph.h"
  29. #include "ir/graph_utils.h"
  30. #include "frontend/operator/composite/composite.h"
  31. #include "ir/tensor.h"
  32. #include "debug/anf_ir_utils.h"
  33. #include "debug/common.h"
  34. #include "pipeline/jit/static_analysis/evaluator.h"
  35. #include "pipeline/jit/static_analysis/async_eval_result.h"
  36. #include "pipeline/jit/base.h"
  37. #include "utils/log_adapter.h"
  38. #include "utils/comm_manager.h"
  39. #include "abstract/abstract_value.h"
  40. #include "utils/file_utils.h"
  41. namespace mindspore {
  42. // namespace to support debug trace information
  43. namespace trace {
  44. using abstract::AbstractBasePtr;
  45. using abstract::AnalysisContextPtr;
  46. using abstract::AnalysisEnginePtr;
  47. using abstract::AnfNodeConfigPtr;
  48. std::string GetAbstractStr(const abstract::AbstractBasePtr &abs) {
  49. if (abs == nullptr) {
  50. return "NullAbstract";
  51. }
  52. auto shape = abs->BuildShape()->cast<abstract::ShapePtr>();
  53. TypePtr type = abs->BuildType();
  54. std::ostringstream oss;
  55. if ((shape != nullptr) && (type != nullptr)) {
  56. oss << type->DumpText() << shape->DumpText();
  57. } else if (type != nullptr) {
  58. oss << type->DumpText();
  59. } else {
  60. oss << "Undefined";
  61. }
  62. return oss.str();
  63. }
  64. std::string GetGraphParamString(const FuncGraphPtr &graph, const abstract::AbstractBasePtrList &args_spec_list) {
  65. MS_EXCEPTION_IF_NULL(graph);
  66. std::ostringstream oss;
  67. oss << "graph:" << graph->ToString() << " with args[";
  68. auto params = graph->parameters();
  69. if (params.size() < args_spec_list.size()) {
  70. MS_EXCEPTION(TypeError) << "The size of parameters less than args_spec_list's size.";
  71. }
  72. for (size_t i = 0; i < args_spec_list.size(); i++) {
  73. auto parameter = params[i];
  74. MS_EXCEPTION_IF_NULL(parameter);
  75. oss << parameter->ToString() << ":<" << GetAbstractStr(args_spec_list[i]) << ">,";
  76. }
  77. oss << "]";
  78. oss << GetDebugInfo(graph->debug_info(), kSourceLineTipDiscard);
  79. return oss.str();
  80. }
  81. void DumpInferStack(std::ostringstream &oss) {
  82. auto &graph_stack = GetCurrentGraphEvalStack();
  83. if (graph_stack.empty()) {
  84. return;
  85. }
  86. std::vector<std::pair<abstract::AnalysisContextPtr, abstract::AnfNodeConfigPtr>> infer_vec;
  87. while (!graph_stack.empty()) {
  88. auto top = graph_stack.back();
  89. infer_vec.push_back(top);
  90. graph_stack.pop_back();
  91. }
  92. std::reverse(infer_vec.begin(), infer_vec.end());
  93. int index = 0;
  94. for (const auto &item : infer_vec) {
  95. auto context = item.first;
  96. if (context == nullptr) {
  97. MS_LOG(EXCEPTION) << "DumpInferStack failed, got null graph context";
  98. }
  99. auto graph = context->func_graph();
  100. if (graph == nullptr) { // Top context.
  101. continue;
  102. }
  103. auto args_spec_list = context->args_spec_list();
  104. if (graph->parameters().size() < args_spec_list.size()) {
  105. continue;
  106. }
  107. oss << " #" << index++ << " " << GetGraphParamString(graph, args_spec_list) << "\n";
  108. }
  109. }
  110. void TraceGraphEval() {
  111. auto &graph_stack = GetCurrentGraphEvalStack();
  112. if (graph_stack.empty()) {
  113. MS_LOG(INFO) << "Length of analysis graph stack is empty.";
  114. return;
  115. }
  116. std::ostringstream oss;
  117. oss << "\n*******************************graph evaluate stack**********************************";
  118. oss << std::endl;
  119. DumpInferStack(oss);
  120. oss << "\n*************************************************************************************";
  121. MS_LOG(INFO) << oss.str();
  122. }
  123. class AnalyzeFailExporter : public AnfExporter {
  124. public:
  125. AnalyzeFailExporter() : AnfExporter(true, false) {}
  126. ~AnalyzeFailExporter() override = default;
  127. bool ExportFuncGraph(const std::string &filename, const TraceCNodeEvalStack &node_config_stack);
  128. protected:
  129. void OutputCNode(std::ofstream &ofs, const CNodePtr &cnode, const FuncGraphPtr &func_graph, int *idx,
  130. std::map<AnfNodePtr, int> *const apply_map) override;
  131. protected:
  132. std::string GetNodeType(const AnfNodePtr &nd) override;
  133. AbstractBasePtr GetNodeAbstract(const AnfNodePtr &nd);
  134. AnfNodeConfigPtr GetForwardConfig(const AnfNodeConfigPtr &cfg);
  135. void ProcessFuncGraphCall(const CNodePtr &node, std::string *const op_comment);
  136. mindspore::HashMap<FuncGraphPtr, TaggedNodeMap> CreateTaggedNodeMap(
  137. const std::vector<abstract::AnfNodeConfigPtr> &node_config_stack);
  138. private:
  139. AnalysisContextPtr current_context_ = nullptr;
  140. AnalysisEnginePtr engine_ = nullptr;
  141. };
  142. mindspore::HashMap<FuncGraphPtr, TaggedNodeMap> AnalyzeFailExporter::CreateTaggedNodeMap(
  143. const std::vector<abstract::AnfNodeConfigPtr> &node_config_stack) {
  144. mindspore::HashSet<abstract::AnfNodeConfigPtr> forwarded_configs; // Check if config. is forwarded.
  145. mindspore::HashMap<FuncGraphPtr, TaggedNodeMap> tagged_func_graphs;
  146. size_t index = 0;
  147. for (auto &node_config : node_config_stack) {
  148. MS_EXCEPTION_IF_NULL(node_config);
  149. // Record new config in set.
  150. auto new_config = GetForwardConfig(node_config);
  151. if (new_config != node_config) {
  152. MS_LOG(DEBUG) << "The node_config is forwarded, old config: " << node_config->ToString()
  153. << ", new_config: " << new_config->ToString();
  154. forwarded_configs.emplace(new_config);
  155. }
  156. // Ignore the new config.
  157. if (forwarded_configs.find(node_config) != forwarded_configs.end()) {
  158. continue;
  159. }
  160. auto fg = node_config->func_graph();
  161. MS_EXCEPTION_IF_NULL(fg);
  162. auto node = node_config->node();
  163. tagged_func_graphs[fg][node] = index;
  164. index++;
  165. }
  166. return tagged_func_graphs;
  167. }
  168. bool OutputAnalyzedGraphWithType(const string &file_path) {
  169. AnalyzeFailExporter exporter;
  170. return exporter.ExportFuncGraph(file_path, GetCNodeDebugStack());
  171. }
  172. std::string AnalyzeFailExporter::GetNodeType(const AnfNodePtr &node) {
  173. if (current_context_ == nullptr) {
  174. return AnfExporter::GetNodeType(node);
  175. }
  176. MS_EXCEPTION_IF_NULL(engine_);
  177. try {
  178. FuncGraphPtr dummy_call_func_graph = nullptr;
  179. auto cfg = engine_->MakeConfig(node, current_context_, dummy_call_func_graph);
  180. auto res = abstract::AnalysisResultCacheMgr::GetInstance().GetValue(cfg);
  181. if (res != nullptr) {
  182. return GetAbstractStr(res->abstract());
  183. }
  184. } catch (const std::exception &e) {
  185. MS_LOG(INFO) << "Exception: " << e.what();
  186. }
  187. return "Undefined";
  188. }
  189. AbstractBasePtr AnalyzeFailExporter::GetNodeAbstract(const AnfNodePtr &node) {
  190. if (current_context_ == nullptr) {
  191. return nullptr;
  192. }
  193. MS_EXCEPTION_IF_NULL(engine_);
  194. try {
  195. FuncGraphPtr dummy_call_func_graph = nullptr;
  196. auto cfg = engine_->MakeConfig(node, current_context_, dummy_call_func_graph);
  197. auto res = abstract::AnalysisResultCacheMgr::GetInstance().GetValue(cfg);
  198. return res == nullptr ? nullptr : res->abstract();
  199. } catch (const std::exception &e) {
  200. MS_LOG(INFO) << "Exception: " << e.what();
  201. }
  202. return nullptr;
  203. }
  204. AnfNodeConfigPtr AnalyzeFailExporter::GetForwardConfig(const AnfNodeConfigPtr &cfg) {
  205. MS_EXCEPTION_IF_NULL(cfg);
  206. MS_EXCEPTION_IF_NULL(engine_);
  207. AnfNodeConfigPtr cur_cfg = cfg;
  208. auto iter = engine_->anfnode_config_map().find(cur_cfg);
  209. while (iter != engine_->anfnode_config_map().end()) {
  210. auto node = cur_cfg->node();
  211. cur_cfg = iter->second;
  212. MS_LOG(DEBUG) << "Get forward node: " << node << "[" << node->DebugString() << "] --> " << cur_cfg->node() << "["
  213. << cur_cfg->node()->DebugString() << "]";
  214. iter = engine_->anfnode_config_map().find(cur_cfg);
  215. }
  216. return cur_cfg;
  217. }
  218. void AnalyzeFailExporter::ProcessFuncGraphCall(const CNodePtr &node, std::string *const op_comment) {
  219. if (node == nullptr) {
  220. MS_LOG(ERROR) << "Node is nullptr";
  221. return;
  222. }
  223. CNodePtr cnode = nullptr;
  224. try {
  225. FuncGraphPtr dummy_call_func_graph = nullptr;
  226. auto cfg = engine_->MakeConfig(node, current_context_, dummy_call_func_graph);
  227. cfg = GetForwardConfig(cfg);
  228. MS_EXCEPTION_IF_NULL(cfg);
  229. cnode = dyn_cast<CNode>(cfg->node());
  230. } catch (const std::exception &e) {
  231. MS_LOG(INFO) << "Exception: " << e.what();
  232. }
  233. if (cnode == nullptr) {
  234. MS_LOG(INFO) << "CNode is nullptr";
  235. return;
  236. }
  237. const auto &inputs = cnode->inputs();
  238. for (size_t i = 0; i < inputs.size(); ++i) {
  239. auto op_abs = GetNodeAbstract(inputs[i]);
  240. if (op_abs == nullptr) {
  241. MS_LOG(DEBUG) << "Abstract of inputs[" << i << "] of cnode " << cnode->ToString() << " is nullptr";
  242. continue;
  243. }
  244. if (!op_abs->isa<abstract::FuncGraphAbstractClosure>() && !op_abs->isa<abstract::MetaFuncGraphAbstractClosure>()) {
  245. MS_LOG(DEBUG) << "Inputs[" << i << "] of cnode " << cnode->ToString() << " is of type " << op_abs->type_name()
  246. << ", not function, ignore it";
  247. // Get prototype of VirtualEvaluator for printing
  248. if (i == 0 && op_abs->isa<abstract::VirtualAbstractClosure>()) {
  249. auto func = dyn_cast<abstract::VirtualAbstractClosure>(op_abs);
  250. std::ostringstream oss;
  251. oss << "(";
  252. bool first_flag = false;
  253. for (const auto &arg : func->args_spec_list()) {
  254. if (!first_flag) {
  255. first_flag = true;
  256. } else {
  257. oss << ", ";
  258. }
  259. oss << GetAbstractStr(arg);
  260. }
  261. oss << ") -> " << GetAbstractStr(func->output()) << " ";
  262. *op_comment = oss.str();
  263. }
  264. }
  265. }
  266. }
  267. void AnalyzeFailExporter::OutputCNode(std::ofstream &ofs, const CNodePtr &cnode, const FuncGraphPtr &func_graph,
  268. int *idx, std::map<AnfNodePtr, int> *const apply_map) {
  269. OutputCNodeText(ofs, cnode, func_graph, idx, apply_map);
  270. // Process function graph call
  271. std::string op_comment;
  272. ProcessFuncGraphCall(cnode, &op_comment);
  273. if (!op_comment.empty()) {
  274. auto &inputs = cnode->inputs();
  275. ofs << " #" << GetAnfNodeText(func_graph, inputs[0], *apply_map) << ".prototype = " << op_comment;
  276. }
  277. // Output comment
  278. OutputStatementComment(ofs, cnode);
  279. ofs << "\n";
  280. }
  281. bool AnalyzeFailExporter::ExportFuncGraph(const std::string &filename, const TraceCNodeEvalStack &node_config_stack) {
  282. if (node_config_stack.empty()) {
  283. MS_LOG(DEBUG) << "Node configs is empty";
  284. return false;
  285. }
  286. auto real_filepath = Common::CreatePrefixPath(filename);
  287. if (!real_filepath.has_value()) {
  288. MS_LOG(ERROR) << "The export ir path: " << filename << " is not illegal.";
  289. return false;
  290. }
  291. ChangeFileMode(real_filepath.value(), S_IWUSR);
  292. std::ofstream ofs(real_filepath.value());
  293. if (!ofs.is_open()) {
  294. MS_LOG(ERROR) << "Open file '" << real_filepath.value() << "' failed!" << ErrnoToString(errno);
  295. return false;
  296. }
  297. if (engine_ == nullptr) {
  298. engine_ = node_config_stack.front()->engine();
  299. }
  300. auto tagged_func_graphs = CreateTaggedNodeMap(node_config_stack);
  301. mindspore::HashSet<FuncGraphPtr> printed_func_graphs; // Check if func graph has been printed.
  302. // Output graph on the analysis stack
  303. for (const auto &node_config : node_config_stack) {
  304. MS_EXCEPTION_IF_NULL(node_config);
  305. auto fg = node_config->func_graph();
  306. MS_LOG(INFO) << "Node: " << node_config->node()->DebugString()
  307. << ", FV: " << (node_config->func_graph() != node_config->context()->func_graph())
  308. << ", calling func graph: " << node_config->func_graph()->ToString()
  309. << ", context func graph: " << node_config->context()->func_graph()->ToString();
  310. if (fg == nullptr) {
  311. MS_LOG(ERROR) << "FuncGraph is null, context: " << node_config->ToString();
  312. continue;
  313. }
  314. if (printed_func_graphs.find(fg) != printed_func_graphs.end()) {
  315. continue;
  316. }
  317. (void)printed_func_graphs.emplace(fg);
  318. current_context_ = node_config->context(); // Set current context.
  319. ExportOneFuncGraph(ofs, fg, tagged_func_graphs[fg]);
  320. ofs << "\n\n";
  321. }
  322. ofs << "#===============================================================================\n";
  323. ofs << "# num of function graphs in stack: ";
  324. auto ignored_num = (node_config_stack.size() - printed_func_graphs.size());
  325. if (ignored_num == 0) {
  326. ofs << node_config_stack.size() << "\n";
  327. } else {
  328. ofs << printed_func_graphs.size() << "/" << node_config_stack.size() << " (Ignored " << ignored_num
  329. << " internal frames).\n";
  330. }
  331. ofs.close();
  332. ChangeFileMode(real_filepath.value(), S_IRUSR);
  333. return true;
  334. }
  335. std::string GetEvalFailDatPath() {
  336. std::string path;
  337. auto ms_om_path = common::GetEnv("MS_OM_PATH");
  338. if (!ms_om_path.empty()) {
  339. path = ms_om_path;
  340. } else {
  341. path = ".";
  342. }
  343. path += "/rank_" + std::to_string(GetRank()) + "/om/analyze_fail.dat";
  344. // Support "../" in path.
  345. auto realpath = Common::CreatePrefixPath(path, true);
  346. if (!realpath.has_value()) {
  347. MS_EXCEPTION(ValueError) << "Get real path failed. path=" << path;
  348. }
  349. return realpath.value();
  350. }
  351. void GetEvalStackInfo(std::ostringstream &oss) {
  352. MS_LOG(INFO) << "Get graph analysis information begin";
  353. auto stack = GetCNodeDebugStack();
  354. if (stack.empty()) {
  355. MS_LOG(INFO) << "Length of analysis information stack is empty.";
  356. return;
  357. }
  358. oss << "\nThe function call stack";
  359. #ifndef ENABLE_SECURITY
  360. std::string file_name = GetEvalFailDatPath();
  361. auto ret = OutputAnalyzedGraphWithType(file_name);
  362. if (ret) {
  363. oss << " (See file '" << file_name << "' for more details)";
  364. }
  365. #endif
  366. oss << ":\n";
  367. int index = 0;
  368. std::string last_location_info = "";
  369. for (size_t i = 0; i < stack.size(); ++i) {
  370. auto node_config = stack[i];
  371. MS_EXCEPTION_IF_NULL(node_config);
  372. auto cnode = dyn_cast<CNode>(node_config->node());
  373. if (cnode == nullptr) {
  374. MS_LOG(DEBUG) << "CNode of elements[" << i << "] is nullptr.";
  375. continue;
  376. }
  377. auto debug_info = cnode->debug_info();
  378. auto this_location_info = trace::GetDebugInfo(debug_info, std::string(""));
  379. if (this_location_info.empty() || this_location_info == last_location_info) {
  380. continue;
  381. }
  382. last_location_info = this_location_info;
  383. oss << "# " << index++ << " " << this_location_info;
  384. }
  385. stack.clear();
  386. MS_LOG(INFO) << "Get graph analysis information *end*";
  387. }
  388. // Trace the graph evaluator stack
  389. thread_local TraceGraphEvalStack graph_infer_stack;
  390. // Trace the cnode infer debug info
  391. thread_local TraceCNodeEvalStack cnode_debug_stack{};
  392. void TraceGraphEvalEnter(const abstract::AnalysisContextPtr &context, const abstract::AnfNodeConfigPtr &node) {
  393. if (context == nullptr) {
  394. MS_LOG(EXCEPTION) << "GraphInferEnter got null context";
  395. }
  396. (void)graph_infer_stack.push_back(std::pair<abstract::AnalysisContextPtr, abstract::AnfNodeConfigPtr>(context, node));
  397. }
  398. void TraceGraphEvalLeave(const abstract::AnalysisContextPtr &context) {
  399. if (context == nullptr || graph_infer_stack.empty()) {
  400. MS_LOG(EXCEPTION) << "The context is null, or call stack is empty.";
  401. }
  402. if (context != graph_infer_stack.back().first) {
  403. MS_LOG(EXCEPTION) << "Different context: " << context->func_graph()->ToString() << ", "
  404. << graph_infer_stack.back().first->func_graph()->ToString();
  405. }
  406. graph_infer_stack.pop_back();
  407. }
  408. void TraceGraphEvalStackPrepare(const TraceGraphEvalStack &graphEvals) {
  409. graph_infer_stack.insert(graph_infer_stack.end(), graphEvals.begin(), graphEvals.end());
  410. }
  411. void TraceEvalCNodeStackPrepare(const TraceCNodeEvalStack &cnodeEvals) {
  412. cnode_debug_stack.insert(cnode_debug_stack.end(), cnodeEvals.begin(), cnodeEvals.end());
  413. }
  414. void TraceEvalCNodeEnter(const abstract::AnfNodeConfigPtr &node_config) { cnode_debug_stack.push_back(node_config); }
  415. void TraceEvalCNodeLeave() { cnode_debug_stack.pop_back(); }
  416. TraceCNodeEvalStack &GetCNodeDebugStack() { return cnode_debug_stack; }
  417. TraceGraphEvalStack &GetCurrentGraphEvalStack() { return graph_infer_stack; }
  418. void ClearTraceStack() {
  419. while (!graph_infer_stack.empty()) {
  420. graph_infer_stack.pop_back();
  421. }
  422. cnode_debug_stack.clear();
  423. }
  424. void GetTraceStackInfo(std::ostringstream &oss) {
  425. TraceGraphEval();
  426. std::ostringstream trace_info;
  427. GetEvalStackInfo(trace_info);
  428. if (trace_info.str().empty()) {
  429. DebugInfoPtr debug_info = TraceManager::GetParseOrResolveDebugInfo();
  430. if (debug_info != nullptr) {
  431. oss << "\n\n# " << trace::GetDebugInfo(debug_info);
  432. }
  433. } else {
  434. oss << trace_info.str();
  435. }
  436. }
  437. // Register trace provider to LogWriter.
  438. struct TraceProviderRegister {
  439. TraceProviderRegister() { LogWriter::set_trace_provider(GetTraceStackInfo); }
  440. ~TraceProviderRegister() = default;
  441. } trace_provider_register;
  442. // Register trace cnode provider to AbstractBase.
  443. struct TraceNodeProviderRegister {
  444. TraceNodeProviderRegister() {
  445. abstract::AbstractBase::set_trace_node_provider([](AnfNodePtr *node) {
  446. auto stack = GetCNodeDebugStack();
  447. if (!stack.empty()) {
  448. auto conf = stack.back();
  449. *node = conf->node();
  450. }
  451. });
  452. }
  453. ~TraceNodeProviderRegister() = default;
  454. } trace_node_provider_register;
  455. } // namespace trace
  456. } // namespace mindspore