You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

trace.cc 18 kB

4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508
  1. /**
  2. * Copyright 2019-2021 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "debug/trace.h"
  17. #include <iostream>
  18. #include <fstream>
  19. #include <map>
  20. #include <vector>
  21. #include <string>
  22. #include <sstream>
  23. #include <utility>
  24. #include <stack>
  25. #include <algorithm>
  26. #include "utils/hash_map.h"
  27. #include "utils/hash_set.h"
  28. #include "ir/meta_func_graph.h"
  29. #include "ir/graph_utils.h"
  30. #include "frontend/operator/composite/composite.h"
  31. #include "ir/tensor.h"
  32. #include "debug/anf_ir_utils.h"
  33. #include "debug/common.h"
  34. #include "pipeline/jit/static_analysis/evaluator.h"
  35. #include "pipeline/jit/static_analysis/async_eval_result.h"
  36. #include "pipeline/jit/base.h"
  37. #include "utils/log_adapter.h"
  38. #include "include/common/utils/comm_manager.h"
  39. #include "abstract/abstract_value.h"
  40. #include "utils/file_utils.h"
  41. namespace mindspore {
  42. // namespace to support debug trace information
  43. namespace trace {
  44. using abstract::AbstractBasePtr;
  45. using abstract::AnalysisContextPtr;
  46. using abstract::AnalysisEnginePtr;
  47. using abstract::AnfNodeConfigPtr;
  48. std::string GetAbstractStr(const abstract::AbstractBasePtr &abs) {
  49. if (abs == nullptr) {
  50. return "NullAbstract";
  51. }
  52. auto shape = abs->BuildShape()->cast<abstract::ShapePtr>();
  53. TypePtr type = abs->BuildType();
  54. std::ostringstream oss;
  55. if ((shape != nullptr) && (type != nullptr)) {
  56. oss << type->DumpText() << shape->DumpText();
  57. } else if (type != nullptr) {
  58. oss << type->DumpText();
  59. } else {
  60. oss << "Undefined";
  61. }
  62. return oss.str();
  63. }
  64. std::string GetGraphParamString(const FuncGraphPtr &graph, const abstract::AbstractBasePtrList &args_spec_list) {
  65. MS_EXCEPTION_IF_NULL(graph);
  66. std::ostringstream oss;
  67. oss << "graph:" << graph->ToString() << " with args[";
  68. auto params = graph->parameters();
  69. if (params.size() < args_spec_list.size()) {
  70. MS_EXCEPTION(TypeError) << "The size of parameters less than args_spec_list's size.";
  71. }
  72. for (size_t i = 0; i < args_spec_list.size(); i++) {
  73. auto parameter = params[i];
  74. MS_EXCEPTION_IF_NULL(parameter);
  75. oss << parameter->ToString() << ":<" << GetAbstractStr(args_spec_list[i]) << ">,";
  76. }
  77. oss << "]";
  78. oss << GetDebugInfo(graph->debug_info(), kSourceLineTipDiscard);
  79. return oss.str();
  80. }
  81. void DumpInferStack(std::ostringstream &oss) {
  82. auto &graph_stack = GetCurrentGraphEvalStack();
  83. if (graph_stack.empty()) {
  84. return;
  85. }
  86. std::vector<std::pair<abstract::AnalysisContextPtr, abstract::AnfNodeConfigPtr>> infer_vec;
  87. while (!graph_stack.empty()) {
  88. auto top = graph_stack.back();
  89. infer_vec.push_back(top);
  90. graph_stack.pop_back();
  91. }
  92. std::reverse(infer_vec.begin(), infer_vec.end());
  93. int index = 0;
  94. for (const auto &item : infer_vec) {
  95. auto context = item.first;
  96. if (context == nullptr) {
  97. MS_LOG(EXCEPTION) << "DumpInferStack failed, got null graph context";
  98. }
  99. auto graph = context->func_graph();
  100. if (graph == nullptr) { // Top context.
  101. continue;
  102. }
  103. auto args_spec_list = context->args_spec_list();
  104. if (graph->parameters().size() < args_spec_list.size()) {
  105. continue;
  106. }
  107. oss << " #" << index++ << " " << GetGraphParamString(graph, args_spec_list) << "\n";
  108. }
  109. }
  110. void TraceGraphEval() {
  111. auto &graph_stack = GetCurrentGraphEvalStack();
  112. if (graph_stack.empty()) {
  113. MS_LOG(INFO) << "Length of analysis graph stack is empty.";
  114. return;
  115. }
  116. std::ostringstream oss;
  117. oss << "\n*******************************graph evaluate stack**********************************";
  118. oss << std::endl;
  119. DumpInferStack(oss);
  120. oss << "\n*************************************************************************************";
  121. MS_LOG(INFO) << oss.str();
  122. }
  123. class AnalyzeFailExporter : public AnfExporter {
  124. public:
  125. AnalyzeFailExporter() : AnfExporter(true, false) {}
  126. ~AnalyzeFailExporter() override = default;
  127. bool ExportFuncGraph(const std::string &filename, const TraceCNodeEvalStack &node_config_stack);
  128. protected:
  129. void OutputCNode(std::ostringstream &oss, const CNodePtr &cnode, const FuncGraphPtr &func_graph, int *idx,
  130. std::map<AnfNodePtr, int> *const apply_map) override;
  131. std::string GetNodeType(const AnfNodePtr &nd) override;
  132. AbstractBasePtr GetNodeAbstract(const AnfNodePtr &nd);
  133. AnfNodeConfigPtr GetForwardConfig(const AnfNodeConfigPtr &cfg);
  134. void ProcessFuncGraphCall(const CNodePtr &node, std::string *const op_comment);
  135. mindspore::HashMap<FuncGraphPtr, TaggedNodeMap> CreateTaggedNodeMap(
  136. const std::vector<abstract::AnfNodeConfigPtr> &node_config_stack);
  137. private:
  138. AnalysisContextPtr current_context_ = nullptr;
  139. AnalysisEnginePtr engine_ = nullptr;
  140. };
  141. mindspore::HashMap<FuncGraphPtr, TaggedNodeMap> AnalyzeFailExporter::CreateTaggedNodeMap(
  142. const std::vector<abstract::AnfNodeConfigPtr> &node_config_stack) {
  143. mindspore::HashSet<abstract::AnfNodeConfigPtr> forwarded_configs; // Check if config. is forwarded.
  144. mindspore::HashMap<FuncGraphPtr, TaggedNodeMap> tagged_func_graphs;
  145. size_t index = 0;
  146. for (auto &node_config : node_config_stack) {
  147. MS_EXCEPTION_IF_NULL(node_config);
  148. // Record new config in set.
  149. auto new_config = GetForwardConfig(node_config);
  150. if (new_config != node_config) {
  151. MS_LOG(DEBUG) << "The node_config is forwarded, old config: " << node_config->ToString()
  152. << ", new_config: " << new_config->ToString();
  153. forwarded_configs.emplace(new_config);
  154. }
  155. // Ignore the new config.
  156. if (forwarded_configs.find(node_config) != forwarded_configs.end()) {
  157. continue;
  158. }
  159. auto fg = node_config->func_graph();
  160. MS_EXCEPTION_IF_NULL(fg);
  161. auto node = node_config->node();
  162. tagged_func_graphs[fg][node] = index;
  163. index++;
  164. }
  165. return tagged_func_graphs;
  166. }
  167. bool OutputAnalyzedGraphWithType(const string &file_path) {
  168. AnalyzeFailExporter exporter;
  169. return exporter.ExportFuncGraph(file_path, GetCNodeDebugStack());
  170. }
  171. std::string AnalyzeFailExporter::GetNodeType(const AnfNodePtr &node) {
  172. if (current_context_ == nullptr) {
  173. return AnfExporter::GetNodeType(node);
  174. }
  175. MS_EXCEPTION_IF_NULL(engine_);
  176. try {
  177. FuncGraphPtr dummy_call_func_graph = nullptr;
  178. auto cfg = engine_->MakeConfig(node, current_context_, dummy_call_func_graph);
  179. auto res = abstract::AnalysisResultCacheMgr::GetInstance().GetValue(cfg);
  180. if (res != nullptr) {
  181. return GetAbstractStr(res->abstract());
  182. }
  183. } catch (const std::exception &e) {
  184. MS_LOG(INFO) << "Exception: " << e.what();
  185. }
  186. return "Undefined";
  187. }
  188. AbstractBasePtr AnalyzeFailExporter::GetNodeAbstract(const AnfNodePtr &node) {
  189. if (current_context_ == nullptr) {
  190. return nullptr;
  191. }
  192. MS_EXCEPTION_IF_NULL(engine_);
  193. try {
  194. FuncGraphPtr dummy_call_func_graph = nullptr;
  195. auto cfg = engine_->MakeConfig(node, current_context_, dummy_call_func_graph);
  196. auto res = abstract::AnalysisResultCacheMgr::GetInstance().GetValue(cfg);
  197. return res == nullptr ? nullptr : res->abstract();
  198. } catch (const std::exception &e) {
  199. MS_LOG(INFO) << "Exception: " << e.what();
  200. }
  201. return nullptr;
  202. }
  203. AnfNodeConfigPtr AnalyzeFailExporter::GetForwardConfig(const AnfNodeConfigPtr &cfg) {
  204. MS_EXCEPTION_IF_NULL(cfg);
  205. MS_EXCEPTION_IF_NULL(engine_);
  206. AnfNodeConfigPtr cur_cfg = cfg;
  207. auto iter = engine_->anfnode_config_map().find(cur_cfg);
  208. while (iter != engine_->anfnode_config_map().end()) {
  209. auto node = cur_cfg->node();
  210. cur_cfg = iter->second;
  211. MS_LOG(DEBUG) << "Get forward node: " << node << "[" << node->DebugString() << "] --> " << cur_cfg->node() << "["
  212. << cur_cfg->node()->DebugString() << "]";
  213. iter = engine_->anfnode_config_map().find(cur_cfg);
  214. }
  215. return cur_cfg;
  216. }
  217. void AnalyzeFailExporter::ProcessFuncGraphCall(const CNodePtr &node, std::string *const op_comment) {
  218. if (node == nullptr) {
  219. MS_LOG(ERROR) << "Node is nullptr";
  220. return;
  221. }
  222. CNodePtr cnode = nullptr;
  223. try {
  224. FuncGraphPtr dummy_call_func_graph = nullptr;
  225. auto cfg = engine_->MakeConfig(node, current_context_, dummy_call_func_graph);
  226. cfg = GetForwardConfig(cfg);
  227. MS_EXCEPTION_IF_NULL(cfg);
  228. cnode = dyn_cast<CNode>(cfg->node());
  229. } catch (const std::exception &e) {
  230. MS_LOG(INFO) << "Exception: " << e.what();
  231. }
  232. if (cnode == nullptr) {
  233. MS_LOG(INFO) << "CNode is nullptr";
  234. return;
  235. }
  236. const auto &inputs = cnode->inputs();
  237. for (size_t i = 0; i < inputs.size(); ++i) {
  238. auto op_abs = GetNodeAbstract(inputs[i]);
  239. if (op_abs == nullptr) {
  240. MS_LOG(DEBUG) << "Abstract of inputs[" << i << "] of cnode " << cnode->ToString() << " is nullptr";
  241. continue;
  242. }
  243. if (!op_abs->isa<abstract::FuncGraphAbstractClosure>() && !op_abs->isa<abstract::MetaFuncGraphAbstractClosure>()) {
  244. MS_LOG(DEBUG) << "Inputs[" << i << "] of cnode " << cnode->ToString() << " is of type " << op_abs->type_name()
  245. << ", not function, ignore it";
  246. // Get prototype of VirtualEvaluator for printing
  247. if (i == 0 && op_abs->isa<abstract::VirtualAbstractClosure>()) {
  248. auto func = dyn_cast<abstract::VirtualAbstractClosure>(op_abs);
  249. std::ostringstream oss;
  250. oss << "(";
  251. bool first_flag = false;
  252. for (const auto &arg : func->args_spec_list()) {
  253. if (!first_flag) {
  254. first_flag = true;
  255. } else {
  256. oss << ", ";
  257. }
  258. oss << GetAbstractStr(arg);
  259. }
  260. oss << ") -> " << GetAbstractStr(func->output()) << " ";
  261. *op_comment = oss.str();
  262. }
  263. }
  264. }
  265. }
  266. void AnalyzeFailExporter::OutputCNode(std::ostringstream &oss, const CNodePtr &cnode, const FuncGraphPtr &func_graph,
  267. int *idx, std::map<AnfNodePtr, int> *const apply_map) {
  268. OutputCNodeText(oss, cnode, func_graph, idx, apply_map);
  269. // Process function graph call
  270. std::string op_comment;
  271. ProcessFuncGraphCall(cnode, &op_comment);
  272. if (!op_comment.empty()) {
  273. auto &inputs = cnode->inputs();
  274. oss << " #" << GetAnfNodeText(func_graph, inputs[0], *apply_map) << ".prototype = " << op_comment;
  275. }
  276. // Output comment
  277. OutputStatementComment(oss, cnode);
  278. oss << "\n";
  279. }
  280. bool AnalyzeFailExporter::ExportFuncGraph(const std::string &filename, const TraceCNodeEvalStack &node_config_stack) {
  281. if (node_config_stack.empty()) {
  282. MS_LOG(DEBUG) << "Node configs is empty";
  283. return false;
  284. }
  285. auto real_filepath = Common::CreatePrefixPath(filename);
  286. if (!real_filepath.has_value()) {
  287. MS_LOG(ERROR) << "The export ir path: " << filename << " is not illegal.";
  288. return false;
  289. }
  290. ChangeFileMode(real_filepath.value(), S_IWUSR);
  291. std::ofstream ofs(real_filepath.value());
  292. if (!ofs.is_open()) {
  293. MS_LOG(ERROR) << "Open file '" << real_filepath.value() << "' failed!" << ErrnoToString(errno);
  294. return false;
  295. }
  296. if (engine_ == nullptr) {
  297. engine_ = node_config_stack.front()->engine();
  298. }
  299. auto tagged_func_graphs = CreateTaggedNodeMap(node_config_stack);
  300. mindspore::HashSet<FuncGraphPtr> printed_func_graphs; // Check if func graph has been printed.
  301. // Output graph on the analysis stack
  302. for (const auto &node_config : node_config_stack) {
  303. MS_EXCEPTION_IF_NULL(node_config);
  304. auto fg = node_config->func_graph();
  305. MS_LOG(INFO) << "Node: " << node_config->node()->DebugString()
  306. << ", FV: " << (node_config->func_graph() != node_config->context()->func_graph())
  307. << ", calling func graph: " << node_config->func_graph()->ToString()
  308. << ", context func graph: " << node_config->context()->func_graph()->ToString();
  309. if (fg == nullptr) {
  310. MS_LOG(ERROR) << "FuncGraph is null, context: " << node_config->ToString();
  311. continue;
  312. }
  313. if (printed_func_graphs.find(fg) != printed_func_graphs.end()) {
  314. continue;
  315. }
  316. (void)printed_func_graphs.emplace(fg);
  317. current_context_ = node_config->context(); // Set current context.
  318. std::ostringstream buffer;
  319. ExportOneFuncGraph(buffer, fg, tagged_func_graphs[fg]);
  320. ofs << buffer.str() << "\n\n";
  321. }
  322. ofs << "#===============================================================================\n";
  323. ofs << "# num of function graphs in stack: ";
  324. auto ignored_num = (node_config_stack.size() - printed_func_graphs.size());
  325. if (ignored_num == 0) {
  326. ofs << node_config_stack.size() << "\n";
  327. } else {
  328. ofs << printed_func_graphs.size() << "/" << node_config_stack.size() << " (Ignored " << ignored_num
  329. << " internal frames).\n";
  330. }
  331. ofs.close();
  332. ChangeFileMode(real_filepath.value(), S_IRUSR);
  333. return true;
  334. }
  335. std::string GetEvalFailDatPath() {
  336. std::string path;
  337. auto ms_om_path = common::GetEnv("MS_OM_PATH");
  338. if (!ms_om_path.empty()) {
  339. path = ms_om_path;
  340. } else {
  341. path = ".";
  342. }
  343. path += "/rank_" + std::to_string(GetRank()) + "/om/analyze_fail.dat";
  344. // Support "../" in path.
  345. auto realpath = Common::CreatePrefixPath(path, true);
  346. if (!realpath.has_value()) {
  347. MS_EXCEPTION(ValueError) << "Get real path failed. path=" << path;
  348. }
  349. return realpath.value();
  350. }
  351. void GetEvalStackInfo(std::ostringstream &oss) {
  352. MS_LOG(INFO) << "Get graph analysis information begin";
  353. auto stack = GetCNodeDebugStack();
  354. if (stack.empty()) {
  355. MS_LOG(INFO) << "Length of analysis information stack is empty.";
  356. return;
  357. }
  358. oss << "\nThe function call stack";
  359. #ifndef ENABLE_SECURITY
  360. std::string file_name = GetEvalFailDatPath();
  361. auto ret = OutputAnalyzedGraphWithType(file_name);
  362. if (ret) {
  363. oss << " (See file '" << file_name << "' for more details)";
  364. }
  365. #endif
  366. oss << ":\n";
  367. int index = 0;
  368. std::string last_location_info = "";
  369. for (size_t i = 0; i < stack.size(); ++i) {
  370. auto node_config = stack[i];
  371. MS_EXCEPTION_IF_NULL(node_config);
  372. auto cnode = dyn_cast<CNode>(node_config->node());
  373. if (cnode == nullptr) {
  374. MS_LOG(DEBUG) << "CNode of elements[" << i << "] is nullptr.";
  375. continue;
  376. }
  377. auto debug_info = cnode->debug_info();
  378. auto this_location_info = trace::GetDebugInfo(debug_info, std::string(""));
  379. if (this_location_info.empty() || this_location_info == last_location_info) {
  380. continue;
  381. }
  382. last_location_info = this_location_info;
  383. oss << "# " << index++ << " " << this_location_info;
  384. }
  385. stack.clear();
  386. MS_LOG(INFO) << "Get graph analysis information *end*";
  387. }
  388. // Trace the graph evaluator stack
  389. thread_local TraceGraphEvalStack graph_infer_stack;
  390. // Trace the cnode infer debug info
  391. thread_local TraceCNodeEvalStack cnode_debug_stack{};
  392. void TraceGraphEvalEnter(const abstract::AnalysisContextPtr &context, const abstract::AnfNodeConfigPtr &node) {
  393. if (context == nullptr) {
  394. MS_LOG(EXCEPTION) << "GraphInferEnter got null context";
  395. }
  396. (void)graph_infer_stack.push_back(std::pair<abstract::AnalysisContextPtr, abstract::AnfNodeConfigPtr>(context, node));
  397. }
  398. void TraceGraphEvalLeave(const abstract::AnalysisContextPtr &context) {
  399. if (context == nullptr || graph_infer_stack.empty()) {
  400. MS_LOG(EXCEPTION) << "The context is null, or call stack is empty.";
  401. }
  402. if (context != graph_infer_stack.back().first) {
  403. MS_LOG(EXCEPTION) << "Different context: " << context->func_graph()->ToString() << ", "
  404. << graph_infer_stack.back().first->func_graph()->ToString();
  405. }
  406. graph_infer_stack.pop_back();
  407. }
  408. void TraceGraphEvalStackPrepare(const TraceGraphEvalStack &graph_evals) {
  409. graph_infer_stack.insert(graph_infer_stack.end(), graph_evals.begin(), graph_evals.end());
  410. }
  411. void TraceEvalCNodeStackPrepare(const TraceCNodeEvalStack &cnode_evals) {
  412. cnode_debug_stack.insert(cnode_debug_stack.end(), cnode_evals.begin(), cnode_evals.end());
  413. }
  414. void TraceEvalCNodeEnter(const abstract::AnfNodeConfigPtr &node_config) { cnode_debug_stack.push_back(node_config); }
  415. void TraceEvalCNodeLeave() { cnode_debug_stack.pop_back(); }
  416. TraceCNodeEvalStack &GetCNodeDebugStack() { return cnode_debug_stack; }
  417. TraceGraphEvalStack &GetCurrentGraphEvalStack() { return graph_infer_stack; }
  418. void ClearTraceStack() {
  419. while (!graph_infer_stack.empty()) {
  420. graph_infer_stack.pop_back();
  421. }
  422. cnode_debug_stack.clear();
  423. }
  424. void GetTraceStackInfo(std::ostringstream &oss) {
  425. TraceGraphEval();
  426. std::ostringstream trace_info;
  427. GetEvalStackInfo(trace_info);
  428. if (trace_info.str().empty()) {
  429. DebugInfoPtr debug_info = TraceManager::record_debug_info();
  430. if (debug_info != nullptr && TraceManager::record_debug_info_flag() == true) {
  431. auto debug_str = trace::GetDebugInfo(debug_info);
  432. if (!debug_str.empty()) {
  433. oss << "\n\n# " << debug_str;
  434. }
  435. }
  436. } else {
  437. oss << trace_info.str();
  438. }
  439. }
  440. // Register trace provider to LogWriter.
  441. struct TraceProviderRegister {
  442. TraceProviderRegister() { LogWriter::set_trace_provider(GetTraceStackInfo); }
  443. ~TraceProviderRegister() = default;
  444. } trace_provider_register;
  445. // Register trace cnode provider to AbstractBase.
  446. struct TraceNodeProviderRegister {
  447. TraceNodeProviderRegister() {
  448. abstract::AbstractBase::set_trace_node_provider([](AnfNodePtr *node) {
  449. auto stack = GetCNodeDebugStack();
  450. if (!stack.empty()) {
  451. auto conf = stack.back();
  452. *node = conf->node();
  453. }
  454. });
  455. }
  456. ~TraceNodeProviderRegister() = default;
  457. } trace_node_provider_register;
  458. } // namespace trace
  459. } // namespace mindspore