You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

profiling_utils.h 6.0 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122
  1. /**
  2. * Copyright 2019 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef MINDSPORE_MINDSPORE_CCSRC_DEVICE_ASCEND_PROFILING_PROFILING_UTILS_H_
  17. #define MINDSPORE_MINDSPORE_CCSRC_DEVICE_ASCEND_PROFILING_PROFILING_UTILS_H_
  18. #include <memory>
  19. #include <string>
  20. #include <vector>
  21. #include <set>
  22. #include <unordered_map>
  23. #include "session/kernel_graph.h"
  24. #include "utils/contract.h"
  25. namespace mindspore {
  26. namespace device {
  27. namespace ascend {
  28. struct ProfilingTraceInfo {
  29. // execute order's first execute op(like: Cast or Four2Five ...), except tdt op(GetNext ...)
  30. std::string trace_begin;
  31. // get first net_output(apply kernel) from graph outputs: fp ->net_output<- bp
  32. std::string trace_bp_end;
  33. // execute order's end execute (like: Conv2DBackpropFilter)
  34. std::string trace_netoutput;
  35. // profiling specific op, such as AllReduce;
  36. std::set<std::string> trace_custom_node;
  37. // 1. insert profiling_trace_begin if profiling_trace_bp_end is not empty.
  38. // 2. op lanuch get task info with callback func.
  39. // 3. insert profiling_trace_bp_end.
  40. // 4. insert profiling_trace_net_output if profiling_trace_bp_end is not empty.
  41. bool IsValid() const { return !(trace_begin.empty() || trace_bp_end.empty() || trace_netoutput.empty()); }
  42. };
  43. struct ProfilingContent {
  44. // true -send data from device to host and finish profiling
  45. bool notify;
  46. uint64_t profiler_trace_id;
  47. uint32_t flags;
  48. };
  49. class ProfilingUtils {
  50. public:
  51. ProfilingUtils() = default;
  52. ~ProfilingUtils() = default;
  53. // Insert job_id profiling node and fp_start profiling node.
  54. // Job_id is got from envs, which shound be a number greater than 255
  55. // Fp_start node should been inserted in the start of a network, and the log_id is hard code to 1.
  56. static void ProfilingTraceFpStart(const AnfNodePtr &anf_node, const ProfilingTraceInfo &profiling_trace_info,
  57. NotNull<session::KernelGraph *> graph_ptr,
  58. NotNull<std::vector<CNodePtr> *> kernel_list);
  59. // Insert net output profiling node, which tells the device to stop profiling.
  60. // The notify in struct ProfilingContent should be 'true', which tells the device to send data to host.
  61. static void ProfilingTraceEnd(const AnfNodePtr &anf_node, const ProfilingTraceInfo &profiling_trace_info,
  62. NotNull<session::KernelGraph *> graph_ptr,
  63. NotNull<std::vector<CNodePtr> *> kernel_list);
  64. // Insert bp_end profiling node, which should been inserted after the last backpropagation CNode in the network.
  65. static void ProfilingTraceBpEnd(const mindspore::AnfNodePtr &anf_node, const ProfilingTraceInfo &profiling_trace_info,
  66. NotNull<session::KernelGraph *> graph_ptr,
  67. NotNull<std::vector<mindspore::CNodePtr> *> kernel_list);
  68. // Mapping graph id and the kernels' name in the graph
  69. static void SetGraphKernelName(uint32_t graph_id, const std::vector<std::string> &kernel_names);
  70. // Mapping task_id and kernel name for device to generate the time cost of specific kernel.
  71. // Device calculate the time cost of the task which is marked by task id.
  72. // But we need data of (kernel name , time cost)
  73. static void ReportProfilingData(uint32_t graph_id, const std::vector<uint32_t> &task_ids);
  74. // Get profiling trace point from envs.
  75. // export PROFILING_FP_START='full name of the first cnode to execute'
  76. // export PROFILING_BP_END='full name of the last backpropagation cnode to execute'
  77. // export PROFILING_ITER_END='full name of last cnode in graph to execute'
  78. // And other cnode, like AllReduce, export PROFILING_CUSTOM_1='full name of AllReduce cnode'
  79. // GetNext, export PROFIFLING_CUSTOM_2='full name fo GetNext cnode'
  80. // The variable i in PROFILING_CUSTOM_i should start from 1 without interruption.
  81. static ProfilingTraceInfo GetProfilingTraceFromEnv(NotNull<session::KernelGraph *> graph_ptr);
  82. // Insert two profiling trace points, one in front and one behind
  83. static void ProfilingCustomOp(const mindspore::AnfNodePtr &anf_node, const ProfilingTraceInfo &profiling_trace_info,
  84. NotNull<session::KernelGraph *> graph_ptr,
  85. NotNull<std::vector<mindspore::CNodePtr> *> kernel_list);
  86. inline static constexpr char kProfiling[] = "Profiling";
  87. inline static constexpr char kNotify[] = "notify";
  88. inline static constexpr char kProfilerTraceId[] = "profiler_trace_id";
  89. inline static constexpr char kFlags[] = "flags";
  90. private:
  91. static NotNull<CNodePtr> CreateProfilingCNode(const ProfilingContent &profiling_content,
  92. NotNull<session::KernelGraph *> graph_ptr);
  93. static CNodePtr CreateProfilingCNodeWithStream(const AnfNodePtr &anf_node, const ProfilingContent &profiling_content,
  94. NotNull<session::KernelGraph *> graph_ptr);
  95. static std::string GetTraceBegin(const std::vector<CNodePtr> &cnode_exec_order);
  96. static std::string GetTraceBpEnd();
  97. static std::string GetTraceNetoutput(const std::vector<CNodePtr> &cnode_exec_order);
  98. // graph id --> (kernel name list)
  99. static std::unordered_map<uint32_t, std::vector<std::string>> graph_kernel_name_;
  100. static uint32_t custom_node_index_;
  101. };
  102. } // namespace ascend
  103. } // namespace device
  104. } // namespace mindspore
  105. #endif // MINDSPORE_MINDSPORE_CCSRC_DEVICE_ASCEND_PROFILING_PROFILING_UTILS_H_