From: @gzhcv Reviewed-by: @wangyue01,@jonyguo Signed-off-by: @jonyguotags/v1.1.0
| @@ -275,7 +275,6 @@ void DataSaver::WriteStepTrace(const std::string &saver_base_dir) { | |||
| // write step trace time info into file | |||
| uint32_t factor = 10; | |||
| std::vector<std::string> op_name_arr; | |||
| op_name_arr.push_back(step_trace_op_name.trace_iter_start); | |||
| op_name_arr.push_back(step_trace_op_name.trace_fp_start); | |||
| op_name_arr.push_back(step_trace_op_name.trace_bp_end); | |||
| op_name_arr.push_back(step_trace_op_name.trace_iter_end); | |||
| @@ -27,11 +27,10 @@ namespace gpu { | |||
| constexpr char kFpStartNode[] = "PROFILING_FP_START"; | |||
| constexpr char kBpEndNode[] = "PROFILING_BP_END"; | |||
| constexpr char kIterEndNode[] = "PROFILING_ITER_END"; | |||
| constexpr int fpExistGraphId = 2; | |||
| constexpr auto kInitDatasetQueueOpName = "InitDataSetQueue"; | |||
| uint32_t ProfilingUtils::last_graph_id = 0; | |||
| bool ProfilingUtils::have_communication_op = false; | |||
| ProfilingTraceInfo ProfilingUtils::profiling_trace = {"", "", "", "", false}; | |||
| ProfilingTraceInfo ProfilingUtils::profiling_trace = {"", "", ""}; | |||
| ProfilingTraceInfo ProfilingUtils::GetProfilingTraceFromEnv(NotNull<const session::KernelGraph *> graph_ptr) { | |||
| MS_LOG(INFO) << "get current subgraph op name start."; | |||
| @@ -39,30 +38,21 @@ ProfilingTraceInfo ProfilingUtils::GetProfilingTraceFromEnv(NotNull<const sessio | |||
| if (cnode_exec_order.empty()) { | |||
| return profiling_trace; | |||
| } | |||
| uint32_t current_graph_id = graph_ptr->graph_id(); | |||
| // current graph id less than last graph id indicates all subgraph have called. | |||
| if (current_graph_id < last_graph_id) { | |||
| profiling_trace.IsFirstStepEnd = true; | |||
| OutputStepTraceOpNameStatus(); | |||
| return profiling_trace; | |||
| } | |||
| SetTraceIterStart(cnode_exec_order); | |||
| SetTraceIterEnd(cnode_exec_order); | |||
| SetTraceFpStart(cnode_exec_order, current_graph_id); | |||
| SetTraceFpStart(cnode_exec_order); | |||
| SetTraceBpEnd(cnode_exec_order); | |||
| GetTraceHccl(cnode_exec_order); | |||
| last_graph_id = current_graph_id; | |||
| OutputStepTraceOpNameStatus(); | |||
| return profiling_trace; | |||
| } | |||
| void ProfilingUtils::OutputStepTraceOpNameStatus() { | |||
| if (!profiling_trace.IsValid()) { | |||
| MS_LOG(ERROR) << "Did not get all the step_trace op name."; | |||
| if (profiling_trace.IsValid()) { | |||
| MS_LOG(INFO) << "Get all the step_trace op name."; | |||
| } | |||
| MS_LOG(INFO) << "[profiling]trace_iter_start: " << profiling_trace.trace_iter_start | |||
| << "trace_fp_start: " << profiling_trace.trace_fp_start | |||
| MS_LOG(INFO) << "[profiling]trace_fp_start: " << profiling_trace.trace_fp_start | |||
| << "trace_bp_end: " << profiling_trace.trace_bp_end | |||
| << "trace_iter_end: " << profiling_trace.trace_iter_end; | |||
| MS_LOG(INFO) << "get step_trace op name end."; | |||
| @@ -81,19 +71,7 @@ void ProfilingUtils::GetTraceHccl(const std::vector<CNodePtr> &cnode_exec_order) | |||
| } | |||
| } | |||
| void ProfilingUtils::SetTraceIterStart(const std::vector<CNodePtr> &cnode_exec_order) { | |||
| if (!profiling_trace.trace_iter_start.empty()) { | |||
| return; | |||
| } | |||
| auto first_node = cnode_exec_order.front(); | |||
| MS_EXCEPTION_IF_NULL(first_node); | |||
| if (AnfAlgo::GetCNodeName(first_node) == kGetNextOpName) { | |||
| profiling_trace.trace_iter_start = first_node->fullname_with_scope(); | |||
| } | |||
| } | |||
| void ProfilingUtils::SetTraceFpStart(const std::vector<CNodePtr> &cnode_exec_order, uint32_t graph_id) { | |||
| void ProfilingUtils::SetTraceFpStart(const std::vector<CNodePtr> &cnode_exec_order) { | |||
| if (!profiling_trace.trace_fp_start.empty()) { | |||
| return; | |||
| } | |||
| @@ -105,9 +83,20 @@ void ProfilingUtils::SetTraceFpStart(const std::vector<CNodePtr> &cnode_exec_ord | |||
| return; | |||
| } | |||
| if (graph_id == fpExistGraphId) { | |||
| auto first_node = cnode_exec_order.front(); | |||
| MS_EXCEPTION_IF_NULL(first_node); | |||
| auto first_node = cnode_exec_order.front(); | |||
| MS_EXCEPTION_IF_NULL(first_node); | |||
| auto node_name = AnfAlgo::GetCNodeName(first_node); | |||
| if (node_name == kInitDatasetQueueOpName) { | |||
| return; | |||
| } | |||
| if (node_name == kGetNextOpName) { | |||
| if (cnode_exec_order.size() > 1) { | |||
| profiling_trace.trace_fp_start = cnode_exec_order.at(1)->fullname_with_scope(); | |||
| } else { | |||
| MS_LOG(ERROR) << "No Op Behind the GetNext Op" << std::endl; | |||
| } | |||
| } else { | |||
| profiling_trace.trace_fp_start = first_node->fullname_with_scope(); | |||
| } | |||
| } | |||
| @@ -30,23 +30,17 @@ namespace profiler { | |||
| namespace gpu { | |||
| struct ProfilingTraceInfo { | |||
| // support get all the op name from environment variable | |||
| // iteration start op is GetNext | |||
| std::string trace_iter_start; | |||
| // fp start op is the first op of Graph that ID is 2 | |||
| // fp start op is the first op in all subgraph except data related op | |||
| std::string trace_fp_start; | |||
| // bp end op is the input node op of the last communication op (if exist) | |||
| std::string trace_bp_end; | |||
| // iteration end op is the last executed op | |||
| std::string trace_iter_end; | |||
| bool IsFirstStepEnd; | |||
| // profiling specific op, such as AllReduce; | |||
| std::vector<std::string> trace_custom_node; | |||
| bool IsValid() const { | |||
| return !(trace_iter_start.empty() || trace_fp_start.empty() || trace_bp_end.empty() || trace_iter_end.empty()); | |||
| } | |||
| bool IsValid() const { return !(trace_fp_start.empty() || trace_bp_end.empty() || trace_iter_end.empty()); } | |||
| }; | |||
| class ProfilingUtils { | |||
| @@ -61,13 +55,11 @@ class ProfilingUtils { | |||
| static ProfilingTraceInfo GetProfilingTraceFromEnv(NotNull<const session::KernelGraph *> graph_ptr); | |||
| static void OutputStepTraceOpNameStatus(); | |||
| static uint32_t last_graph_id; | |||
| static bool have_communication_op; | |||
| static ProfilingTraceInfo profiling_trace; | |||
| private: | |||
| static void SetTraceIterStart(const std::vector<CNodePtr> &cnode_exec_order); | |||
| static void SetTraceFpStart(const std::vector<CNodePtr> &cnode_exec_order, uint32_t graph_id); | |||
| static void SetTraceFpStart(const std::vector<CNodePtr> &cnode_exec_order); | |||
| static void SetTraceBpEnd(const std::vector<CNodePtr> &cnode_exec_order); | |||
| static void SetTraceIterEnd(const std::vector<CNodePtr> &cnode_exec_order); | |||
| static std::string GetGraphSecondLastKernelName(const std::vector<CNodePtr> &cnode_exec_order); | |||
| @@ -587,14 +587,10 @@ bool GPUKernelRuntime::LaunchKernelDynamic(const session::KernelGraph *graph, bo | |||
| auto profiler_inst = profiler::gpu::GPUProfiler::GetInstance(); | |||
| MS_EXCEPTION_IF_NULL(profiler_inst); | |||
| static bool FlagGetStepTraceOpName = false; | |||
| if (!FlagGetStepTraceOpName) { | |||
| if (is_first_step_map_[graph->graph_id()]) { | |||
| profiler::gpu::ProfilingTraceInfo profiling_trace = | |||
| profiler::gpu::ProfilingUtils::GetProfilingTraceFromEnv(NOT_NULL(graph)); | |||
| if (profiling_trace.IsFirstStepEnd) { | |||
| FlagGetStepTraceOpName = true; | |||
| profiler_inst->SetStepTraceOpName(profiling_trace); | |||
| } | |||
| profiler_inst->SetStepTraceOpName(profiling_trace); | |||
| } | |||
| for (const auto &kernel : kernels) { | |||
| @@ -43,6 +43,7 @@ class BaseStepTraceParser: | |||
| job_id (int): The job id used to define the start of new step. Default: 0. | |||
| skip_first_step (bool): Whether skip the first step or not. | |||
| """ | |||
| def __init__(self, input_dir, output_file_path, job_id=0, skip_first_step=False): | |||
| self._input_dir = input_dir | |||
| self._output_path = output_file_path | |||
| @@ -333,6 +334,7 @@ class BaseStepTraceParser: | |||
| class GpuStepTraceParser(BaseStepTraceParser): | |||
| """The parser for gpu step trace data.""" | |||
| def record_point_info(self, source_file, output_path): | |||
| """ | |||
| Record point info into json. | |||
| @@ -344,7 +346,7 @@ class GpuStepTraceParser(BaseStepTraceParser): | |||
| Returns: | |||
| dict, parsed point info. | |||
| """ | |||
| fp_start, bp_end = 1, 2 | |||
| fp_start, bp_end = 0, 1 | |||
| try: | |||
| with open(source_file, 'r') as f: | |||
| lines = f.readlines() | |||
| @@ -374,7 +376,7 @@ class GpuStepTraceParser(BaseStepTraceParser): | |||
| def _parse(self, source_file): | |||
| """Parse source step trace files.""" | |||
| log.info("Start to parse step trace file.") | |||
| iter_start, fp_start, bp_end, iter_end = 0, 1, 2, 3 | |||
| fp_start, bp_end, iter_end, iter_start = 0, 1, 2, 3 | |||
| reduce_start = 4 | |||
| start_time, end_time = 0, 1 | |||
| @@ -382,15 +384,16 @@ class GpuStepTraceParser(BaseStepTraceParser): | |||
| try: | |||
| with open(source_file, 'r') as f: | |||
| lines = f.readlines() | |||
| step_trace_info_all = [line.strip().split() for line in lines] | |||
| step_trace_info_all = [line.strip().split()[1:] for line in lines] | |||
| num_of_step = len(step_trace_info_all[0]) | |||
| # in callback mode that set the profiling step range, each op count is not equal | |||
| step_trace_info_all = [line[-num_of_step:] for line in step_trace_info_all] | |||
| iter_start_info = [step_trace_info_all[fp_start][0]] + \ | |||
| step_trace_info_all[iter_end][:num_of_step] | |||
| step_trace_info_all.insert(iter_start, iter_start_info) | |||
| except (IOError, OSError) as err: | |||
| log.warning(f'Failed to read {source_file}', err) | |||
| raise ProfilerIOException | |||
| for step_num in range(1, num_of_step): | |||
| for step_num in range(num_of_step): | |||
| step_trace = { | |||
| 'start': int(step_trace_info_all[iter_start][step_num].split(',')[start_time]), | |||
| 'fp': int(step_trace_info_all[fp_start][step_num].split(',')[start_time]), | |||