From: @gzhcv Reviewed-by: @wangyue01,@jonyguo Signed-off-by: @jonyguotags/v1.1.0
| @@ -275,7 +275,6 @@ void DataSaver::WriteStepTrace(const std::string &saver_base_dir) { | |||||
| // write step trace time info into file | // write step trace time info into file | ||||
| uint32_t factor = 10; | uint32_t factor = 10; | ||||
| std::vector<std::string> op_name_arr; | std::vector<std::string> op_name_arr; | ||||
| op_name_arr.push_back(step_trace_op_name.trace_iter_start); | |||||
| op_name_arr.push_back(step_trace_op_name.trace_fp_start); | op_name_arr.push_back(step_trace_op_name.trace_fp_start); | ||||
| op_name_arr.push_back(step_trace_op_name.trace_bp_end); | op_name_arr.push_back(step_trace_op_name.trace_bp_end); | ||||
| op_name_arr.push_back(step_trace_op_name.trace_iter_end); | op_name_arr.push_back(step_trace_op_name.trace_iter_end); | ||||
| @@ -27,11 +27,10 @@ namespace gpu { | |||||
| constexpr char kFpStartNode[] = "PROFILING_FP_START"; | constexpr char kFpStartNode[] = "PROFILING_FP_START"; | ||||
| constexpr char kBpEndNode[] = "PROFILING_BP_END"; | constexpr char kBpEndNode[] = "PROFILING_BP_END"; | ||||
| constexpr char kIterEndNode[] = "PROFILING_ITER_END"; | constexpr char kIterEndNode[] = "PROFILING_ITER_END"; | ||||
| constexpr int fpExistGraphId = 2; | |||||
| constexpr auto kInitDatasetQueueOpName = "InitDataSetQueue"; | |||||
| uint32_t ProfilingUtils::last_graph_id = 0; | |||||
| bool ProfilingUtils::have_communication_op = false; | bool ProfilingUtils::have_communication_op = false; | ||||
| ProfilingTraceInfo ProfilingUtils::profiling_trace = {"", "", "", "", false}; | |||||
| ProfilingTraceInfo ProfilingUtils::profiling_trace = {"", "", ""}; | |||||
| ProfilingTraceInfo ProfilingUtils::GetProfilingTraceFromEnv(NotNull<const session::KernelGraph *> graph_ptr) { | ProfilingTraceInfo ProfilingUtils::GetProfilingTraceFromEnv(NotNull<const session::KernelGraph *> graph_ptr) { | ||||
| MS_LOG(INFO) << "get current subgraph op name start."; | MS_LOG(INFO) << "get current subgraph op name start."; | ||||
| @@ -39,30 +38,21 @@ ProfilingTraceInfo ProfilingUtils::GetProfilingTraceFromEnv(NotNull<const sessio | |||||
| if (cnode_exec_order.empty()) { | if (cnode_exec_order.empty()) { | ||||
| return profiling_trace; | return profiling_trace; | ||||
| } | } | ||||
| uint32_t current_graph_id = graph_ptr->graph_id(); | |||||
| // current graph id less than last graph id indicates all subgraph have called. | |||||
| if (current_graph_id < last_graph_id) { | |||||
| profiling_trace.IsFirstStepEnd = true; | |||||
| OutputStepTraceOpNameStatus(); | |||||
| return profiling_trace; | |||||
| } | |||||
| SetTraceIterStart(cnode_exec_order); | |||||
| SetTraceIterEnd(cnode_exec_order); | SetTraceIterEnd(cnode_exec_order); | ||||
| SetTraceFpStart(cnode_exec_order, current_graph_id); | |||||
| SetTraceFpStart(cnode_exec_order); | |||||
| SetTraceBpEnd(cnode_exec_order); | SetTraceBpEnd(cnode_exec_order); | ||||
| GetTraceHccl(cnode_exec_order); | GetTraceHccl(cnode_exec_order); | ||||
| last_graph_id = current_graph_id; | |||||
| OutputStepTraceOpNameStatus(); | |||||
| return profiling_trace; | return profiling_trace; | ||||
| } | } | ||||
| void ProfilingUtils::OutputStepTraceOpNameStatus() { | void ProfilingUtils::OutputStepTraceOpNameStatus() { | ||||
| if (!profiling_trace.IsValid()) { | |||||
| MS_LOG(ERROR) << "Did not get all the step_trace op name."; | |||||
| if (profiling_trace.IsValid()) { | |||||
| MS_LOG(INFO) << "Get all the step_trace op name."; | |||||
| } | } | ||||
| MS_LOG(INFO) << "[profiling]trace_iter_start: " << profiling_trace.trace_iter_start | |||||
| << "trace_fp_start: " << profiling_trace.trace_fp_start | |||||
| MS_LOG(INFO) << "[profiling]trace_fp_start: " << profiling_trace.trace_fp_start | |||||
| << "trace_bp_end: " << profiling_trace.trace_bp_end | << "trace_bp_end: " << profiling_trace.trace_bp_end | ||||
| << "trace_iter_end: " << profiling_trace.trace_iter_end; | << "trace_iter_end: " << profiling_trace.trace_iter_end; | ||||
| MS_LOG(INFO) << "get step_trace op name end."; | MS_LOG(INFO) << "get step_trace op name end."; | ||||
| @@ -81,19 +71,7 @@ void ProfilingUtils::GetTraceHccl(const std::vector<CNodePtr> &cnode_exec_order) | |||||
| } | } | ||||
| } | } | ||||
| void ProfilingUtils::SetTraceIterStart(const std::vector<CNodePtr> &cnode_exec_order) { | |||||
| if (!profiling_trace.trace_iter_start.empty()) { | |||||
| return; | |||||
| } | |||||
| auto first_node = cnode_exec_order.front(); | |||||
| MS_EXCEPTION_IF_NULL(first_node); | |||||
| if (AnfAlgo::GetCNodeName(first_node) == kGetNextOpName) { | |||||
| profiling_trace.trace_iter_start = first_node->fullname_with_scope(); | |||||
| } | |||||
| } | |||||
| void ProfilingUtils::SetTraceFpStart(const std::vector<CNodePtr> &cnode_exec_order, uint32_t graph_id) { | |||||
| void ProfilingUtils::SetTraceFpStart(const std::vector<CNodePtr> &cnode_exec_order) { | |||||
| if (!profiling_trace.trace_fp_start.empty()) { | if (!profiling_trace.trace_fp_start.empty()) { | ||||
| return; | return; | ||||
| } | } | ||||
| @@ -105,9 +83,20 @@ void ProfilingUtils::SetTraceFpStart(const std::vector<CNodePtr> &cnode_exec_ord | |||||
| return; | return; | ||||
| } | } | ||||
| if (graph_id == fpExistGraphId) { | |||||
| auto first_node = cnode_exec_order.front(); | |||||
| MS_EXCEPTION_IF_NULL(first_node); | |||||
| auto first_node = cnode_exec_order.front(); | |||||
| MS_EXCEPTION_IF_NULL(first_node); | |||||
| auto node_name = AnfAlgo::GetCNodeName(first_node); | |||||
| if (node_name == kInitDatasetQueueOpName) { | |||||
| return; | |||||
| } | |||||
| if (node_name == kGetNextOpName) { | |||||
| if (cnode_exec_order.size() > 1) { | |||||
| profiling_trace.trace_fp_start = cnode_exec_order.at(1)->fullname_with_scope(); | |||||
| } else { | |||||
| MS_LOG(ERROR) << "No Op Behind the GetNext Op" << std::endl; | |||||
| } | |||||
| } else { | |||||
| profiling_trace.trace_fp_start = first_node->fullname_with_scope(); | profiling_trace.trace_fp_start = first_node->fullname_with_scope(); | ||||
| } | } | ||||
| } | } | ||||
| @@ -30,23 +30,17 @@ namespace profiler { | |||||
| namespace gpu { | namespace gpu { | ||||
| struct ProfilingTraceInfo { | struct ProfilingTraceInfo { | ||||
| // support get all the op name from environment variable | // support get all the op name from environment variable | ||||
| // iteration start op is GetNext | |||||
| std::string trace_iter_start; | |||||
| // fp start op is the first op of Graph that ID is 2 | |||||
| // fp start op is the first op in all subgraph except data related op | |||||
| std::string trace_fp_start; | std::string trace_fp_start; | ||||
| // bp end op is the input node op of the last communication op (if exist) | // bp end op is the input node op of the last communication op (if exist) | ||||
| std::string trace_bp_end; | std::string trace_bp_end; | ||||
| // iteration end op is the last executed op | // iteration end op is the last executed op | ||||
| std::string trace_iter_end; | std::string trace_iter_end; | ||||
| bool IsFirstStepEnd; | |||||
| // profiling specific op, such as AllReduce; | // profiling specific op, such as AllReduce; | ||||
| std::vector<std::string> trace_custom_node; | std::vector<std::string> trace_custom_node; | ||||
| bool IsValid() const { | |||||
| return !(trace_iter_start.empty() || trace_fp_start.empty() || trace_bp_end.empty() || trace_iter_end.empty()); | |||||
| } | |||||
| bool IsValid() const { return !(trace_fp_start.empty() || trace_bp_end.empty() || trace_iter_end.empty()); } | |||||
| }; | }; | ||||
| class ProfilingUtils { | class ProfilingUtils { | ||||
| @@ -61,13 +55,11 @@ class ProfilingUtils { | |||||
| static ProfilingTraceInfo GetProfilingTraceFromEnv(NotNull<const session::KernelGraph *> graph_ptr); | static ProfilingTraceInfo GetProfilingTraceFromEnv(NotNull<const session::KernelGraph *> graph_ptr); | ||||
| static void OutputStepTraceOpNameStatus(); | static void OutputStepTraceOpNameStatus(); | ||||
| static uint32_t last_graph_id; | |||||
| static bool have_communication_op; | static bool have_communication_op; | ||||
| static ProfilingTraceInfo profiling_trace; | static ProfilingTraceInfo profiling_trace; | ||||
| private: | private: | ||||
| static void SetTraceIterStart(const std::vector<CNodePtr> &cnode_exec_order); | |||||
| static void SetTraceFpStart(const std::vector<CNodePtr> &cnode_exec_order, uint32_t graph_id); | |||||
| static void SetTraceFpStart(const std::vector<CNodePtr> &cnode_exec_order); | |||||
| static void SetTraceBpEnd(const std::vector<CNodePtr> &cnode_exec_order); | static void SetTraceBpEnd(const std::vector<CNodePtr> &cnode_exec_order); | ||||
| static void SetTraceIterEnd(const std::vector<CNodePtr> &cnode_exec_order); | static void SetTraceIterEnd(const std::vector<CNodePtr> &cnode_exec_order); | ||||
| static std::string GetGraphSecondLastKernelName(const std::vector<CNodePtr> &cnode_exec_order); | static std::string GetGraphSecondLastKernelName(const std::vector<CNodePtr> &cnode_exec_order); | ||||
| @@ -587,14 +587,10 @@ bool GPUKernelRuntime::LaunchKernelDynamic(const session::KernelGraph *graph, bo | |||||
| auto profiler_inst = profiler::gpu::GPUProfiler::GetInstance(); | auto profiler_inst = profiler::gpu::GPUProfiler::GetInstance(); | ||||
| MS_EXCEPTION_IF_NULL(profiler_inst); | MS_EXCEPTION_IF_NULL(profiler_inst); | ||||
| static bool FlagGetStepTraceOpName = false; | |||||
| if (!FlagGetStepTraceOpName) { | |||||
| if (is_first_step_map_[graph->graph_id()]) { | |||||
| profiler::gpu::ProfilingTraceInfo profiling_trace = | profiler::gpu::ProfilingTraceInfo profiling_trace = | ||||
| profiler::gpu::ProfilingUtils::GetProfilingTraceFromEnv(NOT_NULL(graph)); | profiler::gpu::ProfilingUtils::GetProfilingTraceFromEnv(NOT_NULL(graph)); | ||||
| if (profiling_trace.IsFirstStepEnd) { | |||||
| FlagGetStepTraceOpName = true; | |||||
| profiler_inst->SetStepTraceOpName(profiling_trace); | |||||
| } | |||||
| profiler_inst->SetStepTraceOpName(profiling_trace); | |||||
| } | } | ||||
| for (const auto &kernel : kernels) { | for (const auto &kernel : kernels) { | ||||
| @@ -43,6 +43,7 @@ class BaseStepTraceParser: | |||||
| job_id (int): The job id used to define the start of new step. Default: 0. | job_id (int): The job id used to define the start of new step. Default: 0. | ||||
| skip_first_step (bool): Whether skip the first step or not. | skip_first_step (bool): Whether skip the first step or not. | ||||
| """ | """ | ||||
| def __init__(self, input_dir, output_file_path, job_id=0, skip_first_step=False): | def __init__(self, input_dir, output_file_path, job_id=0, skip_first_step=False): | ||||
| self._input_dir = input_dir | self._input_dir = input_dir | ||||
| self._output_path = output_file_path | self._output_path = output_file_path | ||||
| @@ -333,6 +334,7 @@ class BaseStepTraceParser: | |||||
| class GpuStepTraceParser(BaseStepTraceParser): | class GpuStepTraceParser(BaseStepTraceParser): | ||||
| """The parser for gpu step trace data.""" | """The parser for gpu step trace data.""" | ||||
| def record_point_info(self, source_file, output_path): | def record_point_info(self, source_file, output_path): | ||||
| """ | """ | ||||
| Record point info into json. | Record point info into json. | ||||
| @@ -344,7 +346,7 @@ class GpuStepTraceParser(BaseStepTraceParser): | |||||
| Returns: | Returns: | ||||
| dict, parsed point info. | dict, parsed point info. | ||||
| """ | """ | ||||
| fp_start, bp_end = 1, 2 | |||||
| fp_start, bp_end = 0, 1 | |||||
| try: | try: | ||||
| with open(source_file, 'r') as f: | with open(source_file, 'r') as f: | ||||
| lines = f.readlines() | lines = f.readlines() | ||||
| @@ -374,7 +376,7 @@ class GpuStepTraceParser(BaseStepTraceParser): | |||||
| def _parse(self, source_file): | def _parse(self, source_file): | ||||
| """Parse source step trace files.""" | """Parse source step trace files.""" | ||||
| log.info("Start to parse step trace file.") | log.info("Start to parse step trace file.") | ||||
| iter_start, fp_start, bp_end, iter_end = 0, 1, 2, 3 | |||||
| fp_start, bp_end, iter_end, iter_start = 0, 1, 2, 3 | |||||
| reduce_start = 4 | reduce_start = 4 | ||||
| start_time, end_time = 0, 1 | start_time, end_time = 0, 1 | ||||
| @@ -382,15 +384,16 @@ class GpuStepTraceParser(BaseStepTraceParser): | |||||
| try: | try: | ||||
| with open(source_file, 'r') as f: | with open(source_file, 'r') as f: | ||||
| lines = f.readlines() | lines = f.readlines() | ||||
| step_trace_info_all = [line.strip().split() for line in lines] | |||||
| step_trace_info_all = [line.strip().split()[1:] for line in lines] | |||||
| num_of_step = len(step_trace_info_all[0]) | num_of_step = len(step_trace_info_all[0]) | ||||
| # in callback mode that set the profiling step range, each op count is not equal | |||||
| step_trace_info_all = [line[-num_of_step:] for line in step_trace_info_all] | |||||
| iter_start_info = [step_trace_info_all[fp_start][0]] + \ | |||||
| step_trace_info_all[iter_end][:num_of_step] | |||||
| step_trace_info_all.insert(iter_start, iter_start_info) | |||||
| except (IOError, OSError) as err: | except (IOError, OSError) as err: | ||||
| log.warning(f'Failed to read {source_file}', err) | log.warning(f'Failed to read {source_file}', err) | ||||
| raise ProfilerIOException | raise ProfilerIOException | ||||
| for step_num in range(1, num_of_step): | |||||
| for step_num in range(num_of_step): | |||||
| step_trace = { | step_trace = { | ||||
| 'start': int(step_trace_info_all[iter_start][step_num].split(',')[start_time]), | 'start': int(step_trace_info_all[iter_start][step_num].split(',')[start_time]), | ||||
| 'fp': int(step_trace_info_all[fp_start][step_num].split(',')[start_time]), | 'fp': int(step_trace_info_all[fp_start][step_num].split(',')[start_time]), | ||||