Merge pull request !18982 from gzhcv/FitLoopCounttags/v1.4.0
| @@ -21,4 +21,12 @@ if(ENABLE_CPU AND NOT (ENABLE_D OR ENABLE_GPU)) | |||
| set_property(SOURCE ${PROFILER_SRC_LIST} PROPERTY COMPILE_DEFINITIONS | |||
| SUBMODULE_ID=mindspore::SubModuleId::SM_PROFILER) | |||
| add_library(_mindspore_profiler_obj OBJECT ${PROFILER_SRC_LIST}) | |||
| endif() | |||
| if(ENABLE_TESTCASES) | |||
| file(GLOB_RECURSE PROFILER_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} | |||
| "device/profiling.cc") | |||
| set_property(SOURCE ${PROFILER_SRC_LIST} PROPERTY COMPILE_DEFINITIONS | |||
| SUBMODULE_ID=mindspore::SubModuleId::SM_PROFILER) | |||
| add_library(_mindspore_profiler_obj OBJECT ${PROFILER_SRC_LIST}) | |||
| endif() | |||
| @@ -24,6 +24,8 @@ | |||
| namespace mindspore { | |||
| namespace profiler { | |||
| namespace cpu { | |||
| std::shared_ptr<CpuDataSaver> CpuDataSaver::cpu_data_saver_inst_ = std::make_shared<CpuDataSaver>(); | |||
| void CpuDataSaver::WriteFile(const std::string out_path_dir) { | |||
| if (op_detail_infos_.empty() || op_type_infos_.empty()) { | |||
| MS_LOG(INFO) << "No cpu operation detail infos to write."; | |||
| @@ -38,6 +40,10 @@ void CpuDataSaver::WriteFile(const std::string out_path_dir) { | |||
| WriteOpType(out_path_dir); | |||
| WriteOpTimestamp(out_path_dir); | |||
| } | |||
| OpTimestampInfo &CpuDataSaver::GetOpTimeStampInfo() { return op_timestamps_map_; } | |||
| std::shared_ptr<CpuDataSaver> &CpuDataSaver::GetInstance() { return cpu_data_saver_inst_; } | |||
| } // namespace cpu | |||
| } // namespace profiler | |||
| } // namespace mindspore | |||
| @@ -29,6 +29,8 @@ namespace profiler { | |||
| namespace cpu { | |||
| class CpuDataSaver : public DataSaver { | |||
| public: | |||
| static std::shared_ptr<CpuDataSaver> &GetInstance(); | |||
| CpuDataSaver() = default; | |||
| ~CpuDataSaver() = default; | |||
| @@ -37,7 +39,12 @@ class CpuDataSaver : public DataSaver { | |||
| CpuDataSaver &operator=(const CpuDataSaver &) = delete; | |||
| OpTimestampInfo &GetOpTimeStampInfo(); | |||
| void WriteFile(const std::string out_path); | |||
| private: | |||
| static std::shared_ptr<CpuDataSaver> cpu_data_saver_inst_; | |||
| }; | |||
| } // namespace cpu | |||
| } // namespace profiler | |||
| @@ -23,6 +23,7 @@ | |||
| #include "pybind_api/api_register.h" | |||
| #include "utils/log_adapter.h" | |||
| #include "utils/utils.h" | |||
| #include "utils/ms_context.h" | |||
| namespace mindspore { | |||
| namespace profiler { | |||
| @@ -62,13 +63,24 @@ void CPUProfiler::OpDataProducerBegin(const std::string op_name, const uint32_t | |||
| op_time_start_ = GetHostMonoTimeStamp(); | |||
| op_time_mono_start_ = GetHostMonoTimeStamp(); | |||
| SetRunTimeData(op_name, pid); | |||
| #if ENABLE_GPU | |||
| if (MsContext::GetInstance()->get_param<bool>(MS_CTX_ENABLE_MINDRT)) { | |||
| // For heterogeneous scene, record op name to gpu_profiler_inst. | |||
| auto gpu_profiler_inst = profiler::gpu::GPUProfiler::GetInstance(); | |||
| // For cpu network, no gpu profiler, do not to raise exception. | |||
| if (gpu_profiler_inst && gpu_profiler_inst->GetEnableFlag()) { | |||
| gpu_profiler_inst->RecordOneStepStartEndInfo(op_name); | |||
| } | |||
| } | |||
| #endif | |||
| } | |||
| void CPUProfiler::OpDataProducerEnd() { | |||
| float op_time_elapsed = 0; | |||
| op_time_stop_ = GetHostMonoTimeStamp(); | |||
| op_time_elapsed = (op_time_stop_ - op_time_start_) / kNanosecondToMillisecond; | |||
| MS_LOG(DEBUG) << "Host Time Elapsed(us)," << op_name_ << "," << op_time_elapsed; | |||
| MS_LOG(DEBUG) << "Host Time Elapsed(ms)," << op_name_ << "," << op_time_elapsed; | |||
| Profiler::SetRunTimeData(op_name_, op_time_elapsed); | |||
| Profiler::SetRunTimeData(op_name_, op_time_mono_start_, op_time_elapsed); | |||
| } | |||
| @@ -83,9 +95,10 @@ void CPUProfiler::SaveProfileData() { | |||
| if (profile_data_path_.empty()) { | |||
| MS_LOG(WARNING) << "Profile data path is empty, skip save profile data."; | |||
| } else { | |||
| CpuDataSaver dataSaver; | |||
| dataSaver.ParseOpInfo(op_info_map_); | |||
| dataSaver.WriteFile(profile_data_path_); | |||
| auto cpu_data_saver_inst = profiler::cpu::CpuDataSaver::GetInstance(); | |||
| MS_EXCEPTION_IF_NULL(cpu_data_saver_inst); | |||
| cpu_data_saver_inst->ParseOpInfo(op_info_map_); | |||
| cpu_data_saver_inst->WriteFile(profile_data_path_); | |||
| } | |||
| } | |||
| @@ -24,6 +24,9 @@ | |||
| #include <unordered_map> | |||
| #include <utility> | |||
| #include "profiler/device/profiling.h" | |||
| #if ENABLE_GPU | |||
| #include "profiler/device/gpu/gpu_profiling.h" | |||
| #endif | |||
| namespace mindspore { | |||
| namespace profiler { | |||
| @@ -105,6 +105,22 @@ void GpuDataSaver::AddKernelEventToDevice(const Event &event, DeviceActivityInfo | |||
| } | |||
| } | |||
| void GpuDataSaver::CpuProfilingTimeSynchronizedToGpu(const BaseTime &start_time) { | |||
| auto cpu_data_saver_inst = profiler::cpu::CpuDataSaver::GetInstance(); | |||
| MS_EXCEPTION_IF_NULL(cpu_data_saver_inst); | |||
| auto &cpu_op_timestamps_map = cpu_data_saver_inst->GetOpTimeStampInfo(); | |||
| auto cpu_op_iter = cpu_op_timestamps_map.begin(); | |||
| while (cpu_op_iter != cpu_op_timestamps_map.end()) { | |||
| for (auto &time_iter : cpu_op_iter->second) { | |||
| time_iter.start_timestamp = | |||
| time_iter.start_timestamp - start_time.host_start_monotonic_raw_time + start_time.gpu_start_time; | |||
| // time unit from ms to us. | |||
| time_iter.duration *= kTimeUnit; | |||
| } | |||
| cpu_op_iter++; | |||
| } | |||
| } | |||
| void GpuDataSaver::WriteFile(std::string out_path_dir, const BaseTime &start_time) { | |||
| if (out_path_dir.empty()) { | |||
| MS_LOG(WARNING) << "Output directory. Ignore the writing data."; | |||
| @@ -122,6 +138,7 @@ void GpuDataSaver::WriteFile(std::string out_path_dir, const BaseTime &start_tim | |||
| WriteActivity(out_path_dir); | |||
| WriteOpTimestamp(out_path_dir); | |||
| WriteStartTime(out_path_dir, start_time); | |||
| CpuProfilingTimeSynchronizedToGpu(start_time); | |||
| if (MsContext::GetInstance()->get_param<bool>(MS_CTX_ENABLE_MINDRT)) { | |||
| WriteStepTraceAsyncLaunchKernel(out_path_dir); | |||
| } else { | |||
| @@ -176,6 +193,11 @@ void GpuDataSaver::WriteStepTraceAsyncLaunchKernel(const std::string &saver_base | |||
| return; | |||
| } | |||
| // cpu profiler information. | |||
| auto cpu_data_saver_inst = profiler::cpu::CpuDataSaver::GetInstance(); | |||
| MS_EXCEPTION_IF_NULL(cpu_data_saver_inst); | |||
| auto &cpu_op_timestamps_map = cpu_data_saver_inst->GetOpTimeStampInfo(); | |||
| // write step trace time info into file | |||
| uint32_t step = 0; | |||
| uint64_t duration; | |||
| @@ -188,17 +210,32 @@ void GpuDataSaver::WriteStepTraceAsyncLaunchKernel(const std::string &saver_base | |||
| auto bp_end_op_timestamp = op_timestamps_map_.find(step_trace_op_name_.trace_bp_end); | |||
| auto iter_end_op_timestamp = op_timestamps_map_.find(iter_end_op_name); | |||
| if (iter_end_op_name == "Default/InitDataSetQueue-op0") continue; | |||
| // if iter_start/fp_start/iter_end op is executed on cpu, update it. | |||
| if (iter_start_op_timestamp == op_timestamps_map_.end()) { | |||
| iter_start_op_timestamp = cpu_op_timestamps_map.find(iter_start_op_name); | |||
| } | |||
| if (fp_op_timestamp == op_timestamps_map_.end()) { | |||
| fp_op_timestamp = cpu_op_timestamps_map.find(fp_op_name); | |||
| } | |||
| if (iter_end_op_timestamp == op_timestamps_map_.end()) { | |||
| iter_end_op_timestamp = cpu_op_timestamps_map.find(iter_end_op_name); | |||
| } | |||
| if (iter_end_op_name == "Default/InitDataSetQueue-op0") { | |||
| continue; | |||
| } | |||
| if (iter_start_op_timestamp == op_timestamps_map_.end() || fp_op_timestamp == op_timestamps_map_.end() || | |||
| iter_end_op_timestamp == op_timestamps_map_.end() || bp_end_op_timestamp == op_timestamps_map_.end()) { | |||
| MS_LOG(ERROR) << "[profiling step trace] failed, do not find " << fp_op_name << " or " << iter_end_op_name << "or" | |||
| << step_trace_op_name_.trace_bp_end; | |||
| MS_LOG(ERROR) << "[profiling step trace] failed, do not find \"" << fp_op_name << "\" or \"" << iter_end_op_name | |||
| << "\" or \"" << step_trace_op_name_.trace_bp_end << "\""; | |||
| ofs.close(); | |||
| return; | |||
| } | |||
| if (iter_start_op_timestamp->second.size() <= step || fp_op_timestamp->second.size() <= step || | |||
| iter_end_op_timestamp->second.size() <= step || bp_end_op_timestamp->second.size() <= step) { | |||
| MS_LOG(ERROR) << "[profiling step trace] the number of fp/bp/iter_end timestamp not enough"; | |||
| ofs.close(); | |||
| return; | |||
| } | |||
| @@ -215,13 +252,15 @@ void GpuDataSaver::WriteStepTraceAsyncLaunchKernel(const std::string &saver_base | |||
| // convert the time unit from 1ns to 10ns (keep the same with ascend) | |||
| auto iter_op_timestamp = op_timestamps_map_.find(op_name); | |||
| if (iter_op_timestamp == op_timestamps_map_.end()) { | |||
| MS_LOG(ERROR) << "[profiling step trace] failed, do not find " << fp_op_name << " or " << iter_end_op_name | |||
| << "or" << step_trace_op_name_.trace_bp_end; | |||
| MS_LOG(ERROR) << "[profiling step trace] failed, do not find \"" << fp_op_name << "\" or " << iter_end_op_name | |||
| << "\" or \"" << step_trace_op_name_.trace_bp_end << "\""; | |||
| ofs.close(); | |||
| return; | |||
| } | |||
| if (iter_op_timestamp->second.size() <= step) { | |||
| MS_LOG(ERROR) << "[profiling step trace] the number of communication op timestamp not enough"; | |||
| ofs.close(); | |||
| return; | |||
| } | |||
| @@ -278,6 +317,7 @@ void GpuDataSaver::WriteStepTrace(const std::string &saver_base_dir) { | |||
| ofs << std::endl; | |||
| } catch (const std::exception &e) { | |||
| MS_LOG(ERROR) << "Write " << file_path << "failed:" << e.what(); | |||
| ofs.close(); | |||
| } | |||
| } | |||
| } | |||
| @@ -302,6 +342,7 @@ void GpuDataSaver::WriteStartTime(const std::string &saver_base_dir, const BaseT | |||
| ofs << "gpu_start_time(ns): " << start_time.gpu_start_time << std::endl; | |||
| } catch (const std::exception &e) { | |||
| MS_LOG(ERROR) << "Write " << file_path << "failed:" << e.what(); | |||
| ofs.close(); | |||
| } | |||
| ofs.close(); | |||
| @@ -23,6 +23,7 @@ | |||
| #include <string> | |||
| #include <memory> | |||
| #include "profiler/device/gpu/gpu_profiling.h" | |||
| #include "profiler/device/cpu/cpu_data_saver.h" | |||
| #include "profiler/device/data_saver.h" | |||
| namespace mindspore { | |||
| namespace profiler { | |||
| @@ -93,6 +94,8 @@ class GpuDataSaver : public DataSaver { | |||
| void WriteStartTime(const std::string &saver_base_dir, const BaseTime &start_time); | |||
| void CpuProfilingTimeSynchronizedToGpu(const BaseTime &start_time); | |||
| AllActivityInfos activity_infos_; | |||
| ProfilingTraceInfo step_trace_op_name_from_graph_; | |||
| ProfilingTraceInfo step_trace_op_name_; | |||
| @@ -211,7 +211,10 @@ std::string GetKernelFuncName(std::string kernel_name) { | |||
| return kernel_name.substr(func_name_begin_iter); | |||
| } | |||
| std::shared_ptr<GPUProfiler> &GPUProfiler::GetInstance() { return profiler_inst_; } | |||
| std::shared_ptr<GPUProfiler> &GPUProfiler::GetInstance() { | |||
| MS_EXCEPTION_IF_NULL(profiler_inst_); | |||
| return profiler_inst_; | |||
| } | |||
| void GPUProfiler::SyncEnable(const bool enable_flag) { | |||
| MS_LOG(INFO) << "GPU Profiler synchronous enable flag:" << enable_flag; | |||
| @@ -434,7 +437,9 @@ void GPUProfiler::OpDataProducerBegin(const std::string op_name, void *stream) { | |||
| } | |||
| SetRunTimeData(op_name, stream); | |||
| if (MsContext::GetInstance()->get_param<bool>(MS_CTX_ENABLE_MINDRT)) RecordOneStepStartEndInfo(op_name); | |||
| if (MsContext::GetInstance()->get_param<bool>(MS_CTX_ENABLE_MINDRT)) { | |||
| RecordOneStepStartEndInfo(op_name); | |||
| } | |||
| } | |||
| void GPUProfiler::OpDataProducerEnd() { | |||
| @@ -490,7 +495,7 @@ void GPUProfiler::SaveProfileData() { | |||
| if (profile_data_path_.empty()) { | |||
| MS_LOG(WARNING) << "Profile data path is empty, skip save profile data."; | |||
| } else { | |||
| GpuDataSaver dataSaver(step_trace_op_name, all_step_start_end_info_); | |||
| GpuDataSaver dataSaver(step_trace_op_name_, all_step_start_end_info_); | |||
| dataSaver.ParseOpInfo(op_info_map_); | |||
| dataSaver.ParseEvent(events_); | |||
| dataSaver.WriteFile(profile_data_path_, base_time_); | |||
| @@ -498,30 +503,6 @@ void GPUProfiler::SaveProfileData() { | |||
| } | |||
| } | |||
| void GPUProfiler::RecordOneStepStartEndInfo() { | |||
| std::lock_guard<std::mutex> locker(record_mutex_); | |||
| step_start_end_info_.iter_end_timestamp = GetCUPTITimeStamp(); | |||
| all_step_start_end_info_.push_back(step_start_end_info_); | |||
| step_start_end_info_.iter_start_op_name = ""; | |||
| step_start_end_info_.fp_start_op_name = ""; | |||
| } | |||
| void GPUProfiler::RecordOneStepStartEndInfo(const std::string op_name) { | |||
| std::lock_guard<std::mutex> locker(record_mutex_); | |||
| if (step_start_end_info_.iter_start_op_name.empty()) { | |||
| step_start_end_info_.iter_start_op_name = op_name; | |||
| step_start_end_info_.fp_start_op_name = op_name; | |||
| } | |||
| std::string fp_start_op_name = step_start_end_info_.fp_start_op_name; | |||
| auto op_type_begin_iter = fp_start_op_name.rfind('/') + 1; | |||
| auto op_type_end_iter = fp_start_op_name.rfind('-'); | |||
| auto op_type = fp_start_op_name.substr(op_type_begin_iter, op_type_end_iter - op_type_begin_iter); | |||
| if (op_type == "InitDataSetQueue" || op_type == "GetNext") step_start_end_info_.fp_start_op_name = op_name; | |||
| step_start_end_info_.iter_end_op_name = op_name; | |||
| } | |||
| void GPUProfiler::ClearInst() { | |||
| op_info_map_.clear(); | |||
| op_name_map_.clear(); | |||
| @@ -701,7 +682,7 @@ void GPUProfiler::HandleActivityRecord(CUpti_Activity *record) { | |||
| AddEvent(std::move(profilingData)); | |||
| } | |||
| void GPUProfiler::SetStepTraceOpName(ProfilingTraceInfo trace_op_name) { step_trace_op_name = trace_op_name; } | |||
| void GPUProfiler::SetStepTraceOpName(ProfilingTraceInfo trace_op_name) { step_trace_op_name_ = trace_op_name; } | |||
| void GPUProfiler::RegisterProfilingOp(std::shared_ptr<ProfilingOp> node) { | |||
| PROFILER_ERROR_IF_NULLPTR(node); | |||
| @@ -132,7 +132,6 @@ class GPUProfiler : public Profiler { | |||
| void ProcessEvents(); | |||
| void RegisterProfilingOp(std::shared_ptr<ProfilingOp> node); | |||
| void SetStepTraceOpName(ProfilingTraceInfo trace_op_name); | |||
| void RecordOneStepStartEndInfo(); | |||
| std::string ProfileDataPath() const { return profile_data_path_; } | |||
| private: | |||
| @@ -143,7 +142,6 @@ class GPUProfiler : public Profiler { | |||
| void AddEvent(Event &&event); | |||
| void SetRunTimeData(const std::string &op_name, void *stream); | |||
| void FixOpNameByCorrelationId(Event *event); | |||
| void RecordOneStepStartEndInfo(std::string op_name); | |||
| static std::shared_ptr<GPUProfiler> profiler_inst_; | |||
| bool enable_flag_ = false; | |||
| @@ -175,8 +173,7 @@ class GPUProfiler : public Profiler { | |||
| uint64_t op_cupti_time_start_; | |||
| std::string profile_data_path_; | |||
| std::map<std::string, std::shared_ptr<ProfilingOp>> profiling_op_; | |||
| ProfilingTraceInfo step_trace_op_name; | |||
| std::mutex record_mutex_; | |||
| ProfilingTraceInfo step_trace_op_name_; | |||
| }; | |||
| } // namespace gpu | |||
| } // namespace profiler | |||
| @@ -41,6 +41,7 @@ ProfilingTraceInfo ProfilingUtils::GetProfilingTraceFromEnv(NotNull<const sessio | |||
| } | |||
| ProfilingTraceInfo empty_info; | |||
| ProfilingTraceInfo last_graph_profiling_trace = profiling_trace; | |||
| profiling_trace = empty_info; | |||
| SetTraceIterEnd(cnode_exec_order); | |||
| SetTraceFpStart(cnode_exec_order); | |||
| @@ -49,7 +50,13 @@ ProfilingTraceInfo ProfilingUtils::GetProfilingTraceFromEnv(NotNull<const sessio | |||
| OutputStepTraceOpNameStatus(); | |||
| is_first_step_map_[graph_ptr->graph_id()] = false; | |||
| return profiling_trace; | |||
| // If current graph has only one node, the bp_end will be empty, so select the last graph node. | |||
| if (profiling_trace.trace_bp_end != "") { | |||
| return profiling_trace; | |||
| } else { | |||
| return last_graph_profiling_trace; | |||
| } | |||
| } | |||
| void ProfilingUtils::OutputStepTraceOpNameStatus() { | |||
| @@ -19,13 +19,17 @@ | |||
| #include <cxxabi.h> | |||
| #include <cmath> | |||
| #include <ctime> | |||
| #include "profiler/device/cpu/cpu_data_saver.h" | |||
| #include "pybind_api/api_register.h" | |||
| #include "utils/log_adapter.h" | |||
| #include "utils/utils.h" | |||
| #if ENABLE_GPU | |||
| #include "profiler/device/gpu/gpu_profiling.h" | |||
| #endif | |||
| namespace mindspore { | |||
| namespace profiler { | |||
| std::shared_ptr<ProfilerManager> ProfilerManager::profiler_manager_inst_ = std::make_shared<ProfilerManager>(); | |||
| uint64_t Profiler::GetHostMonoTimeStamp() const { | |||
| struct timespec ts; | |||
| #if defined(_WIN32) || defined(_WIN64) | |||
| @@ -57,5 +61,51 @@ void Profiler::SetRunTimeData(const std::string &op_name, const uint64_t start, | |||
| iter->second.start_duration.emplace_back(StartDuration({start, duration})); | |||
| } | |||
| } | |||
| void Profiler::RecordOneStepStartEndInfo() { | |||
| std::lock_guard<std::mutex> locker(record_mutex_); | |||
| all_step_start_end_info_.push_back(step_start_end_info_); | |||
| step_start_end_info_.iter_start_op_name = ""; | |||
| step_start_end_info_.fp_start_op_name = ""; | |||
| } | |||
| void Profiler::RecordOneStepStartEndInfo(const std::string op_name) { | |||
| std::lock_guard<std::mutex> locker(record_mutex_); | |||
| if (step_start_end_info_.iter_start_op_name.empty()) { | |||
| step_start_end_info_.iter_start_op_name = op_name; | |||
| step_start_end_info_.fp_start_op_name = op_name; | |||
| } | |||
| std::string fp_start_op_name = step_start_end_info_.fp_start_op_name; | |||
| auto op_type_begin_iter = fp_start_op_name.rfind('/') + 1; | |||
| auto op_type_end_iter = fp_start_op_name.rfind('-'); | |||
| auto op_type = fp_start_op_name.substr(op_type_begin_iter, op_type_end_iter - op_type_begin_iter); | |||
| if (op_type == "InitDataSetQueue" || op_type == "GetNext") { | |||
| step_start_end_info_.fp_start_op_name = op_name; | |||
| } | |||
| step_start_end_info_.iter_end_op_name = op_name; | |||
| } | |||
| std::shared_ptr<ProfilerManager> &ProfilerManager::GetInstance() { | |||
| MS_EXCEPTION_IF_NULL(profiler_manager_inst_); | |||
| return profiler_manager_inst_; | |||
| } | |||
| bool ProfilerManager::GetEnableRecorderActorFlag() { | |||
| #if ENABLE_GPU | |||
| return profiler::gpu::GPUProfiler::GetInstance()->GetEnableFlag(); | |||
| #endif | |||
| return false; | |||
| } | |||
| void ProfilerManager::RecordOneStepStartEndInfo() { | |||
| #if ENABLE_GPU | |||
| auto gpu_profiler_inst = profiler::gpu::GPUProfiler::GetInstance(); | |||
| if (gpu_profiler_inst->GetEnableFlag()) { | |||
| gpu_profiler_inst->RecordOneStepStartEndInfo(); | |||
| } | |||
| #endif | |||
| } | |||
| } // namespace profiler | |||
| } // namespace mindspore | |||
| @@ -37,8 +37,6 @@ struct OneStepStartEndInfo { | |||
| std::string iter_start_op_name; | |||
| std::string fp_start_op_name; | |||
| std::string iter_end_op_name; | |||
| uint64_t fp_start_timestamp = 0l; | |||
| uint64_t iter_end_timestamp = 0l; | |||
| }; | |||
| struct OpInfo { | |||
| @@ -54,6 +52,20 @@ struct OpInfo { | |||
| uint32_t pid; | |||
| }; | |||
| class ProfilerManager { | |||
| public: | |||
| static std::shared_ptr<ProfilerManager> &GetInstance(); | |||
| ProfilerManager() = default; | |||
| ~ProfilerManager() = default; | |||
| ProfilerManager(const ProfilerManager &) = delete; | |||
| ProfilerManager &operator=(const ProfilerManager &) = delete; | |||
| bool GetEnableRecorderActorFlag(); | |||
| void RecordOneStepStartEndInfo(); | |||
| private: | |||
| static std::shared_ptr<ProfilerManager> profiler_manager_inst_; | |||
| }; | |||
| class Profiler { | |||
| public: | |||
| Profiler() = default; | |||
| @@ -63,8 +75,10 @@ class Profiler { | |||
| virtual void Stop() = 0; | |||
| virtual void StepProfilingEnable(const bool enable_flag) = 0; | |||
| virtual void OpDataProducerEnd() = 0; | |||
| void RecordOneStepStartEndInfo(); | |||
| bool GetEnableFlag() const { return enable_flag_; } | |||
| std::string ProfileDataPath() const { return profile_data_path_; } | |||
| void RecordOneStepStartEndInfo(std::string op_name); | |||
| protected: | |||
| void SetRunTimeData(const std::string &op_name, const float time_elapsed); | |||
| @@ -77,6 +91,7 @@ class Profiler { | |||
| std::unordered_map<std::string, OpInfo> op_info_map_; | |||
| OneStepStartEndInfo step_start_end_info_; | |||
| std::vector<OneStepStartEndInfo> all_step_start_end_info_; | |||
| std::mutex record_mutex_; | |||
| }; | |||
| } // namespace profiler | |||
| } // namespace mindspore | |||
| @@ -65,12 +65,8 @@ void RecorderActor::RecordInfo(const std::string op_name, const KernelLaunchInfo | |||
| void RecorderActor::RecordOnStepEnd(OpContext<DeviceTensor> *op_context) { | |||
| MS_EXCEPTION_IF_NULL(op_context); | |||
| // todo clear | |||
| #if ENABLE_GPU | |||
| // Record fp_start and iter_end op name and timestamp at the step end. (GPU) | |||
| auto profiler_inst = profiler::gpu::GPUProfiler::GetInstance(); | |||
| MS_EXCEPTION_IF_NULL(profiler_inst); | |||
| if (profiler_inst->GetEnableFlag()) profiler_inst->RecordOneStepStartEndInfo(); | |||
| #endif | |||
| // Record iter_start, fp_start and iter_end op name and timestamp at the step end. (GPU) | |||
| profiler::ProfilerManager::GetInstance()->RecordOneStepStartEndInfo(); | |||
| } | |||
| } // namespace runtime | |||
| @@ -22,9 +22,7 @@ | |||
| #include "runtime/framework/actor/actor_common.h" | |||
| #include "runtime/framework/device_tensor_store.h" | |||
| #include "runtime/hardware/device_context.h" | |||
| #if ENABLE_GPU | |||
| #include "profiler/device/gpu/gpu_profiling.h" | |||
| #endif | |||
| #include "profiler/device/profiling.h" | |||
| namespace mindspore { | |||
| namespace runtime { | |||
| @@ -35,6 +35,7 @@ | |||
| #ifdef ENABLE_DEBUGGER | |||
| #include "debug/debugger/debugger.h" | |||
| #endif | |||
| #include "profiler/device/profiling.h" | |||
| namespace mindspore { | |||
| namespace runtime { | |||
| @@ -456,6 +457,13 @@ void GraphScheduler::Initialize() { | |||
| << ", the computed OMP thread number : " << OMP_thread_num | |||
| << ", the used OMP thread number : " << stoi(OMP_thread_num_used); | |||
| BuildAndScheduleGlobalActor(); | |||
| } | |||
| void GraphScheduler::BuildAndScheduleGlobalActor() { | |||
| auto actorMgr = ActorMgr::GetActorMgrRef(); | |||
| MS_EXCEPTION_IF_NULL(actorMgr); | |||
| // Create and schedule memory manager actor. | |||
| auto memory_manager_actor = std::make_shared<MemoryManagerActor>(); | |||
| MS_EXCEPTION_IF_NULL(memory_manager_actor); | |||
| @@ -465,9 +473,17 @@ void GraphScheduler::Initialize() { | |||
| // Bind single thread to response to memory alloc and free quickly. | |||
| (void)actorMgr->Spawn(base_actor, false); | |||
| // Create and schedule recorder actor. | |||
| // Create and schedule recorder actor. | |||
| bool recorder_actor_need = false; | |||
| if (profiler::ProfilerManager::GetInstance()->GetEnableRecorderActorFlag()) { | |||
| recorder_actor_need = true; | |||
| } | |||
| #ifdef ENABLE_DUMP_IR | |||
| if (mindspore::RecorderManager::Instance().RdrEnable()) { | |||
| recorder_actor_need = true; | |||
| } | |||
| #endif | |||
| if (recorder_actor_need) { | |||
| auto recorder_actor = std::make_shared<RecorderActor>(); | |||
| MS_EXCEPTION_IF_NULL(recorder_actor); | |||
| recorder_aid_ = &(recorder_actor->GetAID()); | |||
| @@ -475,7 +491,7 @@ void GraphScheduler::Initialize() { | |||
| base_recorder_actor->set_thread_pool(thread_pool_); | |||
| (void)actorMgr->Spawn(base_recorder_actor, true); | |||
| } | |||
| #endif | |||
| // Create and schedule debug actor. | |||
| bool debugger_actor_need = DumpJsonParser::GetInstance().e2e_dump_enabled(); | |||
| #ifdef ENABLE_DEBUGGER | |||
| @@ -170,6 +170,9 @@ class GraphScheduler { | |||
| ~GraphScheduler() = default; | |||
| DISABLE_COPY_AND_ASSIGN(GraphScheduler); | |||
| // The Global actors contain memory manager actor, recorder actor and debug actor. | |||
| void BuildAndScheduleGlobalActor(); | |||
| // Transform the nodes of graph to actors. | |||
| ActorSetPtr Build(const GraphCompilerInfo &graph_compiler_info); | |||
| // Link actors to DAG through the edge connection of graph and graph execution strategy. | |||
| @@ -561,7 +561,7 @@ class BaseTimelineGenerator: | |||
| os.chmod(display_file_path, stat.S_IREAD | stat.S_IWRITE) | |||
| except (IOError, OSError) as err: | |||
| logger.error('Error occurred when write timeline display file: %s', err) | |||
| raise ProfilerIOException | |||
| raise ProfilerIOException() | |||
| def write_timeline_summary(self): | |||
| """Write timeline summary to json.""" | |||
| @@ -578,7 +578,7 @@ class BaseTimelineGenerator: | |||
| os.chmod(timeline_summary_file_path, stat.S_IREAD | stat.S_IWRITE) | |||
| except (IOError, OSError) as err: | |||
| logger.error('Error occurred when write timeline summary file: %s', err) | |||
| raise ProfilerIOException | |||
| raise ProfilerIOException() | |||
| @staticmethod | |||
| def _update_num_of_streams(timeline, stream_count_dict): | |||
| @@ -852,7 +852,7 @@ class GpuTimelineGenerator(BaseTimelineGenerator): | |||
| gpu_start_time = int(lines[1].strip().split(':')[-1]) | |||
| except (IOError, OSError) as err: | |||
| logger.error(f'Error occurred when read {start_time_file_path}: {err}') | |||
| raise ProfilerIOException | |||
| raise ProfilerIOException() | |||
| time_diff = gpu_start_time - host_monotonic_start_time | |||
| for idx, time_item in enumerate(timeline_list): | |||
| @@ -874,7 +874,7 @@ class GpuTimelineGenerator(BaseTimelineGenerator): | |||
| op_timeline_list.append(line_list) | |||
| except (IOError, OSError) as err: | |||
| logger.error('Error occurred when load operator timeline data intermediate file: %s', err) | |||
| raise ProfilerIOException | |||
| raise ProfilerIOException() | |||
| return op_timeline_list | |||
| @@ -898,7 +898,7 @@ class GpuTimelineGenerator(BaseTimelineGenerator): | |||
| activity_timeline_list.append(line_list) | |||
| except (IOError, OSError) as err: | |||
| logger.error('Error occurred when load activity timeline data intermediate file: %s', err) | |||
| raise ProfilerIOException | |||
| raise ProfilerIOException() | |||
| return activity_timeline_list | |||
| @@ -967,7 +967,7 @@ class GpuTimelineGenerator(BaseTimelineGenerator): | |||
| step_num += 1 | |||
| except (IOError, OSError) as err: | |||
| logger.error(f'Error occurred when read {step_trace_profiling_path}: {err}') | |||
| raise ProfilerIOException | |||
| raise ProfilerIOException() | |||
| return step_time_list | |||
| @@ -985,7 +985,7 @@ class GpuTimelineGenerator(BaseTimelineGenerator): | |||
| return bool(len(first_string.split(',')) == 2) | |||
| except (IOError, OSError) as err: | |||
| logger.error(f'Error occurred when read {step_trace_profiling_path}: {err}') | |||
| raise ProfilerIOException | |||
| raise ProfilerIOException() | |||
| class AscendTimelineGenerator(BaseTimelineGenerator): | |||
| @@ -1018,7 +1018,7 @@ class AscendTimelineGenerator(BaseTimelineGenerator): | |||
| timeline_list.append(line_list) | |||
| except (IOError, OSError) as err: | |||
| logger.error('Error occurred when read timeline intermediate file: %s', err) | |||
| raise ProfilerIOException | |||
| raise ProfilerIOException() | |||
| return timeline_list | |||
| @@ -1139,7 +1139,7 @@ class AscendTimelineGenerator(BaseTimelineGenerator): | |||
| host_monotonic = int(lines[2].strip().split(':')[1]) | |||
| except (IOError, OSError) as err: | |||
| logger.error('Error occurred when read host_start.log: %s', err) | |||
| raise ProfilerIOException | |||
| raise ProfilerIOException() | |||
| try: | |||
| with open(dev_start_file_path) as f_obj: | |||
| lines = f_obj.readlines() | |||
| @@ -1147,7 +1147,7 @@ class AscendTimelineGenerator(BaseTimelineGenerator): | |||
| dev_cntvct = int(lines[2].strip().split(':')[1]) | |||
| except (IOError, OSError) as err: | |||
| logger.error('Error occurred when read dev_start.log: %s', err) | |||
| raise ProfilerIOException | |||
| raise ProfilerIOException() | |||
| factor_ns_to_ms = 1e-6 | |||
| factor_ten_ns_to_ns = 10 | |||