| @@ -50,6 +50,7 @@ ActivityData::ActivityData(std::shared_ptr<Event> data) : basic_info_(data) { | |||||
| avg_duration_ = total_duration_; | avg_duration_ = total_duration_; | ||||
| max_duration_ = total_duration_; | max_duration_ = total_duration_; | ||||
| min_duration_ = total_duration_; | min_duration_ = total_duration_; | ||||
| start_duration.emplace_back(StartDuration({basic_info_->start_time_stamp, total_duration_})); | |||||
| } | } | ||||
| ActivityData &ActivityData::operator+=(const ActivityData &other) { | ActivityData &ActivityData::operator+=(const ActivityData &other) { | ||||
| @@ -68,6 +69,7 @@ void DataSaver::ParseOpInfo(const OpInfoMap &op_info_maps) { | |||||
| op_detail_infos_.reserve(op_info_maps.size()); | op_detail_infos_.reserve(op_info_maps.size()); | ||||
| float total_time_sum = GetTotalOpTime(op_info_maps); | float total_time_sum = GetTotalOpTime(op_info_maps); | ||||
| for (auto item : op_info_maps) { | for (auto item : op_info_maps) { | ||||
| op_timestamps_map_[item.first] = item.second.start_duration; | |||||
| float proportion = item.second.op_host_cost_time / total_time_sum; | float proportion = item.second.op_host_cost_time / total_time_sum; | ||||
| auto op_info = std::make_shared<OpInfo>(item.second); | auto op_info = std::make_shared<OpInfo>(item.second); | ||||
| OpDetailInfo op_detail_info = OpDetailInfo(op_info, proportion); | OpDetailInfo op_detail_info = OpDetailInfo(op_info, proportion); | ||||
| @@ -147,6 +149,7 @@ void DataSaver::AddKernelEventToDevice(const Event &event, DeviceActivityInfos * | |||||
| device_activity_infos->emplace(kernel_name, activity_data); | device_activity_infos->emplace(kernel_name, activity_data); | ||||
| } else { | } else { | ||||
| iter->second += activity_data; | iter->second += activity_data; | ||||
| iter->second.start_duration.emplace_back(StartDuration({event.start_time_stamp, activity_data.total_duration_})); | |||||
| } | } | ||||
| } | } | ||||
| @@ -164,6 +167,7 @@ void DataSaver::WriteFile(std::string out_path_dir) { | |||||
| WriteOpDetail(out_path_dir); | WriteOpDetail(out_path_dir); | ||||
| WriteOpType(out_path_dir); | WriteOpType(out_path_dir); | ||||
| WriteActivity(out_path_dir); | WriteActivity(out_path_dir); | ||||
| WriteOpTimestamp(out_path_dir); | |||||
| } | } | ||||
| void DataSaver::WriteOpType(const std::string &saver_base_dir) { | void DataSaver::WriteOpType(const std::string &saver_base_dir) { | ||||
| @@ -201,23 +205,58 @@ void DataSaver::WriteOpDetail(const std::string &saver_base_dir) { | |||||
| void DataSaver::WriteActivity(const std::string &saver_base_dir) { | void DataSaver::WriteActivity(const std::string &saver_base_dir) { | ||||
| std::string file_path_base = saver_base_dir + "/gpu_activity_data_"; | std::string file_path_base = saver_base_dir + "/gpu_activity_data_"; | ||||
| std::string timestamp_file_path_base = saver_base_dir + "/activity_execute_timestamp_"; | |||||
| for (auto device_info : activity_infos_) { | for (auto device_info : activity_infos_) { | ||||
| // write activity result csv | |||||
| std::string file_path = file_path_base + std::to_string(device_info.first) + ".csv"; | std::string file_path = file_path_base + std::to_string(device_info.first) + ".csv"; | ||||
| std::ofstream ofs(file_path); | std::ofstream ofs(file_path); | ||||
| if (!ofs.is_open()) { | if (!ofs.is_open()) { | ||||
| MS_LOG(WARNING) << "Open file '" << file_path << "' failed!"; | MS_LOG(WARNING) << "Open file '" << file_path << "' failed!"; | ||||
| return; | return; | ||||
| } | } | ||||
| // write activity timestamp txt | |||||
| std::string timestamp_file_path = timestamp_file_path_base + std::to_string(device_info.first) + ".txt"; | |||||
| std::ofstream activity_timestamp_ofs(timestamp_file_path); | |||||
| if (!activity_timestamp_ofs.is_open()) { | |||||
| MS_LOG(WARNING) << "Open file '" << timestamp_file_path << "' failed!"; | |||||
| return; | |||||
| } | |||||
| // write activity data into file | // write activity data into file | ||||
| ofs << ActivityData().GetHeader() << std::endl; | ofs << ActivityData().GetHeader() << std::endl; | ||||
| for (auto activity_data : device_info.second) { | for (auto activity_data : device_info.second) { | ||||
| ofs << activity_data.second << std::endl; | ofs << activity_data.second << std::endl; | ||||
| for (auto start_duration : activity_data.second.start_duration) { | |||||
| activity_timestamp_ofs << activity_data.second.basic_info_->kernel_name << ";"; | |||||
| activity_timestamp_ofs << activity_data.second.basic_info_->stream_id << ";"; | |||||
| activity_timestamp_ofs << start_duration.start_timestamp << ";"; | |||||
| activity_timestamp_ofs << start_duration.duration << std::endl; | |||||
| } | |||||
| } | } | ||||
| ofs.close(); | ofs.close(); | ||||
| activity_timestamp_ofs.close(); | |||||
| MS_LOG(INFO) << "Write " << device_info.second.size() << " activity infos into file: " << file_path; | MS_LOG(INFO) << "Write " << device_info.second.size() << " activity infos into file: " << file_path; | ||||
| } | } | ||||
| } | } | ||||
| void DataSaver::WriteOpTimestamp(const std::string &saver_base_dir) { | |||||
| std::string file_path = saver_base_dir + "/op_execute_timestamp_" + device_id_ + ".txt"; | |||||
| std::ofstream ofs(file_path); | |||||
| // check if the file is writable | |||||
| if (!ofs.is_open()) { | |||||
| MS_LOG(WARNING) << "Open file '" << file_path << "' failed!"; | |||||
| return; | |||||
| } | |||||
| // write op timestamp info into file | |||||
| for (const auto &op_timestamp_info : op_timestamps_map_) { | |||||
| ofs << op_timestamp_info.first << ";Ops;"; | |||||
| for (auto start_end : op_timestamp_info.second) { | |||||
| ofs << start_end.start_timestamp << "," << start_end.duration << " "; | |||||
| } | |||||
| ofs << std::endl; | |||||
| } | |||||
| ofs.close(); | |||||
| } | |||||
| } // namespace gpu | } // namespace gpu | ||||
| } // namespace profiler | } // namespace profiler | ||||
| } // namespace mindspore | } // namespace mindspore | ||||
| @@ -83,6 +83,7 @@ struct ActivityData { | |||||
| float avg_duration_{0}; | float avg_duration_{0}; | ||||
| float max_duration_{0}; | float max_duration_{0}; | ||||
| float min_duration_{0}; | float min_duration_{0}; | ||||
| std::vector<StartDuration> start_duration; | |||||
| ActivityData() = default; | ActivityData() = default; | ||||
| @@ -109,6 +110,8 @@ using DeviceActivityInfos = std::unordered_map<std::string, ActivityData>; // | |||||
| using AllActivityInfos = std::unordered_map<uint32_t, DeviceActivityInfos>; // <device_id, ActivityData> | using AllActivityInfos = std::unordered_map<uint32_t, DeviceActivityInfos>; // <device_id, ActivityData> | ||||
| using OpTypeInfos = std::unordered_map<std::string, OpType>; // <op_full_name, Optype> | using OpTypeInfos = std::unordered_map<std::string, OpType>; // <op_full_name, Optype> | ||||
| using OpDetailInfos = std::vector<OpDetailInfo>; | using OpDetailInfos = std::vector<OpDetailInfo>; | ||||
| // <op_full_name, StartDuration> | |||||
| using OpTimestampInfo = std::unordered_map<std::string, std::vector<StartDuration>>; | |||||
| class DataSaver { | class DataSaver { | ||||
| public: | public: | ||||
| @@ -141,10 +144,13 @@ class DataSaver { | |||||
| void WriteActivity(const std::string &saver_base_dir); | void WriteActivity(const std::string &saver_base_dir); | ||||
| void WriteOpTimestamp(const std::string &saver_base_dir); | |||||
| std::string device_id_; | std::string device_id_; | ||||
| AllActivityInfos activity_infos_; | AllActivityInfos activity_infos_; | ||||
| OpTypeInfos op_type_infos_; | OpTypeInfos op_type_infos_; | ||||
| OpDetailInfos op_detail_infos_; | OpDetailInfos op_detail_infos_; | ||||
| OpTimestampInfo op_timestamps_map_; | |||||
| }; | }; | ||||
| } // namespace gpu | } // namespace gpu | ||||
| } // namespace profiler | } // namespace profiler | ||||
| @@ -390,12 +390,20 @@ void GPUProfiler::SetRunTimeData(const std::string &op_name, const float time_el | |||||
| } | } | ||||
| } | } | ||||
| void GPUProfiler::SetRunTimeData(const std::string &op_name, const uint64_t start, const float duration) { | |||||
| auto iter = op_info_map_.find(op_name); | |||||
| if (iter != op_info_map_.end()) { | |||||
| iter->second.start_duration.emplace_back(StartDuration({start, duration})); | |||||
| } | |||||
| } | |||||
| void GPUProfiler::OpDataProducerBegin(const std::string op_name, void *stream) { | void GPUProfiler::OpDataProducerBegin(const std::string op_name, void *stream) { | ||||
| if (sync_enable_flag_) { | if (sync_enable_flag_) { | ||||
| CHECK_CUDA_RET_WITH_ERROR(cudaEventCreate(&op_event_start_), "cudaEventCreate op event start failed"); | CHECK_CUDA_RET_WITH_ERROR(cudaEventCreate(&op_event_start_), "cudaEventCreate op event start failed"); | ||||
| CHECK_CUDA_RET_WITH_ERROR(cudaEventCreate(&op_event_stop_), "cudaEventCreate op event stop failed"); | CHECK_CUDA_RET_WITH_ERROR(cudaEventCreate(&op_event_stop_), "cudaEventCreate op event stop failed"); | ||||
| CHECK_CUDA_RET_WITH_ERROR(cudaEventRecord(op_event_start_, (CUstream)stream_), | CHECK_CUDA_RET_WITH_ERROR(cudaEventRecord(op_event_start_, (CUstream)stream_), | ||||
| "cudaEventRecord op event start failed"); | "cudaEventRecord op event start failed"); | ||||
| op_host_time_start_ = GetHostTimeStamp(); | |||||
| } else { | } else { | ||||
| op_host_time_start_ = GetHostTimeStamp(); | op_host_time_start_ = GetHostTimeStamp(); | ||||
| } | } | ||||
| @@ -414,12 +422,14 @@ void GPUProfiler::OpDataProducerEnd() { | |||||
| CHECK_CUDA_RET_WITH_ERROR(cudaEventDestroy(op_event_start_), "cudaEventDestroy op event start failed"); | CHECK_CUDA_RET_WITH_ERROR(cudaEventDestroy(op_event_start_), "cudaEventDestroy op event start failed"); | ||||
| CHECK_CUDA_RET_WITH_ERROR(cudaEventDestroy(op_event_stop_), "cudaEventDestroy op event stop failed"); | CHECK_CUDA_RET_WITH_ERROR(cudaEventDestroy(op_event_stop_), "cudaEventDestroy op event stop failed"); | ||||
| op_time_elapsed = op_time_elapsed * kTimeUnit; | op_time_elapsed = op_time_elapsed * kTimeUnit; | ||||
| op_host_time_stop_ = GetHostTimeStamp(); | |||||
| } else { | } else { | ||||
| op_host_time_stop_ = GetHostTimeStamp(); | op_host_time_stop_ = GetHostTimeStamp(); | ||||
| op_time_elapsed = (op_host_time_stop_ - op_host_time_start_) / kTimeUnit; | op_time_elapsed = (op_host_time_stop_ - op_host_time_start_) / kTimeUnit; | ||||
| } | } | ||||
| MS_LOG(DEBUG) << "Host Time Elapsed(us)," << op_name_ << "," << op_time_elapsed; | MS_LOG(DEBUG) << "Host Time Elapsed(us)," << op_name_ << "," << op_time_elapsed; | ||||
| SetRunTimeData(op_name_, op_time_elapsed); | SetRunTimeData(op_name_, op_time_elapsed); | ||||
| SetRunTimeData(op_name_, op_host_time_start_, op_time_elapsed); | |||||
| } | } | ||||
| void GPUProfiler::StopCUPTI() { | void GPUProfiler::StopCUPTI() { | ||||
| @@ -84,6 +84,11 @@ struct Event { | |||||
| }; | }; | ||||
| }; | }; | ||||
| struct StartDuration { | |||||
| uint64_t start_timestamp = 0l; | |||||
| float duration = 0l; | |||||
| }; | |||||
| struct OpInfo { | struct OpInfo { | ||||
| std::string op_name; | std::string op_name; | ||||
| float cupti_api_call_time = 0l; | float cupti_api_call_time = 0l; | ||||
| @@ -92,6 +97,7 @@ struct OpInfo { | |||||
| int op_kernel_api_count = 0; | int op_kernel_api_count = 0; | ||||
| int op_kernel_count = 0; | int op_kernel_count = 0; | ||||
| int op_count = 0; | int op_count = 0; | ||||
| std::vector<StartDuration> start_duration; | |||||
| void *stream; | void *stream; | ||||
| }; | }; | ||||
| @@ -133,6 +139,7 @@ class GPUProfiler { | |||||
| void AddEvent(Event &&event); | void AddEvent(Event &&event); | ||||
| void SetRunTimeData(const std::string &op_name, void *stream); | void SetRunTimeData(const std::string &op_name, void *stream); | ||||
| void SetRunTimeData(const std::string &op_name, const float time_elapsed); | void SetRunTimeData(const std::string &op_name, const float time_elapsed); | ||||
| void SetRunTimeData(const std::string &op_name, const uint64_t start, const float duration); | |||||
| void FixOpNameByCorrelationId(Event *event); | void FixOpNameByCorrelationId(Event *event); | ||||
| static std::shared_ptr<GPUProfiler> profiler_inst_; | static std::shared_ptr<GPUProfiler> profiler_inst_; | ||||