Some function from ccsrc/profiler/device/cpu/cpu_data_saver* and ccsrc/profiler/device/gpu/data_saver* could be reused. Some function from ccsrc/profiler/device/cpu/cpu_profiling* and ccsrc/profiler/device/gpu/gpu_profiling* could be reused. This pr is to simplify these code.tags/v1.2.0-rc1
| @@ -1,6 +1,6 @@ | |||
| if(ENABLE_GPU) | |||
| file(GLOB_RECURSE PROFILER_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} | |||
| "device/gpu/*.cc" "device/cpu/*.cc") | |||
| "device/gpu/*.cc" "device/cpu/*.cc" "device/profiling.cc" "device/data_saver.cc") | |||
| set_property(SOURCE ${PROFILER_SRC_LIST} PROPERTY COMPILE_DEFINITIONS | |||
| SUBMODULE_ID=mindspore::SubModuleId::SM_PROFILER) | |||
| add_library(_mindspore_profiler_obj OBJECT ${PROFILER_SRC_LIST}) | |||
| @@ -8,7 +8,7 @@ endif() | |||
| if(ENABLE_D) | |||
| file(GLOB_RECURSE PROFILER_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} | |||
| "device/common/*.cc" "device/ascend/*.cc" "device/cpu/*.cc") | |||
| "device/common/*.cc" "device/ascend/*.cc" "device/cpu/*.cc" "device/profiling.cc" "device/data_saver.cc") | |||
| set_property(SOURCE ${PROFILER_SRC_LIST} PROPERTY COMPILE_DEFINITIONS | |||
| SUBMODULE_ID=mindspore::SubModuleId::SM_PROFILER) | |||
| add_library(_mindspore_profiler_obj OBJECT ${PROFILER_SRC_LIST}) | |||
| @@ -16,7 +16,8 @@ if(ENABLE_D) | |||
| endif() | |||
| if(ENABLE_CPU AND NOT (ENABLE_D OR ENABLE_GPU)) | |||
| file(GLOB_RECURSE PROFILER_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "device/cpu/*.cc") | |||
| file(GLOB_RECURSE PROFILER_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} | |||
| "device/cpu/*.cc" "device/profiling.cc" "device/data_saver.cc") | |||
| set_property(SOURCE ${PROFILER_SRC_LIST} PROPERTY COMPILE_DEFINITIONS | |||
| SUBMODULE_ID=mindspore::SubModuleId::SM_PROFILER) | |||
| add_library(_mindspore_profiler_obj OBJECT ${PROFILER_SRC_LIST}) | |||
| @@ -24,65 +24,7 @@ | |||
| namespace mindspore { | |||
| namespace profiler { | |||
| namespace cpu { | |||
| OpDetailInfo::OpDetailInfo(std::shared_ptr<OpInfo> op_info, float proportion) | |||
| : op_info_(op_info), proportion_(proportion) { | |||
| // op_full_name is like 'xxx/xxx/{op_type}-op{node_id}' | |||
| op_full_name_ = op_info->op_name; | |||
| auto op_type_begin_iter = op_full_name_.rfind('/') + 1; | |||
| auto op_type_end_iter = op_full_name_.rfind('-'); | |||
| op_type_ = op_full_name_.substr(op_type_begin_iter, op_type_end_iter - op_type_begin_iter); | |||
| op_name_ = op_full_name_.substr(op_type_begin_iter); | |||
| op_avg_time_ = op_info->op_cost_time / op_info->op_count; | |||
| } | |||
| void DataSaver::ParseOpInfo(const OpInfoMap &op_info_maps) { | |||
| const float factor_percent = 100; | |||
| op_detail_infos_.reserve(op_info_maps.size()); | |||
| float total_time_sum = GetTotalOpTime(op_info_maps); | |||
| for (auto item : op_info_maps) { | |||
| op_timestamps_map_[item.first] = item.second.start_duration; | |||
| float proportion = item.second.op_cost_time / total_time_sum * factor_percent; | |||
| auto op_info = std::make_shared<OpInfo>(item.second); | |||
| OpDetailInfo op_detail_info = OpDetailInfo(op_info, proportion); | |||
| op_detail_infos_.emplace_back(op_detail_info); | |||
| AddOpDetailInfoForType(op_detail_info); | |||
| } | |||
| // update average time of op type | |||
| for (auto &op_type : op_type_infos_) { | |||
| // device_infos: <type_name, op_type_info> | |||
| op_type.second.avg_time_ = op_type.second.total_time_ / op_type.second.count_; | |||
| } | |||
| MS_LOG(DEBUG) << "Get " << op_detail_infos_.size() << " operation items."; | |||
| MS_LOG(DEBUG) << "Get " << op_type_infos_.size() << " operation type items."; | |||
| } | |||
| void DataSaver::AddOpDetailInfoForType(const OpDetailInfo &op_detail_info) { | |||
| // Construct OpType object according to op detail info | |||
| OpType op_type = OpType{op_detail_info.op_type_, | |||
| op_detail_info.op_info_->op_count, | |||
| op_detail_info.op_info_->op_count, | |||
| op_detail_info.op_info_->op_cost_time, | |||
| 0, | |||
| op_detail_info.proportion_}; | |||
| // Set the OpType into op_type_infos_ map | |||
| std::string type_name = op_detail_info.op_type_; | |||
| auto iter = op_type_infos_.find(type_name); | |||
| if (iter == op_type_infos_.end()) { | |||
| op_type_infos_.emplace(type_name, op_type); | |||
| } else { | |||
| iter->second += op_type; | |||
| } | |||
| } | |||
| float DataSaver::GetTotalOpTime(const OpInfoMap &op_info_maps) { | |||
| float sum = 0; | |||
| sum = std::accumulate(op_info_maps.begin(), op_info_maps.end(), sum, | |||
| [](float i, auto iter) { return i + iter.second.op_cost_time; }); | |||
| MS_LOG(DEBUG) << "The total op time is " << sum; | |||
| return sum; | |||
| } | |||
| void DataSaver::WriteFile(std::string out_path_dir) { | |||
| void CpuDataSaver::WriteFile(std::string out_path_dir) { | |||
| if (op_detail_infos_.empty() || op_type_infos_.empty()) { | |||
| MS_LOG(INFO) << "No cpu operation detail infos to write."; | |||
| return; | |||
| @@ -91,84 +33,11 @@ void DataSaver::WriteFile(std::string out_path_dir) { | |||
| MS_EXCEPTION_IF_NULL(context_ptr); | |||
| auto device_id = context_ptr->get_param<uint32_t>(MS_CTX_DEVICE_ID); | |||
| device_id_ = std::to_string(device_id); | |||
| op_side_ = "cpu"; | |||
| WriteOpDetail(out_path_dir); | |||
| WriteOpType(out_path_dir); | |||
| WriteOpTimestamp(out_path_dir); | |||
| } | |||
| void DataSaver::WriteOpType(const std::string &saver_base_dir) { | |||
| std::string file_path = saver_base_dir + "/cpu_op_type_info_" + device_id_ + ".csv"; | |||
| std::ofstream ofs(file_path); | |||
| // check if the file is writable | |||
| if (!ofs.is_open()) { | |||
| MS_LOG(WARNING) << "Open file '" << file_path << "' failed!"; | |||
| return; | |||
| } | |||
| try { | |||
| // write op type info into file | |||
| ofs << OpType().GetHeader() << std::endl; | |||
| for (auto op_type_info : op_type_infos_) { | |||
| ofs << op_type_info.second << std::endl; | |||
| } | |||
| } catch (const std::exception &e) { | |||
| MS_LOG(ERROR) << "Write " << file_path << "failed: " << e.what(); | |||
| } | |||
| ofs.close(); | |||
| ChangeFileMode(file_path); | |||
| MS_LOG(INFO) << "Write " << op_type_infos_.size() << " op type infos into file: " << file_path; | |||
| } | |||
| void DataSaver::WriteOpDetail(const std::string &saver_base_dir) { | |||
| std::string file_path = saver_base_dir + "/cpu_op_detail_info_" + device_id_ + ".csv"; | |||
| std::ofstream ofs(file_path); | |||
| if (!ofs.is_open()) { | |||
| MS_LOG(WARNING) << "Open file '" << file_path << "' failed!"; | |||
| return; | |||
| } | |||
| try { | |||
| // write op detail info into file | |||
| ofs << OpDetailInfo().GetHeader() << std::endl; | |||
| for (auto op_detail : op_detail_infos_) { | |||
| ofs << op_detail << std::endl; | |||
| } | |||
| } catch (const std::exception &e) { | |||
| MS_LOG(ERROR) << "Write " << file_path << "failed: " << e.what(); | |||
| } | |||
| ofs.close(); | |||
| ChangeFileMode(file_path); | |||
| MS_LOG(INFO) << "Write " << op_detail_infos_.size() << " op detail infos into file: " << file_path; | |||
| } | |||
| void DataSaver::WriteOpTimestamp(const std::string &saver_base_dir) { | |||
| std::string file_path = saver_base_dir + "/cpu_op_execute_timestamp_" + device_id_ + ".txt"; | |||
| std::ofstream ofs(file_path); | |||
| // check if the file is writable | |||
| if (!ofs.is_open()) { | |||
| MS_LOG(WARNING) << "Open file '" << file_path << "' failed!"; | |||
| return; | |||
| } | |||
| try { | |||
| // write op timestamp info into file | |||
| for (const auto &op_timestamp_info : op_timestamps_map_) { | |||
| ofs << op_timestamp_info.first << ";host_cpu_ops;"; | |||
| for (auto start_end : op_timestamp_info.second) { | |||
| ofs << start_end.start_timestamp << "," << start_end.duration << " "; | |||
| } | |||
| ofs << std::endl; | |||
| } | |||
| } catch (const std::exception &e) { | |||
| MS_LOG(ERROR) << "Write " << file_path << "failed: " << e.what(); | |||
| } | |||
| ofs.close(); | |||
| ChangeFileMode(file_path); | |||
| } | |||
| void DataSaver::ChangeFileMode(const std::string &file_path) { | |||
| if (chmod(common::SafeCStr(file_path), S_IRUSR) == -1) { | |||
| MS_LOG(WARNING) << "Modify file: " << file_path << " to rw fail."; | |||
| return; | |||
| } | |||
| } | |||
| } // namespace cpu | |||
| } // namespace profiler | |||
| } // namespace mindspore | |||
| @@ -14,8 +14,8 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CPU_DATA_SAVER_H | |||
| #define MINDSPORE_CPU_DATA_SAVER_H | |||
| #ifndef MINDSPORE_CCSRC_PROFILER_DEVICE_CPU_CPU_DATA_SAVER_H | |||
| #define MINDSPORE_CCSRC_PROFILER_DEVICE_CPU_CPU_DATA_SAVER_H | |||
| #include <iostream> | |||
| #include <algorithm> | |||
| #include <unordered_map> | |||
| @@ -23,101 +23,24 @@ | |||
| #include <string> | |||
| #include <memory> | |||
| #include "profiler/device/cpu/cpu_profiling.h" | |||
| #include "profiler/device/data_saver.h" | |||
| namespace mindspore { | |||
| namespace profiler { | |||
| namespace cpu { | |||
| struct OpDetailInfo { | |||
| std::string op_type_; | |||
| std::string op_name_; | |||
| std::string op_full_name_; | |||
| std::shared_ptr<OpInfo> op_info_{nullptr}; | |||
| float op_avg_time_{0}; | |||
| float proportion_{0}; | |||
| OpDetailInfo() = default; | |||
| OpDetailInfo(std::shared_ptr<OpInfo> op_info, float proportion); | |||
| std::string GetHeader() const { | |||
| return "op_side,op_type,op_name,full_op_name,op_occurrences,compute_time(ms)," | |||
| "avg_execution_time(ms),total_proportion,subgraph,pid"; | |||
| } | |||
| friend std::ostream &operator<<(std::ostream &os, const OpDetailInfo &event) { | |||
| os << "Host," << event.op_type_ << ',' << event.op_name_ << ',' << event.op_full_name_ << ',' | |||
| << event.op_info_->op_count << ',' << event.op_info_->op_cost_time << ',' << event.op_avg_time_ << ',' | |||
| << event.proportion_ << ",Default," << event.op_info_->pid; | |||
| return os; | |||
| } | |||
| }; | |||
| struct OpType { | |||
| std::string op_type_; | |||
| int count_{0}; | |||
| int step_{0}; | |||
| float total_time_{0}; | |||
| float avg_time_{0}; | |||
| float proportion_{0}; | |||
| std::string GetHeader() const { | |||
| return "op_type,total_called_times,called_times(per-step)," | |||
| "total_compute_time,compute_time(ms per-step),percent"; | |||
| } | |||
| friend std::ostream &operator<<(std::ostream &os, const OpType &event) { | |||
| os << event.op_type_ << ',' << event.count_ << ',' << event.count_ / event.step_ << ',' << event.total_time_ << ',' | |||
| << event.total_time_ / event.step_ << ',' << event.proportion_; | |||
| return os; | |||
| } | |||
| OpType &operator+=(const OpType &other) { | |||
| this->count_ += other.count_; | |||
| this->total_time_ += other.total_time_; | |||
| this->proportion_ += other.proportion_; | |||
| return *this; | |||
| } | |||
| }; | |||
| using OpInfoMap = std::unordered_map<std::string, OpInfo>; | |||
| using OpTypeInfos = std::unordered_map<std::string, OpType>; // <op_full_name, Optype> | |||
| using OpDetailInfos = std::vector<OpDetailInfo>; | |||
| // <op_full_name, StartDuration> | |||
| using OpTimestampInfo = std::unordered_map<std::string, std::vector<StartDuration>>; | |||
| class DataSaver { | |||
| class CpuDataSaver : public DataSaver { | |||
| public: | |||
| DataSaver() = default; | |||
| ~DataSaver() = default; | |||
| CpuDataSaver() = default; | |||
| DataSaver(const DataSaver &) = delete; | |||
| ~CpuDataSaver() = default; | |||
| DataSaver &operator=(const DataSaver &) = delete; | |||
| CpuDataSaver(const CpuDataSaver &) = delete; | |||
| void ParseOpInfo(const OpInfoMap &op_info_maps); | |||
| CpuDataSaver &operator=(const CpuDataSaver &) = delete; | |||
| void WriteFile(std::string out_path); | |||
| private: | |||
| void AddOpDetailInfoForType(const OpDetailInfo &op_detail_info); | |||
| float GetTotalOpTime(const OpInfoMap &op_info_maps); | |||
| void WriteOpType(const std::string &saver_base_dir); | |||
| void WriteOpDetail(const std::string &saver_base_dir); | |||
| void WriteOpTimestamp(const std::string &saver_base_dir); | |||
| void ChangeFileMode(const std::string &file_path); | |||
| std::string device_id_; | |||
| OpTypeInfos op_type_infos_; | |||
| OpDetailInfos op_detail_infos_; | |||
| OpTimestampInfo op_timestamps_map_; | |||
| }; | |||
| } // namespace cpu | |||
| } // namespace profiler | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CPU_DATA_SAVER_H | |||
| #endif // MINDSPORE_CCSRC_PROFILER_DEVICE_CPU_CPU_DATA_SAVER_H | |||
| @@ -29,18 +29,6 @@ namespace profiler { | |||
| namespace cpu { | |||
| std::shared_ptr<CPUProfiler> CPUProfiler::profiler_inst_ = nullptr; | |||
| uint64_t GetMonoTimeStamp() { | |||
| struct timespec ts; | |||
| #if defined(_WIN32) || defined(_WIN64) | |||
| clock_gettime(CLOCK_MONOTONIC, &ts); | |||
| #else | |||
| clock_gettime(CLOCK_MONOTONIC_RAW, &ts); | |||
| #endif | |||
| constexpr uint64_t kNSecondInSecond = 1000000000; | |||
| uint64_t cur_time_stamp = ts.tv_sec * kNSecondInSecond + ts.tv_nsec; | |||
| return cur_time_stamp; | |||
| } | |||
| std::shared_ptr<CPUProfiler> CPUProfiler::GetInstance() { | |||
| if (profiler_inst_ == nullptr) { | |||
| profiler_inst_ = std::shared_ptr<CPUProfiler>(new (std::nothrow) CPUProfiler()); | |||
| @@ -50,7 +38,7 @@ std::shared_ptr<CPUProfiler> CPUProfiler::GetInstance() { | |||
| void CPUProfiler::Init(const std::string &profileDataPath = "") { | |||
| MS_LOG(INFO) << "Initialize CPU Profiling"; | |||
| base_time_ = GetMonoTimeStamp(); | |||
| base_time_ = GetHostMonoTimeStamp(); | |||
| profile_data_path_ = profileDataPath; | |||
| MS_LOG(INFO) << " Host start time(ns): " << base_time_ << " profile data path: " << profile_data_path_; | |||
| } | |||
| @@ -75,34 +63,19 @@ void CPUProfiler::SetRunTimeData(const std::string &op_name, const uint32_t pid) | |||
| pid_ = pid; | |||
| } | |||
| void CPUProfiler::SetRunTimeData(const std::string &op_name, const float time_elapsed) { | |||
| auto iter = op_info_map_.find(op_name); | |||
| if (iter != op_info_map_.end()) { | |||
| // The time unit is ms, convert to us | |||
| iter->second.op_cost_time += time_elapsed; | |||
| } | |||
| } | |||
| void CPUProfiler::SetRunTimeData(const std::string &op_name, const uint64_t start, const float duration) { | |||
| auto iter = op_info_map_.find(op_name); | |||
| if (iter != op_info_map_.end()) { | |||
| iter->second.start_duration.emplace_back(StartDuration({start, duration})); | |||
| } | |||
| } | |||
| void CPUProfiler::OpDataProducerBegin(const std::string op_name, const uint32_t pid) { | |||
| op_time_start_ = GetMonoTimeStamp(); | |||
| op_time_mono_start_ = GetMonoTimeStamp(); | |||
| op_time_start_ = GetHostMonoTimeStamp(); | |||
| op_time_mono_start_ = GetHostMonoTimeStamp(); | |||
| SetRunTimeData(op_name, pid); | |||
| } | |||
| void CPUProfiler::OpDataProducerEnd() { | |||
| float op_time_elapsed = 0; | |||
| op_time_stop_ = GetMonoTimeStamp(); | |||
| op_time_stop_ = GetHostMonoTimeStamp(); | |||
| op_time_elapsed = (op_time_stop_ - op_time_start_) / kTimeUnit; | |||
| MS_LOG(DEBUG) << "Host Time Elapsed(us)," << op_name_ << "," << op_time_elapsed; | |||
| SetRunTimeData(op_name_, op_time_elapsed); | |||
| SetRunTimeData(op_name_, op_time_mono_start_, op_time_elapsed); | |||
| Profiler::SetRunTimeData(op_name_, op_time_elapsed); | |||
| Profiler::SetRunTimeData(op_name_, op_time_mono_start_, op_time_elapsed); | |||
| } | |||
| void CPUProfiler::Stop() { | |||
| @@ -115,7 +88,7 @@ void CPUProfiler::SaveProfileData() { | |||
| if (profile_data_path_.empty()) { | |||
| MS_LOG(WARNING) << "Profile data path is empty, skip save profile data."; | |||
| } else { | |||
| DataSaver dataSaver; | |||
| CpuDataSaver dataSaver; | |||
| dataSaver.ParseOpInfo(op_info_map_); | |||
| dataSaver.WriteFile(profile_data_path_); | |||
| } | |||
| @@ -14,73 +14,52 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CPU_PROFILING_H | |||
| #define MINDSPORE_CPU_PROFILING_H | |||
| #ifndef MINDSPORE_CCSRC_PROFILER_DEVICE_CPU_PROFILING_H | |||
| #define MINDSPORE_CCSRC_PROFILER_DEVICE_CPU_PROFILING_H | |||
| #include <algorithm> | |||
| #include <cstdio> | |||
| #include <map> | |||
| #include <memory> | |||
| #include <mutex> | |||
| #include <string> | |||
| #include <unordered_map> | |||
| #include <utility> | |||
| #include <vector> | |||
| #include "profiler/device/profiling.h" | |||
| namespace mindspore { | |||
| namespace profiler { | |||
| namespace cpu { | |||
| struct StartDuration { | |||
| uint64_t start_timestamp = 0l; | |||
| float duration = 0l; | |||
| }; | |||
| struct OpInfo { | |||
| std::string op_name; | |||
| float op_cost_time = 0; | |||
| int op_count = 0; | |||
| std::vector<StartDuration> start_duration; | |||
| uint32_t pid; | |||
| }; | |||
| const float kTimeUnit = 1000; | |||
| class CPUProfiler { | |||
| class CPUProfiler : public Profiler { | |||
| public: | |||
| static std::shared_ptr<CPUProfiler> GetInstance(); | |||
| ~CPUProfiler() = default; | |||
| CPUProfiler(const CPUProfiler &) = delete; | |||
| CPUProfiler &operator=(const CPUProfiler &) = delete; | |||
| void Init(const std::string &profileDataPath); | |||
| void Stop(); | |||
| void StepProfilingEnable(const bool enable_flag); | |||
| bool GetEnableFlag() const { return enable_flag_; } | |||
| void Init(const std::string &profileDataPath) override; | |||
| void Stop() override; | |||
| void StepProfilingEnable(const bool enable_flag) override; | |||
| void OpDataProducerBegin(const std::string op_name, const uint32_t pid); | |||
| void OpDataProducerEnd(); | |||
| std::string ProfileDataPath() const { return profile_data_path_; } | |||
| void OpDataProducerEnd() override; | |||
| private: | |||
| CPUProfiler() = default; | |||
| void ClearInst(); | |||
| void SetRunTimeData(const std::string &op_name, const uint32_t pid); | |||
| void SetRunTimeData(const std::string &op_name, const float time_elapsed); | |||
| void SetRunTimeData(const std::string &op_name, const uint64_t start, const float duration); | |||
| void SaveProfileData() override; | |||
| void ClearInst() override; | |||
| static std::shared_ptr<CPUProfiler> profiler_inst_; | |||
| bool enable_flag_ = false; | |||
| std::unordered_map<std::string, OpInfo> op_info_map_; | |||
| uint64_t base_time_; | |||
| std::string op_name_; | |||
| uint32_t pid_; | |||
| void SaveProfileData(); | |||
| uint64_t op_time_start_; | |||
| uint64_t op_time_mono_start_; | |||
| uint64_t op_time_stop_; | |||
| std::string profile_data_path_; | |||
| }; | |||
| } // namespace cpu | |||
| } // namespace profiler | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CPU_PROFILING_H | |||
| #endif // MINDSPORE_CCSRC_PROFILER_DEVICE_CPU_PROFILING_H | |||
| @@ -0,0 +1,177 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "profiler/device/data_saver.h" | |||
| #include <fstream> | |||
| #include <numeric> | |||
| #include "sys/stat.h" | |||
| #include "utils/log_adapter.h" | |||
| #include "utils/ms_utils.h" | |||
| #include "utils/ms_context.h" | |||
| namespace mindspore { | |||
| namespace profiler { | |||
| OpDetailInfo::OpDetailInfo(std::shared_ptr<OpInfo> op_info, float proportion) | |||
| : op_info_(op_info), proportion_(proportion) { | |||
| // op_full_name is like 'xxx/xxx/{op_type}-op{node_id}' | |||
| op_full_name_ = op_info->op_name; | |||
| auto op_type_begin_iter = op_full_name_.rfind('/') + 1; | |||
| auto op_type_end_iter = op_full_name_.rfind('-'); | |||
| op_type_ = op_full_name_.substr(op_type_begin_iter, op_type_end_iter - op_type_begin_iter); | |||
| op_name_ = op_full_name_.substr(op_type_begin_iter); | |||
| op_avg_time_ = op_info->op_host_cost_time / op_info->op_count; | |||
| } | |||
| void DataSaver::ParseOpInfo(const OpInfoMap &op_info_maps) { | |||
| op_detail_infos_.reserve(op_info_maps.size()); | |||
| float total_time_sum = GetTotalOpTime(op_info_maps); | |||
| for (auto item : op_info_maps) { | |||
| op_timestamps_map_[item.first] = item.second.start_duration; | |||
| float proportion = item.second.op_host_cost_time / total_time_sum; | |||
| auto op_info = std::make_shared<OpInfo>(item.second); | |||
| OpDetailInfo op_detail_info = OpDetailInfo(op_info, proportion); | |||
| op_detail_infos_.emplace_back(op_detail_info); | |||
| AddOpDetailInfoForType(op_detail_info); | |||
| } | |||
| // update average time of op type | |||
| for (auto &op_type : op_type_infos_) { | |||
| // device_infos: <type_name, op_type_info> | |||
| op_type.second.avg_time_ = op_type.second.total_time_ / op_type.second.count_; | |||
| } | |||
| MS_LOG(DEBUG) << "Get " << op_detail_infos_.size() << " operation items."; | |||
| MS_LOG(DEBUG) << "Get " << op_type_infos_.size() << " operation type items."; | |||
| } | |||
| void DataSaver::AddOpDetailInfoForType(const OpDetailInfo &op_detail_info) { | |||
| // Construct OpType object according to op detail info | |||
| OpType op_type = OpType{op_detail_info.op_type_, | |||
| op_detail_info.op_info_->op_count, | |||
| op_detail_info.op_info_->op_count, | |||
| op_detail_info.op_info_->op_host_cost_time, | |||
| 0, | |||
| op_detail_info.proportion_}; | |||
| // Set the OpType into op_type_infos_ map | |||
| std::string type_name = op_detail_info.op_type_; | |||
| auto iter = op_type_infos_.find(type_name); | |||
| if (iter == op_type_infos_.end()) { | |||
| op_type_infos_.emplace(type_name, op_type); | |||
| } else { | |||
| iter->second += op_type; | |||
| } | |||
| } | |||
| float DataSaver::GetTotalOpTime(const OpInfoMap &op_info_maps) { | |||
| float sum = 0; | |||
| sum = std::accumulate(op_info_maps.begin(), op_info_maps.end(), sum, | |||
| [](float i, auto iter) { return i + iter.second.op_host_cost_time; }); | |||
| MS_LOG(DEBUG) << "The total op time is " << sum; | |||
| return sum; | |||
| } | |||
| void DataSaver::WriteOpType(const std::string &saver_base_dir) { | |||
| std::string file_path = saver_base_dir + "/" + op_side_ + "_op_type_info_" + device_id_ + ".csv"; | |||
| std::ofstream ofs(file_path); | |||
| // check if the file is writable | |||
| if (!ofs.is_open()) { | |||
| MS_LOG(WARNING) << "Open file '" << file_path << "' failed!"; | |||
| return; | |||
| } | |||
| try { | |||
| // write op type info into file | |||
| if (op_side_ == "cpu") { | |||
| ofs << OpType().GetCpuHeader() << std::endl; | |||
| for (auto op_type_info : op_type_infos_) { | |||
| op_type_info.second.OutputCpuOpTypeInfo(ofs); | |||
| } | |||
| } | |||
| if (op_side_ == "gpu") { | |||
| ofs << OpType().GetGpuHeader() << std::endl; | |||
| for (auto op_type_info : op_type_infos_) { | |||
| op_type_info.second.OutputGpuOpTypeInfo(ofs); | |||
| } | |||
| } | |||
| } catch (const std::exception &e) { | |||
| MS_LOG(ERROR) << "Write " << file_path << "failed: " << e.what(); | |||
| } | |||
| ofs.close(); | |||
| ChangeFileMode(file_path); | |||
| MS_LOG(INFO) << "Write " << op_type_infos_.size() << " op type infos into file: " << file_path; | |||
| } | |||
| void DataSaver::WriteOpDetail(const std::string &saver_base_dir) { | |||
| std::string file_path = saver_base_dir + "/" + op_side_ + "_op_detail_info_" + device_id_ + ".csv"; | |||
| std::ofstream ofs(file_path); | |||
| if (!ofs.is_open()) { | |||
| MS_LOG(WARNING) << "Open file '" << file_path << "' failed!"; | |||
| return; | |||
| } | |||
| try { | |||
| // write op detail info into file | |||
| if (op_side_ == "cpu") { | |||
| ofs << OpDetailInfo().GetCpuHeader() << std::endl; | |||
| for (auto op_detail : op_detail_infos_) { | |||
| op_detail.OutputCpuOpDetailInfo(ofs); | |||
| } | |||
| } | |||
| if (op_side_ == "gpu") { | |||
| ofs << OpDetailInfo().GetGpuHeader() << std::endl; | |||
| for (auto op_detail : op_detail_infos_) { | |||
| op_detail.OutputGpuOpDetailInfo(ofs); | |||
| } | |||
| } | |||
| } catch (const std::exception &e) { | |||
| MS_LOG(ERROR) << "Write " << file_path << "failed: " << e.what(); | |||
| } | |||
| ofs.close(); | |||
| ChangeFileMode(file_path); | |||
| MS_LOG(INFO) << "Write " << op_detail_infos_.size() << " op detail infos into file: " << file_path; | |||
| } | |||
| void DataSaver::WriteOpTimestamp(const std::string &saver_base_dir) { | |||
| std::string file_path = saver_base_dir + "/" + op_side_ + "_op_execute_timestamp_" + device_id_ + ".txt"; | |||
| std::ofstream ofs(file_path); | |||
| // check if the file is writable | |||
| if (!ofs.is_open()) { | |||
| MS_LOG(WARNING) << "Open file '" << file_path << "' failed!"; | |||
| return; | |||
| } | |||
| try { | |||
| // write op timestamp info into file | |||
| for (const auto &op_timestamp_info : op_timestamps_map_) { | |||
| if (op_side_ == "cpu") { | |||
| ofs << op_timestamp_info.first << ";HostCpuOps;"; | |||
| } else { | |||
| ofs << op_timestamp_info.first << ";GpuOps;"; | |||
| } | |||
| for (auto start_end : op_timestamp_info.second) { | |||
| ofs << start_end.start_timestamp << "," << start_end.duration << " "; | |||
| } | |||
| ofs << std::endl; | |||
| } | |||
| } catch (const std::exception &e) { | |||
| MS_LOG(ERROR) << "Write " << file_path << "failed: " << e.what(); | |||
| } | |||
| ofs.close(); | |||
| ChangeFileMode(file_path); | |||
| } | |||
| void DataSaver::ChangeFileMode(const std::string &file_path) { | |||
| if (chmod(common::SafeCStr(file_path), S_IRUSR | S_IWUSR) == -1) { | |||
| MS_LOG(WARNING) << "Modify file: " << file_path << " to rw fail."; | |||
| return; | |||
| } | |||
| } | |||
| } // namespace profiler | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,127 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_PROFILER_DEVICE_DATA_SAVER_H | |||
| #define MINDSPORE_CCSRC_PROFILER_DEVICE_DATA_SAVER_H | |||
| #include <iostream> | |||
| #include <algorithm> | |||
| #include <unordered_map> | |||
| #include <vector> | |||
| #include <string> | |||
| #include <memory> | |||
| #include "profiler/device/profiling.h" | |||
| namespace mindspore { | |||
| namespace profiler { | |||
| struct OpDetailInfo { | |||
| std::string op_type_; | |||
| std::string op_name_; | |||
| std::string op_full_name_; | |||
| std::shared_ptr<OpInfo> op_info_{nullptr}; | |||
| float op_avg_time_{0}; | |||
| float proportion_{0}; | |||
| OpDetailInfo() = default; | |||
| OpDetailInfo(std::shared_ptr<OpInfo> op_info, float proportion); | |||
| std::string GetCpuHeader() const { | |||
| return "op_side,op_type,op_name,full_op_name,op_occurrences,op_total_time(ms)," | |||
| "op_avg_time(ms),total_proportion,subgraph,pid"; | |||
| } | |||
| std::string GetGpuHeader() const { | |||
| return "op_side,op_type,op_name,op_full_name,op_occurrences,op_total_time(us),op_avg_time(us),total_proportion," | |||
| "cuda_activity_cost_time(us),cuda_activity_call_count"; | |||
| } | |||
| void OutputCpuOpDetailInfo(std::ostream &os) { | |||
| os << "Host," << op_type_ << ',' << op_name_ << ',' << op_full_name_ << ',' << op_info_->op_count << ',' | |||
| << op_info_->op_host_cost_time << ',' << op_avg_time_ << ',' << proportion_ << ",Default," << op_info_->pid | |||
| << std::endl; | |||
| } | |||
| void OutputGpuOpDetailInfo(std::ostream &os) { | |||
| os << "Device," << op_type_ << ',' << op_name_ << ',' << op_full_name_ << ',' << op_info_->op_count << ',' | |||
| << op_info_->op_host_cost_time << ',' << op_avg_time_ << ',' << proportion_ << ',' | |||
| << op_info_->cupti_activity_time << ',' << op_info_->op_kernel_count << std::endl; | |||
| } | |||
| }; | |||
| struct OpType { | |||
| std::string op_type_; | |||
| int count_{0}; | |||
| int step_{0}; | |||
| float total_time_{0}; | |||
| float avg_time_{0}; | |||
| float proportion_{0}; | |||
| std::string GetCpuHeader() const { | |||
| return "op_type,type_occurrences,execution_frequency(per-step)," | |||
| "total_compute_time,avg_time(ms),percent"; | |||
| } | |||
| std::string GetGpuHeader() const { return "op_type,type_occurrences,total_time(us),total_proportion,avg_time(us)"; } | |||
| void OutputCpuOpTypeInfo(std::ostream &os) { | |||
| os << op_type_ << ',' << count_ << ',' << count_ / step_ << ',' << total_time_ << ',' << total_time_ / count_ << ',' | |||
| << proportion_ << std::endl; | |||
| } | |||
| void OutputGpuOpTypeInfo(std::ostream &os) { | |||
| os << op_type_ << ',' << count_ << ',' << total_time_ << ',' << proportion_ << ',' << avg_time_ << std::endl; | |||
| } | |||
| OpType &operator+=(const OpType &other) { | |||
| this->count_ += other.count_; | |||
| this->total_time_ += other.total_time_; | |||
| this->proportion_ += other.proportion_; | |||
| return *this; | |||
| } | |||
| }; | |||
| using OpTimestampInfo = std::unordered_map<std::string, std::vector<StartDuration>>; // <op_full_name, StartDuration> | |||
| using OpInfoMap = std::unordered_map<std::string, OpInfo>; | |||
| using OpTypeInfos = std::unordered_map<std::string, OpType>; // <op_full_name, Optype> | |||
| using OpDetailInfos = std::vector<OpDetailInfo>; | |||
| class DataSaver { | |||
| public: | |||
| DataSaver() = default; | |||
| virtual ~DataSaver() = default; | |||
| void ParseOpInfo(const OpInfoMap &op_info_maps); | |||
| protected: | |||
| void AddOpDetailInfoForType(const OpDetailInfo &op_detail_info); | |||
| float GetTotalOpTime(const OpInfoMap &op_info_maps); | |||
| void WriteOpType(const std::string &saver_base_dir); | |||
| void WriteOpDetail(const std::string &saver_base_dir); | |||
| void WriteOpTimestamp(const std::string &saver_base_dir); | |||
| void ChangeFileMode(const std::string &file_path); | |||
| OpTypeInfos op_type_infos_; | |||
| OpDetailInfos op_detail_infos_; | |||
| OpTimestampInfo op_timestamps_map_; | |||
| std::string op_side_; | |||
| std::string device_id_; | |||
| }; | |||
| } // namespace profiler | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_PROFILER_DEVICE_DATA_SAVER_H | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * Copyright 2020-2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -13,7 +13,7 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "profiler/device/gpu/data_saver.h" | |||
| #include "profiler/device/gpu/gpu_data_saver.h" | |||
| #include <fstream> | |||
| #include <numeric> | |||
| #include "sys/stat.h" | |||
| @@ -23,17 +23,6 @@ | |||
| namespace mindspore { | |||
| namespace profiler { | |||
| namespace gpu { | |||
| OpDetailInfo::OpDetailInfo(std::shared_ptr<OpInfo> op_info, float proportion) | |||
| : op_info_(op_info), proportion_(proportion) { | |||
| // op_full_name is like 'xxx/xxx/{op_type}-op{node_id}' | |||
| op_full_name_ = op_info->op_name; | |||
| auto op_type_begin_iter = op_full_name_.rfind('/') + 1; | |||
| auto op_type_end_iter = op_full_name_.rfind('-'); | |||
| op_type_ = op_full_name_.substr(op_type_begin_iter, op_type_end_iter - op_type_begin_iter); | |||
| op_name_ = op_full_name_.substr(op_type_begin_iter); | |||
| op_avg_time_ = op_info->op_host_cost_time / op_info->op_count; | |||
| } | |||
| ActivityData::ActivityData(std::shared_ptr<Event> data) : basic_info_(data) { | |||
| grid_dim_ = basic_info_->activity_type == ActivityType::kKernel | |||
| ? "\"" + std::to_string(basic_info_->kernel_info.grid_x) + ',' + | |||
| @@ -65,49 +54,7 @@ ActivityData &ActivityData::operator+=(const ActivityData &other) { | |||
| return *this; | |||
| } | |||
| void DataSaver::ParseOpInfo(const OpInfoMap &op_info_maps) { | |||
| op_detail_infos_.reserve(op_info_maps.size()); | |||
| float total_time_sum = GetTotalOpTime(op_info_maps); | |||
| for (auto item : op_info_maps) { | |||
| op_timestamps_map_[item.first] = item.second.start_duration; | |||
| float proportion = item.second.op_host_cost_time / total_time_sum; | |||
| auto op_info = std::make_shared<OpInfo>(item.second); | |||
| OpDetailInfo op_detail_info = OpDetailInfo(op_info, proportion); | |||
| op_detail_infos_.emplace_back(op_detail_info); | |||
| AddOpDetailInfoForType(op_detail_info); | |||
| } | |||
| // update average time of op type | |||
| for (auto &op_type : op_type_infos_) { | |||
| // device_infos: <type_name, op_type_info> | |||
| op_type.second.avg_time_ = op_type.second.total_time_ / op_type.second.count_; | |||
| } | |||
| MS_LOG(DEBUG) << "Get " << op_detail_infos_.size() << " operation items."; | |||
| MS_LOG(DEBUG) << "Get " << op_type_infos_.size() << " operation type items."; | |||
| } | |||
| void DataSaver::AddOpDetailInfoForType(const OpDetailInfo &op_detail_info) { | |||
| // Construct OpType object according to op detail info | |||
| OpType op_type = OpType{op_detail_info.op_type_, op_detail_info.op_info_->op_count, | |||
| op_detail_info.op_info_->op_host_cost_time, 0, op_detail_info.proportion_}; | |||
| // Set the OpType into op_type_infos_ map | |||
| std::string type_name = op_detail_info.op_type_; | |||
| auto iter = op_type_infos_.find(type_name); | |||
| if (iter == op_type_infos_.end()) { | |||
| op_type_infos_.emplace(type_name, op_type); | |||
| } else { | |||
| iter->second += op_type; | |||
| } | |||
| } | |||
| float DataSaver::GetTotalOpTime(const OpInfoMap &op_info_maps) { | |||
| float sum = 0; | |||
| sum = std::accumulate(op_info_maps.begin(), op_info_maps.end(), sum, | |||
| [](float i, auto iter) { return i + iter.second.op_host_cost_time; }); | |||
| MS_LOG(DEBUG) << "The total op time is " << sum; | |||
| return sum; | |||
| } | |||
| void DataSaver::ParseEvent(const std::vector<Event> &events) { | |||
| void GpuDataSaver::ParseEvent(const std::vector<Event> &events) { | |||
| // Put Kernel activity events into activity_infos_ | |||
| for (const auto &event : events) { | |||
| if (event.op_name.empty() || event.api_type != CUPTIApiType::kActivity || | |||
| @@ -127,7 +74,7 @@ void DataSaver::ParseEvent(const std::vector<Event> &events) { | |||
| } | |||
| } | |||
| void DataSaver::AddKernelEvent(const Event &event) { | |||
| void GpuDataSaver::AddKernelEvent(const Event &event) { | |||
| // Put kernel event to activity_infos according to device id | |||
| uint32_t device_id = event.device_id; | |||
| auto iter = activity_infos_.find(device_id); | |||
| @@ -139,7 +86,7 @@ void DataSaver::AddKernelEvent(const Event &event) { | |||
| } | |||
| } | |||
| void DataSaver::AddKernelEventToDevice(const Event &event, DeviceActivityInfos *device_activity_infos) { | |||
| void GpuDataSaver::AddKernelEventToDevice(const Event &event, DeviceActivityInfos *device_activity_infos) { | |||
| // Combine kernel activity with same kernel name | |||
| auto event_ptr = std::make_shared<Event>(event); | |||
| ActivityData activity_data = ActivityData(event_ptr); | |||
| @@ -153,7 +100,7 @@ void DataSaver::AddKernelEventToDevice(const Event &event, DeviceActivityInfos * | |||
| } | |||
| } | |||
| void DataSaver::WriteFile(std::string out_path_dir, const BaseTime &start_time) { | |||
| void GpuDataSaver::WriteFile(std::string out_path_dir, const BaseTime &start_time) { | |||
| if (out_path_dir.empty()) { | |||
| MS_LOG(WARNING) << "Output directory. Ignore the writing data."; | |||
| return; | |||
| @@ -164,6 +111,7 @@ void DataSaver::WriteFile(std::string out_path_dir, const BaseTime &start_time) | |||
| } | |||
| // not support multi-device for operator info per process yet | |||
| device_id_ = std::to_string(activity_infos_.begin()->first); | |||
| op_side_ = "gpu"; | |||
| WriteOpDetail(out_path_dir); | |||
| WriteOpType(out_path_dir); | |||
| WriteActivity(out_path_dir); | |||
| @@ -172,42 +120,7 @@ void DataSaver::WriteFile(std::string out_path_dir, const BaseTime &start_time) | |||
| WriteStartTime(out_path_dir, start_time); | |||
| } | |||
| void DataSaver::WriteOpType(const std::string &saver_base_dir) { | |||
| std::string file_path = saver_base_dir + "/gpu_op_type_info_" + device_id_ + ".csv"; | |||
| std::ofstream ofs(file_path); | |||
| // check if the file is writable | |||
| if (!ofs.is_open()) { | |||
| MS_LOG(WARNING) << "Open file '" << file_path << "' failed!"; | |||
| return; | |||
| } | |||
| // write op type info into file | |||
| ofs << OpType().GetHeader() << std::endl; | |||
| for (auto op_type_info : op_type_infos_) { | |||
| ofs << op_type_info.second << std::endl; | |||
| } | |||
| ofs.close(); | |||
| ChangeFileMode(file_path); | |||
| MS_LOG(INFO) << "Write " << op_type_infos_.size() << " op type infos into file: " << file_path; | |||
| } | |||
| void DataSaver::WriteOpDetail(const std::string &saver_base_dir) { | |||
| std::string file_path = saver_base_dir + "/gpu_op_detail_info_" + device_id_ + ".csv"; | |||
| std::ofstream ofs(file_path); | |||
| if (!ofs.is_open()) { | |||
| MS_LOG(WARNING) << "Open file '" << file_path << "' failed!"; | |||
| return; | |||
| } | |||
| // write op detail info into file | |||
| ofs << OpDetailInfo().GetHeader() << std::endl; | |||
| for (auto op_detail : op_detail_infos_) { | |||
| ofs << op_detail << std::endl; | |||
| } | |||
| ofs.close(); | |||
| ChangeFileMode(file_path); | |||
| MS_LOG(INFO) << "Write " << op_detail_infos_.size() << " op detail infos into file: " << file_path; | |||
| } | |||
| void DataSaver::WriteActivity(const std::string &saver_base_dir) { | |||
| void GpuDataSaver::WriteActivity(const std::string &saver_base_dir) { | |||
| std::string file_path_base = saver_base_dir + "/gpu_activity_data_"; | |||
| std::string timestamp_file_path_base = saver_base_dir + "/activity_execute_timestamp_"; | |||
| for (auto device_info : activity_infos_) { | |||
| @@ -244,27 +157,7 @@ void DataSaver::WriteActivity(const std::string &saver_base_dir) { | |||
| } | |||
| } | |||
| void DataSaver::WriteOpTimestamp(const std::string &saver_base_dir) { | |||
| std::string file_path = saver_base_dir + "/op_execute_timestamp_" + device_id_ + ".txt"; | |||
| std::ofstream ofs(file_path); | |||
| // check if the file is writable | |||
| if (!ofs.is_open()) { | |||
| MS_LOG(WARNING) << "Open file '" << file_path << "' failed!"; | |||
| return; | |||
| } | |||
| // write op timestamp info into file | |||
| for (const auto &op_timestamp_info : op_timestamps_map_) { | |||
| ofs << op_timestamp_info.first << ";Ops;"; | |||
| for (auto start_end : op_timestamp_info.second) { | |||
| ofs << start_end.start_timestamp << "," << start_end.duration << " "; | |||
| } | |||
| ofs << std::endl; | |||
| } | |||
| ofs.close(); | |||
| ChangeFileMode(file_path); | |||
| } | |||
| void DataSaver::WriteStepTrace(const std::string &saver_base_dir) { | |||
| void GpuDataSaver::WriteStepTrace(const std::string &saver_base_dir) { | |||
| std::string file_path = saver_base_dir + "/step_trace_profiling_" + device_id_ + ".txt"; | |||
| std::ofstream ofs(file_path); | |||
| // check if the file is writable | |||
| @@ -308,7 +201,7 @@ void DataSaver::WriteStepTrace(const std::string &saver_base_dir) { | |||
| MS_LOG(INFO) << "Write step trace infos into file: " << file_path; | |||
| } | |||
| void DataSaver::WriteStartTime(const std::string &saver_base_dir, const BaseTime &start_time) { | |||
| void GpuDataSaver::WriteStartTime(const std::string &saver_base_dir, const BaseTime &start_time) { | |||
| std::string file_path = saver_base_dir + "/start_time_" + device_id_ + ".txt"; | |||
| std::ofstream ofs(file_path); | |||
| // check if the file is writable | |||
| @@ -330,14 +223,7 @@ void DataSaver::WriteStartTime(const std::string &saver_base_dir, const BaseTime | |||
| MS_LOG(INFO) << "Write profiler start time infos into file: " << file_path; | |||
| } | |||
| void DataSaver::SetStepTraceOpName(ProfilingTraceInfo trace_op_name) { step_trace_op_name = trace_op_name; } | |||
| void DataSaver::ChangeFileMode(const std::string &file_path) { | |||
| if (chmod(common::SafeCStr(file_path), S_IRUSR) == -1) { | |||
| MS_LOG(WARNING) << "Modify file:" << file_path << " to rw fail."; | |||
| return; | |||
| } | |||
| } | |||
| void GpuDataSaver::SetStepTraceOpName(ProfilingTraceInfo trace_op_name) { step_trace_op_name = trace_op_name; } | |||
| } // namespace gpu | |||
| } // namespace profiler | |||
| } // namespace mindspore | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * Copyright 2020-2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -14,8 +14,8 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_DATA_SAVER_H | |||
| #define MINDSPORE_DATA_SAVER_H | |||
| #ifndef MINDSPORE_CCSRC_PROFILER_DEVICE_GPU_GPU_DATA_SAVER_H | |||
| #define MINDSPORE_CCSRC_PROFILER_DEVICE_GPU_GPU_DATA_SAVER_H | |||
| #include <iostream> | |||
| #include <algorithm> | |||
| #include <unordered_map> | |||
| @@ -23,57 +23,10 @@ | |||
| #include <string> | |||
| #include <memory> | |||
| #include "profiler/device/gpu/gpu_profiling.h" | |||
| #include "profiler/device/data_saver.h" | |||
| namespace mindspore { | |||
| namespace profiler { | |||
| namespace gpu { | |||
| struct OpDetailInfo { | |||
| std::string op_type_; | |||
| std::string op_name_; | |||
| std::string op_full_name_; | |||
| std::shared_ptr<OpInfo> op_info_{nullptr}; | |||
| float op_avg_time_{0}; | |||
| float proportion_{0}; | |||
| OpDetailInfo() = default; | |||
| OpDetailInfo(std::shared_ptr<OpInfo> op_info, float proportion); | |||
| std::string GetHeader() const { | |||
| return "op_side,op_type,op_name,op_full_name,op_occurrences,op_total_time(us),op_avg_time(us),total_proportion," | |||
| "cuda_activity_cost_time(us),cuda_activity_call_count"; | |||
| } | |||
| friend std::ostream &operator<<(std::ostream &os, const OpDetailInfo &event) { | |||
| os << "Device," << event.op_type_ << ',' << event.op_name_ << ',' << event.op_full_name_ << ',' | |||
| << event.op_info_->op_count << ',' << event.op_info_->op_host_cost_time << ',' << event.op_avg_time_ << ',' | |||
| << event.proportion_ << ',' << event.op_info_->cupti_activity_time << ',' << event.op_info_->op_kernel_count; | |||
| return os; | |||
| } | |||
| }; | |||
| struct OpType { | |||
| std::string op_type_; | |||
| int count_{0}; | |||
| float total_time_{0}; | |||
| float avg_time_{0}; | |||
| float proportion_{0}; | |||
| std::string GetHeader() const { return "op_type,type_occurrences,total_time(us),total_proportion,avg_time(us)"; } | |||
| friend std::ostream &operator<<(std::ostream &os, const OpType &event) { | |||
| os << event.op_type_ << ',' << event.count_ << ',' << event.total_time_ << ',' << event.proportion_ << ',' | |||
| << event.avg_time_; | |||
| return os; | |||
| } | |||
| OpType &operator+=(const OpType &other) { | |||
| this->count_ += other.count_; | |||
| this->total_time_ += other.total_time_; | |||
| this->proportion_ += other.proportion_; | |||
| return *this; | |||
| } | |||
| }; | |||
| struct ActivityData { | |||
| std::shared_ptr<Event> basic_info_{nullptr}; | |||
| std::string block_dim_; | |||
| @@ -105,25 +58,18 @@ struct ActivityData { | |||
| ActivityData &operator+=(const ActivityData &other); | |||
| }; | |||
| using OpInfoMap = std::unordered_map<std::string, OpInfo>; | |||
| using DeviceActivityInfos = std::unordered_map<std::string, ActivityData>; // <device_id, ActivityData> | |||
| using AllActivityInfos = std::unordered_map<uint32_t, DeviceActivityInfos>; // <device_id, ActivityData> | |||
| using OpTypeInfos = std::unordered_map<std::string, OpType>; // <op_full_name, Optype> | |||
| using OpDetailInfos = std::vector<OpDetailInfo>; | |||
| // <op_full_name, StartDuration> | |||
| using OpTimestampInfo = std::unordered_map<std::string, std::vector<StartDuration>>; | |||
| class DataSaver { | |||
| class GpuDataSaver : public DataSaver { | |||
| public: | |||
| DataSaver() = default; | |||
| ~DataSaver() = default; | |||
| GpuDataSaver() = default; | |||
| DataSaver(const DataSaver &) = delete; | |||
| ~GpuDataSaver() = default; | |||
| DataSaver &operator=(const DataSaver &) = delete; | |||
| GpuDataSaver(const GpuDataSaver &) = delete; | |||
| void ParseOpInfo(const OpInfoMap &op_info_maps); | |||
| GpuDataSaver &operator=(const GpuDataSaver &) = delete; | |||
| void SetStepTraceOpName(ProfilingTraceInfo trace_op_name); | |||
| @@ -132,37 +78,21 @@ class DataSaver { | |||
| void WriteFile(std::string out_path, const BaseTime &start_time); | |||
| private: | |||
| void AddOpDetailInfoForType(const OpDetailInfo &op_detail_info); | |||
| float GetTotalOpTime(const OpInfoMap &op_info_maps); | |||
| void AddKernelEvent(const Event &event); | |||
| void AddKernelEventToDevice(const Event &event, DeviceActivityInfos *device_activity_infos); | |||
| void WriteOpType(const std::string &saver_base_dir); | |||
| void WriteOpDetail(const std::string &saver_base_dir); | |||
| void WriteActivity(const std::string &saver_base_dir); | |||
| void WriteOpTimestamp(const std::string &saver_base_dir); | |||
| void WriteStepTrace(const std::string &saver_base_dir); | |||
| void WriteStartTime(const std::string &saver_base_dir, const BaseTime &start_time); | |||
| void ChangeFileMode(const std::string &file_path); | |||
| std::string device_id_; | |||
| AllActivityInfos activity_infos_; | |||
| OpTypeInfos op_type_infos_; | |||
| OpDetailInfos op_detail_infos_; | |||
| OpTimestampInfo op_timestamps_map_; | |||
| ProfilingTraceInfo step_trace_op_name; | |||
| }; | |||
| } // namespace gpu | |||
| } // namespace profiler | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_DATA_SAVER_H | |||
| #endif // MINDSPORE_CCSRC_PROFILER_DEVICE_GPU_GPU_DATA_SAVER_H | |||
| @@ -21,7 +21,7 @@ | |||
| #include <chrono> | |||
| #include <cmath> | |||
| #include "profiler/device/gpu/cupti_interface.h" | |||
| #include "profiler/device/gpu/data_saver.h" | |||
| #include "profiler/device/gpu/gpu_data_saver.h" | |||
| #include "pybind_api/api_register.h" | |||
| #include "utils/log_adapter.h" | |||
| #include "utils/utils.h" | |||
| @@ -92,14 +92,6 @@ uint64_t GetHostTimeStamp() { | |||
| return cur_time_stamp; | |||
| } | |||
| uint64_t GetHostMonoTimeStamp() { | |||
| struct timespec ts; | |||
| clock_gettime(CLOCK_MONOTONIC_RAW, &ts); | |||
| constexpr uint64_t kNSecondInSecond = 1000000000; | |||
| uint64_t cur_time_stamp = ts.tv_sec * kNSecondInSecond + ts.tv_nsec; | |||
| return cur_time_stamp; | |||
| } | |||
| std::string GetKernelFunc(const char *name) { | |||
| char *demangledName = abi::__cxa_demangle(name, nullptr, nullptr, nullptr); | |||
| if (demangledName != nullptr) { | |||
| @@ -415,21 +407,6 @@ void GPUProfiler::SetRunTimeData(const std::string &op_name, void *stream) { | |||
| stream_ = stream; | |||
| } | |||
| void GPUProfiler::SetRunTimeData(const std::string &op_name, const float time_elapsed) { | |||
| auto iter = op_info_map_.find(op_name); | |||
| if (iter != op_info_map_.end()) { | |||
| // The time unit is ms ,convert to us | |||
| iter->second.op_host_cost_time += time_elapsed; | |||
| } | |||
| } | |||
| void GPUProfiler::SetRunTimeData(const std::string &op_name, const uint64_t start, const float duration) { | |||
| auto iter = op_info_map_.find(op_name); | |||
| if (iter != op_info_map_.end()) { | |||
| iter->second.start_duration.emplace_back(StartDuration({start, duration})); | |||
| } | |||
| } | |||
| void GPUProfiler::OpDataProducerBegin(const std::string op_name, void *stream) { | |||
| if (sync_enable_flag_) { | |||
| CHECK_CUDA_RET_WITH_ERROR(cudaEventCreate(&op_event_start_), "cudaEventCreate op event start failed"); | |||
| @@ -463,8 +440,8 @@ void GPUProfiler::OpDataProducerEnd() { | |||
| op_time_elapsed = (op_host_time_stop_ - op_host_time_start_) / kTimeUnit; | |||
| } | |||
| MS_LOG(DEBUG) << "Host Time Elapsed(us)," << op_name_ << "," << op_time_elapsed; | |||
| SetRunTimeData(op_name_, op_time_elapsed); | |||
| SetRunTimeData(op_name_, op_cupti_time_start_, op_time_elapsed); | |||
| Profiler::SetRunTimeData(op_name_, op_time_elapsed); | |||
| Profiler::SetRunTimeData(op_name_, op_cupti_time_start_, op_time_elapsed); | |||
| } | |||
| void GPUProfiler::StopCUPTI() { | |||
| @@ -498,7 +475,7 @@ void GPUProfiler::SaveProfileData() { | |||
| if (profile_data_path_.empty()) { | |||
| MS_LOG(WARNING) << "Profile data path is empty, skip save profile data."; | |||
| } else { | |||
| DataSaver dataSaver; | |||
| GpuDataSaver dataSaver; | |||
| dataSaver.SetStepTraceOpName(step_trace_op_name); | |||
| dataSaver.ParseOpInfo(op_info_map_); | |||
| dataSaver.ParseEvent(events_); | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * Copyright 2020-2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -14,8 +14,8 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_GPU_PROFILING_H | |||
| #define MINDSPORE_GPU_PROFILING_H | |||
| #ifndef MINDSPORE_CCSRC_PROFILER_DEVICE_GPU_GPU_PROFILING_H | |||
| #define MINDSPORE_CCSRC_PROFILER_DEVICE_GPU_GPU_PROFILING_H | |||
| #include <cuda.h> | |||
| #include <cupti.h> | |||
| #include <algorithm> | |||
| @@ -27,6 +27,7 @@ | |||
| #include <unordered_map> | |||
| #include <utility> | |||
| #include <vector> | |||
| #include "profiler/device/profiling.h" | |||
| #include "profiler/device/gpu/gpu_profiling_utils.h" | |||
| namespace mindspore { | |||
| @@ -86,23 +87,6 @@ struct Event { | |||
| }; | |||
| }; | |||
| struct StartDuration { | |||
| uint64_t start_timestamp = 0l; | |||
| float duration = 0l; | |||
| }; | |||
| struct OpInfo { | |||
| std::string op_name; | |||
| float cupti_api_call_time = 0l; | |||
| float cupti_activity_time = 0l; | |||
| float op_host_cost_time = 0; | |||
| int op_kernel_api_count = 0; | |||
| int op_kernel_count = 0; | |||
| int op_count = 0; | |||
| std::vector<StartDuration> start_duration; | |||
| void *stream; | |||
| }; | |||
| struct BaseTime { | |||
| // nanosecond | |||
| uint64_t host_start_time = 0l; | |||
| @@ -124,17 +108,17 @@ class ProfilingOp { | |||
| std::string op_name_; | |||
| }; | |||
| class GPUProfiler { | |||
| class GPUProfiler : public Profiler { | |||
| public: | |||
| static std::shared_ptr<GPUProfiler> GetInstance(); | |||
| ~GPUProfiler() { StopCUPTI(); } | |||
| GPUProfiler(const GPUProfiler &) = delete; | |||
| GPUProfiler &operator=(const GPUProfiler &) = delete; | |||
| void Init(const std::string &profileDataPath); | |||
| void Stop(); | |||
| void Init(const std::string &profileDataPath) override; | |||
| void Stop() override; | |||
| void StopCUPTI(); | |||
| void StepProfilingEnable(const bool enable_flag); | |||
| void StepProfilingEnable(const bool enable_flag) override; | |||
| void SyncEnable(const bool enable_flag); | |||
| bool GetEnableFlag() const { return enable_flag_; } | |||
| bool GetSyncEnableFlag() const { return sync_enable_flag_; } | |||
| @@ -143,7 +127,7 @@ class GPUProfiler { | |||
| void CUPTIAPI AllocBuffer(uint8_t **buffer, size_t *size, size_t *maxNumRecords); | |||
| void CUPTIAPI ProcessBuffer(CUcontext ctx, uint32_t streamId, uint8_t *buffer, size_t size, size_t validSize); | |||
| void OpDataProducerBegin(const std::string op_name, void *stream); | |||
| void OpDataProducerEnd(); | |||
| void OpDataProducerEnd() override; | |||
| void ProcessEvents(); | |||
| void RegisterProfilingOp(std::shared_ptr<ProfilingOp> node); | |||
| void SetStepTraceOpName(ProfilingTraceInfo trace_op_name); | |||
| @@ -153,24 +137,21 @@ class GPUProfiler { | |||
| GPUProfiler() = default; | |||
| void OpsParser(); | |||
| void EventLog(const Event &event); | |||
| void ClearInst(); | |||
| void ClearInst() override; | |||
| void HandleActivityRecord(CUpti_Activity *record); | |||
| void AddEvent(Event &&event); | |||
| void SetRunTimeData(const std::string &op_name, void *stream); | |||
| void SetRunTimeData(const std::string &op_name, const float time_elapsed); | |||
| void SetRunTimeData(const std::string &op_name, const uint64_t start, const float duration); | |||
| void FixOpNameByCorrelationId(Event *event); | |||
| static std::shared_ptr<GPUProfiler> profiler_inst_; | |||
| bool enable_flag_ = false; | |||
| bool sync_enable_flag_ = true; | |||
| std::unordered_map<std::string, OpInfo> op_info_map_; | |||
| std::unordered_map<uint32_t, std::string> op_name_map_; | |||
| std::vector<Event> events_; | |||
| BaseTime base_time_; | |||
| std::string op_name_; | |||
| void *stream_; | |||
| void SaveProfileData(); | |||
| void SaveProfileData() override; | |||
| void SaveExtraProfileData(); | |||
| std::mutex event_mutex_; | |||
| @@ -198,4 +179,4 @@ class GPUProfiler { | |||
| } // namespace profiler | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_GPU_PROFILING_H | |||
| #endif // MINDSPORE_CCSRC_PROFILER_DEVICE_GPU_PROFILING_H | |||
| @@ -0,0 +1,56 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "profiler/device/profiling.h" | |||
| #include <time.h> | |||
| #include <cxxabi.h> | |||
| #include <cmath> | |||
| #include "profiler/device/cpu/cpu_data_saver.h" | |||
| #include "pybind_api/api_register.h" | |||
| #include "utils/log_adapter.h" | |||
| #include "utils/utils.h" | |||
| namespace mindspore { | |||
| namespace profiler { | |||
| uint64_t Profiler::GetHostMonoTimeStamp() { | |||
| struct timespec ts; | |||
| #if defined(_WIN32) || defined(_WIN64) | |||
| clock_gettime(CLOCK_MONOTONIC, &ts); | |||
| #else | |||
| clock_gettime(CLOCK_MONOTONIC_RAW, &ts); | |||
| #endif | |||
| constexpr uint64_t kNSecondInSecond = 1000000000; | |||
| uint64_t cur_time_stamp = ts.tv_sec * kNSecondInSecond + ts.tv_nsec; | |||
| return cur_time_stamp; | |||
| } | |||
| void Profiler::SetRunTimeData(const std::string &op_name, const float time_elapsed) { | |||
| auto iter = op_info_map_.find(op_name); | |||
| if (iter != op_info_map_.end()) { | |||
| // The time unit is ms, convert to us | |||
| iter->second.op_host_cost_time += time_elapsed; | |||
| } | |||
| } | |||
| void Profiler::SetRunTimeData(const std::string &op_name, const uint64_t start, const float duration) { | |||
| auto iter = op_info_map_.find(op_name); | |||
| if (iter != op_info_map_.end()) { | |||
| iter->second.start_duration.emplace_back(StartDuration({start, duration})); | |||
| } | |||
| } | |||
| } // namespace profiler | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,74 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_PROFILER_DEVICE_PROFILING_H | |||
| #define MINDSPORE_CCSRC_PROFILER_DEVICE_PROFILING_H | |||
| #include <algorithm> | |||
| #include <cstdio> | |||
| #include <map> | |||
| #include <memory> | |||
| #include <mutex> | |||
| #include <string> | |||
| #include <unordered_map> | |||
| #include <utility> | |||
| #include <vector> | |||
| namespace mindspore { | |||
| namespace profiler { | |||
| struct StartDuration { | |||
| uint64_t start_timestamp = 0l; | |||
| float duration = 0l; | |||
| }; | |||
| struct OpInfo { | |||
| std::string op_name; | |||
| float cupti_api_call_time = 0l; | |||
| float cupti_activity_time = 0l; | |||
| float op_host_cost_time = 0; | |||
| int op_kernel_api_count = 0; | |||
| int op_kernel_count = 0; | |||
| int op_count = 0; | |||
| std::vector<StartDuration> start_duration; | |||
| void *stream; | |||
| uint32_t pid; | |||
| }; | |||
| class Profiler { | |||
| public: | |||
| Profiler() = default; | |||
| virtual ~Profiler() = default; | |||
| virtual void Init(const std::string &profileDataPath) = 0; | |||
| virtual void Stop() = 0; | |||
| virtual void StepProfilingEnable(const bool enable_flag) = 0; | |||
| virtual void OpDataProducerEnd() = 0; | |||
| bool GetEnableFlag() const { return enable_flag_; } | |||
| std::string ProfileDataPath() const { return profile_data_path_; } | |||
| protected: | |||
| void SetRunTimeData(const std::string &op_name, const float time_elapsed); | |||
| void SetRunTimeData(const std::string &op_name, const uint64_t start, const float duration); | |||
| uint64_t GetHostMonoTimeStamp(); | |||
| virtual void SaveProfileData() = 0; | |||
| virtual void ClearInst() = 0; | |||
| bool enable_flag_ = false; | |||
| std::string profile_data_path_; | |||
| std::unordered_map<std::string, OpInfo> op_info_map_; | |||
| }; | |||
| } // namespace profiler | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_PROFILER_DEVICE_PROFILING_H | |||
| @@ -642,7 +642,7 @@ class GpuTimelineGenerator(BaseTimelineGenerator): | |||
| """Generate gpu Timeline data from file.""" | |||
| _display_filename = 'gpu_timeline_display_{}.json' | |||
| _timeline_summary_filename = 'gpu_timeline_summary_{}.json' | |||
| _output_op_execute_time_file_path = "op_execute_timestamp_{}.txt" | |||
| _output_op_execute_time_file_path = "gpu_op_execute_timestamp_{}.txt" | |||
| _output_activity_execute_time_file_path = "activity_execute_timestamp_{}.txt" | |||
| _output_gpu_activity_info_file_path = "gpu_activity_data_{}.csv" | |||
| _activity_keys_list = [] | |||
| @@ -341,7 +341,7 @@ class BaseStepTraceParser: | |||
| row_data[FP_DURATION] += row_data[TAIL] | |||
| row_data = row_data[:BP_POINT] + row_data[BP_POINT+1:TAIL] | |||
| csv_writer.writerow(row_data) | |||
| os.chmod(self._output_path, stat.S_IRUSR) | |||
| os.chmod(self._output_path, stat.S_IREAD | stat.S_IWRITE) | |||
| except (IOError, OSError) as err: | |||
| log.warning('Failed to save step trace raw info. %s', err) | |||
| raise ProfilerIOException | |||
| @@ -385,7 +385,7 @@ class GpuStepTraceParser(BaseStepTraceParser): | |||
| try: | |||
| with open(output_path, 'w') as json_file: | |||
| json.dump(points, json_file) | |||
| os.chmod(output_path, stat.S_IRUSR) | |||
| os.chmod(output_path, stat.S_IREAD | stat.S_IWRITE) | |||
| except (IOError, OSError) as err: | |||
| log.warning('Failed to save point info. %s', err) | |||
| raise ProfilerIOException | |||
| @@ -504,7 +504,7 @@ class AscendStepTraceParser(BaseStepTraceParser): | |||
| try: | |||
| with open(output_path, 'w') as json_file: | |||
| json.dump(points, json_file) | |||
| os.chmod(output_path, stat.S_IRUSR) | |||
| os.chmod(output_path, stat.S_IREAD | stat.S_IWRITE) | |||
| except (IOError, OSError) as err: | |||
| log.warning('Failed to save point info. %s', err) | |||
| raise ProfilerIOException | |||