Some function from ccsrc/profiler/device/cpu/cpu_data_saver* and ccsrc/profiler/device/gpu/data_saver* could be reused. Some function from ccsrc/profiler/device/cpu/cpu_profiling* and ccsrc/profiler/device/gpu/gpu_profiling* could be reused. This pr is to simplify these code.tags/v1.2.0-rc1
| @@ -1,6 +1,6 @@ | |||||
| if(ENABLE_GPU) | if(ENABLE_GPU) | ||||
| file(GLOB_RECURSE PROFILER_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} | file(GLOB_RECURSE PROFILER_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} | ||||
| "device/gpu/*.cc" "device/cpu/*.cc") | |||||
| "device/gpu/*.cc" "device/cpu/*.cc" "device/profiling.cc" "device/data_saver.cc") | |||||
| set_property(SOURCE ${PROFILER_SRC_LIST} PROPERTY COMPILE_DEFINITIONS | set_property(SOURCE ${PROFILER_SRC_LIST} PROPERTY COMPILE_DEFINITIONS | ||||
| SUBMODULE_ID=mindspore::SubModuleId::SM_PROFILER) | SUBMODULE_ID=mindspore::SubModuleId::SM_PROFILER) | ||||
| add_library(_mindspore_profiler_obj OBJECT ${PROFILER_SRC_LIST}) | add_library(_mindspore_profiler_obj OBJECT ${PROFILER_SRC_LIST}) | ||||
| @@ -8,7 +8,7 @@ endif() | |||||
| if(ENABLE_D) | if(ENABLE_D) | ||||
| file(GLOB_RECURSE PROFILER_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} | file(GLOB_RECURSE PROFILER_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} | ||||
| "device/common/*.cc" "device/ascend/*.cc" "device/cpu/*.cc") | |||||
| "device/common/*.cc" "device/ascend/*.cc" "device/cpu/*.cc" "device/profiling.cc" "device/data_saver.cc") | |||||
| set_property(SOURCE ${PROFILER_SRC_LIST} PROPERTY COMPILE_DEFINITIONS | set_property(SOURCE ${PROFILER_SRC_LIST} PROPERTY COMPILE_DEFINITIONS | ||||
| SUBMODULE_ID=mindspore::SubModuleId::SM_PROFILER) | SUBMODULE_ID=mindspore::SubModuleId::SM_PROFILER) | ||||
| add_library(_mindspore_profiler_obj OBJECT ${PROFILER_SRC_LIST}) | add_library(_mindspore_profiler_obj OBJECT ${PROFILER_SRC_LIST}) | ||||
| @@ -16,7 +16,8 @@ if(ENABLE_D) | |||||
| endif() | endif() | ||||
| if(ENABLE_CPU AND NOT (ENABLE_D OR ENABLE_GPU)) | if(ENABLE_CPU AND NOT (ENABLE_D OR ENABLE_GPU)) | ||||
| file(GLOB_RECURSE PROFILER_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "device/cpu/*.cc") | |||||
| file(GLOB_RECURSE PROFILER_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} | |||||
| "device/cpu/*.cc" "device/profiling.cc" "device/data_saver.cc") | |||||
| set_property(SOURCE ${PROFILER_SRC_LIST} PROPERTY COMPILE_DEFINITIONS | set_property(SOURCE ${PROFILER_SRC_LIST} PROPERTY COMPILE_DEFINITIONS | ||||
| SUBMODULE_ID=mindspore::SubModuleId::SM_PROFILER) | SUBMODULE_ID=mindspore::SubModuleId::SM_PROFILER) | ||||
| add_library(_mindspore_profiler_obj OBJECT ${PROFILER_SRC_LIST}) | add_library(_mindspore_profiler_obj OBJECT ${PROFILER_SRC_LIST}) | ||||
| @@ -24,65 +24,7 @@ | |||||
| namespace mindspore { | namespace mindspore { | ||||
| namespace profiler { | namespace profiler { | ||||
| namespace cpu { | namespace cpu { | ||||
| OpDetailInfo::OpDetailInfo(std::shared_ptr<OpInfo> op_info, float proportion) | |||||
| : op_info_(op_info), proportion_(proportion) { | |||||
| // op_full_name is like 'xxx/xxx/{op_type}-op{node_id}' | |||||
| op_full_name_ = op_info->op_name; | |||||
| auto op_type_begin_iter = op_full_name_.rfind('/') + 1; | |||||
| auto op_type_end_iter = op_full_name_.rfind('-'); | |||||
| op_type_ = op_full_name_.substr(op_type_begin_iter, op_type_end_iter - op_type_begin_iter); | |||||
| op_name_ = op_full_name_.substr(op_type_begin_iter); | |||||
| op_avg_time_ = op_info->op_cost_time / op_info->op_count; | |||||
| } | |||||
| void DataSaver::ParseOpInfo(const OpInfoMap &op_info_maps) { | |||||
| const float factor_percent = 100; | |||||
| op_detail_infos_.reserve(op_info_maps.size()); | |||||
| float total_time_sum = GetTotalOpTime(op_info_maps); | |||||
| for (auto item : op_info_maps) { | |||||
| op_timestamps_map_[item.first] = item.second.start_duration; | |||||
| float proportion = item.second.op_cost_time / total_time_sum * factor_percent; | |||||
| auto op_info = std::make_shared<OpInfo>(item.second); | |||||
| OpDetailInfo op_detail_info = OpDetailInfo(op_info, proportion); | |||||
| op_detail_infos_.emplace_back(op_detail_info); | |||||
| AddOpDetailInfoForType(op_detail_info); | |||||
| } | |||||
| // update average time of op type | |||||
| for (auto &op_type : op_type_infos_) { | |||||
| // device_infos: <type_name, op_type_info> | |||||
| op_type.second.avg_time_ = op_type.second.total_time_ / op_type.second.count_; | |||||
| } | |||||
| MS_LOG(DEBUG) << "Get " << op_detail_infos_.size() << " operation items."; | |||||
| MS_LOG(DEBUG) << "Get " << op_type_infos_.size() << " operation type items."; | |||||
| } | |||||
| void DataSaver::AddOpDetailInfoForType(const OpDetailInfo &op_detail_info) { | |||||
| // Construct OpType object according to op detail info | |||||
| OpType op_type = OpType{op_detail_info.op_type_, | |||||
| op_detail_info.op_info_->op_count, | |||||
| op_detail_info.op_info_->op_count, | |||||
| op_detail_info.op_info_->op_cost_time, | |||||
| 0, | |||||
| op_detail_info.proportion_}; | |||||
| // Set the OpType into op_type_infos_ map | |||||
| std::string type_name = op_detail_info.op_type_; | |||||
| auto iter = op_type_infos_.find(type_name); | |||||
| if (iter == op_type_infos_.end()) { | |||||
| op_type_infos_.emplace(type_name, op_type); | |||||
| } else { | |||||
| iter->second += op_type; | |||||
| } | |||||
| } | |||||
| float DataSaver::GetTotalOpTime(const OpInfoMap &op_info_maps) { | |||||
| float sum = 0; | |||||
| sum = std::accumulate(op_info_maps.begin(), op_info_maps.end(), sum, | |||||
| [](float i, auto iter) { return i + iter.second.op_cost_time; }); | |||||
| MS_LOG(DEBUG) << "The total op time is " << sum; | |||||
| return sum; | |||||
| } | |||||
| void DataSaver::WriteFile(std::string out_path_dir) { | |||||
| void CpuDataSaver::WriteFile(std::string out_path_dir) { | |||||
| if (op_detail_infos_.empty() || op_type_infos_.empty()) { | if (op_detail_infos_.empty() || op_type_infos_.empty()) { | ||||
| MS_LOG(INFO) << "No cpu operation detail infos to write."; | MS_LOG(INFO) << "No cpu operation detail infos to write."; | ||||
| return; | return; | ||||
| @@ -91,84 +33,11 @@ void DataSaver::WriteFile(std::string out_path_dir) { | |||||
| MS_EXCEPTION_IF_NULL(context_ptr); | MS_EXCEPTION_IF_NULL(context_ptr); | ||||
| auto device_id = context_ptr->get_param<uint32_t>(MS_CTX_DEVICE_ID); | auto device_id = context_ptr->get_param<uint32_t>(MS_CTX_DEVICE_ID); | ||||
| device_id_ = std::to_string(device_id); | device_id_ = std::to_string(device_id); | ||||
| op_side_ = "cpu"; | |||||
| WriteOpDetail(out_path_dir); | WriteOpDetail(out_path_dir); | ||||
| WriteOpType(out_path_dir); | WriteOpType(out_path_dir); | ||||
| WriteOpTimestamp(out_path_dir); | WriteOpTimestamp(out_path_dir); | ||||
| } | } | ||||
| void DataSaver::WriteOpType(const std::string &saver_base_dir) { | |||||
| std::string file_path = saver_base_dir + "/cpu_op_type_info_" + device_id_ + ".csv"; | |||||
| std::ofstream ofs(file_path); | |||||
| // check if the file is writable | |||||
| if (!ofs.is_open()) { | |||||
| MS_LOG(WARNING) << "Open file '" << file_path << "' failed!"; | |||||
| return; | |||||
| } | |||||
| try { | |||||
| // write op type info into file | |||||
| ofs << OpType().GetHeader() << std::endl; | |||||
| for (auto op_type_info : op_type_infos_) { | |||||
| ofs << op_type_info.second << std::endl; | |||||
| } | |||||
| } catch (const std::exception &e) { | |||||
| MS_LOG(ERROR) << "Write " << file_path << "failed: " << e.what(); | |||||
| } | |||||
| ofs.close(); | |||||
| ChangeFileMode(file_path); | |||||
| MS_LOG(INFO) << "Write " << op_type_infos_.size() << " op type infos into file: " << file_path; | |||||
| } | |||||
| void DataSaver::WriteOpDetail(const std::string &saver_base_dir) { | |||||
| std::string file_path = saver_base_dir + "/cpu_op_detail_info_" + device_id_ + ".csv"; | |||||
| std::ofstream ofs(file_path); | |||||
| if (!ofs.is_open()) { | |||||
| MS_LOG(WARNING) << "Open file '" << file_path << "' failed!"; | |||||
| return; | |||||
| } | |||||
| try { | |||||
| // write op detail info into file | |||||
| ofs << OpDetailInfo().GetHeader() << std::endl; | |||||
| for (auto op_detail : op_detail_infos_) { | |||||
| ofs << op_detail << std::endl; | |||||
| } | |||||
| } catch (const std::exception &e) { | |||||
| MS_LOG(ERROR) << "Write " << file_path << "failed: " << e.what(); | |||||
| } | |||||
| ofs.close(); | |||||
| ChangeFileMode(file_path); | |||||
| MS_LOG(INFO) << "Write " << op_detail_infos_.size() << " op detail infos into file: " << file_path; | |||||
| } | |||||
| void DataSaver::WriteOpTimestamp(const std::string &saver_base_dir) { | |||||
| std::string file_path = saver_base_dir + "/cpu_op_execute_timestamp_" + device_id_ + ".txt"; | |||||
| std::ofstream ofs(file_path); | |||||
| // check if the file is writable | |||||
| if (!ofs.is_open()) { | |||||
| MS_LOG(WARNING) << "Open file '" << file_path << "' failed!"; | |||||
| return; | |||||
| } | |||||
| try { | |||||
| // write op timestamp info into file | |||||
| for (const auto &op_timestamp_info : op_timestamps_map_) { | |||||
| ofs << op_timestamp_info.first << ";host_cpu_ops;"; | |||||
| for (auto start_end : op_timestamp_info.second) { | |||||
| ofs << start_end.start_timestamp << "," << start_end.duration << " "; | |||||
| } | |||||
| ofs << std::endl; | |||||
| } | |||||
| } catch (const std::exception &e) { | |||||
| MS_LOG(ERROR) << "Write " << file_path << "failed: " << e.what(); | |||||
| } | |||||
| ofs.close(); | |||||
| ChangeFileMode(file_path); | |||||
| } | |||||
| void DataSaver::ChangeFileMode(const std::string &file_path) { | |||||
| if (chmod(common::SafeCStr(file_path), S_IRUSR) == -1) { | |||||
| MS_LOG(WARNING) << "Modify file: " << file_path << " to rw fail."; | |||||
| return; | |||||
| } | |||||
| } | |||||
| } // namespace cpu | } // namespace cpu | ||||
| } // namespace profiler | } // namespace profiler | ||||
| } // namespace mindspore | } // namespace mindspore | ||||
| @@ -14,8 +14,8 @@ | |||||
| * limitations under the License. | * limitations under the License. | ||||
| */ | */ | ||||
| #ifndef MINDSPORE_CPU_DATA_SAVER_H | |||||
| #define MINDSPORE_CPU_DATA_SAVER_H | |||||
| #ifndef MINDSPORE_CCSRC_PROFILER_DEVICE_CPU_CPU_DATA_SAVER_H | |||||
| #define MINDSPORE_CCSRC_PROFILER_DEVICE_CPU_CPU_DATA_SAVER_H | |||||
| #include <iostream> | #include <iostream> | ||||
| #include <algorithm> | #include <algorithm> | ||||
| #include <unordered_map> | #include <unordered_map> | ||||
| @@ -23,101 +23,24 @@ | |||||
| #include <string> | #include <string> | ||||
| #include <memory> | #include <memory> | ||||
| #include "profiler/device/cpu/cpu_profiling.h" | #include "profiler/device/cpu/cpu_profiling.h" | ||||
| #include "profiler/device/data_saver.h" | |||||
| namespace mindspore { | namespace mindspore { | ||||
| namespace profiler { | namespace profiler { | ||||
| namespace cpu { | namespace cpu { | ||||
| struct OpDetailInfo { | |||||
| std::string op_type_; | |||||
| std::string op_name_; | |||||
| std::string op_full_name_; | |||||
| std::shared_ptr<OpInfo> op_info_{nullptr}; | |||||
| float op_avg_time_{0}; | |||||
| float proportion_{0}; | |||||
| OpDetailInfo() = default; | |||||
| OpDetailInfo(std::shared_ptr<OpInfo> op_info, float proportion); | |||||
| std::string GetHeader() const { | |||||
| return "op_side,op_type,op_name,full_op_name,op_occurrences,compute_time(ms)," | |||||
| "avg_execution_time(ms),total_proportion,subgraph,pid"; | |||||
| } | |||||
| friend std::ostream &operator<<(std::ostream &os, const OpDetailInfo &event) { | |||||
| os << "Host," << event.op_type_ << ',' << event.op_name_ << ',' << event.op_full_name_ << ',' | |||||
| << event.op_info_->op_count << ',' << event.op_info_->op_cost_time << ',' << event.op_avg_time_ << ',' | |||||
| << event.proportion_ << ",Default," << event.op_info_->pid; | |||||
| return os; | |||||
| } | |||||
| }; | |||||
| struct OpType { | |||||
| std::string op_type_; | |||||
| int count_{0}; | |||||
| int step_{0}; | |||||
| float total_time_{0}; | |||||
| float avg_time_{0}; | |||||
| float proportion_{0}; | |||||
| std::string GetHeader() const { | |||||
| return "op_type,total_called_times,called_times(per-step)," | |||||
| "total_compute_time,compute_time(ms per-step),percent"; | |||||
| } | |||||
| friend std::ostream &operator<<(std::ostream &os, const OpType &event) { | |||||
| os << event.op_type_ << ',' << event.count_ << ',' << event.count_ / event.step_ << ',' << event.total_time_ << ',' | |||||
| << event.total_time_ / event.step_ << ',' << event.proportion_; | |||||
| return os; | |||||
| } | |||||
| OpType &operator+=(const OpType &other) { | |||||
| this->count_ += other.count_; | |||||
| this->total_time_ += other.total_time_; | |||||
| this->proportion_ += other.proportion_; | |||||
| return *this; | |||||
| } | |||||
| }; | |||||
| using OpInfoMap = std::unordered_map<std::string, OpInfo>; | |||||
| using OpTypeInfos = std::unordered_map<std::string, OpType>; // <op_full_name, Optype> | |||||
| using OpDetailInfos = std::vector<OpDetailInfo>; | |||||
| // <op_full_name, StartDuration> | |||||
| using OpTimestampInfo = std::unordered_map<std::string, std::vector<StartDuration>>; | |||||
| class DataSaver { | |||||
| class CpuDataSaver : public DataSaver { | |||||
| public: | public: | ||||
| DataSaver() = default; | |||||
| ~DataSaver() = default; | |||||
| CpuDataSaver() = default; | |||||
| DataSaver(const DataSaver &) = delete; | |||||
| ~CpuDataSaver() = default; | |||||
| DataSaver &operator=(const DataSaver &) = delete; | |||||
| CpuDataSaver(const CpuDataSaver &) = delete; | |||||
| void ParseOpInfo(const OpInfoMap &op_info_maps); | |||||
| CpuDataSaver &operator=(const CpuDataSaver &) = delete; | |||||
| void WriteFile(std::string out_path); | void WriteFile(std::string out_path); | ||||
| private: | |||||
| void AddOpDetailInfoForType(const OpDetailInfo &op_detail_info); | |||||
| float GetTotalOpTime(const OpInfoMap &op_info_maps); | |||||
| void WriteOpType(const std::string &saver_base_dir); | |||||
| void WriteOpDetail(const std::string &saver_base_dir); | |||||
| void WriteOpTimestamp(const std::string &saver_base_dir); | |||||
| void ChangeFileMode(const std::string &file_path); | |||||
| std::string device_id_; | |||||
| OpTypeInfos op_type_infos_; | |||||
| OpDetailInfos op_detail_infos_; | |||||
| OpTimestampInfo op_timestamps_map_; | |||||
| }; | }; | ||||
| } // namespace cpu | } // namespace cpu | ||||
| } // namespace profiler | } // namespace profiler | ||||
| } // namespace mindspore | } // namespace mindspore | ||||
| #endif // MINDSPORE_CPU_DATA_SAVER_H | |||||
| #endif // MINDSPORE_CCSRC_PROFILER_DEVICE_CPU_CPU_DATA_SAVER_H | |||||
| @@ -29,18 +29,6 @@ namespace profiler { | |||||
| namespace cpu { | namespace cpu { | ||||
| std::shared_ptr<CPUProfiler> CPUProfiler::profiler_inst_ = nullptr; | std::shared_ptr<CPUProfiler> CPUProfiler::profiler_inst_ = nullptr; | ||||
| uint64_t GetMonoTimeStamp() { | |||||
| struct timespec ts; | |||||
| #if defined(_WIN32) || defined(_WIN64) | |||||
| clock_gettime(CLOCK_MONOTONIC, &ts); | |||||
| #else | |||||
| clock_gettime(CLOCK_MONOTONIC_RAW, &ts); | |||||
| #endif | |||||
| constexpr uint64_t kNSecondInSecond = 1000000000; | |||||
| uint64_t cur_time_stamp = ts.tv_sec * kNSecondInSecond + ts.tv_nsec; | |||||
| return cur_time_stamp; | |||||
| } | |||||
| std::shared_ptr<CPUProfiler> CPUProfiler::GetInstance() { | std::shared_ptr<CPUProfiler> CPUProfiler::GetInstance() { | ||||
| if (profiler_inst_ == nullptr) { | if (profiler_inst_ == nullptr) { | ||||
| profiler_inst_ = std::shared_ptr<CPUProfiler>(new (std::nothrow) CPUProfiler()); | profiler_inst_ = std::shared_ptr<CPUProfiler>(new (std::nothrow) CPUProfiler()); | ||||
| @@ -50,7 +38,7 @@ std::shared_ptr<CPUProfiler> CPUProfiler::GetInstance() { | |||||
| void CPUProfiler::Init(const std::string &profileDataPath = "") { | void CPUProfiler::Init(const std::string &profileDataPath = "") { | ||||
| MS_LOG(INFO) << "Initialize CPU Profiling"; | MS_LOG(INFO) << "Initialize CPU Profiling"; | ||||
| base_time_ = GetMonoTimeStamp(); | |||||
| base_time_ = GetHostMonoTimeStamp(); | |||||
| profile_data_path_ = profileDataPath; | profile_data_path_ = profileDataPath; | ||||
| MS_LOG(INFO) << " Host start time(ns): " << base_time_ << " profile data path: " << profile_data_path_; | MS_LOG(INFO) << " Host start time(ns): " << base_time_ << " profile data path: " << profile_data_path_; | ||||
| } | } | ||||
| @@ -75,34 +63,19 @@ void CPUProfiler::SetRunTimeData(const std::string &op_name, const uint32_t pid) | |||||
| pid_ = pid; | pid_ = pid; | ||||
| } | } | ||||
| void CPUProfiler::SetRunTimeData(const std::string &op_name, const float time_elapsed) { | |||||
| auto iter = op_info_map_.find(op_name); | |||||
| if (iter != op_info_map_.end()) { | |||||
| // The time unit is ms, convert to us | |||||
| iter->second.op_cost_time += time_elapsed; | |||||
| } | |||||
| } | |||||
| void CPUProfiler::SetRunTimeData(const std::string &op_name, const uint64_t start, const float duration) { | |||||
| auto iter = op_info_map_.find(op_name); | |||||
| if (iter != op_info_map_.end()) { | |||||
| iter->second.start_duration.emplace_back(StartDuration({start, duration})); | |||||
| } | |||||
| } | |||||
| void CPUProfiler::OpDataProducerBegin(const std::string op_name, const uint32_t pid) { | void CPUProfiler::OpDataProducerBegin(const std::string op_name, const uint32_t pid) { | ||||
| op_time_start_ = GetMonoTimeStamp(); | |||||
| op_time_mono_start_ = GetMonoTimeStamp(); | |||||
| op_time_start_ = GetHostMonoTimeStamp(); | |||||
| op_time_mono_start_ = GetHostMonoTimeStamp(); | |||||
| SetRunTimeData(op_name, pid); | SetRunTimeData(op_name, pid); | ||||
| } | } | ||||
| void CPUProfiler::OpDataProducerEnd() { | void CPUProfiler::OpDataProducerEnd() { | ||||
| float op_time_elapsed = 0; | float op_time_elapsed = 0; | ||||
| op_time_stop_ = GetMonoTimeStamp(); | |||||
| op_time_stop_ = GetHostMonoTimeStamp(); | |||||
| op_time_elapsed = (op_time_stop_ - op_time_start_) / kTimeUnit; | op_time_elapsed = (op_time_stop_ - op_time_start_) / kTimeUnit; | ||||
| MS_LOG(DEBUG) << "Host Time Elapsed(us)," << op_name_ << "," << op_time_elapsed; | MS_LOG(DEBUG) << "Host Time Elapsed(us)," << op_name_ << "," << op_time_elapsed; | ||||
| SetRunTimeData(op_name_, op_time_elapsed); | |||||
| SetRunTimeData(op_name_, op_time_mono_start_, op_time_elapsed); | |||||
| Profiler::SetRunTimeData(op_name_, op_time_elapsed); | |||||
| Profiler::SetRunTimeData(op_name_, op_time_mono_start_, op_time_elapsed); | |||||
| } | } | ||||
| void CPUProfiler::Stop() { | void CPUProfiler::Stop() { | ||||
| @@ -115,7 +88,7 @@ void CPUProfiler::SaveProfileData() { | |||||
| if (profile_data_path_.empty()) { | if (profile_data_path_.empty()) { | ||||
| MS_LOG(WARNING) << "Profile data path is empty, skip save profile data."; | MS_LOG(WARNING) << "Profile data path is empty, skip save profile data."; | ||||
| } else { | } else { | ||||
| DataSaver dataSaver; | |||||
| CpuDataSaver dataSaver; | |||||
| dataSaver.ParseOpInfo(op_info_map_); | dataSaver.ParseOpInfo(op_info_map_); | ||||
| dataSaver.WriteFile(profile_data_path_); | dataSaver.WriteFile(profile_data_path_); | ||||
| } | } | ||||
| @@ -14,73 +14,52 @@ | |||||
| * limitations under the License. | * limitations under the License. | ||||
| */ | */ | ||||
| #ifndef MINDSPORE_CPU_PROFILING_H | |||||
| #define MINDSPORE_CPU_PROFILING_H | |||||
| #ifndef MINDSPORE_CCSRC_PROFILER_DEVICE_CPU_PROFILING_H | |||||
| #define MINDSPORE_CCSRC_PROFILER_DEVICE_CPU_PROFILING_H | |||||
| #include <algorithm> | #include <algorithm> | ||||
| #include <cstdio> | #include <cstdio> | ||||
| #include <map> | #include <map> | ||||
| #include <memory> | #include <memory> | ||||
| #include <mutex> | |||||
| #include <string> | #include <string> | ||||
| #include <unordered_map> | #include <unordered_map> | ||||
| #include <utility> | #include <utility> | ||||
| #include <vector> | |||||
| #include "profiler/device/profiling.h" | |||||
| namespace mindspore { | namespace mindspore { | ||||
| namespace profiler { | namespace profiler { | ||||
| namespace cpu { | namespace cpu { | ||||
| struct StartDuration { | |||||
| uint64_t start_timestamp = 0l; | |||||
| float duration = 0l; | |||||
| }; | |||||
| struct OpInfo { | |||||
| std::string op_name; | |||||
| float op_cost_time = 0; | |||||
| int op_count = 0; | |||||
| std::vector<StartDuration> start_duration; | |||||
| uint32_t pid; | |||||
| }; | |||||
| const float kTimeUnit = 1000; | const float kTimeUnit = 1000; | ||||
| class CPUProfiler { | |||||
| class CPUProfiler : public Profiler { | |||||
| public: | public: | ||||
| static std::shared_ptr<CPUProfiler> GetInstance(); | static std::shared_ptr<CPUProfiler> GetInstance(); | ||||
| ~CPUProfiler() = default; | ~CPUProfiler() = default; | ||||
| CPUProfiler(const CPUProfiler &) = delete; | CPUProfiler(const CPUProfiler &) = delete; | ||||
| CPUProfiler &operator=(const CPUProfiler &) = delete; | CPUProfiler &operator=(const CPUProfiler &) = delete; | ||||
| void Init(const std::string &profileDataPath); | |||||
| void Stop(); | |||||
| void StepProfilingEnable(const bool enable_flag); | |||||
| bool GetEnableFlag() const { return enable_flag_; } | |||||
| void Init(const std::string &profileDataPath) override; | |||||
| void Stop() override; | |||||
| void StepProfilingEnable(const bool enable_flag) override; | |||||
| void OpDataProducerBegin(const std::string op_name, const uint32_t pid); | void OpDataProducerBegin(const std::string op_name, const uint32_t pid); | ||||
| void OpDataProducerEnd(); | |||||
| std::string ProfileDataPath() const { return profile_data_path_; } | |||||
| void OpDataProducerEnd() override; | |||||
| private: | private: | ||||
| CPUProfiler() = default; | CPUProfiler() = default; | ||||
| void ClearInst(); | |||||
| void SetRunTimeData(const std::string &op_name, const uint32_t pid); | void SetRunTimeData(const std::string &op_name, const uint32_t pid); | ||||
| void SetRunTimeData(const std::string &op_name, const float time_elapsed); | |||||
| void SetRunTimeData(const std::string &op_name, const uint64_t start, const float duration); | |||||
| void SaveProfileData() override; | |||||
| void ClearInst() override; | |||||
| static std::shared_ptr<CPUProfiler> profiler_inst_; | static std::shared_ptr<CPUProfiler> profiler_inst_; | ||||
| bool enable_flag_ = false; | |||||
| std::unordered_map<std::string, OpInfo> op_info_map_; | |||||
| uint64_t base_time_; | uint64_t base_time_; | ||||
| std::string op_name_; | std::string op_name_; | ||||
| uint32_t pid_; | uint32_t pid_; | ||||
| void SaveProfileData(); | |||||
| uint64_t op_time_start_; | uint64_t op_time_start_; | ||||
| uint64_t op_time_mono_start_; | uint64_t op_time_mono_start_; | ||||
| uint64_t op_time_stop_; | uint64_t op_time_stop_; | ||||
| std::string profile_data_path_; | |||||
| }; | }; | ||||
| } // namespace cpu | } // namespace cpu | ||||
| } // namespace profiler | } // namespace profiler | ||||
| } // namespace mindspore | } // namespace mindspore | ||||
| #endif // MINDSPORE_CPU_PROFILING_H | |||||
| #endif // MINDSPORE_CCSRC_PROFILER_DEVICE_CPU_PROFILING_H | |||||
| @@ -0,0 +1,177 @@ | |||||
| /** | |||||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "profiler/device/data_saver.h" | |||||
| #include <fstream> | |||||
| #include <numeric> | |||||
| #include "sys/stat.h" | |||||
| #include "utils/log_adapter.h" | |||||
| #include "utils/ms_utils.h" | |||||
| #include "utils/ms_context.h" | |||||
| namespace mindspore { | |||||
| namespace profiler { | |||||
| OpDetailInfo::OpDetailInfo(std::shared_ptr<OpInfo> op_info, float proportion) | |||||
| : op_info_(op_info), proportion_(proportion) { | |||||
| // op_full_name is like 'xxx/xxx/{op_type}-op{node_id}' | |||||
| op_full_name_ = op_info->op_name; | |||||
| auto op_type_begin_iter = op_full_name_.rfind('/') + 1; | |||||
| auto op_type_end_iter = op_full_name_.rfind('-'); | |||||
| op_type_ = op_full_name_.substr(op_type_begin_iter, op_type_end_iter - op_type_begin_iter); | |||||
| op_name_ = op_full_name_.substr(op_type_begin_iter); | |||||
| op_avg_time_ = op_info->op_host_cost_time / op_info->op_count; | |||||
| } | |||||
| void DataSaver::ParseOpInfo(const OpInfoMap &op_info_maps) { | |||||
| op_detail_infos_.reserve(op_info_maps.size()); | |||||
| float total_time_sum = GetTotalOpTime(op_info_maps); | |||||
| for (auto item : op_info_maps) { | |||||
| op_timestamps_map_[item.first] = item.second.start_duration; | |||||
| float proportion = item.second.op_host_cost_time / total_time_sum; | |||||
| auto op_info = std::make_shared<OpInfo>(item.second); | |||||
| OpDetailInfo op_detail_info = OpDetailInfo(op_info, proportion); | |||||
| op_detail_infos_.emplace_back(op_detail_info); | |||||
| AddOpDetailInfoForType(op_detail_info); | |||||
| } | |||||
| // update average time of op type | |||||
| for (auto &op_type : op_type_infos_) { | |||||
| // device_infos: <type_name, op_type_info> | |||||
| op_type.second.avg_time_ = op_type.second.total_time_ / op_type.second.count_; | |||||
| } | |||||
| MS_LOG(DEBUG) << "Get " << op_detail_infos_.size() << " operation items."; | |||||
| MS_LOG(DEBUG) << "Get " << op_type_infos_.size() << " operation type items."; | |||||
| } | |||||
| void DataSaver::AddOpDetailInfoForType(const OpDetailInfo &op_detail_info) { | |||||
| // Construct OpType object according to op detail info | |||||
| OpType op_type = OpType{op_detail_info.op_type_, | |||||
| op_detail_info.op_info_->op_count, | |||||
| op_detail_info.op_info_->op_count, | |||||
| op_detail_info.op_info_->op_host_cost_time, | |||||
| 0, | |||||
| op_detail_info.proportion_}; | |||||
| // Set the OpType into op_type_infos_ map | |||||
| std::string type_name = op_detail_info.op_type_; | |||||
| auto iter = op_type_infos_.find(type_name); | |||||
| if (iter == op_type_infos_.end()) { | |||||
| op_type_infos_.emplace(type_name, op_type); | |||||
| } else { | |||||
| iter->second += op_type; | |||||
| } | |||||
| } | |||||
| float DataSaver::GetTotalOpTime(const OpInfoMap &op_info_maps) { | |||||
| float sum = 0; | |||||
| sum = std::accumulate(op_info_maps.begin(), op_info_maps.end(), sum, | |||||
| [](float i, auto iter) { return i + iter.second.op_host_cost_time; }); | |||||
| MS_LOG(DEBUG) << "The total op time is " << sum; | |||||
| return sum; | |||||
| } | |||||
| void DataSaver::WriteOpType(const std::string &saver_base_dir) { | |||||
| std::string file_path = saver_base_dir + "/" + op_side_ + "_op_type_info_" + device_id_ + ".csv"; | |||||
| std::ofstream ofs(file_path); | |||||
| // check if the file is writable | |||||
| if (!ofs.is_open()) { | |||||
| MS_LOG(WARNING) << "Open file '" << file_path << "' failed!"; | |||||
| return; | |||||
| } | |||||
| try { | |||||
| // write op type info into file | |||||
| if (op_side_ == "cpu") { | |||||
| ofs << OpType().GetCpuHeader() << std::endl; | |||||
| for (auto op_type_info : op_type_infos_) { | |||||
| op_type_info.second.OutputCpuOpTypeInfo(ofs); | |||||
| } | |||||
| } | |||||
| if (op_side_ == "gpu") { | |||||
| ofs << OpType().GetGpuHeader() << std::endl; | |||||
| for (auto op_type_info : op_type_infos_) { | |||||
| op_type_info.second.OutputGpuOpTypeInfo(ofs); | |||||
| } | |||||
| } | |||||
| } catch (const std::exception &e) { | |||||
| MS_LOG(ERROR) << "Write " << file_path << "failed: " << e.what(); | |||||
| } | |||||
| ofs.close(); | |||||
| ChangeFileMode(file_path); | |||||
| MS_LOG(INFO) << "Write " << op_type_infos_.size() << " op type infos into file: " << file_path; | |||||
| } | |||||
| void DataSaver::WriteOpDetail(const std::string &saver_base_dir) { | |||||
| std::string file_path = saver_base_dir + "/" + op_side_ + "_op_detail_info_" + device_id_ + ".csv"; | |||||
| std::ofstream ofs(file_path); | |||||
| if (!ofs.is_open()) { | |||||
| MS_LOG(WARNING) << "Open file '" << file_path << "' failed!"; | |||||
| return; | |||||
| } | |||||
| try { | |||||
| // write op detail info into file | |||||
| if (op_side_ == "cpu") { | |||||
| ofs << OpDetailInfo().GetCpuHeader() << std::endl; | |||||
| for (auto op_detail : op_detail_infos_) { | |||||
| op_detail.OutputCpuOpDetailInfo(ofs); | |||||
| } | |||||
| } | |||||
| if (op_side_ == "gpu") { | |||||
| ofs << OpDetailInfo().GetGpuHeader() << std::endl; | |||||
| for (auto op_detail : op_detail_infos_) { | |||||
| op_detail.OutputGpuOpDetailInfo(ofs); | |||||
| } | |||||
| } | |||||
| } catch (const std::exception &e) { | |||||
| MS_LOG(ERROR) << "Write " << file_path << "failed: " << e.what(); | |||||
| } | |||||
| ofs.close(); | |||||
| ChangeFileMode(file_path); | |||||
| MS_LOG(INFO) << "Write " << op_detail_infos_.size() << " op detail infos into file: " << file_path; | |||||
| } | |||||
| void DataSaver::WriteOpTimestamp(const std::string &saver_base_dir) { | |||||
| std::string file_path = saver_base_dir + "/" + op_side_ + "_op_execute_timestamp_" + device_id_ + ".txt"; | |||||
| std::ofstream ofs(file_path); | |||||
| // check if the file is writable | |||||
| if (!ofs.is_open()) { | |||||
| MS_LOG(WARNING) << "Open file '" << file_path << "' failed!"; | |||||
| return; | |||||
| } | |||||
| try { | |||||
| // write op timestamp info into file | |||||
| for (const auto &op_timestamp_info : op_timestamps_map_) { | |||||
| if (op_side_ == "cpu") { | |||||
| ofs << op_timestamp_info.first << ";HostCpuOps;"; | |||||
| } else { | |||||
| ofs << op_timestamp_info.first << ";GpuOps;"; | |||||
| } | |||||
| for (auto start_end : op_timestamp_info.second) { | |||||
| ofs << start_end.start_timestamp << "," << start_end.duration << " "; | |||||
| } | |||||
| ofs << std::endl; | |||||
| } | |||||
| } catch (const std::exception &e) { | |||||
| MS_LOG(ERROR) << "Write " << file_path << "failed: " << e.what(); | |||||
| } | |||||
| ofs.close(); | |||||
| ChangeFileMode(file_path); | |||||
| } | |||||
| void DataSaver::ChangeFileMode(const std::string &file_path) { | |||||
| if (chmod(common::SafeCStr(file_path), S_IRUSR | S_IWUSR) == -1) { | |||||
| MS_LOG(WARNING) << "Modify file: " << file_path << " to rw fail."; | |||||
| return; | |||||
| } | |||||
| } | |||||
| } // namespace profiler | |||||
| } // namespace mindspore | |||||
| @@ -0,0 +1,127 @@ | |||||
| /** | |||||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_CCSRC_PROFILER_DEVICE_DATA_SAVER_H | |||||
| #define MINDSPORE_CCSRC_PROFILER_DEVICE_DATA_SAVER_H | |||||
| #include <iostream> | |||||
| #include <algorithm> | |||||
| #include <unordered_map> | |||||
| #include <vector> | |||||
| #include <string> | |||||
| #include <memory> | |||||
| #include "profiler/device/profiling.h" | |||||
| namespace mindspore { | |||||
| namespace profiler { | |||||
| struct OpDetailInfo { | |||||
| std::string op_type_; | |||||
| std::string op_name_; | |||||
| std::string op_full_name_; | |||||
| std::shared_ptr<OpInfo> op_info_{nullptr}; | |||||
| float op_avg_time_{0}; | |||||
| float proportion_{0}; | |||||
| OpDetailInfo() = default; | |||||
| OpDetailInfo(std::shared_ptr<OpInfo> op_info, float proportion); | |||||
| std::string GetCpuHeader() const { | |||||
| return "op_side,op_type,op_name,full_op_name,op_occurrences,op_total_time(ms)," | |||||
| "op_avg_time(ms),total_proportion,subgraph,pid"; | |||||
| } | |||||
| std::string GetGpuHeader() const { | |||||
| return "op_side,op_type,op_name,op_full_name,op_occurrences,op_total_time(us),op_avg_time(us),total_proportion," | |||||
| "cuda_activity_cost_time(us),cuda_activity_call_count"; | |||||
| } | |||||
| void OutputCpuOpDetailInfo(std::ostream &os) { | |||||
| os << "Host," << op_type_ << ',' << op_name_ << ',' << op_full_name_ << ',' << op_info_->op_count << ',' | |||||
| << op_info_->op_host_cost_time << ',' << op_avg_time_ << ',' << proportion_ << ",Default," << op_info_->pid | |||||
| << std::endl; | |||||
| } | |||||
| void OutputGpuOpDetailInfo(std::ostream &os) { | |||||
| os << "Device," << op_type_ << ',' << op_name_ << ',' << op_full_name_ << ',' << op_info_->op_count << ',' | |||||
| << op_info_->op_host_cost_time << ',' << op_avg_time_ << ',' << proportion_ << ',' | |||||
| << op_info_->cupti_activity_time << ',' << op_info_->op_kernel_count << std::endl; | |||||
| } | |||||
| }; | |||||
| struct OpType { | |||||
| std::string op_type_; | |||||
| int count_{0}; | |||||
| int step_{0}; | |||||
| float total_time_{0}; | |||||
| float avg_time_{0}; | |||||
| float proportion_{0}; | |||||
| std::string GetCpuHeader() const { | |||||
| return "op_type,type_occurrences,execution_frequency(per-step)," | |||||
| "total_compute_time,avg_time(ms),percent"; | |||||
| } | |||||
| std::string GetGpuHeader() const { return "op_type,type_occurrences,total_time(us),total_proportion,avg_time(us)"; } | |||||
| void OutputCpuOpTypeInfo(std::ostream &os) { | |||||
| os << op_type_ << ',' << count_ << ',' << count_ / step_ << ',' << total_time_ << ',' << total_time_ / count_ << ',' | |||||
| << proportion_ << std::endl; | |||||
| } | |||||
| void OutputGpuOpTypeInfo(std::ostream &os) { | |||||
| os << op_type_ << ',' << count_ << ',' << total_time_ << ',' << proportion_ << ',' << avg_time_ << std::endl; | |||||
| } | |||||
| OpType &operator+=(const OpType &other) { | |||||
| this->count_ += other.count_; | |||||
| this->total_time_ += other.total_time_; | |||||
| this->proportion_ += other.proportion_; | |||||
| return *this; | |||||
| } | |||||
| }; | |||||
| using OpTimestampInfo = std::unordered_map<std::string, std::vector<StartDuration>>; // <op_full_name, StartDuration> | |||||
| using OpInfoMap = std::unordered_map<std::string, OpInfo>; | |||||
| using OpTypeInfos = std::unordered_map<std::string, OpType>; // <op_full_name, Optype> | |||||
| using OpDetailInfos = std::vector<OpDetailInfo>; | |||||
| class DataSaver { | |||||
| public: | |||||
| DataSaver() = default; | |||||
| virtual ~DataSaver() = default; | |||||
| void ParseOpInfo(const OpInfoMap &op_info_maps); | |||||
| protected: | |||||
| void AddOpDetailInfoForType(const OpDetailInfo &op_detail_info); | |||||
| float GetTotalOpTime(const OpInfoMap &op_info_maps); | |||||
| void WriteOpType(const std::string &saver_base_dir); | |||||
| void WriteOpDetail(const std::string &saver_base_dir); | |||||
| void WriteOpTimestamp(const std::string &saver_base_dir); | |||||
| void ChangeFileMode(const std::string &file_path); | |||||
| OpTypeInfos op_type_infos_; | |||||
| OpDetailInfos op_detail_infos_; | |||||
| OpTimestampInfo op_timestamps_map_; | |||||
| std::string op_side_; | |||||
| std::string device_id_; | |||||
| }; | |||||
| } // namespace profiler | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_CCSRC_PROFILER_DEVICE_DATA_SAVER_H | |||||
| @@ -1,5 +1,5 @@ | |||||
| /** | /** | ||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * Copyright 2020-2021 Huawei Technologies Co., Ltd | |||||
| * | * | ||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| * you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
| @@ -13,7 +13,7 @@ | |||||
| * See the License for the specific language governing permissions and | * See the License for the specific language governing permissions and | ||||
| * limitations under the License. | * limitations under the License. | ||||
| */ | */ | ||||
| #include "profiler/device/gpu/data_saver.h" | |||||
| #include "profiler/device/gpu/gpu_data_saver.h" | |||||
| #include <fstream> | #include <fstream> | ||||
| #include <numeric> | #include <numeric> | ||||
| #include "sys/stat.h" | #include "sys/stat.h" | ||||
| @@ -23,17 +23,6 @@ | |||||
| namespace mindspore { | namespace mindspore { | ||||
| namespace profiler { | namespace profiler { | ||||
| namespace gpu { | namespace gpu { | ||||
| OpDetailInfo::OpDetailInfo(std::shared_ptr<OpInfo> op_info, float proportion) | |||||
| : op_info_(op_info), proportion_(proportion) { | |||||
| // op_full_name is like 'xxx/xxx/{op_type}-op{node_id}' | |||||
| op_full_name_ = op_info->op_name; | |||||
| auto op_type_begin_iter = op_full_name_.rfind('/') + 1; | |||||
| auto op_type_end_iter = op_full_name_.rfind('-'); | |||||
| op_type_ = op_full_name_.substr(op_type_begin_iter, op_type_end_iter - op_type_begin_iter); | |||||
| op_name_ = op_full_name_.substr(op_type_begin_iter); | |||||
| op_avg_time_ = op_info->op_host_cost_time / op_info->op_count; | |||||
| } | |||||
| ActivityData::ActivityData(std::shared_ptr<Event> data) : basic_info_(data) { | ActivityData::ActivityData(std::shared_ptr<Event> data) : basic_info_(data) { | ||||
| grid_dim_ = basic_info_->activity_type == ActivityType::kKernel | grid_dim_ = basic_info_->activity_type == ActivityType::kKernel | ||||
| ? "\"" + std::to_string(basic_info_->kernel_info.grid_x) + ',' + | ? "\"" + std::to_string(basic_info_->kernel_info.grid_x) + ',' + | ||||
| @@ -65,49 +54,7 @@ ActivityData &ActivityData::operator+=(const ActivityData &other) { | |||||
| return *this; | return *this; | ||||
| } | } | ||||
| void DataSaver::ParseOpInfo(const OpInfoMap &op_info_maps) { | |||||
| op_detail_infos_.reserve(op_info_maps.size()); | |||||
| float total_time_sum = GetTotalOpTime(op_info_maps); | |||||
| for (auto item : op_info_maps) { | |||||
| op_timestamps_map_[item.first] = item.second.start_duration; | |||||
| float proportion = item.second.op_host_cost_time / total_time_sum; | |||||
| auto op_info = std::make_shared<OpInfo>(item.second); | |||||
| OpDetailInfo op_detail_info = OpDetailInfo(op_info, proportion); | |||||
| op_detail_infos_.emplace_back(op_detail_info); | |||||
| AddOpDetailInfoForType(op_detail_info); | |||||
| } | |||||
| // update average time of op type | |||||
| for (auto &op_type : op_type_infos_) { | |||||
| // device_infos: <type_name, op_type_info> | |||||
| op_type.second.avg_time_ = op_type.second.total_time_ / op_type.second.count_; | |||||
| } | |||||
| MS_LOG(DEBUG) << "Get " << op_detail_infos_.size() << " operation items."; | |||||
| MS_LOG(DEBUG) << "Get " << op_type_infos_.size() << " operation type items."; | |||||
| } | |||||
| void DataSaver::AddOpDetailInfoForType(const OpDetailInfo &op_detail_info) { | |||||
| // Construct OpType object according to op detail info | |||||
| OpType op_type = OpType{op_detail_info.op_type_, op_detail_info.op_info_->op_count, | |||||
| op_detail_info.op_info_->op_host_cost_time, 0, op_detail_info.proportion_}; | |||||
| // Set the OpType into op_type_infos_ map | |||||
| std::string type_name = op_detail_info.op_type_; | |||||
| auto iter = op_type_infos_.find(type_name); | |||||
| if (iter == op_type_infos_.end()) { | |||||
| op_type_infos_.emplace(type_name, op_type); | |||||
| } else { | |||||
| iter->second += op_type; | |||||
| } | |||||
| } | |||||
| float DataSaver::GetTotalOpTime(const OpInfoMap &op_info_maps) { | |||||
| float sum = 0; | |||||
| sum = std::accumulate(op_info_maps.begin(), op_info_maps.end(), sum, | |||||
| [](float i, auto iter) { return i + iter.second.op_host_cost_time; }); | |||||
| MS_LOG(DEBUG) << "The total op time is " << sum; | |||||
| return sum; | |||||
| } | |||||
| void DataSaver::ParseEvent(const std::vector<Event> &events) { | |||||
| void GpuDataSaver::ParseEvent(const std::vector<Event> &events) { | |||||
| // Put Kernel activity events into activity_infos_ | // Put Kernel activity events into activity_infos_ | ||||
| for (const auto &event : events) { | for (const auto &event : events) { | ||||
| if (event.op_name.empty() || event.api_type != CUPTIApiType::kActivity || | if (event.op_name.empty() || event.api_type != CUPTIApiType::kActivity || | ||||
| @@ -127,7 +74,7 @@ void DataSaver::ParseEvent(const std::vector<Event> &events) { | |||||
| } | } | ||||
| } | } | ||||
| void DataSaver::AddKernelEvent(const Event &event) { | |||||
| void GpuDataSaver::AddKernelEvent(const Event &event) { | |||||
| // Put kernel event to activity_infos according to device id | // Put kernel event to activity_infos according to device id | ||||
| uint32_t device_id = event.device_id; | uint32_t device_id = event.device_id; | ||||
| auto iter = activity_infos_.find(device_id); | auto iter = activity_infos_.find(device_id); | ||||
| @@ -139,7 +86,7 @@ void DataSaver::AddKernelEvent(const Event &event) { | |||||
| } | } | ||||
| } | } | ||||
| void DataSaver::AddKernelEventToDevice(const Event &event, DeviceActivityInfos *device_activity_infos) { | |||||
| void GpuDataSaver::AddKernelEventToDevice(const Event &event, DeviceActivityInfos *device_activity_infos) { | |||||
| // Combine kernel activity with same kernel name | // Combine kernel activity with same kernel name | ||||
| auto event_ptr = std::make_shared<Event>(event); | auto event_ptr = std::make_shared<Event>(event); | ||||
| ActivityData activity_data = ActivityData(event_ptr); | ActivityData activity_data = ActivityData(event_ptr); | ||||
| @@ -153,7 +100,7 @@ void DataSaver::AddKernelEventToDevice(const Event &event, DeviceActivityInfos * | |||||
| } | } | ||||
| } | } | ||||
| void DataSaver::WriteFile(std::string out_path_dir, const BaseTime &start_time) { | |||||
| void GpuDataSaver::WriteFile(std::string out_path_dir, const BaseTime &start_time) { | |||||
| if (out_path_dir.empty()) { | if (out_path_dir.empty()) { | ||||
| MS_LOG(WARNING) << "Output directory. Ignore the writing data."; | MS_LOG(WARNING) << "Output directory. Ignore the writing data."; | ||||
| return; | return; | ||||
| @@ -164,6 +111,7 @@ void DataSaver::WriteFile(std::string out_path_dir, const BaseTime &start_time) | |||||
| } | } | ||||
| // not support multi-device for operator info per process yet | // not support multi-device for operator info per process yet | ||||
| device_id_ = std::to_string(activity_infos_.begin()->first); | device_id_ = std::to_string(activity_infos_.begin()->first); | ||||
| op_side_ = "gpu"; | |||||
| WriteOpDetail(out_path_dir); | WriteOpDetail(out_path_dir); | ||||
| WriteOpType(out_path_dir); | WriteOpType(out_path_dir); | ||||
| WriteActivity(out_path_dir); | WriteActivity(out_path_dir); | ||||
| @@ -172,42 +120,7 @@ void DataSaver::WriteFile(std::string out_path_dir, const BaseTime &start_time) | |||||
| WriteStartTime(out_path_dir, start_time); | WriteStartTime(out_path_dir, start_time); | ||||
| } | } | ||||
| void DataSaver::WriteOpType(const std::string &saver_base_dir) { | |||||
| std::string file_path = saver_base_dir + "/gpu_op_type_info_" + device_id_ + ".csv"; | |||||
| std::ofstream ofs(file_path); | |||||
| // check if the file is writable | |||||
| if (!ofs.is_open()) { | |||||
| MS_LOG(WARNING) << "Open file '" << file_path << "' failed!"; | |||||
| return; | |||||
| } | |||||
| // write op type info into file | |||||
| ofs << OpType().GetHeader() << std::endl; | |||||
| for (auto op_type_info : op_type_infos_) { | |||||
| ofs << op_type_info.second << std::endl; | |||||
| } | |||||
| ofs.close(); | |||||
| ChangeFileMode(file_path); | |||||
| MS_LOG(INFO) << "Write " << op_type_infos_.size() << " op type infos into file: " << file_path; | |||||
| } | |||||
| void DataSaver::WriteOpDetail(const std::string &saver_base_dir) { | |||||
| std::string file_path = saver_base_dir + "/gpu_op_detail_info_" + device_id_ + ".csv"; | |||||
| std::ofstream ofs(file_path); | |||||
| if (!ofs.is_open()) { | |||||
| MS_LOG(WARNING) << "Open file '" << file_path << "' failed!"; | |||||
| return; | |||||
| } | |||||
| // write op detail info into file | |||||
| ofs << OpDetailInfo().GetHeader() << std::endl; | |||||
| for (auto op_detail : op_detail_infos_) { | |||||
| ofs << op_detail << std::endl; | |||||
| } | |||||
| ofs.close(); | |||||
| ChangeFileMode(file_path); | |||||
| MS_LOG(INFO) << "Write " << op_detail_infos_.size() << " op detail infos into file: " << file_path; | |||||
| } | |||||
| void DataSaver::WriteActivity(const std::string &saver_base_dir) { | |||||
| void GpuDataSaver::WriteActivity(const std::string &saver_base_dir) { | |||||
| std::string file_path_base = saver_base_dir + "/gpu_activity_data_"; | std::string file_path_base = saver_base_dir + "/gpu_activity_data_"; | ||||
| std::string timestamp_file_path_base = saver_base_dir + "/activity_execute_timestamp_"; | std::string timestamp_file_path_base = saver_base_dir + "/activity_execute_timestamp_"; | ||||
| for (auto device_info : activity_infos_) { | for (auto device_info : activity_infos_) { | ||||
| @@ -244,27 +157,7 @@ void DataSaver::WriteActivity(const std::string &saver_base_dir) { | |||||
| } | } | ||||
| } | } | ||||
| void DataSaver::WriteOpTimestamp(const std::string &saver_base_dir) { | |||||
| std::string file_path = saver_base_dir + "/op_execute_timestamp_" + device_id_ + ".txt"; | |||||
| std::ofstream ofs(file_path); | |||||
| // check if the file is writable | |||||
| if (!ofs.is_open()) { | |||||
| MS_LOG(WARNING) << "Open file '" << file_path << "' failed!"; | |||||
| return; | |||||
| } | |||||
| // write op timestamp info into file | |||||
| for (const auto &op_timestamp_info : op_timestamps_map_) { | |||||
| ofs << op_timestamp_info.first << ";Ops;"; | |||||
| for (auto start_end : op_timestamp_info.second) { | |||||
| ofs << start_end.start_timestamp << "," << start_end.duration << " "; | |||||
| } | |||||
| ofs << std::endl; | |||||
| } | |||||
| ofs.close(); | |||||
| ChangeFileMode(file_path); | |||||
| } | |||||
| void DataSaver::WriteStepTrace(const std::string &saver_base_dir) { | |||||
| void GpuDataSaver::WriteStepTrace(const std::string &saver_base_dir) { | |||||
| std::string file_path = saver_base_dir + "/step_trace_profiling_" + device_id_ + ".txt"; | std::string file_path = saver_base_dir + "/step_trace_profiling_" + device_id_ + ".txt"; | ||||
| std::ofstream ofs(file_path); | std::ofstream ofs(file_path); | ||||
| // check if the file is writable | // check if the file is writable | ||||
| @@ -308,7 +201,7 @@ void DataSaver::WriteStepTrace(const std::string &saver_base_dir) { | |||||
| MS_LOG(INFO) << "Write step trace infos into file: " << file_path; | MS_LOG(INFO) << "Write step trace infos into file: " << file_path; | ||||
| } | } | ||||
| void DataSaver::WriteStartTime(const std::string &saver_base_dir, const BaseTime &start_time) { | |||||
| void GpuDataSaver::WriteStartTime(const std::string &saver_base_dir, const BaseTime &start_time) { | |||||
| std::string file_path = saver_base_dir + "/start_time_" + device_id_ + ".txt"; | std::string file_path = saver_base_dir + "/start_time_" + device_id_ + ".txt"; | ||||
| std::ofstream ofs(file_path); | std::ofstream ofs(file_path); | ||||
| // check if the file is writable | // check if the file is writable | ||||
| @@ -330,14 +223,7 @@ void DataSaver::WriteStartTime(const std::string &saver_base_dir, const BaseTime | |||||
| MS_LOG(INFO) << "Write profiler start time infos into file: " << file_path; | MS_LOG(INFO) << "Write profiler start time infos into file: " << file_path; | ||||
| } | } | ||||
| void DataSaver::SetStepTraceOpName(ProfilingTraceInfo trace_op_name) { step_trace_op_name = trace_op_name; } | |||||
| void DataSaver::ChangeFileMode(const std::string &file_path) { | |||||
| if (chmod(common::SafeCStr(file_path), S_IRUSR) == -1) { | |||||
| MS_LOG(WARNING) << "Modify file:" << file_path << " to rw fail."; | |||||
| return; | |||||
| } | |||||
| } | |||||
| void GpuDataSaver::SetStepTraceOpName(ProfilingTraceInfo trace_op_name) { step_trace_op_name = trace_op_name; } | |||||
| } // namespace gpu | } // namespace gpu | ||||
| } // namespace profiler | } // namespace profiler | ||||
| } // namespace mindspore | } // namespace mindspore | ||||
| @@ -1,5 +1,5 @@ | |||||
| /** | /** | ||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * Copyright 2020-2021 Huawei Technologies Co., Ltd | |||||
| * | * | ||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| * you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
| @@ -14,8 +14,8 @@ | |||||
| * limitations under the License. | * limitations under the License. | ||||
| */ | */ | ||||
| #ifndef MINDSPORE_DATA_SAVER_H | |||||
| #define MINDSPORE_DATA_SAVER_H | |||||
| #ifndef MINDSPORE_CCSRC_PROFILER_DEVICE_GPU_GPU_DATA_SAVER_H | |||||
| #define MINDSPORE_CCSRC_PROFILER_DEVICE_GPU_GPU_DATA_SAVER_H | |||||
| #include <iostream> | #include <iostream> | ||||
| #include <algorithm> | #include <algorithm> | ||||
| #include <unordered_map> | #include <unordered_map> | ||||
| @@ -23,57 +23,10 @@ | |||||
| #include <string> | #include <string> | ||||
| #include <memory> | #include <memory> | ||||
| #include "profiler/device/gpu/gpu_profiling.h" | #include "profiler/device/gpu/gpu_profiling.h" | ||||
| #include "profiler/device/data_saver.h" | |||||
| namespace mindspore { | namespace mindspore { | ||||
| namespace profiler { | namespace profiler { | ||||
| namespace gpu { | namespace gpu { | ||||
| struct OpDetailInfo { | |||||
| std::string op_type_; | |||||
| std::string op_name_; | |||||
| std::string op_full_name_; | |||||
| std::shared_ptr<OpInfo> op_info_{nullptr}; | |||||
| float op_avg_time_{0}; | |||||
| float proportion_{0}; | |||||
| OpDetailInfo() = default; | |||||
| OpDetailInfo(std::shared_ptr<OpInfo> op_info, float proportion); | |||||
| std::string GetHeader() const { | |||||
| return "op_side,op_type,op_name,op_full_name,op_occurrences,op_total_time(us),op_avg_time(us),total_proportion," | |||||
| "cuda_activity_cost_time(us),cuda_activity_call_count"; | |||||
| } | |||||
| friend std::ostream &operator<<(std::ostream &os, const OpDetailInfo &event) { | |||||
| os << "Device," << event.op_type_ << ',' << event.op_name_ << ',' << event.op_full_name_ << ',' | |||||
| << event.op_info_->op_count << ',' << event.op_info_->op_host_cost_time << ',' << event.op_avg_time_ << ',' | |||||
| << event.proportion_ << ',' << event.op_info_->cupti_activity_time << ',' << event.op_info_->op_kernel_count; | |||||
| return os; | |||||
| } | |||||
| }; | |||||
| struct OpType { | |||||
| std::string op_type_; | |||||
| int count_{0}; | |||||
| float total_time_{0}; | |||||
| float avg_time_{0}; | |||||
| float proportion_{0}; | |||||
| std::string GetHeader() const { return "op_type,type_occurrences,total_time(us),total_proportion,avg_time(us)"; } | |||||
| friend std::ostream &operator<<(std::ostream &os, const OpType &event) { | |||||
| os << event.op_type_ << ',' << event.count_ << ',' << event.total_time_ << ',' << event.proportion_ << ',' | |||||
| << event.avg_time_; | |||||
| return os; | |||||
| } | |||||
| OpType &operator+=(const OpType &other) { | |||||
| this->count_ += other.count_; | |||||
| this->total_time_ += other.total_time_; | |||||
| this->proportion_ += other.proportion_; | |||||
| return *this; | |||||
| } | |||||
| }; | |||||
| struct ActivityData { | struct ActivityData { | ||||
| std::shared_ptr<Event> basic_info_{nullptr}; | std::shared_ptr<Event> basic_info_{nullptr}; | ||||
| std::string block_dim_; | std::string block_dim_; | ||||
| @@ -105,25 +58,18 @@ struct ActivityData { | |||||
| ActivityData &operator+=(const ActivityData &other); | ActivityData &operator+=(const ActivityData &other); | ||||
| }; | }; | ||||
| using OpInfoMap = std::unordered_map<std::string, OpInfo>; | |||||
| using DeviceActivityInfos = std::unordered_map<std::string, ActivityData>; // <device_id, ActivityData> | using DeviceActivityInfos = std::unordered_map<std::string, ActivityData>; // <device_id, ActivityData> | ||||
| using AllActivityInfos = std::unordered_map<uint32_t, DeviceActivityInfos>; // <device_id, ActivityData> | using AllActivityInfos = std::unordered_map<uint32_t, DeviceActivityInfos>; // <device_id, ActivityData> | ||||
| using OpTypeInfos = std::unordered_map<std::string, OpType>; // <op_full_name, Optype> | |||||
| using OpDetailInfos = std::vector<OpDetailInfo>; | |||||
| // <op_full_name, StartDuration> | |||||
| using OpTimestampInfo = std::unordered_map<std::string, std::vector<StartDuration>>; | |||||
| class DataSaver { | |||||
| class GpuDataSaver : public DataSaver { | |||||
| public: | public: | ||||
| DataSaver() = default; | |||||
| ~DataSaver() = default; | |||||
| GpuDataSaver() = default; | |||||
| DataSaver(const DataSaver &) = delete; | |||||
| ~GpuDataSaver() = default; | |||||
| DataSaver &operator=(const DataSaver &) = delete; | |||||
| GpuDataSaver(const GpuDataSaver &) = delete; | |||||
| void ParseOpInfo(const OpInfoMap &op_info_maps); | |||||
| GpuDataSaver &operator=(const GpuDataSaver &) = delete; | |||||
| void SetStepTraceOpName(ProfilingTraceInfo trace_op_name); | void SetStepTraceOpName(ProfilingTraceInfo trace_op_name); | ||||
| @@ -132,37 +78,21 @@ class DataSaver { | |||||
| void WriteFile(std::string out_path, const BaseTime &start_time); | void WriteFile(std::string out_path, const BaseTime &start_time); | ||||
| private: | private: | ||||
| void AddOpDetailInfoForType(const OpDetailInfo &op_detail_info); | |||||
| float GetTotalOpTime(const OpInfoMap &op_info_maps); | |||||
| void AddKernelEvent(const Event &event); | void AddKernelEvent(const Event &event); | ||||
| void AddKernelEventToDevice(const Event &event, DeviceActivityInfos *device_activity_infos); | void AddKernelEventToDevice(const Event &event, DeviceActivityInfos *device_activity_infos); | ||||
| void WriteOpType(const std::string &saver_base_dir); | |||||
| void WriteOpDetail(const std::string &saver_base_dir); | |||||
| void WriteActivity(const std::string &saver_base_dir); | void WriteActivity(const std::string &saver_base_dir); | ||||
| void WriteOpTimestamp(const std::string &saver_base_dir); | |||||
| void WriteStepTrace(const std::string &saver_base_dir); | void WriteStepTrace(const std::string &saver_base_dir); | ||||
| void WriteStartTime(const std::string &saver_base_dir, const BaseTime &start_time); | void WriteStartTime(const std::string &saver_base_dir, const BaseTime &start_time); | ||||
| void ChangeFileMode(const std::string &file_path); | |||||
| std::string device_id_; | |||||
| AllActivityInfos activity_infos_; | AllActivityInfos activity_infos_; | ||||
| OpTypeInfos op_type_infos_; | |||||
| OpDetailInfos op_detail_infos_; | |||||
| OpTimestampInfo op_timestamps_map_; | |||||
| ProfilingTraceInfo step_trace_op_name; | ProfilingTraceInfo step_trace_op_name; | ||||
| }; | }; | ||||
| } // namespace gpu | } // namespace gpu | ||||
| } // namespace profiler | } // namespace profiler | ||||
| } // namespace mindspore | } // namespace mindspore | ||||
| #endif // MINDSPORE_DATA_SAVER_H | |||||
| #endif // MINDSPORE_CCSRC_PROFILER_DEVICE_GPU_GPU_DATA_SAVER_H | |||||
| @@ -21,7 +21,7 @@ | |||||
| #include <chrono> | #include <chrono> | ||||
| #include <cmath> | #include <cmath> | ||||
| #include "profiler/device/gpu/cupti_interface.h" | #include "profiler/device/gpu/cupti_interface.h" | ||||
| #include "profiler/device/gpu/data_saver.h" | |||||
| #include "profiler/device/gpu/gpu_data_saver.h" | |||||
| #include "pybind_api/api_register.h" | #include "pybind_api/api_register.h" | ||||
| #include "utils/log_adapter.h" | #include "utils/log_adapter.h" | ||||
| #include "utils/utils.h" | #include "utils/utils.h" | ||||
| @@ -92,14 +92,6 @@ uint64_t GetHostTimeStamp() { | |||||
| return cur_time_stamp; | return cur_time_stamp; | ||||
| } | } | ||||
| uint64_t GetHostMonoTimeStamp() { | |||||
| struct timespec ts; | |||||
| clock_gettime(CLOCK_MONOTONIC_RAW, &ts); | |||||
| constexpr uint64_t kNSecondInSecond = 1000000000; | |||||
| uint64_t cur_time_stamp = ts.tv_sec * kNSecondInSecond + ts.tv_nsec; | |||||
| return cur_time_stamp; | |||||
| } | |||||
| std::string GetKernelFunc(const char *name) { | std::string GetKernelFunc(const char *name) { | ||||
| char *demangledName = abi::__cxa_demangle(name, nullptr, nullptr, nullptr); | char *demangledName = abi::__cxa_demangle(name, nullptr, nullptr, nullptr); | ||||
| if (demangledName != nullptr) { | if (demangledName != nullptr) { | ||||
| @@ -415,21 +407,6 @@ void GPUProfiler::SetRunTimeData(const std::string &op_name, void *stream) { | |||||
| stream_ = stream; | stream_ = stream; | ||||
| } | } | ||||
| void GPUProfiler::SetRunTimeData(const std::string &op_name, const float time_elapsed) { | |||||
| auto iter = op_info_map_.find(op_name); | |||||
| if (iter != op_info_map_.end()) { | |||||
| // The time unit is ms ,convert to us | |||||
| iter->second.op_host_cost_time += time_elapsed; | |||||
| } | |||||
| } | |||||
| void GPUProfiler::SetRunTimeData(const std::string &op_name, const uint64_t start, const float duration) { | |||||
| auto iter = op_info_map_.find(op_name); | |||||
| if (iter != op_info_map_.end()) { | |||||
| iter->second.start_duration.emplace_back(StartDuration({start, duration})); | |||||
| } | |||||
| } | |||||
| void GPUProfiler::OpDataProducerBegin(const std::string op_name, void *stream) { | void GPUProfiler::OpDataProducerBegin(const std::string op_name, void *stream) { | ||||
| if (sync_enable_flag_) { | if (sync_enable_flag_) { | ||||
| CHECK_CUDA_RET_WITH_ERROR(cudaEventCreate(&op_event_start_), "cudaEventCreate op event start failed"); | CHECK_CUDA_RET_WITH_ERROR(cudaEventCreate(&op_event_start_), "cudaEventCreate op event start failed"); | ||||
| @@ -463,8 +440,8 @@ void GPUProfiler::OpDataProducerEnd() { | |||||
| op_time_elapsed = (op_host_time_stop_ - op_host_time_start_) / kTimeUnit; | op_time_elapsed = (op_host_time_stop_ - op_host_time_start_) / kTimeUnit; | ||||
| } | } | ||||
| MS_LOG(DEBUG) << "Host Time Elapsed(us)," << op_name_ << "," << op_time_elapsed; | MS_LOG(DEBUG) << "Host Time Elapsed(us)," << op_name_ << "," << op_time_elapsed; | ||||
| SetRunTimeData(op_name_, op_time_elapsed); | |||||
| SetRunTimeData(op_name_, op_cupti_time_start_, op_time_elapsed); | |||||
| Profiler::SetRunTimeData(op_name_, op_time_elapsed); | |||||
| Profiler::SetRunTimeData(op_name_, op_cupti_time_start_, op_time_elapsed); | |||||
| } | } | ||||
| void GPUProfiler::StopCUPTI() { | void GPUProfiler::StopCUPTI() { | ||||
| @@ -498,7 +475,7 @@ void GPUProfiler::SaveProfileData() { | |||||
| if (profile_data_path_.empty()) { | if (profile_data_path_.empty()) { | ||||
| MS_LOG(WARNING) << "Profile data path is empty, skip save profile data."; | MS_LOG(WARNING) << "Profile data path is empty, skip save profile data."; | ||||
| } else { | } else { | ||||
| DataSaver dataSaver; | |||||
| GpuDataSaver dataSaver; | |||||
| dataSaver.SetStepTraceOpName(step_trace_op_name); | dataSaver.SetStepTraceOpName(step_trace_op_name); | ||||
| dataSaver.ParseOpInfo(op_info_map_); | dataSaver.ParseOpInfo(op_info_map_); | ||||
| dataSaver.ParseEvent(events_); | dataSaver.ParseEvent(events_); | ||||
| @@ -1,5 +1,5 @@ | |||||
| /** | /** | ||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * Copyright 2020-2021 Huawei Technologies Co., Ltd | |||||
| * | * | ||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| * you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
| @@ -14,8 +14,8 @@ | |||||
| * limitations under the License. | * limitations under the License. | ||||
| */ | */ | ||||
| #ifndef MINDSPORE_GPU_PROFILING_H | |||||
| #define MINDSPORE_GPU_PROFILING_H | |||||
| #ifndef MINDSPORE_CCSRC_PROFILER_DEVICE_GPU_GPU_PROFILING_H | |||||
| #define MINDSPORE_CCSRC_PROFILER_DEVICE_GPU_GPU_PROFILING_H | |||||
| #include <cuda.h> | #include <cuda.h> | ||||
| #include <cupti.h> | #include <cupti.h> | ||||
| #include <algorithm> | #include <algorithm> | ||||
| @@ -27,6 +27,7 @@ | |||||
| #include <unordered_map> | #include <unordered_map> | ||||
| #include <utility> | #include <utility> | ||||
| #include <vector> | #include <vector> | ||||
| #include "profiler/device/profiling.h" | |||||
| #include "profiler/device/gpu/gpu_profiling_utils.h" | #include "profiler/device/gpu/gpu_profiling_utils.h" | ||||
| namespace mindspore { | namespace mindspore { | ||||
| @@ -86,23 +87,6 @@ struct Event { | |||||
| }; | }; | ||||
| }; | }; | ||||
| struct StartDuration { | |||||
| uint64_t start_timestamp = 0l; | |||||
| float duration = 0l; | |||||
| }; | |||||
| struct OpInfo { | |||||
| std::string op_name; | |||||
| float cupti_api_call_time = 0l; | |||||
| float cupti_activity_time = 0l; | |||||
| float op_host_cost_time = 0; | |||||
| int op_kernel_api_count = 0; | |||||
| int op_kernel_count = 0; | |||||
| int op_count = 0; | |||||
| std::vector<StartDuration> start_duration; | |||||
| void *stream; | |||||
| }; | |||||
| struct BaseTime { | struct BaseTime { | ||||
| // nanosecond | // nanosecond | ||||
| uint64_t host_start_time = 0l; | uint64_t host_start_time = 0l; | ||||
| @@ -124,17 +108,17 @@ class ProfilingOp { | |||||
| std::string op_name_; | std::string op_name_; | ||||
| }; | }; | ||||
| class GPUProfiler { | |||||
| class GPUProfiler : public Profiler { | |||||
| public: | public: | ||||
| static std::shared_ptr<GPUProfiler> GetInstance(); | static std::shared_ptr<GPUProfiler> GetInstance(); | ||||
| ~GPUProfiler() { StopCUPTI(); } | ~GPUProfiler() { StopCUPTI(); } | ||||
| GPUProfiler(const GPUProfiler &) = delete; | GPUProfiler(const GPUProfiler &) = delete; | ||||
| GPUProfiler &operator=(const GPUProfiler &) = delete; | GPUProfiler &operator=(const GPUProfiler &) = delete; | ||||
| void Init(const std::string &profileDataPath); | |||||
| void Stop(); | |||||
| void Init(const std::string &profileDataPath) override; | |||||
| void Stop() override; | |||||
| void StopCUPTI(); | void StopCUPTI(); | ||||
| void StepProfilingEnable(const bool enable_flag); | |||||
| void StepProfilingEnable(const bool enable_flag) override; | |||||
| void SyncEnable(const bool enable_flag); | void SyncEnable(const bool enable_flag); | ||||
| bool GetEnableFlag() const { return enable_flag_; } | bool GetEnableFlag() const { return enable_flag_; } | ||||
| bool GetSyncEnableFlag() const { return sync_enable_flag_; } | bool GetSyncEnableFlag() const { return sync_enable_flag_; } | ||||
| @@ -143,7 +127,7 @@ class GPUProfiler { | |||||
| void CUPTIAPI AllocBuffer(uint8_t **buffer, size_t *size, size_t *maxNumRecords); | void CUPTIAPI AllocBuffer(uint8_t **buffer, size_t *size, size_t *maxNumRecords); | ||||
| void CUPTIAPI ProcessBuffer(CUcontext ctx, uint32_t streamId, uint8_t *buffer, size_t size, size_t validSize); | void CUPTIAPI ProcessBuffer(CUcontext ctx, uint32_t streamId, uint8_t *buffer, size_t size, size_t validSize); | ||||
| void OpDataProducerBegin(const std::string op_name, void *stream); | void OpDataProducerBegin(const std::string op_name, void *stream); | ||||
| void OpDataProducerEnd(); | |||||
| void OpDataProducerEnd() override; | |||||
| void ProcessEvents(); | void ProcessEvents(); | ||||
| void RegisterProfilingOp(std::shared_ptr<ProfilingOp> node); | void RegisterProfilingOp(std::shared_ptr<ProfilingOp> node); | ||||
| void SetStepTraceOpName(ProfilingTraceInfo trace_op_name); | void SetStepTraceOpName(ProfilingTraceInfo trace_op_name); | ||||
| @@ -153,24 +137,21 @@ class GPUProfiler { | |||||
| GPUProfiler() = default; | GPUProfiler() = default; | ||||
| void OpsParser(); | void OpsParser(); | ||||
| void EventLog(const Event &event); | void EventLog(const Event &event); | ||||
| void ClearInst(); | |||||
| void ClearInst() override; | |||||
| void HandleActivityRecord(CUpti_Activity *record); | void HandleActivityRecord(CUpti_Activity *record); | ||||
| void AddEvent(Event &&event); | void AddEvent(Event &&event); | ||||
| void SetRunTimeData(const std::string &op_name, void *stream); | void SetRunTimeData(const std::string &op_name, void *stream); | ||||
| void SetRunTimeData(const std::string &op_name, const float time_elapsed); | |||||
| void SetRunTimeData(const std::string &op_name, const uint64_t start, const float duration); | |||||
| void FixOpNameByCorrelationId(Event *event); | void FixOpNameByCorrelationId(Event *event); | ||||
| static std::shared_ptr<GPUProfiler> profiler_inst_; | static std::shared_ptr<GPUProfiler> profiler_inst_; | ||||
| bool enable_flag_ = false; | bool enable_flag_ = false; | ||||
| bool sync_enable_flag_ = true; | bool sync_enable_flag_ = true; | ||||
| std::unordered_map<std::string, OpInfo> op_info_map_; | |||||
| std::unordered_map<uint32_t, std::string> op_name_map_; | std::unordered_map<uint32_t, std::string> op_name_map_; | ||||
| std::vector<Event> events_; | std::vector<Event> events_; | ||||
| BaseTime base_time_; | BaseTime base_time_; | ||||
| std::string op_name_; | std::string op_name_; | ||||
| void *stream_; | void *stream_; | ||||
| void SaveProfileData(); | |||||
| void SaveProfileData() override; | |||||
| void SaveExtraProfileData(); | void SaveExtraProfileData(); | ||||
| std::mutex event_mutex_; | std::mutex event_mutex_; | ||||
| @@ -198,4 +179,4 @@ class GPUProfiler { | |||||
| } // namespace profiler | } // namespace profiler | ||||
| } // namespace mindspore | } // namespace mindspore | ||||
| #endif // MINDSPORE_GPU_PROFILING_H | |||||
| #endif // MINDSPORE_CCSRC_PROFILER_DEVICE_GPU_PROFILING_H | |||||
| @@ -0,0 +1,56 @@ | |||||
| /** | |||||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "profiler/device/profiling.h" | |||||
| #include <time.h> | |||||
| #include <cxxabi.h> | |||||
| #include <cmath> | |||||
| #include "profiler/device/cpu/cpu_data_saver.h" | |||||
| #include "pybind_api/api_register.h" | |||||
| #include "utils/log_adapter.h" | |||||
| #include "utils/utils.h" | |||||
| namespace mindspore { | |||||
| namespace profiler { | |||||
| uint64_t Profiler::GetHostMonoTimeStamp() { | |||||
| struct timespec ts; | |||||
| #if defined(_WIN32) || defined(_WIN64) | |||||
| clock_gettime(CLOCK_MONOTONIC, &ts); | |||||
| #else | |||||
| clock_gettime(CLOCK_MONOTONIC_RAW, &ts); | |||||
| #endif | |||||
| constexpr uint64_t kNSecondInSecond = 1000000000; | |||||
| uint64_t cur_time_stamp = ts.tv_sec * kNSecondInSecond + ts.tv_nsec; | |||||
| return cur_time_stamp; | |||||
| } | |||||
| void Profiler::SetRunTimeData(const std::string &op_name, const float time_elapsed) { | |||||
| auto iter = op_info_map_.find(op_name); | |||||
| if (iter != op_info_map_.end()) { | |||||
| // The time unit is ms, convert to us | |||||
| iter->second.op_host_cost_time += time_elapsed; | |||||
| } | |||||
| } | |||||
| void Profiler::SetRunTimeData(const std::string &op_name, const uint64_t start, const float duration) { | |||||
| auto iter = op_info_map_.find(op_name); | |||||
| if (iter != op_info_map_.end()) { | |||||
| iter->second.start_duration.emplace_back(StartDuration({start, duration})); | |||||
| } | |||||
| } | |||||
| } // namespace profiler | |||||
| } // namespace mindspore | |||||
| @@ -0,0 +1,74 @@ | |||||
| /** | |||||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_CCSRC_PROFILER_DEVICE_PROFILING_H | |||||
| #define MINDSPORE_CCSRC_PROFILER_DEVICE_PROFILING_H | |||||
| #include <algorithm> | |||||
| #include <cstdio> | |||||
| #include <map> | |||||
| #include <memory> | |||||
| #include <mutex> | |||||
| #include <string> | |||||
| #include <unordered_map> | |||||
| #include <utility> | |||||
| #include <vector> | |||||
| namespace mindspore { | |||||
| namespace profiler { | |||||
| struct StartDuration { | |||||
| uint64_t start_timestamp = 0l; | |||||
| float duration = 0l; | |||||
| }; | |||||
| struct OpInfo { | |||||
| std::string op_name; | |||||
| float cupti_api_call_time = 0l; | |||||
| float cupti_activity_time = 0l; | |||||
| float op_host_cost_time = 0; | |||||
| int op_kernel_api_count = 0; | |||||
| int op_kernel_count = 0; | |||||
| int op_count = 0; | |||||
| std::vector<StartDuration> start_duration; | |||||
| void *stream; | |||||
| uint32_t pid; | |||||
| }; | |||||
| class Profiler { | |||||
| public: | |||||
| Profiler() = default; | |||||
| virtual ~Profiler() = default; | |||||
| virtual void Init(const std::string &profileDataPath) = 0; | |||||
| virtual void Stop() = 0; | |||||
| virtual void StepProfilingEnable(const bool enable_flag) = 0; | |||||
| virtual void OpDataProducerEnd() = 0; | |||||
| bool GetEnableFlag() const { return enable_flag_; } | |||||
| std::string ProfileDataPath() const { return profile_data_path_; } | |||||
| protected: | |||||
| void SetRunTimeData(const std::string &op_name, const float time_elapsed); | |||||
| void SetRunTimeData(const std::string &op_name, const uint64_t start, const float duration); | |||||
| uint64_t GetHostMonoTimeStamp(); | |||||
| virtual void SaveProfileData() = 0; | |||||
| virtual void ClearInst() = 0; | |||||
| bool enable_flag_ = false; | |||||
| std::string profile_data_path_; | |||||
| std::unordered_map<std::string, OpInfo> op_info_map_; | |||||
| }; | |||||
| } // namespace profiler | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_CCSRC_PROFILER_DEVICE_PROFILING_H | |||||
| @@ -642,7 +642,7 @@ class GpuTimelineGenerator(BaseTimelineGenerator): | |||||
| """Generate gpu Timeline data from file.""" | """Generate gpu Timeline data from file.""" | ||||
| _display_filename = 'gpu_timeline_display_{}.json' | _display_filename = 'gpu_timeline_display_{}.json' | ||||
| _timeline_summary_filename = 'gpu_timeline_summary_{}.json' | _timeline_summary_filename = 'gpu_timeline_summary_{}.json' | ||||
| _output_op_execute_time_file_path = "op_execute_timestamp_{}.txt" | |||||
| _output_op_execute_time_file_path = "gpu_op_execute_timestamp_{}.txt" | |||||
| _output_activity_execute_time_file_path = "activity_execute_timestamp_{}.txt" | _output_activity_execute_time_file_path = "activity_execute_timestamp_{}.txt" | ||||
| _output_gpu_activity_info_file_path = "gpu_activity_data_{}.csv" | _output_gpu_activity_info_file_path = "gpu_activity_data_{}.csv" | ||||
| _activity_keys_list = [] | _activity_keys_list = [] | ||||
| @@ -341,7 +341,7 @@ class BaseStepTraceParser: | |||||
| row_data[FP_DURATION] += row_data[TAIL] | row_data[FP_DURATION] += row_data[TAIL] | ||||
| row_data = row_data[:BP_POINT] + row_data[BP_POINT+1:TAIL] | row_data = row_data[:BP_POINT] + row_data[BP_POINT+1:TAIL] | ||||
| csv_writer.writerow(row_data) | csv_writer.writerow(row_data) | ||||
| os.chmod(self._output_path, stat.S_IRUSR) | |||||
| os.chmod(self._output_path, stat.S_IREAD | stat.S_IWRITE) | |||||
| except (IOError, OSError) as err: | except (IOError, OSError) as err: | ||||
| log.warning('Failed to save step trace raw info. %s', err) | log.warning('Failed to save step trace raw info. %s', err) | ||||
| raise ProfilerIOException | raise ProfilerIOException | ||||
| @@ -385,7 +385,7 @@ class GpuStepTraceParser(BaseStepTraceParser): | |||||
| try: | try: | ||||
| with open(output_path, 'w') as json_file: | with open(output_path, 'w') as json_file: | ||||
| json.dump(points, json_file) | json.dump(points, json_file) | ||||
| os.chmod(output_path, stat.S_IRUSR) | |||||
| os.chmod(output_path, stat.S_IREAD | stat.S_IWRITE) | |||||
| except (IOError, OSError) as err: | except (IOError, OSError) as err: | ||||
| log.warning('Failed to save point info. %s', err) | log.warning('Failed to save point info. %s', err) | ||||
| raise ProfilerIOException | raise ProfilerIOException | ||||
| @@ -504,7 +504,7 @@ class AscendStepTraceParser(BaseStepTraceParser): | |||||
| try: | try: | ||||
| with open(output_path, 'w') as json_file: | with open(output_path, 'w') as json_file: | ||||
| json.dump(points, json_file) | json.dump(points, json_file) | ||||
| os.chmod(output_path, stat.S_IRUSR) | |||||
| os.chmod(output_path, stat.S_IREAD | stat.S_IWRITE) | |||||
| except (IOError, OSError) as err: | except (IOError, OSError) as err: | ||||
| log.warning('Failed to save point info. %s', err) | log.warning('Failed to save point info. %s', err) | ||||
| raise ProfilerIOException | raise ProfilerIOException | ||||