| @@ -1,14 +1,23 @@ | |||||
| if(ENABLE_GPU) | if(ENABLE_GPU) | ||||
| file(GLOB_RECURSE PROFILER_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "device/gpu/*.cc") | |||||
| file(GLOB_RECURSE PROFILER_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} | |||||
| "device/gpu/*.cc" "device/cpu/*.cc") | |||||
| set_property(SOURCE ${PROFILER_SRC_LIST} PROPERTY COMPILE_DEFINITIONS | set_property(SOURCE ${PROFILER_SRC_LIST} PROPERTY COMPILE_DEFINITIONS | ||||
| SUBMODULE_ID=mindspore::SubModuleId::SM_PROFILER) | SUBMODULE_ID=mindspore::SubModuleId::SM_PROFILER) | ||||
| add_library(_mindspore_profiler_obj OBJECT ${PROFILER_SRC_LIST}) | add_library(_mindspore_profiler_obj OBJECT ${PROFILER_SRC_LIST}) | ||||
| endif() | endif() | ||||
| if(ENABLE_D) | if(ENABLE_D) | ||||
| file(GLOB_RECURSE PROFILER_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "device/ascend/*.cc" "device/common/*.cc") | |||||
| file(GLOB_RECURSE PROFILER_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} | |||||
| "device/common/*.cc" "device/ascend/*.cc" "device/cpu/*.cc") | |||||
| set_property(SOURCE ${PROFILER_SRC_LIST} PROPERTY COMPILE_DEFINITIONS | set_property(SOURCE ${PROFILER_SRC_LIST} PROPERTY COMPILE_DEFINITIONS | ||||
| SUBMODULE_ID=mindspore::SubModuleId::SM_PROFILER) | SUBMODULE_ID=mindspore::SubModuleId::SM_PROFILER) | ||||
| add_library(_mindspore_profiler_obj OBJECT ${PROFILER_SRC_LIST}) | add_library(_mindspore_profiler_obj OBJECT ${PROFILER_SRC_LIST}) | ||||
| add_dependencies(_mindspore_profiler_obj mindspore::protobuf) | add_dependencies(_mindspore_profiler_obj mindspore::protobuf) | ||||
| endif() | endif() | ||||
| if(ENABLE_CPU AND NOT (ENABLE_D OR ENABLE_GPU)) | |||||
| file(GLOB_RECURSE PROFILER_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "device/cpu/*.cc") | |||||
| set_property(SOURCE ${PROFILER_SRC_LIST} PROPERTY COMPILE_DEFINITIONS | |||||
| SUBMODULE_ID=mindspore::SubModuleId::SM_PROFILER) | |||||
| add_library(_mindspore_profiler_obj OBJECT ${PROFILER_SRC_LIST}) | |||||
| endif() | |||||
| @@ -0,0 +1,174 @@ | |||||
| /** | |||||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "profiler/device/cpu/cpu_data_saver.h" | |||||
| #include <fstream> | |||||
| #include <numeric> | |||||
| #include "sys/stat.h" | |||||
| #include "utils/log_adapter.h" | |||||
| #include "utils/ms_utils.h" | |||||
| #include "utils/ms_context.h" | |||||
| namespace mindspore { | |||||
| namespace profiler { | |||||
| namespace cpu { | |||||
| OpDetailInfo::OpDetailInfo(std::shared_ptr<OpInfo> op_info, float proportion) | |||||
| : op_info_(op_info), proportion_(proportion) { | |||||
| // op_full_name is like 'xxx/xxx/{op_type}-op{node_id}' | |||||
| op_full_name_ = op_info->op_name; | |||||
| auto op_type_begin_iter = op_full_name_.rfind('/') + 1; | |||||
| auto op_type_end_iter = op_full_name_.rfind('-'); | |||||
| op_type_ = op_full_name_.substr(op_type_begin_iter, op_type_end_iter - op_type_begin_iter); | |||||
| op_name_ = op_full_name_.substr(op_type_begin_iter); | |||||
| op_avg_time_ = op_info->op_cost_time / op_info->op_count; | |||||
| } | |||||
| void DataSaver::ParseOpInfo(const OpInfoMap &op_info_maps) { | |||||
| const float factor_percent = 100; | |||||
| op_detail_infos_.reserve(op_info_maps.size()); | |||||
| float total_time_sum = GetTotalOpTime(op_info_maps); | |||||
| for (auto item : op_info_maps) { | |||||
| op_timestamps_map_[item.first] = item.second.start_duration; | |||||
| float proportion = item.second.op_cost_time / total_time_sum * factor_percent; | |||||
| auto op_info = std::make_shared<OpInfo>(item.second); | |||||
| OpDetailInfo op_detail_info = OpDetailInfo(op_info, proportion); | |||||
| op_detail_infos_.emplace_back(op_detail_info); | |||||
| AddOpDetailInfoForType(op_detail_info); | |||||
| } | |||||
| // update average time of op type | |||||
| for (auto &op_type : op_type_infos_) { | |||||
| // device_infos: <type_name, op_type_info> | |||||
| op_type.second.avg_time_ = op_type.second.total_time_ / op_type.second.count_; | |||||
| } | |||||
| MS_LOG(DEBUG) << "Get " << op_detail_infos_.size() << " operation items."; | |||||
| MS_LOG(DEBUG) << "Get " << op_type_infos_.size() << " operation type items."; | |||||
| } | |||||
| void DataSaver::AddOpDetailInfoForType(const OpDetailInfo &op_detail_info) { | |||||
| // Construct OpType object according to op detail info | |||||
| OpType op_type = OpType{op_detail_info.op_type_, | |||||
| op_detail_info.op_info_->op_count, | |||||
| op_detail_info.op_info_->op_count, | |||||
| op_detail_info.op_info_->op_cost_time, | |||||
| 0, | |||||
| op_detail_info.proportion_}; | |||||
| // Set the OpType into op_type_infos_ map | |||||
| std::string type_name = op_detail_info.op_type_; | |||||
| auto iter = op_type_infos_.find(type_name); | |||||
| if (iter == op_type_infos_.end()) { | |||||
| op_type_infos_.emplace(type_name, op_type); | |||||
| } else { | |||||
| iter->second += op_type; | |||||
| } | |||||
| } | |||||
| float DataSaver::GetTotalOpTime(const OpInfoMap &op_info_maps) { | |||||
| float sum = 0; | |||||
| sum = std::accumulate(op_info_maps.begin(), op_info_maps.end(), sum, | |||||
| [](float i, auto iter) { return i + iter.second.op_cost_time; }); | |||||
| MS_LOG(DEBUG) << "The total op time is " << sum; | |||||
| return sum; | |||||
| } | |||||
| void DataSaver::WriteFile(std::string out_path_dir) { | |||||
| if (op_detail_infos_.empty() || op_type_infos_.empty()) { | |||||
| MS_LOG(INFO) << "No cpu operation detail infos to write."; | |||||
| return; | |||||
| } | |||||
| auto context_ptr = MsContext::GetInstance(); | |||||
| MS_EXCEPTION_IF_NULL(context_ptr); | |||||
| auto device_id = context_ptr->get_param<uint32_t>(MS_CTX_DEVICE_ID); | |||||
| device_id_ = std::to_string(device_id); | |||||
| WriteOpDetail(out_path_dir); | |||||
| WriteOpType(out_path_dir); | |||||
| WriteOpTimestamp(out_path_dir); | |||||
| } | |||||
| void DataSaver::WriteOpType(const std::string &saver_base_dir) { | |||||
| std::string file_path = saver_base_dir + "/cpu_op_type_info_" + device_id_ + ".csv"; | |||||
| std::ofstream ofs(file_path); | |||||
| // check if the file is writable | |||||
| if (!ofs.is_open()) { | |||||
| MS_LOG(WARNING) << "Open file '" << file_path << "' failed!"; | |||||
| return; | |||||
| } | |||||
| try { | |||||
| // write op type info into file | |||||
| ofs << OpType().GetHeader() << std::endl; | |||||
| for (auto op_type_info : op_type_infos_) { | |||||
| ofs << op_type_info.second << std::endl; | |||||
| } | |||||
| } catch (const std::exception &e) { | |||||
| MS_LOG(ERROR) << "Write " << file_path << "failed: " << e.what(); | |||||
| } | |||||
| ofs.close(); | |||||
| ChangeFileMode(file_path); | |||||
| MS_LOG(INFO) << "Write " << op_type_infos_.size() << " op type infos into file: " << file_path; | |||||
| } | |||||
| void DataSaver::WriteOpDetail(const std::string &saver_base_dir) { | |||||
| std::string file_path = saver_base_dir + "/cpu_op_detail_info_" + device_id_ + ".csv"; | |||||
| std::ofstream ofs(file_path); | |||||
| if (!ofs.is_open()) { | |||||
| MS_LOG(WARNING) << "Open file '" << file_path << "' failed!"; | |||||
| return; | |||||
| } | |||||
| try { | |||||
| // write op detail info into file | |||||
| ofs << OpDetailInfo().GetHeader() << std::endl; | |||||
| for (auto op_detail : op_detail_infos_) { | |||||
| ofs << op_detail << std::endl; | |||||
| } | |||||
| } catch (const std::exception &e) { | |||||
| MS_LOG(ERROR) << "Write " << file_path << "failed: " << e.what(); | |||||
| } | |||||
| ofs.close(); | |||||
| ChangeFileMode(file_path); | |||||
| MS_LOG(INFO) << "Write " << op_detail_infos_.size() << " op detail infos into file: " << file_path; | |||||
| } | |||||
| void DataSaver::WriteOpTimestamp(const std::string &saver_base_dir) { | |||||
| std::string file_path = saver_base_dir + "/cpu_op_execute_timestamp_" + device_id_ + ".txt"; | |||||
| std::ofstream ofs(file_path); | |||||
| // check if the file is writable | |||||
| if (!ofs.is_open()) { | |||||
| MS_LOG(WARNING) << "Open file '" << file_path << "' failed!"; | |||||
| return; | |||||
| } | |||||
| try { | |||||
| // write op timestamp info into file | |||||
| for (const auto &op_timestamp_info : op_timestamps_map_) { | |||||
| ofs << op_timestamp_info.first << ";host_cpu_ops;"; | |||||
| for (auto start_end : op_timestamp_info.second) { | |||||
| ofs << start_end.start_timestamp << "," << start_end.duration << " "; | |||||
| } | |||||
| ofs << std::endl; | |||||
| } | |||||
| } catch (const std::exception &e) { | |||||
| MS_LOG(ERROR) << "Write " << file_path << "failed: " << e.what(); | |||||
| } | |||||
| ofs.close(); | |||||
| ChangeFileMode(file_path); | |||||
| } | |||||
| void DataSaver::ChangeFileMode(const std::string &file_path) { | |||||
| if (chmod(common::SafeCStr(file_path), S_IRUSR) == -1) { | |||||
| MS_LOG(WARNING) << "Modify file: " << file_path << " to rw fail."; | |||||
| return; | |||||
| } | |||||
| } | |||||
| } // namespace cpu | |||||
| } // namespace profiler | |||||
| } // namespace mindspore | |||||
| @@ -0,0 +1,123 @@ | |||||
| /** | |||||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_CPU_DATA_SAVER_H | |||||
| #define MINDSPORE_CPU_DATA_SAVER_H | |||||
| #include <iostream> | |||||
| #include <algorithm> | |||||
| #include <unordered_map> | |||||
| #include <vector> | |||||
| #include <string> | |||||
| #include <memory> | |||||
| #include "profiler/device/cpu/cpu_profiling.h" | |||||
| namespace mindspore { | |||||
| namespace profiler { | |||||
| namespace cpu { | |||||
| struct OpDetailInfo { | |||||
| std::string op_type_; | |||||
| std::string op_name_; | |||||
| std::string op_full_name_; | |||||
| std::shared_ptr<OpInfo> op_info_{nullptr}; | |||||
| float op_avg_time_{0}; | |||||
| float proportion_{0}; | |||||
| OpDetailInfo() = default; | |||||
| OpDetailInfo(std::shared_ptr<OpInfo> op_info, float proportion); | |||||
| std::string GetHeader() const { | |||||
| return "op_side,op_type,op_name,full_op_name,op_occurrences,compute_time(ms)," | |||||
| "avg_execution_time(ms),total_proportion,subgraph,pid"; | |||||
| } | |||||
| friend std::ostream &operator<<(std::ostream &os, const OpDetailInfo &event) { | |||||
| os << "Host," << event.op_type_ << ',' << event.op_name_ << ',' << event.op_full_name_ << ',' | |||||
| << event.op_info_->op_count << ',' << event.op_info_->op_cost_time << ',' << event.op_avg_time_ << ',' | |||||
| << event.proportion_ << ",Default," << event.op_info_->pid; | |||||
| return os; | |||||
| } | |||||
| }; | |||||
| struct OpType { | |||||
| std::string op_type_; | |||||
| int count_{0}; | |||||
| int step_{0}; | |||||
| float total_time_{0}; | |||||
| float avg_time_{0}; | |||||
| float proportion_{0}; | |||||
| std::string GetHeader() const { | |||||
| return "op_type,total_called_times,called_times(per-step)," | |||||
| "total_compute_time,compute_time(ms per-step),percent"; | |||||
| } | |||||
| friend std::ostream &operator<<(std::ostream &os, const OpType &event) { | |||||
| os << event.op_type_ << ',' << event.count_ << ',' << event.count_ / event.step_ << ',' << event.total_time_ << ',' | |||||
| << event.total_time_ / event.step_ << ',' << event.proportion_; | |||||
| return os; | |||||
| } | |||||
| OpType &operator+=(const OpType &other) { | |||||
| this->count_ += other.count_; | |||||
| this->total_time_ += other.total_time_; | |||||
| this->proportion_ += other.proportion_; | |||||
| return *this; | |||||
| } | |||||
| }; | |||||
| using OpInfoMap = std::unordered_map<std::string, OpInfo>; | |||||
| using OpTypeInfos = std::unordered_map<std::string, OpType>; // <op_full_name, Optype> | |||||
| using OpDetailInfos = std::vector<OpDetailInfo>; | |||||
| // <op_full_name, StartDuration> | |||||
| using OpTimestampInfo = std::unordered_map<std::string, std::vector<StartDuration>>; | |||||
| class DataSaver { | |||||
| public: | |||||
| DataSaver() = default; | |||||
| ~DataSaver() = default; | |||||
| DataSaver(const DataSaver &) = delete; | |||||
| DataSaver &operator=(const DataSaver &) = delete; | |||||
| void ParseOpInfo(const OpInfoMap &op_info_maps); | |||||
| void WriteFile(std::string out_path); | |||||
| private: | |||||
| void AddOpDetailInfoForType(const OpDetailInfo &op_detail_info); | |||||
| float GetTotalOpTime(const OpInfoMap &op_info_maps); | |||||
| void WriteOpType(const std::string &saver_base_dir); | |||||
| void WriteOpDetail(const std::string &saver_base_dir); | |||||
| void WriteOpTimestamp(const std::string &saver_base_dir); | |||||
| void ChangeFileMode(const std::string &file_path); | |||||
| std::string device_id_; | |||||
| OpTypeInfos op_type_infos_; | |||||
| OpDetailInfos op_detail_infos_; | |||||
| OpTimestampInfo op_timestamps_map_; | |||||
| }; | |||||
| } // namespace cpu | |||||
| } // namespace profiler | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_CPU_DATA_SAVER_H | |||||
| @@ -0,0 +1,136 @@ | |||||
| /** | |||||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "profiler/device/cpu/cpu_profiling.h" | |||||
| #include <time.h> | |||||
| #include <cxxabi.h> | |||||
| #include <cmath> | |||||
| #include "profiler/device/cpu/cpu_data_saver.h" | |||||
| #include "pybind_api/api_register.h" | |||||
| #include "utils/log_adapter.h" | |||||
| #include "utils/utils.h" | |||||
| namespace mindspore { | |||||
| namespace profiler { | |||||
| namespace cpu { | |||||
| std::shared_ptr<CPUProfiler> CPUProfiler::profiler_inst_ = nullptr; | |||||
| uint64_t GetMonoTimeStamp() { | |||||
| struct timespec ts; | |||||
| #if defined(_WIN32) || defined(_WIN64) | |||||
| clock_gettime(CLOCK_MONOTONIC, &ts); | |||||
| #else | |||||
| clock_gettime(CLOCK_MONOTONIC_RAW, &ts); | |||||
| #endif | |||||
| constexpr uint64_t kNSecondInSecond = 1000000000; | |||||
| uint64_t cur_time_stamp = ts.tv_sec * kNSecondInSecond + ts.tv_nsec; | |||||
| return cur_time_stamp; | |||||
| } | |||||
| std::shared_ptr<CPUProfiler> CPUProfiler::GetInstance() { | |||||
| if (profiler_inst_ == nullptr) { | |||||
| profiler_inst_ = std::shared_ptr<CPUProfiler>(new (std::nothrow) CPUProfiler()); | |||||
| } | |||||
| return profiler_inst_; | |||||
| } | |||||
| void CPUProfiler::Init(const std::string &profileDataPath = "") { | |||||
| MS_LOG(INFO) << "Initialize CPU Profiling"; | |||||
| base_time_ = GetMonoTimeStamp(); | |||||
| profile_data_path_ = profileDataPath; | |||||
| MS_LOG(INFO) << " Host start time(ns): " << base_time_ << " profile data path: " << profile_data_path_; | |||||
| } | |||||
| void CPUProfiler::StepProfilingEnable(const bool enable_flag) { | |||||
| MS_LOG(INFO) << "CPU Profiler enable flag: " << enable_flag; | |||||
| enable_flag_ = enable_flag; | |||||
| } | |||||
| void CPUProfiler::SetRunTimeData(const std::string &op_name, const uint32_t pid) { | |||||
| auto iter = op_info_map_.find(op_name); | |||||
| if (iter != op_info_map_.end()) { | |||||
| iter->second.op_count += 1; | |||||
| } else { | |||||
| OpInfo op_info; | |||||
| op_info.op_name = op_name; | |||||
| op_info.pid = pid; | |||||
| op_info.op_count = 1; | |||||
| op_info_map_[op_name] = op_info; | |||||
| } | |||||
| op_name_ = op_name; | |||||
| pid_ = pid; | |||||
| } | |||||
| void CPUProfiler::SetRunTimeData(const std::string &op_name, const float time_elapsed) { | |||||
| auto iter = op_info_map_.find(op_name); | |||||
| if (iter != op_info_map_.end()) { | |||||
| // The time unit is ms, convert to us | |||||
| iter->second.op_cost_time += time_elapsed; | |||||
| } | |||||
| } | |||||
| void CPUProfiler::SetRunTimeData(const std::string &op_name, const uint64_t start, const float duration) { | |||||
| auto iter = op_info_map_.find(op_name); | |||||
| if (iter != op_info_map_.end()) { | |||||
| iter->second.start_duration.emplace_back(StartDuration({start, duration})); | |||||
| } | |||||
| } | |||||
| void CPUProfiler::OpDataProducerBegin(const std::string op_name, const uint32_t pid) { | |||||
| op_time_start_ = GetMonoTimeStamp(); | |||||
| op_time_mono_start_ = GetMonoTimeStamp(); | |||||
| SetRunTimeData(op_name, pid); | |||||
| } | |||||
| void CPUProfiler::OpDataProducerEnd() { | |||||
| float op_time_elapsed = 0; | |||||
| op_time_stop_ = GetMonoTimeStamp(); | |||||
| op_time_elapsed = (op_time_stop_ - op_time_start_) / kTimeUnit; | |||||
| MS_LOG(DEBUG) << "Host Time Elapsed(us)," << op_name_ << "," << op_time_elapsed; | |||||
| SetRunTimeData(op_name_, op_time_elapsed); | |||||
| SetRunTimeData(op_name_, op_time_mono_start_, op_time_elapsed); | |||||
| } | |||||
| void CPUProfiler::Stop() { | |||||
| MS_LOG(INFO) << "Stop CPU Profiling"; | |||||
| SaveProfileData(); | |||||
| ClearInst(); | |||||
| } | |||||
| void CPUProfiler::SaveProfileData() { | |||||
| if (profile_data_path_.empty()) { | |||||
| MS_LOG(WARNING) << "Profile data path is empty, skip save profile data."; | |||||
| } else { | |||||
| DataSaver dataSaver; | |||||
| dataSaver.ParseOpInfo(op_info_map_); | |||||
| dataSaver.WriteFile(profile_data_path_); | |||||
| } | |||||
| } | |||||
| void CPUProfiler::ClearInst() { op_info_map_.clear(); } | |||||
| REGISTER_PYBIND_DEFINE(CPUProfiler_, ([](const py::module *m) { | |||||
| (void)py::class_<CPUProfiler, std::shared_ptr<CPUProfiler>>(*m, "CPUProfiler") | |||||
| .def_static("get_instance", &CPUProfiler::GetInstance, "CPUProfiler get_instance.") | |||||
| .def("init", &CPUProfiler::Init, py::arg("profile_data_path"), "init") | |||||
| .def("stop", &CPUProfiler::Stop, "stop") | |||||
| .def("step_profiling_enable", &CPUProfiler::StepProfilingEnable, py::arg("enable_flag"), | |||||
| "enable or disable step profiling"); | |||||
| })); | |||||
| } // namespace cpu | |||||
| } // namespace profiler | |||||
| } // namespace mindspore | |||||
| @@ -0,0 +1,86 @@ | |||||
| /** | |||||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_CPU_PROFILING_H | |||||
| #define MINDSPORE_CPU_PROFILING_H | |||||
| #include <algorithm> | |||||
| #include <cstdio> | |||||
| #include <map> | |||||
| #include <memory> | |||||
| #include <mutex> | |||||
| #include <string> | |||||
| #include <unordered_map> | |||||
| #include <utility> | |||||
| #include <vector> | |||||
| namespace mindspore { | |||||
| namespace profiler { | |||||
| namespace cpu { | |||||
| struct StartDuration { | |||||
| uint64_t start_timestamp = 0l; | |||||
| float duration = 0l; | |||||
| }; | |||||
| struct OpInfo { | |||||
| std::string op_name; | |||||
| float op_cost_time = 0; | |||||
| int op_count = 0; | |||||
| std::vector<StartDuration> start_duration; | |||||
| uint32_t pid; | |||||
| }; | |||||
| const float kTimeUnit = 1000; | |||||
| class CPUProfiler { | |||||
| public: | |||||
| static std::shared_ptr<CPUProfiler> GetInstance(); | |||||
| ~CPUProfiler() = default; | |||||
| CPUProfiler(const CPUProfiler &) = delete; | |||||
| CPUProfiler &operator=(const CPUProfiler &) = delete; | |||||
| void Init(const std::string &profileDataPath); | |||||
| void Stop(); | |||||
| void StepProfilingEnable(const bool enable_flag); | |||||
| bool GetEnableFlag() const { return enable_flag_; } | |||||
| void OpDataProducerBegin(const std::string op_name, const uint32_t pid); | |||||
| void OpDataProducerEnd(); | |||||
| std::string ProfileDataPath() const { return profile_data_path_; } | |||||
| private: | |||||
| CPUProfiler() = default; | |||||
| void ClearInst(); | |||||
| void SetRunTimeData(const std::string &op_name, const uint32_t pid); | |||||
| void SetRunTimeData(const std::string &op_name, const float time_elapsed); | |||||
| void SetRunTimeData(const std::string &op_name, const uint64_t start, const float duration); | |||||
| static std::shared_ptr<CPUProfiler> profiler_inst_; | |||||
| bool enable_flag_ = false; | |||||
| std::unordered_map<std::string, OpInfo> op_info_map_; | |||||
| uint64_t base_time_; | |||||
| std::string op_name_; | |||||
| uint32_t pid_; | |||||
| void SaveProfileData(); | |||||
| uint64_t op_time_start_; | |||||
| uint64_t op_time_mono_start_; | |||||
| uint64_t op_time_stop_; | |||||
| std::string profile_data_path_; | |||||
| }; | |||||
| } // namespace cpu | |||||
| } // namespace profiler | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_CPU_PROFILING_H | |||||
| @@ -153,7 +153,7 @@ void DataSaver::AddKernelEventToDevice(const Event &event, DeviceActivityInfos * | |||||
| } | } | ||||
| } | } | ||||
| void DataSaver::WriteFile(std::string out_path_dir) { | |||||
| void DataSaver::WriteFile(std::string out_path_dir, const BaseTime &start_time) { | |||||
| if (out_path_dir.empty()) { | if (out_path_dir.empty()) { | ||||
| MS_LOG(WARNING) << "Output directory. Ignore the writing data."; | MS_LOG(WARNING) << "Output directory. Ignore the writing data."; | ||||
| return; | return; | ||||
| @@ -169,6 +169,7 @@ void DataSaver::WriteFile(std::string out_path_dir) { | |||||
| WriteActivity(out_path_dir); | WriteActivity(out_path_dir); | ||||
| WriteOpTimestamp(out_path_dir); | WriteOpTimestamp(out_path_dir); | ||||
| WriteStepTrace(out_path_dir); | WriteStepTrace(out_path_dir); | ||||
| WriteStartTime(out_path_dir, start_time); | |||||
| } | } | ||||
| void DataSaver::WriteOpType(const std::string &saver_base_dir) { | void DataSaver::WriteOpType(const std::string &saver_base_dir) { | ||||
| @@ -307,6 +308,28 @@ void DataSaver::WriteStepTrace(const std::string &saver_base_dir) { | |||||
| MS_LOG(INFO) << "Write step trace infos into file: " << file_path; | MS_LOG(INFO) << "Write step trace infos into file: " << file_path; | ||||
| } | } | ||||
| void DataSaver::WriteStartTime(const std::string &saver_base_dir, const BaseTime &start_time) { | |||||
| std::string file_path = saver_base_dir + "/start_time_" + device_id_ + ".txt"; | |||||
| std::ofstream ofs(file_path); | |||||
| // check if the file is writable | |||||
| if (!ofs.is_open()) { | |||||
| MS_LOG(WARNING) << "Open file '" << file_path << "' failed!"; | |||||
| return; | |||||
| } | |||||
| // write start time info into file | |||||
| try { | |||||
| ofs << "host_monotonic_raw_time(ns): " << start_time.host_start_monotonic_raw_time << std::endl; | |||||
| ofs << "gpu_start_time(ns): " << start_time.gpu_start_time << std::endl; | |||||
| } catch (const std::exception &e) { | |||||
| MS_LOG(ERROR) << "Write " << file_path << "failed:" << e.what(); | |||||
| } | |||||
| ofs.close(); | |||||
| ChangeFileMode(file_path); | |||||
| MS_LOG(INFO) << "Write profiler start time infos into file: " << file_path; | |||||
| } | |||||
| void DataSaver::SetStepTraceOpName(ProfilingTraceInfo trace_op_name) { step_trace_op_name = trace_op_name; } | void DataSaver::SetStepTraceOpName(ProfilingTraceInfo trace_op_name) { step_trace_op_name = trace_op_name; } | ||||
| void DataSaver::ChangeFileMode(const std::string &file_path) { | void DataSaver::ChangeFileMode(const std::string &file_path) { | ||||
| @@ -129,7 +129,7 @@ class DataSaver { | |||||
| void ParseEvent(const std::vector<Event> &events); | void ParseEvent(const std::vector<Event> &events); | ||||
| void WriteFile(std::string out_path); | |||||
| void WriteFile(std::string out_path, const BaseTime &start_time); | |||||
| private: | private: | ||||
| void AddOpDetailInfoForType(const OpDetailInfo &op_detail_info); | void AddOpDetailInfoForType(const OpDetailInfo &op_detail_info); | ||||
| @@ -150,6 +150,8 @@ class DataSaver { | |||||
| void WriteStepTrace(const std::string &saver_base_dir); | void WriteStepTrace(const std::string &saver_base_dir); | ||||
| void WriteStartTime(const std::string &saver_base_dir, const BaseTime &start_time); | |||||
| void ChangeFileMode(const std::string &file_path); | void ChangeFileMode(const std::string &file_path); | ||||
| std::string device_id_; | std::string device_id_; | ||||
| @@ -1,5 +1,5 @@ | |||||
| /** | /** | ||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * Copyright 2020-2021 Huawei Technologies Co., Ltd | |||||
| * | * | ||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| * you may not use this file except in compliance with the License. | * you may not use this file except in compliance with the License. | ||||
| @@ -16,6 +16,7 @@ | |||||
| #include "profiler/device/gpu/gpu_profiling.h" | #include "profiler/device/gpu/gpu_profiling.h" | ||||
| #include <time.h> | |||||
| #include <cxxabi.h> | #include <cxxabi.h> | ||||
| #include <chrono> | #include <chrono> | ||||
| #include <cmath> | #include <cmath> | ||||
| @@ -91,6 +92,14 @@ uint64_t GetHostTimeStamp() { | |||||
| return cur_time_stamp; | return cur_time_stamp; | ||||
| } | } | ||||
| uint64_t GetHostMonoTimeStamp() { | |||||
| struct timespec ts; | |||||
| clock_gettime(CLOCK_MONOTONIC_RAW, &ts); | |||||
| constexpr uint64_t kNSecondInSecond = 1000000000; | |||||
| uint64_t cur_time_stamp = ts.tv_sec * kNSecondInSecond + ts.tv_nsec; | |||||
| return cur_time_stamp; | |||||
| } | |||||
| std::string GetKernelFunc(const char *name) { | std::string GetKernelFunc(const char *name) { | ||||
| char *demangledName = abi::__cxa_demangle(name, nullptr, nullptr, nullptr); | char *demangledName = abi::__cxa_demangle(name, nullptr, nullptr, nullptr); | ||||
| if (demangledName != nullptr) { | if (demangledName != nullptr) { | ||||
| @@ -100,16 +109,24 @@ std::string GetKernelFunc(const char *name) { | |||||
| } | } | ||||
| } | } | ||||
| void CUPTIApiExit(const std::shared_ptr<GPUProfiler> &gpu_profiler_inst, CUpti_CallbackId cb_id, | |||||
| const CUpti_CallbackData *cb_data) { | |||||
| uint64_t start_timestamp = *cb_data->correlationData; | |||||
| uint64_t end_timestamp = GetCUPTITimeStamp(); | |||||
| bool IsMemcpyAsyncEvent(CUpti_CallbackId cb_id) { | |||||
| switch (cb_id) { | |||||
| case CUPTI_DRIVER_TRACE_CBID_cuMemcpyAsync: | |||||
| case CUPTI_DRIVER_TRACE_CBID_cuMemcpyHtoDAsync_v2: | |||||
| case CUPTI_DRIVER_TRACE_CBID_cuMemcpyDtoHAsync_v2: | |||||
| case CUPTI_DRIVER_TRACE_CBID_cuMemcpyDtoDAsync_v2: | |||||
| case CUPTI_DRIVER_TRACE_CBID_cuMemcpyAtoHAsync_v2: | |||||
| case CUPTI_DRIVER_TRACE_CBID_cuMemcpy2DAsync_v2: | |||||
| case CUPTI_DRIVER_TRACE_CBID_cuMemcpy3DAsync_v2: | |||||
| case CUPTI_DRIVER_TRACE_CBID_cuMemcpyHtoAAsync_v2: | |||||
| case CUPTI_DRIVER_TRACE_CBID_cuMemcpyPeerAsync: | |||||
| return true; | |||||
| } | |||||
| return false; | |||||
| } | |||||
| bool IsMemcpySyncEvent(CUpti_CallbackId cb_id) { | |||||
| switch (cb_id) { | switch (cb_id) { | ||||
| case CUPTI_DRIVER_TRACE_CBID_cuLaunchKernel: | |||||
| case CUPTI_DRIVER_TRACE_CBID_cuLaunchCooperativeKernel: | |||||
| case CUPTI_DRIVER_TRACE_CBID_cuLaunchCooperativeKernelMultiDevice: | |||||
| gpu_profiler_inst->EventHandleProcess(cb_id, cb_data, "cuLaunchKernel", start_timestamp, end_timestamp); | |||||
| break; | |||||
| case CUPTI_DRIVER_TRACE_CBID_cuMemcpy: | case CUPTI_DRIVER_TRACE_CBID_cuMemcpy: | ||||
| case CUPTI_DRIVER_TRACE_CBID_cuMemcpyHtoD_v2: | case CUPTI_DRIVER_TRACE_CBID_cuMemcpyHtoD_v2: | ||||
| case CUPTI_DRIVER_TRACE_CBID_cuMemcpyDtoH_v2: | case CUPTI_DRIVER_TRACE_CBID_cuMemcpyDtoH_v2: | ||||
| @@ -122,17 +139,21 @@ void CUPTIApiExit(const std::shared_ptr<GPUProfiler> &gpu_profiler_inst, CUpti_C | |||||
| case CUPTI_DRIVER_TRACE_CBID_cuMemcpy2DUnaligned_v2: | case CUPTI_DRIVER_TRACE_CBID_cuMemcpy2DUnaligned_v2: | ||||
| case CUPTI_DRIVER_TRACE_CBID_cuMemcpy3D_v2: | case CUPTI_DRIVER_TRACE_CBID_cuMemcpy3D_v2: | ||||
| case CUPTI_DRIVER_TRACE_CBID_cuMemcpyHtoA_v2: | case CUPTI_DRIVER_TRACE_CBID_cuMemcpyHtoA_v2: | ||||
| case CUPTI_DRIVER_TRACE_CBID_cuMemcpyAsync: | |||||
| case CUPTI_DRIVER_TRACE_CBID_cuMemcpyHtoDAsync_v2: | |||||
| case CUPTI_DRIVER_TRACE_CBID_cuMemcpyDtoHAsync_v2: | |||||
| case CUPTI_DRIVER_TRACE_CBID_cuMemcpyDtoDAsync_v2: | |||||
| case CUPTI_DRIVER_TRACE_CBID_cuMemcpyAtoHAsync_v2: | |||||
| case CUPTI_DRIVER_TRACE_CBID_cuMemcpy2DAsync_v2: | |||||
| case CUPTI_DRIVER_TRACE_CBID_cuMemcpy3DAsync_v2: | |||||
| case CUPTI_DRIVER_TRACE_CBID_cuMemcpyHtoAAsync_v2: | |||||
| case CUPTI_DRIVER_TRACE_CBID_cuMemcpyPeer: | case CUPTI_DRIVER_TRACE_CBID_cuMemcpyPeer: | ||||
| case CUPTI_DRIVER_TRACE_CBID_cuMemcpyPeerAsync: | |||||
| gpu_profiler_inst->EventHandleProcess(cb_id, cb_data, "cuMemcpy", start_timestamp, end_timestamp); | |||||
| return true; | |||||
| } | |||||
| return false; | |||||
| } | |||||
| void CUPTIApiExit(const std::shared_ptr<GPUProfiler> &gpu_profiler_inst, CUpti_CallbackId cb_id, | |||||
| const CUpti_CallbackData *cb_data) { | |||||
| uint64_t start_timestamp = *cb_data->correlationData; | |||||
| uint64_t end_timestamp = GetCUPTITimeStamp(); | |||||
| switch (cb_id) { | |||||
| case CUPTI_DRIVER_TRACE_CBID_cuLaunchKernel: | |||||
| case CUPTI_DRIVER_TRACE_CBID_cuLaunchCooperativeKernel: | |||||
| case CUPTI_DRIVER_TRACE_CBID_cuLaunchCooperativeKernelMultiDevice: | |||||
| gpu_profiler_inst->EventHandleProcess(cb_id, cb_data, "cuLaunchKernel", start_timestamp, end_timestamp); | |||||
| break; | break; | ||||
| case CUPTI_DRIVER_TRACE_CBID_cuMemAlloc: | case CUPTI_DRIVER_TRACE_CBID_cuMemAlloc: | ||||
| case CUPTI_DRIVER_TRACE_CBID_cuMemAlloc_v2: | case CUPTI_DRIVER_TRACE_CBID_cuMemAlloc_v2: | ||||
| @@ -151,6 +172,9 @@ void CUPTIApiExit(const std::shared_ptr<GPUProfiler> &gpu_profiler_inst, CUpti_C | |||||
| gpu_profiler_inst->EventHandleProcess(cb_id, cb_data, "others_api", start_timestamp, end_timestamp); | gpu_profiler_inst->EventHandleProcess(cb_id, cb_data, "others_api", start_timestamp, end_timestamp); | ||||
| break; | break; | ||||
| } | } | ||||
| if (IsMemcpyAsyncEvent(cb_id) || IsMemcpySyncEvent(cb_id)) { | |||||
| gpu_profiler_inst->EventHandleProcess(cb_id, cb_data, "cuMemcpy", start_timestamp, end_timestamp); | |||||
| } | |||||
| } | } | ||||
| void CUPTICallBackFunc(void *user_data, CUpti_CallbackDomain domain, CUpti_CallbackId cb_id, | void CUPTICallBackFunc(void *user_data, CUpti_CallbackDomain domain, CUpti_CallbackId cb_id, | ||||
| @@ -369,6 +393,7 @@ void GPUProfiler::Init(const std::string &profileDataPath = "") { | |||||
| base_time_.gpu_start_time = GetCUPTITimeStamp(); | base_time_.gpu_start_time = GetCUPTITimeStamp(); | ||||
| base_time_.host_start_time = GetHostTimeStamp(); | base_time_.host_start_time = GetHostTimeStamp(); | ||||
| base_time_.host_start_monotonic_raw_time = GetHostMonoTimeStamp(); | |||||
| profile_data_path_ = profileDataPath; | profile_data_path_ = profileDataPath; | ||||
| MS_LOG(INFO) << "GPU start time(ns):" << base_time_.gpu_start_time | MS_LOG(INFO) << "GPU start time(ns):" << base_time_.gpu_start_time | ||||
| @@ -477,7 +502,7 @@ void GPUProfiler::SaveProfileData() { | |||||
| dataSaver.SetStepTraceOpName(step_trace_op_name); | dataSaver.SetStepTraceOpName(step_trace_op_name); | ||||
| dataSaver.ParseOpInfo(op_info_map_); | dataSaver.ParseOpInfo(op_info_map_); | ||||
| dataSaver.ParseEvent(events_); | dataSaver.ParseEvent(events_); | ||||
| dataSaver.WriteFile(profile_data_path_); | |||||
| dataSaver.WriteFile(profile_data_path_, base_time_); | |||||
| SaveExtraProfileData(); | SaveExtraProfileData(); | ||||
| } | } | ||||
| } | } | ||||
| @@ -106,6 +106,7 @@ struct OpInfo { | |||||
| struct BaseTime { | struct BaseTime { | ||||
| // nanosecond | // nanosecond | ||||
| uint64_t host_start_time = 0l; | uint64_t host_start_time = 0l; | ||||
| uint64_t host_start_monotonic_raw_time = 0l; | |||||
| uint64_t gpu_start_time = 0l; | uint64_t gpu_start_time = 0l; | ||||
| }; | }; | ||||
| @@ -14,6 +14,7 @@ | |||||
| * limitations under the License. | * limitations under the License. | ||||
| */ | */ | ||||
| #include "runtime/device/cpu/cpu_kernel_runtime.h" | #include "runtime/device/cpu/cpu_kernel_runtime.h" | ||||
| #include <unistd.h> | |||||
| #include <string> | #include <string> | ||||
| #include <vector> | #include <vector> | ||||
| #include <memory> | #include <memory> | ||||
| @@ -29,6 +30,7 @@ | |||||
| #include "backend/session/anf_runtime_algorithm.h" | #include "backend/session/anf_runtime_algorithm.h" | ||||
| #include "backend/session/session_basic.h" | #include "backend/session/session_basic.h" | ||||
| #include "frontend/operator/ops.h" | #include "frontend/operator/ops.h" | ||||
| #include "profiler/device/cpu/cpu_profiling.h" | |||||
| #include "utils/shape_utils.h" | #include "utils/shape_utils.h" | ||||
| #include "utils/profile.h" | #include "utils/profile.h" | ||||
| #include "utils/trace_base.h" | #include "utils/trace_base.h" | ||||
| @@ -376,6 +378,8 @@ bool CPUKernelRuntime::Run(session::KernelGraph *kernel_graph, bool is_task_sink | |||||
| static_cast<CPUMemoryManager *>(mem_manager_.get())->IncreaseAddressRefCount(kernel_graph); | static_cast<CPUMemoryManager *>(mem_manager_.get())->IncreaseAddressRefCount(kernel_graph); | ||||
| auto kernels = kernel_graph->execution_order(); | auto kernels = kernel_graph->execution_order(); | ||||
| auto profiler_inst = profiler::cpu::CPUProfiler::GetInstance(); | |||||
| MS_EXCEPTION_IF_NULL(profiler_inst); | |||||
| for (const auto &kernel : kernels) { | for (const auto &kernel : kernels) { | ||||
| #ifdef ENABLE_PROFILE | #ifdef ENABLE_PROFILE | ||||
| double start_time = GetTime(); | double start_time = GetTime(); | ||||
| @@ -406,11 +410,18 @@ bool CPUKernelRuntime::Run(session::KernelGraph *kernel_graph, bool is_task_sink | |||||
| AddRuntimeAddress(device_address, &kernel_workspaces); | AddRuntimeAddress(device_address, &kernel_workspaces); | ||||
| } | } | ||||
| bool ret = true; | bool ret = true; | ||||
| if (profiler_inst->GetEnableFlag()) { | |||||
| uint32_t pid = getpid(); | |||||
| profiler_inst->OpDataProducerBegin(kernel->fullname_with_scope(), pid); | |||||
| } | |||||
| try { | try { | ||||
| ret = kernel_mod->Launch(kernel_inputs, kernel_workspaces, kernel_outputs, 0); | ret = kernel_mod->Launch(kernel_inputs, kernel_workspaces, kernel_outputs, 0); | ||||
| } catch (std::exception &e) { | } catch (std::exception &e) { | ||||
| MS_LOG(EXCEPTION) << e.what() << "\nTrace:" << trace::DumpSourceLines(kernel); | MS_LOG(EXCEPTION) << e.what() << "\nTrace:" << trace::DumpSourceLines(kernel); | ||||
| } | } | ||||
| if (profiler_inst->GetEnableFlag()) { | |||||
| profiler_inst->OpDataProducerEnd(); | |||||
| } | |||||
| if (!ret) { | if (!ret) { | ||||
| MS_LOG(EXCEPTION) << "Launch kernel failed. Trace:" << trace::DumpSourceLines(kernel); | MS_LOG(EXCEPTION) << "Launch kernel failed. Trace:" << trace::DumpSourceLines(kernel); | ||||
| } | } | ||||
| @@ -26,6 +26,7 @@ | |||||
| #include "backend/session/session_basic.h" | #include "backend/session/session_basic.h" | ||||
| #include "backend/session/anf_runtime_algorithm.h" | #include "backend/session/anf_runtime_algorithm.h" | ||||
| #include "utils/any.h" | #include "utils/any.h" | ||||
| #include "profiler/device/cpu/cpu_profiling.h" | |||||
| namespace mindspore { | namespace mindspore { | ||||
| namespace device { | namespace device { | ||||
| namespace cpu { | namespace cpu { | ||||
| @@ -1,4 +1,4 @@ | |||||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||||
| # Copyright 2020-2021 Huawei Technologies Co., Ltd | |||||
| # | # | ||||
| # Licensed under the Apache License, Version 2.0 (the "License"); | # Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| # you may not use this file except in compliance with the License. | # you may not use this file except in compliance with the License. | ||||
| @@ -666,7 +666,7 @@ class GpuTimelineGenerator(BaseTimelineGenerator): | |||||
| ) | ) | ||||
| file_path = validate_and_normalize_path(file_path) | file_path = validate_and_normalize_path(file_path) | ||||
| if not os.path.exists(file_path): | if not os.path.exists(file_path): | ||||
| logger.error("Failed to find parsed timeline file.") | |||||
| logger.error(f"Failed to find parsed timeline file {file_path}.") | |||||
| raise ProfilerFileNotFoundException('parsed timeline file') | raise ProfilerFileNotFoundException('parsed timeline file') | ||||
| return file_path | return file_path | ||||
| @@ -712,10 +712,33 @@ class GpuTimelineGenerator(BaseTimelineGenerator): | |||||
| timeline_list = self._load_op_data(op_file_path) + \ | timeline_list = self._load_op_data(op_file_path) + \ | ||||
| self._load_activity_data(activity_file_path, activity_args_file_path) | self._load_activity_data(activity_file_path, activity_args_file_path) | ||||
| cpu_timeline_generator = CpuTimelineGenerator(self._profiling_dir, self._device_id) | |||||
| cpu_timeline_list = cpu_timeline_generator.load_cpu_op_data() | |||||
| if cpu_timeline_list: | |||||
| self._clock_synchronize_to_gpu(cpu_timeline_list) | |||||
| timeline_list.extend(cpu_timeline_list) | |||||
| timeline_list.sort(key=lambda x: float(x[2])) | timeline_list.sort(key=lambda x: float(x[2])) | ||||
| return timeline_list | return timeline_list | ||||
| def _clock_synchronize_to_gpu(self, timeline_list): | |||||
| """Synchronize the timestamp from device to host.""" | |||||
| start_time_file_path = os.path.join(self._profiling_dir, f"start_time_{self._device_id}.txt") | |||||
| try: | |||||
| with open(start_time_file_path) as f: | |||||
| lines = f.readlines() | |||||
| host_monotonic_start_time = int(lines[0].strip().split(':')[-1]) | |||||
| gpu_start_time = int(lines[1].strip().split(':')[-1]) | |||||
| except (IOError, OSError) as err: | |||||
| logger.error(f'Error occurred when read {start_time_file_path}: {err}') | |||||
| raise ProfilerIOException | |||||
| time_diff = gpu_start_time - host_monotonic_start_time | |||||
| start_time = 2 | |||||
| for idx, time_item in enumerate(timeline_list): | |||||
| timeline_list[idx][start_time] = int(time_item[start_time]) + time_diff | |||||
| def _load_op_data(self, op_file_path): | def _load_op_data(self, op_file_path): | ||||
| """Load operator data from file""" | """Load operator data from file""" | ||||
| op_timeline_list = [] | op_timeline_list = [] | ||||
| @@ -847,7 +870,7 @@ class AscendTimelineGenerator(BaseTimelineGenerator): | |||||
| timeline_dict['pid'] = op_meta.pid | timeline_dict['pid'] = op_meta.pid | ||||
| self._timeline_meta.append(timeline_dict) | self._timeline_meta.append(timeline_dict) | ||||
| def init_timeline(self, all_reduce_info, framework_info, aicpu_info, min_cycle_counter): | |||||
| def init_timeline(self, all_reduce_info, framework_info, aicpu_info, min_cycle_counter, source_path): | |||||
| """ | """ | ||||
| Init timeline metadata, adding all collected info. | Init timeline metadata, adding all collected info. | ||||
| @@ -862,6 +885,12 @@ class AscendTimelineGenerator(BaseTimelineGenerator): | |||||
| logger.info('Initiating timeline...') | logger.info('Initiating timeline...') | ||||
| timeline_list = self._load_timeline_data() | timeline_list = self._load_timeline_data() | ||||
| cpu_timeline_generator = CpuTimelineGenerator(self._profiling_dir, self._device_id) | |||||
| cpu_timeline_list = cpu_timeline_generator.get_timeline_data() | |||||
| if cpu_timeline_list: | |||||
| self._clock_synchronize_to_host(timeline_list, source_path) | |||||
| timeline_list.extend(cpu_timeline_list) | |||||
| timeline_list.sort(key=lambda x: float(x[2])) | |||||
| self._timeline_summary['op_exe_times'] = len(timeline_list) | self._timeline_summary['op_exe_times'] = len(timeline_list) | ||||
| # Add AllReduce info to timeline temp list and sort by start time. | # Add AllReduce info to timeline temp list and sort by start time. | ||||
| @@ -897,3 +926,72 @@ class AscendTimelineGenerator(BaseTimelineGenerator): | |||||
| # Update timeline summary info | # Update timeline summary info | ||||
| self._timeline_summary['num_of_streams'] += len(stream_count_dict.keys()) | self._timeline_summary['num_of_streams'] += len(stream_count_dict.keys()) | ||||
| def _clock_synchronize_to_host(self, timeline_list, source_path): | |||||
| """Synchronize the timestamp from device to host.""" | |||||
| host_start_file_path = os.path.join(source_path, f"host_start.log.{self._device_id}") | |||||
| dev_start_file_path = os.path.join(source_path, f"dev_start.log.{self._device_id}") | |||||
| try: | |||||
| with open(host_start_file_path) as f: | |||||
| lines = f.readlines() | |||||
| host_monotonic = int(lines[2].strip().split(':')[1]) | |||||
| except (IOError, OSError) as err: | |||||
| logger.error('Error occurred when read host_start.log: %s', err) | |||||
| raise ProfilerIOException | |||||
| try: | |||||
| with open(dev_start_file_path) as f: | |||||
| lines = f.readlines() | |||||
| dev_cntvct = int(lines[2].strip().split(':')[1]) | |||||
| except (IOError, OSError) as err: | |||||
| logger.error('Error occurred when read dev_start.log: %s', err) | |||||
| raise ProfilerIOException | |||||
| factor_ns_to_ms = 1e6 | |||||
| factor_ms_to_ten_ns = 1e5 | |||||
| factor_ten_ns_to_ns = 10 | |||||
| start_time = 2 | |||||
| for idx, time_item in enumerate(timeline_list): | |||||
| cycle_counter = int(float(time_item[start_time]) * factor_ms_to_ten_ns) | |||||
| host_monotonic_time = host_monotonic + (cycle_counter - dev_cntvct) * factor_ten_ns_to_ns | |||||
| timeline_list[idx][start_time] = host_monotonic_time / factor_ns_to_ms | |||||
| class CpuTimelineGenerator(GpuTimelineGenerator): | |||||
| """Generate gpu Timeline data from file.""" | |||||
| _output_op_execute_time_file_path = "cpu_op_execute_timestamp_{}.txt" | |||||
| def _get_and_validate_path(self, file_name): | |||||
| """Generate op or activity file path from file name, and validate this path.""" | |||||
| file_path = os.path.join( | |||||
| self._profiling_dir, | |||||
| file_name.format(self._device_id) | |||||
| ) | |||||
| file_path = validate_and_normalize_path(file_path) | |||||
| return file_path | |||||
| def load_cpu_op_data(self): | |||||
| """Load cpu operator data from file""" | |||||
| op_file_path = self._get_and_validate_path( | |||||
| self._output_op_execute_time_file_path) | |||||
| timeline_list = [] | |||||
| if not os.path.exists(op_file_path): | |||||
| logger.info("No cpu operator info.") | |||||
| return timeline_list | |||||
| timeline_list = self._load_op_data(op_file_path) | |||||
| return timeline_list | |||||
| def get_timeline_data(self): | |||||
| """Get timeline data from file.""" | |||||
| timeline_list = self.load_cpu_op_data() | |||||
| factor_ns_to_ms = 1e6 | |||||
| factor_us_to_ms = 1e3 | |||||
| start_time = 2 | |||||
| duration = 3 | |||||
| for idx, time_item in enumerate(timeline_list): | |||||
| time_item[start_time] = float(time_item[start_time]) / factor_ns_to_ms | |||||
| time_item[duration] = float(time_item[duration]) / factor_us_to_ms | |||||
| timeline_list[idx] = time_item | |||||
| return timeline_list | |||||
| @@ -96,6 +96,11 @@ class Profiler: | |||||
| os.environ['PROFILING_MODE'] = 'true' | os.environ['PROFILING_MODE'] = 'true' | ||||
| os.environ['MINDDATA_PROFILING_DIR'] = self._output_path | os.environ['MINDDATA_PROFILING_DIR'] = self._output_path | ||||
| if self._device_target: | |||||
| from mindspore._c_expression import CPUProfiler | |||||
| self._cpu_profiler = CPUProfiler.get_instance() | |||||
| self._cpu_profiler.init(self._output_path) | |||||
| self._cpu_profiler.step_profiling_enable(True) | |||||
| if self._device_target and self._device_target == "GPU": | if self._device_target and self._device_target == "GPU": | ||||
| from mindspore._c_expression import GPUProfiler | from mindspore._c_expression import GPUProfiler | ||||
| self._gpu_profiler = GPUProfiler.get_instance() | self._gpu_profiler = GPUProfiler.get_instance() | ||||
| @@ -172,6 +177,7 @@ class Profiler: | |||||
| >>> model.train() | >>> model.train() | ||||
| >>> profiler.analyse() | >>> profiler.analyse() | ||||
| """ | """ | ||||
| self._cpu_profiler.stop() | |||||
| if self._device_target and self._device_target == "GPU": | if self._device_target and self._device_target == "GPU": | ||||
| self._gpu_analyse() | self._gpu_analyse() | ||||
| @@ -244,7 +250,7 @@ class Profiler: | |||||
| # analyse timeline info | # analyse timeline info | ||||
| try: | try: | ||||
| self._analyse_timeline(aicpu_data_parser, optime_parser) | |||||
| self._analyse_timeline(aicpu_data_parser, optime_parser, source_path) | |||||
| except (ProfilerIOException, ProfilerFileNotFoundException, RuntimeError) as err: | except (ProfilerIOException, ProfilerFileNotFoundException, RuntimeError) as err: | ||||
| logger.warning('Fail to write timeline data: %s', err) | logger.warning('Fail to write timeline data: %s', err) | ||||
| @@ -336,7 +342,7 @@ class Profiler: | |||||
| return point_info | return point_info | ||||
| def _analyse_timeline(self, aicpu_parser, optime_parser): | |||||
| def _analyse_timeline(self, aicpu_parser, optime_parser, source_path): | |||||
| """ | """ | ||||
| Analyse and parse timeline info. | Analyse and parse timeline info. | ||||
| @@ -368,7 +374,8 @@ class Profiler: | |||||
| # Add info into timeline, such as AI CPU, AllReduce, framework info. | # Add info into timeline, such as AI CPU, AllReduce, framework info. | ||||
| aicpu_info = aicpu_parser.query_aicpu_data() | aicpu_info = aicpu_parser.query_aicpu_data() | ||||
| min_cycle_counter = min(aicpu_parser.min_cycle_counter, optime_parser.min_cycle_counter) | min_cycle_counter = min(aicpu_parser.min_cycle_counter, optime_parser.min_cycle_counter) | ||||
| timeline_analyser.init_timeline(all_reduce_info, framework_info, aicpu_info, min_cycle_counter) | |||||
| timeline_analyser.init_timeline(all_reduce_info, framework_info, aicpu_info, | |||||
| min_cycle_counter, source_path) | |||||
| size_limit = 20 * 1024 * 1024 # 20MB | size_limit = 20 * 1024 * 1024 # 20MB | ||||
| timeline_analyser.write_timeline(size_limit) | timeline_analyser.write_timeline(size_limit) | ||||
| timeline_analyser.write_timeline_summary() | timeline_analyser.write_timeline_summary() | ||||