| @@ -126,7 +126,7 @@ CUptiResult CuptiGetStreamId(CUcontext context, CUstream stream, uint32_t *strea | |||
| } | |||
| CUptiResult CuptiGetDeviceId(CUcontext context, uint32_t *deviceId) { | |||
| static auto func_ptr = reinterpret_cast<CuptiGetDeviceIdFunc>(GetCUPTIFunc("cuptiSubscribe")); | |||
| static auto func_ptr = reinterpret_cast<CuptiGetDeviceIdFunc>(GetCUPTIFunc("cuptiGetDeviceId")); | |||
| return func_ptr(context, deviceId); | |||
| } | |||
| } // namespace gpu | |||
| @@ -0,0 +1,223 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "profiler/device/gpu/data_saver.h" | |||
| #include <fstream> | |||
| #include <numeric> | |||
| #include "utils/log_adapter.h" | |||
| namespace mindspore { | |||
| namespace profiler { | |||
| namespace gpu { | |||
| OpDetailInfo::OpDetailInfo(std::shared_ptr<OpInfo> op_info, float proportion) | |||
| : op_info_(op_info), proportion_(proportion) { | |||
| // op_full_name is like 'xxx/xxx/{op_type}-op{node_id}' | |||
| op_full_name_ = op_info->op_name; | |||
| auto op_type_begin_iter = op_full_name_.rfind('/') + 1; | |||
| auto op_type_end_iter = op_full_name_.rfind('-'); | |||
| op_type_ = op_full_name_.substr(op_type_begin_iter, op_type_end_iter - op_type_begin_iter); | |||
| op_name_ = op_full_name_.substr(op_type_begin_iter); | |||
| op_avg_time_ = op_info->op_host_cost_time / op_info->op_count; | |||
| } | |||
| ActivityData::ActivityData(std::shared_ptr<Event> data) : basic_info_(data) { | |||
| grid_dim_ = basic_info_->activity_type == ActivityType::kKernel | |||
| ? "\"" + std::to_string(basic_info_->kernel_info.grid_x) + ',' + | |||
| std::to_string(basic_info_->kernel_info.grid_y) + ',' + | |||
| std::to_string(basic_info_->kernel_info.grid_z) + "\"" | |||
| : ""; | |||
| block_dim_ = basic_info_->activity_type == ActivityType::kKernel | |||
| ? "\"" + std::to_string(basic_info_->kernel_info.block_x) + ',' + | |||
| std::to_string(basic_info_->kernel_info.block_y) + ',' + | |||
| std::to_string(basic_info_->kernel_info.block_z) + "\"" | |||
| : ""; | |||
| count_ = 1; | |||
| total_duration_ = (basic_info_->end_time_stamp - basic_info_->start_time_stamp) / kTimeUnit; | |||
| avg_duration_ = total_duration_; | |||
| max_duration_ = total_duration_; | |||
| min_duration_ = total_duration_; | |||
| } | |||
| ActivityData &ActivityData::operator+=(const ActivityData &other) { | |||
| this->count_ += other.count_; | |||
| this->total_duration_ += other.total_duration_; | |||
| // update max or min duration | |||
| if (other.total_duration_ > this->max_duration_) { | |||
| this->max_duration_ = other.total_duration_; | |||
| } else if (other.max_duration_ < this->min_duration_) { | |||
| this->min_duration_ = other.total_duration_; | |||
| } | |||
| return *this; | |||
| } | |||
| void DataSaver::ParseOpInfo(const OpInfoMap &op_info_maps) { | |||
| op_detail_infos_.reserve(op_info_maps.size()); | |||
| float total_time_sum = GetTotalOpTime(op_info_maps); | |||
| for (auto item : op_info_maps) { | |||
| float proportion = item.second.op_host_cost_time / total_time_sum; | |||
| auto op_info = std::make_shared<OpInfo>(item.second); | |||
| OpDetailInfo op_detail_info = OpDetailInfo(op_info, proportion); | |||
| op_detail_infos_.emplace_back(op_detail_info); | |||
| AddOpDetailInfoForType(op_detail_info); | |||
| } | |||
| // update average time of op type | |||
| for (auto &op_type : op_type_infos_) { | |||
| // device_infos: <type_name, op_type_info> | |||
| op_type.second.avg_time_ = op_type.second.total_time_ / op_type.second.count_; | |||
| } | |||
| MS_LOG(DEBUG) << "Get " << op_detail_infos_.size() << " operation items."; | |||
| MS_LOG(DEBUG) << "Get " << op_type_infos_.size() << " operation type items."; | |||
| } | |||
| void DataSaver::AddOpDetailInfoForType(const OpDetailInfo &op_detail_info) { | |||
| // Construct OpType object according to op detail info | |||
| OpType op_type = OpType{op_detail_info.op_type_, op_detail_info.op_info_->op_count, | |||
| op_detail_info.op_info_->op_host_cost_time, 0, op_detail_info.proportion_}; | |||
| // Set the OpType into op_type_infos_ map | |||
| std::string type_name = op_detail_info.op_type_; | |||
| auto iter = op_type_infos_.find(type_name); | |||
| if (iter == op_type_infos_.end()) { | |||
| op_type_infos_.emplace(type_name, op_type); | |||
| } else { | |||
| iter->second += op_type; | |||
| } | |||
| } | |||
| float DataSaver::GetTotalOpTime(const OpInfoMap &op_info_maps) { | |||
| float sum = 0; | |||
| sum = std::accumulate(op_info_maps.begin(), op_info_maps.end(), sum, | |||
| [](float i, auto iter) { return i + iter.second.op_host_cost_time; }); | |||
| MS_LOG(DEBUG) << "The total op time is " << sum; | |||
| return sum; | |||
| } | |||
| void DataSaver::ParseEvent(const std::vector<Event> &events) { | |||
| // Put Kernel activity events into activity_infos_ | |||
| for (const auto &event : events) { | |||
| if (event.op_name.empty() || event.api_type != CUPTIApiType::kActivity || | |||
| event.activity_type != ActivityType::kKernel) { | |||
| continue; | |||
| } | |||
| AddKernelEvent(event); | |||
| } | |||
| // update average time of kernel op cost | |||
| for (auto &device_infos : activity_infos_) { | |||
| // device_infos: <device_id, DeviceActivityInfos> | |||
| for (auto &activity_info : device_infos.second) { | |||
| // activity_info: <kernel_name, Activity> | |||
| activity_info.second.avg_duration_ = activity_info.second.total_duration_ / activity_info.second.count_; | |||
| } | |||
| MS_LOG(DEBUG) << "Get " << device_infos.second.size() << " activity items for device:" << device_infos.first; | |||
| } | |||
| } | |||
| void DataSaver::AddKernelEvent(const Event &event) { | |||
| // Put kernel event to activity_infos according to device id | |||
| uint32_t device_id = event.device_id; | |||
| auto iter = activity_infos_.find(device_id); | |||
| if (iter == activity_infos_.end()) { | |||
| auto res_flag = activity_infos_.emplace(device_id, DeviceActivityInfos()); | |||
| AddKernelEventToDevice(event, &res_flag.first->second); | |||
| } else { | |||
| AddKernelEventToDevice(event, &iter->second); | |||
| } | |||
| } | |||
| void DataSaver::AddKernelEventToDevice(const Event &event, DeviceActivityInfos *device_activity_infos) { | |||
| // Combine kernel activity with same kernel name | |||
| auto event_ptr = std::make_shared<Event>(event); | |||
| ActivityData activity_data = ActivityData(event_ptr); | |||
| std::string kernel_name = event.kernel_name; | |||
| auto iter = device_activity_infos->find(kernel_name); | |||
| if (iter == device_activity_infos->end()) { | |||
| device_activity_infos->emplace(kernel_name, activity_data); | |||
| } else { | |||
| iter->second += activity_data; | |||
| } | |||
| } | |||
| void DataSaver::WriteFile(std::string out_path_dir) { | |||
| if (out_path_dir.empty()) { | |||
| MS_LOG(WARNING) << "Output directory. Ignore the writing data."; | |||
| return; | |||
| } | |||
| if (op_detail_infos_.empty() || op_type_infos_.empty() || activity_infos_.empty()) { | |||
| MS_LOG(WARNING) << "No operation detail infos to write."; | |||
| return; | |||
| } | |||
| // not support multi-device for operator info per process yet | |||
| device_id_ = std::to_string(activity_infos_.begin()->first); | |||
| WriteOpDetail(out_path_dir); | |||
| WriteOpType(out_path_dir); | |||
| WriteActivity(out_path_dir); | |||
| } | |||
| void DataSaver::WriteOpType(const std::string &saver_base_dir) { | |||
| std::string file_path = saver_base_dir + "/gpu_op_type_info_" + device_id_ + ".csv"; | |||
| std::ofstream ofs(file_path); | |||
| // check if the file is writable | |||
| if (!ofs.is_open()) { | |||
| MS_LOG(WARNING) << "Open file '" << file_path << "' failed!"; | |||
| return; | |||
| } | |||
| // write op type info into file | |||
| ofs << OpType().GetHeader() << std::endl; | |||
| for (auto op_type_info : op_type_infos_) { | |||
| ofs << op_type_info.second << std::endl; | |||
| } | |||
| ofs.close(); | |||
| MS_LOG(INFO) << "Write " << op_type_infos_.size() << " op type infos into file: " << file_path; | |||
| } | |||
| void DataSaver::WriteOpDetail(const std::string &saver_base_dir) { | |||
| std::string file_path = saver_base_dir + "/gpu_op_detail_info_" + device_id_ + ".csv"; | |||
| std::ofstream ofs(file_path); | |||
| if (!ofs.is_open()) { | |||
| MS_LOG(WARNING) << "Open file '" << file_path << "' failed!"; | |||
| return; | |||
| } | |||
| // write op detail info into file | |||
| ofs << OpDetailInfo().GetHeader() << std::endl; | |||
| for (auto op_detail : op_detail_infos_) { | |||
| ofs << op_detail << std::endl; | |||
| } | |||
| ofs.close(); | |||
| MS_LOG(INFO) << "Write " << op_detail_infos_.size() << " op detail infos into file: " << file_path; | |||
| } | |||
| void DataSaver::WriteActivity(const std::string &saver_base_dir) { | |||
| std::string file_path_base = saver_base_dir + "/gpu_activity_data_"; | |||
| for (auto device_info : activity_infos_) { | |||
| std::string file_path = file_path_base + std::to_string(device_info.first) + ".csv"; | |||
| std::ofstream ofs(file_path); | |||
| if (!ofs.is_open()) { | |||
| MS_LOG(WARNING) << "Open file '" << file_path << "' failed!"; | |||
| return; | |||
| } | |||
| // write activity data into file | |||
| ofs << ActivityData().GetHeader() << std::endl; | |||
| for (auto activity_data : device_info.second) { | |||
| ofs << activity_data.second << std::endl; | |||
| } | |||
| ofs.close(); | |||
| MS_LOG(INFO) << "Write " << device_info.second.size() << " activity infos into file: " << file_path; | |||
| } | |||
| } | |||
| } // namespace gpu | |||
| } // namespace profiler | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,153 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_DATA_SAVER_H | |||
| #define MINDSPORE_DATA_SAVER_H | |||
| #include <iostream> | |||
| #include <unordered_map> | |||
| #include <vector> | |||
| #include <string> | |||
| #include <memory> | |||
| #include "profiler/device/gpu/gpu_profiling.h" | |||
| namespace mindspore { | |||
| namespace profiler { | |||
| namespace gpu { | |||
| struct OpDetailInfo { | |||
| std::string op_type_; | |||
| std::string op_name_; | |||
| std::string op_full_name_; | |||
| std::shared_ptr<OpInfo> op_info_{nullptr}; | |||
| float op_avg_time_{0}; | |||
| float proportion_{0}; | |||
| OpDetailInfo() = default; | |||
| OpDetailInfo(std::shared_ptr<OpInfo> op_info, float proportion); | |||
| std::string GetHeader() const { | |||
| return "op_side,op_type,op_name,op_full_name,op_occurrences,op_total_time(us),op_avg_time(us),total_proportion," | |||
| "cuda_activity_cost_time(us),cuda_activity_call_count"; | |||
| } | |||
| friend std::ostream &operator<<(std::ostream &os, const OpDetailInfo &event) { | |||
| os << "Device," << event.op_type_ << ',' << event.op_name_ << ',' << event.op_full_name_ << ',' | |||
| << event.op_info_->op_count << ',' << event.op_info_->op_host_cost_time << ',' << event.op_avg_time_ << ',' | |||
| << event.proportion_ << ',' << event.op_info_->cupti_activity_time << ',' << event.op_info_->op_kernel_count; | |||
| return os; | |||
| } | |||
| }; | |||
| struct OpType { | |||
| std::string op_type_; | |||
| int count_{0}; | |||
| float total_time_{0}; | |||
| float avg_time_{0}; | |||
| float proportion_{0}; | |||
| std::string GetHeader() const { return "op_type,type_occurrences,total_time(us),total_proportion,avg_time(us)"; } | |||
| friend std::ostream &operator<<(std::ostream &os, const OpType &event) { | |||
| os << event.op_type_ << ',' << event.count_ << ',' << event.total_time_ << ',' << event.proportion_ << ',' | |||
| << event.avg_time_; | |||
| return os; | |||
| } | |||
| OpType &operator+=(const OpType &other) { | |||
| this->count_ += other.count_; | |||
| this->total_time_ += other.total_time_; | |||
| this->proportion_ += other.proportion_; | |||
| return *this; | |||
| } | |||
| }; | |||
| struct ActivityData { | |||
| std::shared_ptr<Event> basic_info_{nullptr}; | |||
| std::string block_dim_; | |||
| std::string grid_dim_; | |||
| int count_{0}; | |||
| float total_duration_{0}; | |||
| float avg_duration_{0}; | |||
| float max_duration_{0}; | |||
| float min_duration_{0}; | |||
| ActivityData() = default; | |||
| explicit ActivityData(std::shared_ptr<Event> data); | |||
| std::string GetHeader() const { | |||
| return "name,type,op_full_name,stream_id,block_dim,grid_dim,occurrences," | |||
| "total_duration(us),avg_duration(us),max_duration(us),min_duration(us)"; | |||
| } | |||
| friend std::ostream &operator<<(std::ostream &os, const ActivityData &event) { | |||
| os << "\"" << event.basic_info_->kernel_name << "\"," << event.basic_info_->kernel_type << ',' | |||
| << event.basic_info_->op_name << ',' << event.basic_info_->stream_id << ',' << event.block_dim_ << ',' | |||
| << event.grid_dim_ << ',' << event.count_ << ',' << event.total_duration_ << ',' << event.avg_duration_ << ',' | |||
| << event.max_duration_ << ',' << event.min_duration_; | |||
| return os; | |||
| } | |||
| ActivityData &operator+=(const ActivityData &other); | |||
| }; | |||
| using OpInfoMap = std::unordered_map<std::string, OpInfo>; | |||
| using DeviceActivityInfos = std::unordered_map<std::string, ActivityData>; // <device_id, ActivityData> | |||
| using AllActivityInfos = std::unordered_map<uint32_t, DeviceActivityInfos>; // <device_id, ActivityData> | |||
| using OpTypeInfos = std::unordered_map<std::string, OpType>; // <op_full_name, Optype> | |||
| using OpDetailInfos = std::vector<OpDetailInfo>; | |||
| class DataSaver { | |||
| public: | |||
| DataSaver() = default; | |||
| ~DataSaver() = default; | |||
| DataSaver(const DataSaver &) = delete; | |||
| DataSaver &operator=(const DataSaver &) = delete; | |||
| void ParseOpInfo(const OpInfoMap &op_info_maps); | |||
| void ParseEvent(const std::vector<Event> &events); | |||
| void WriteFile(std::string out_path); | |||
| private: | |||
| void AddOpDetailInfoForType(const OpDetailInfo &op_detail_info); | |||
| float GetTotalOpTime(const OpInfoMap &op_info_maps); | |||
| void AddKernelEvent(const Event &event); | |||
| void AddKernelEventToDevice(const Event &event, DeviceActivityInfos *device_activity_infos); | |||
| void WriteOpType(const std::string &saver_base_dir); | |||
| void WriteOpDetail(const std::string &saver_base_dir); | |||
| void WriteActivity(const std::string &saver_base_dir); | |||
| std::string device_id_; | |||
| AllActivityInfos activity_infos_; | |||
| OpTypeInfos op_type_infos_; | |||
| OpDetailInfos op_detail_infos_; | |||
| }; | |||
| } // namespace gpu | |||
| } // namespace profiler | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_DATA_SAVER_H | |||
| @@ -19,6 +19,7 @@ | |||
| #include <chrono> | |||
| #include "profiler/device/gpu/gpu_profiling.h" | |||
| #include "profiler/device/gpu/cupti_interface.h" | |||
| #include "profiler/device/gpu/data_saver.h" | |||
| #include "utils/log_adapter.h" | |||
| #include "pybind_api/api_register.h" | |||
| @@ -478,7 +479,11 @@ void GPUProfiler::Stop() { | |||
| void GPUProfiler::SaveProfileData() { | |||
| if (profile_data_path_.empty()) { | |||
| MS_LOG(WARNING) << "profile_data_path is empty, skip save profile data."; | |||
| return; | |||
| } else { | |||
| DataSaver dataSaver; | |||
| dataSaver.ParseOpInfo(op_info_map_); | |||
| dataSaver.ParseEvent(events_); | |||
| dataSaver.WriteFile(profile_data_path_); | |||
| } | |||
| op_info_map_.clear(); | |||
| op_name_map_.clear(); | |||
| @@ -43,17 +43,21 @@ class MinddataParser: | |||
| node_name, node_start, node_end, queue_size = "", 0, 0, 0 | |||
| if node_info: | |||
| node_name = node_info[0].replace("Node:", "") | |||
| if len(node_info) > 2: | |||
| if len(node_info) > 3 and "queue" in node_info[1]: | |||
| queue_size = node_info[1].replace("queue size:", "") | |||
| queue_size = int(queue_size) if queue_size.isdigit() else queue_size | |||
| node_start = node_info[2].replace("Run start:", "") | |||
| node_start = int(node_start) if node_start.isdigit() else node_start | |||
| node_end = node_info[3].replace("Run end:", "") | |||
| node_end = int(node_end) if node_end.isdigit() else node_end | |||
| elif len(node_info) > 3 and "Run" in node_info[1]: | |||
| queue_size = node_info[3].replace("queue size:", "") | |||
| queue_size = int(queue_size) if queue_size.isdigit() else queue_size | |||
| node_start = node_info[1].replace("Run start:", "") | |||
| if node_start.isdigit(): | |||
| node_start = int(node_start) | |||
| node_start = int(node_start) if node_start.isdigit() else node_start | |||
| node_end = node_info[2].replace("Run end:", "") | |||
| if node_end.isdigit(): | |||
| node_end = int(node_end) | |||
| if len(node_info) > 3: | |||
| queue_size = node_info[3].replace("queue size:", "") | |||
| if queue_size.isdigit(): | |||
| queue_size = int(queue_size) | |||
| node_end = int(node_end) if node_end.isdigit() else node_end | |||
| one_step_list = [node_name, node_start, node_end, queue_size] | |||
| result.append(one_step_list) | |||
| @@ -79,35 +79,42 @@ class Profiler: | |||
| optypes_to_deal='', optypes_not_deal='Variable', job_id=""): | |||
| # get device_id and device_target | |||
| self._get_devid_and_devtarget() | |||
| self._container_path = os.path.join(self._base_profiling_container_path, self._dev_id) | |||
| data_path = os.path.join(self._container_path, "data") | |||
| if not os.path.exists(data_path): | |||
| os.makedirs(data_path, exist_ok=True) | |||
| self._output_path = validate_and_normalize_path(output_path) | |||
| self._output_path = os.path.join(self._output_path, "profiler") | |||
| if not os.path.exists(self._output_path): | |||
| os.makedirs(self._output_path, exist_ok=True) | |||
| os.environ['PROFILING_MODE'] = 'true' | |||
| os.environ['PROFILING_OPTIONS'] = 'training_trace:task_trace' | |||
| os.environ['MINDDATA_PROFILING_DIR'] = self._output_path | |||
| os.environ['DEVICE_ID'] = self._dev_id | |||
| os.environ['AICPU_PROFILING_MODE'] = 'true' | |||
| os.environ['PROFILING_DIR'] = str(self._container_path) | |||
| # use context interface to open profiling, for the new mindspore version(after 2020.5.21) | |||
| context.set_context(enable_profiling=True, profiling_options="training_trace:task_trace") | |||
| self._subgraph = check_subgraph(subgraph) | |||
| self._valid_optype_name = optypes_to_deal.split(",") if optypes_to_deal else [] | |||
| self._filt_optype_names = optypes_not_deal.split(",") if optypes_not_deal else [] | |||
| self._detail = check_bool(is_detail, 'is_detail') | |||
| self._withfullpath = check_bool(is_show_op_path, 'is_show_op_path') | |||
| self._profiling_job_id = job_id | |||
| # add job id env through user input later | |||
| self._job_id_env = 0 | |||
| self._start_time = int(time.time() * 10000000) | |||
| logger.info("Profiling: profiling start time: %d", self._start_time) | |||
| if self._device_target and self._device_target == "GPU": | |||
| from mindspore._c_expression import GPUProfiler | |||
| self._gpu_profiler = GPUProfiler.get_instance() | |||
| self._gpu_profiler.init(self._output_path) | |||
| self._gpu_profiler.step_profiling_enable(True) | |||
| elif self._device_target and (self._device_target == "Ascend" or self._device_target != "Davinci"): | |||
| self._container_path = os.path.join(self._base_profiling_container_path, self._dev_id) | |||
| data_path = os.path.join(self._container_path, "data") | |||
| if not os.path.exists(data_path): | |||
| os.makedirs(data_path, exist_ok=True) | |||
| os.environ['PROFILING_MODE'] = 'true' | |||
| os.environ['PROFILING_OPTIONS'] = 'training_trace:task_trace' | |||
| os.environ['MINDDATA_PROFILING_DIR'] = self._output_path | |||
| os.environ['DEVICE_ID'] = self._dev_id | |||
| os.environ['AICPU_PROFILING_MODE'] = 'true' | |||
| os.environ['PROFILING_DIR'] = str(self._container_path) | |||
| # use context interface to open profiling, for the new mindspore version(after 2020.5.21) | |||
| context.set_context(enable_profiling=True, profiling_options="training_trace:task_trace") | |||
| self._subgraph = check_subgraph(subgraph) | |||
| self._valid_optype_name = optypes_to_deal.split(",") if optypes_to_deal else [] | |||
| self._filt_optype_names = optypes_not_deal.split(",") if optypes_not_deal else [] | |||
| self._detail = check_bool(is_detail, 'is_detail') | |||
| self._withfullpath = check_bool(is_show_op_path, 'is_show_op_path') | |||
| self._profiling_job_id = job_id | |||
| # add job id env through user input later | |||
| self._job_id_env = 0 | |||
| self._start_time = int(time.time() * 10000000) | |||
| logger.info("Profiling: profiling start time: %d", self._start_time) | |||
| def analyse(self): | |||
| """ | |||
| @@ -123,71 +130,74 @@ class Profiler: | |||
| >>> model.train() | |||
| >>> profiler.analyse() | |||
| """ | |||
| release() | |||
| job_id = self._get_profiling_job_id() | |||
| logger.info("Profiling: job id is %s ", job_id) | |||
| source_path = os.path.join(PROFILING_LOG_BASE_PATH, job_id) | |||
| # parse hwts.log.data.45.dev file, and get task profiling data | |||
| hwts_output_filename = self._hwts_output_filename_target + self._dev_id + ".txt" | |||
| hwts_output_filename = os.path.join(self._output_path, hwts_output_filename) | |||
| hwtslog_parser = HWTSLogParser(source_path, hwts_output_filename) | |||
| result = hwtslog_parser.execute() | |||
| if not result: | |||
| logger.error("Profiling: fail to parse hwts log file.") | |||
| return | |||
| # parse Framework file, and get the relation of op and tasks | |||
| framework_parser = FrameworkParser(job_id, self._dev_id, self._output_path) | |||
| framework_parser.parse() | |||
| op_task_dict = framework_parser.to_task_id_full_op_name_dict() | |||
| if not op_task_dict: | |||
| logger.error("Profiling: fail to parse framework files.") | |||
| return | |||
| # get op compute time from hwts data and framework data, write output_op_compute_time.txt | |||
| opcompute_output_filename = self._opcompute_output_filename_target + self._dev_id + ".txt" | |||
| opcompute_output_filename = os.path.join(self._output_path, opcompute_output_filename) | |||
| optime_parser = OPComputeTimeParser( | |||
| hwts_output_filename, opcompute_output_filename, | |||
| op_task_dict, self._output_path, self._dev_id | |||
| ) | |||
| optime_parser.execute() | |||
| # parse DATA_PREPROCESS.dev.AICPU file, write output_data_preprocess_aicpu_x.txt | |||
| output_data_preprocess_aicpu = self._aicpu_op_output_filename_target + self._dev_id + ".txt" | |||
| output_data_preprocess_aicpu = os.path.join(self._output_path, output_data_preprocess_aicpu) | |||
| aicpu_data_parser = DataPreProcessParser(source_path, output_data_preprocess_aicpu) | |||
| aicpu_data_parser.execute() | |||
| # Parsing minddata AICPU profiling | |||
| MinddataParser.execute(source_path, self._output_path, self._dev_id) | |||
| # parse minddata pipeline operator and queue | |||
| try: | |||
| pipeline_parser = MinddataPipelineParser(self._output_path, self._dev_id, self._output_path) | |||
| pipeline_parser.parse() | |||
| except ProfilerException as err: | |||
| logger.warning(err.message) | |||
| # analyse op compute time info | |||
| try: | |||
| self._analyser_op_info() | |||
| except ProfilerException as err: | |||
| logger.warning(err.message) | |||
| # analyse step trace info | |||
| try: | |||
| self._analyse_step_trace(source_path, framework_parser) | |||
| except ProfilerException as err: | |||
| logger.warning(err.message) | |||
| # analyse timeline info | |||
| try: | |||
| self._analyse_timeline(aicpu_data_parser, optime_parser) | |||
| except (ProfilerIOException, ProfilerFileNotFoundException, RuntimeError) as err: | |||
| logger.warning('Fail to write timeline data: %s', err) | |||
| if self._device_target and self._device_target == "GPU": | |||
| self._gpu_profiler.stop() | |||
| elif self._device_target and (self._device_target == "Ascend" or self._device_target != "Davinci"): | |||
| release() | |||
| job_id = self._get_profiling_job_id() | |||
| logger.info("Profiling: job id is %s ", job_id) | |||
| source_path = os.path.join(PROFILING_LOG_BASE_PATH, job_id) | |||
| # parse hwts.log.data.45.dev file, and get task profiling data | |||
| hwts_output_filename = self._hwts_output_filename_target + self._dev_id + ".txt" | |||
| hwts_output_filename = os.path.join(self._output_path, hwts_output_filename) | |||
| hwtslog_parser = HWTSLogParser(source_path, hwts_output_filename) | |||
| result = hwtslog_parser.execute() | |||
| if not result: | |||
| logger.error("Profiling: fail to parse hwts log file.") | |||
| return | |||
| # parse Framework file, and get the relation of op and tasks | |||
| framework_parser = FrameworkParser(job_id, self._dev_id, self._output_path) | |||
| framework_parser.parse() | |||
| op_task_dict = framework_parser.to_task_id_full_op_name_dict() | |||
| if not op_task_dict: | |||
| logger.error("Profiling: fail to parse framework files.") | |||
| return | |||
| # get op compute time from hwts data and framework data, write output_op_compute_time.txt | |||
| opcompute_output_filename = self._opcompute_output_filename_target + self._dev_id + ".txt" | |||
| opcompute_output_filename = os.path.join(self._output_path, opcompute_output_filename) | |||
| optime_parser = OPComputeTimeParser( | |||
| hwts_output_filename, opcompute_output_filename, | |||
| op_task_dict, self._output_path, self._dev_id | |||
| ) | |||
| optime_parser.execute() | |||
| # parse DATA_PREPROCESS.dev.AICPU file, write output_data_preprocess_aicpu_x.txt | |||
| output_data_preprocess_aicpu = self._aicpu_op_output_filename_target + self._dev_id + ".txt" | |||
| output_data_preprocess_aicpu = os.path.join(self._output_path, output_data_preprocess_aicpu) | |||
| aicpu_data_parser = DataPreProcessParser(source_path, output_data_preprocess_aicpu) | |||
| aicpu_data_parser.execute() | |||
| # Parsing minddata AICPU profiling | |||
| MinddataParser.execute(source_path, self._output_path, self._dev_id) | |||
| # parse minddata pipeline operator and queue | |||
| try: | |||
| pipeline_parser = MinddataPipelineParser(self._output_path, self._dev_id, self._output_path) | |||
| pipeline_parser.parse() | |||
| except ProfilerException as err: | |||
| logger.warning(err.message) | |||
| # analyse op compute time info | |||
| try: | |||
| self._analyser_op_info() | |||
| except ProfilerException as err: | |||
| logger.warning(err.message) | |||
| # analyse step trace info | |||
| try: | |||
| self._analyse_step_trace(source_path, framework_parser) | |||
| except ProfilerException as err: | |||
| logger.warning(err.message) | |||
| # analyse timeline info | |||
| try: | |||
| self._analyse_timeline(aicpu_data_parser, optime_parser) | |||
| except (ProfilerIOException, ProfilerFileNotFoundException, RuntimeError) as err: | |||
| logger.warning('Fail to write timeline data: %s', err) | |||
| def _analyse_step_trace(self, source_path, framework_parser): | |||
| """ | |||
| @@ -416,12 +426,12 @@ class Profiler: | |||
| dev_id = "0" | |||
| logger.error("Fail to get DEVICE_ID, use 0 instead.") | |||
| if device_target and device_target != "Davinci" \ | |||
| and device_target != "Ascend": | |||
| if device_target and device_target not in ["Davinci", "Ascend", "GPU"]: | |||
| msg = "Profiling: unsupport backend: %s" % device_target | |||
| raise RuntimeError(msg) | |||
| self._dev_id = dev_id | |||
| self._device_target = device_target | |||
| @staticmethod | |||
| def trainable_parameters(network): | |||