diff --git a/mindspore/ccsrc/profiler/device/gpu/cupti_interface.cc b/mindspore/ccsrc/profiler/device/gpu/cupti_interface.cc index b7d60fd958..2958ba243e 100644 --- a/mindspore/ccsrc/profiler/device/gpu/cupti_interface.cc +++ b/mindspore/ccsrc/profiler/device/gpu/cupti_interface.cc @@ -126,7 +126,7 @@ CUptiResult CuptiGetStreamId(CUcontext context, CUstream stream, uint32_t *strea } CUptiResult CuptiGetDeviceId(CUcontext context, uint32_t *deviceId) { - static auto func_ptr = reinterpret_cast(GetCUPTIFunc("cuptiSubscribe")); + static auto func_ptr = reinterpret_cast(GetCUPTIFunc("cuptiGetDeviceId")); return func_ptr(context, deviceId); } } // namespace gpu diff --git a/mindspore/ccsrc/profiler/device/gpu/data_saver.cc b/mindspore/ccsrc/profiler/device/gpu/data_saver.cc new file mode 100644 index 0000000000..d048d03a92 --- /dev/null +++ b/mindspore/ccsrc/profiler/device/gpu/data_saver.cc @@ -0,0 +1,223 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "profiler/device/gpu/data_saver.h" +#include +#include +#include "utils/log_adapter.h" + +namespace mindspore { +namespace profiler { +namespace gpu { + +OpDetailInfo::OpDetailInfo(std::shared_ptr op_info, float proportion) + : op_info_(op_info), proportion_(proportion) { + // op_full_name is like 'xxx/xxx/{op_type}-op{node_id}' + op_full_name_ = op_info->op_name; + auto op_type_begin_iter = op_full_name_.rfind('/') + 1; + auto op_type_end_iter = op_full_name_.rfind('-'); + op_type_ = op_full_name_.substr(op_type_begin_iter, op_type_end_iter - op_type_begin_iter); + op_name_ = op_full_name_.substr(op_type_begin_iter); + op_avg_time_ = op_info->op_host_cost_time / op_info->op_count; +} + +ActivityData::ActivityData(std::shared_ptr data) : basic_info_(data) { + grid_dim_ = basic_info_->activity_type == ActivityType::kKernel + ? "\"" + std::to_string(basic_info_->kernel_info.grid_x) + ',' + + std::to_string(basic_info_->kernel_info.grid_y) + ',' + + std::to_string(basic_info_->kernel_info.grid_z) + "\"" + : ""; + block_dim_ = basic_info_->activity_type == ActivityType::kKernel + ? "\"" + std::to_string(basic_info_->kernel_info.block_x) + ',' + + std::to_string(basic_info_->kernel_info.block_y) + ',' + + std::to_string(basic_info_->kernel_info.block_z) + "\"" + : ""; + count_ = 1; + total_duration_ = (basic_info_->end_time_stamp - basic_info_->start_time_stamp) / kTimeUnit; + avg_duration_ = total_duration_; + max_duration_ = total_duration_; + min_duration_ = total_duration_; +} + +ActivityData &ActivityData::operator+=(const ActivityData &other) { + this->count_ += other.count_; + this->total_duration_ += other.total_duration_; + // update max or min duration + if (other.total_duration_ > this->max_duration_) { + this->max_duration_ = other.total_duration_; + } else if (other.max_duration_ < this->min_duration_) { + this->min_duration_ = other.total_duration_; + } + return *this; +} + +void DataSaver::ParseOpInfo(const OpInfoMap &op_info_maps) { + op_detail_infos_.reserve(op_info_maps.size()); + float total_time_sum = GetTotalOpTime(op_info_maps); + for (auto item : op_info_maps) { + float proportion = item.second.op_host_cost_time / total_time_sum; + auto op_info = std::make_shared(item.second); + OpDetailInfo op_detail_info = OpDetailInfo(op_info, proportion); + op_detail_infos_.emplace_back(op_detail_info); + AddOpDetailInfoForType(op_detail_info); + } + // update average time of op type + for (auto &op_type : op_type_infos_) { + // device_infos: + op_type.second.avg_time_ = op_type.second.total_time_ / op_type.second.count_; + } + MS_LOG(DEBUG) << "Get " << op_detail_infos_.size() << " operation items."; + MS_LOG(DEBUG) << "Get " << op_type_infos_.size() << " operation type items."; +} + +void DataSaver::AddOpDetailInfoForType(const OpDetailInfo &op_detail_info) { + // Construct OpType object according to op detail info + OpType op_type = OpType{op_detail_info.op_type_, op_detail_info.op_info_->op_count, + op_detail_info.op_info_->op_host_cost_time, 0, op_detail_info.proportion_}; + // Set the OpType into op_type_infos_ map + std::string type_name = op_detail_info.op_type_; + auto iter = op_type_infos_.find(type_name); + if (iter == op_type_infos_.end()) { + op_type_infos_.emplace(type_name, op_type); + } else { + iter->second += op_type; + } +} + +float DataSaver::GetTotalOpTime(const OpInfoMap &op_info_maps) { + float sum = 0; + sum = std::accumulate(op_info_maps.begin(), op_info_maps.end(), sum, + [](float i, auto iter) { return i + iter.second.op_host_cost_time; }); + MS_LOG(DEBUG) << "The total op time is " << sum; + return sum; +} + +void DataSaver::ParseEvent(const std::vector &events) { + // Put Kernel activity events into activity_infos_ + for (const auto &event : events) { + if (event.op_name.empty() || event.api_type != CUPTIApiType::kActivity || + event.activity_type != ActivityType::kKernel) { + continue; + } + AddKernelEvent(event); + } + // update average time of kernel op cost + for (auto &device_infos : activity_infos_) { + // device_infos: + for (auto &activity_info : device_infos.second) { + // activity_info: + activity_info.second.avg_duration_ = activity_info.second.total_duration_ / activity_info.second.count_; + } + MS_LOG(DEBUG) << "Get " << device_infos.second.size() << " activity items for device:" << device_infos.first; + } +} + +void DataSaver::AddKernelEvent(const Event &event) { + // Put kernel event to activity_infos according to device id + uint32_t device_id = event.device_id; + auto iter = activity_infos_.find(device_id); + if (iter == activity_infos_.end()) { + auto res_flag = activity_infos_.emplace(device_id, DeviceActivityInfos()); + AddKernelEventToDevice(event, &res_flag.first->second); + } else { + AddKernelEventToDevice(event, &iter->second); + } +} + +void DataSaver::AddKernelEventToDevice(const Event &event, DeviceActivityInfos *device_activity_infos) { + // Combine kernel activity with same kernel name + auto event_ptr = std::make_shared(event); + ActivityData activity_data = ActivityData(event_ptr); + std::string kernel_name = event.kernel_name; + auto iter = device_activity_infos->find(kernel_name); + if (iter == device_activity_infos->end()) { + device_activity_infos->emplace(kernel_name, activity_data); + } else { + iter->second += activity_data; + } +} + +void DataSaver::WriteFile(std::string out_path_dir) { + if (out_path_dir.empty()) { + MS_LOG(WARNING) << "Output directory. Ignore the writing data."; + return; + } + if (op_detail_infos_.empty() || op_type_infos_.empty() || activity_infos_.empty()) { + MS_LOG(WARNING) << "No operation detail infos to write."; + return; + } + // not support multi-device for operator info per process yet + device_id_ = std::to_string(activity_infos_.begin()->first); + WriteOpDetail(out_path_dir); + WriteOpType(out_path_dir); + WriteActivity(out_path_dir); +} + +void DataSaver::WriteOpType(const std::string &saver_base_dir) { + std::string file_path = saver_base_dir + "/gpu_op_type_info_" + device_id_ + ".csv"; + std::ofstream ofs(file_path); + // check if the file is writable + if (!ofs.is_open()) { + MS_LOG(WARNING) << "Open file '" << file_path << "' failed!"; + return; + } + // write op type info into file + ofs << OpType().GetHeader() << std::endl; + for (auto op_type_info : op_type_infos_) { + ofs << op_type_info.second << std::endl; + } + ofs.close(); + MS_LOG(INFO) << "Write " << op_type_infos_.size() << " op type infos into file: " << file_path; +} + +void DataSaver::WriteOpDetail(const std::string &saver_base_dir) { + std::string file_path = saver_base_dir + "/gpu_op_detail_info_" + device_id_ + ".csv"; + std::ofstream ofs(file_path); + if (!ofs.is_open()) { + MS_LOG(WARNING) << "Open file '" << file_path << "' failed!"; + return; + } + // write op detail info into file + ofs << OpDetailInfo().GetHeader() << std::endl; + for (auto op_detail : op_detail_infos_) { + ofs << op_detail << std::endl; + } + ofs.close(); + MS_LOG(INFO) << "Write " << op_detail_infos_.size() << " op detail infos into file: " << file_path; +} + +void DataSaver::WriteActivity(const std::string &saver_base_dir) { + std::string file_path_base = saver_base_dir + "/gpu_activity_data_"; + for (auto device_info : activity_infos_) { + std::string file_path = file_path_base + std::to_string(device_info.first) + ".csv"; + std::ofstream ofs(file_path); + if (!ofs.is_open()) { + MS_LOG(WARNING) << "Open file '" << file_path << "' failed!"; + return; + } + // write activity data into file + ofs << ActivityData().GetHeader() << std::endl; + for (auto activity_data : device_info.second) { + ofs << activity_data.second << std::endl; + } + ofs.close(); + MS_LOG(INFO) << "Write " << device_info.second.size() << " activity infos into file: " << file_path; + } +} + +} // namespace gpu +} // namespace profiler +} // namespace mindspore diff --git a/mindspore/ccsrc/profiler/device/gpu/data_saver.h b/mindspore/ccsrc/profiler/device/gpu/data_saver.h new file mode 100644 index 0000000000..36e6c810d0 --- /dev/null +++ b/mindspore/ccsrc/profiler/device/gpu/data_saver.h @@ -0,0 +1,153 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_DATA_SAVER_H +#define MINDSPORE_DATA_SAVER_H +#include +#include +#include +#include +#include +#include "profiler/device/gpu/gpu_profiling.h" +namespace mindspore { +namespace profiler { +namespace gpu { + +struct OpDetailInfo { + std::string op_type_; + std::string op_name_; + std::string op_full_name_; + std::shared_ptr op_info_{nullptr}; + float op_avg_time_{0}; + float proportion_{0}; + + OpDetailInfo() = default; + + OpDetailInfo(std::shared_ptr op_info, float proportion); + + std::string GetHeader() const { + return "op_side,op_type,op_name,op_full_name,op_occurrences,op_total_time(us),op_avg_time(us),total_proportion," + "cuda_activity_cost_time(us),cuda_activity_call_count"; + } + + friend std::ostream &operator<<(std::ostream &os, const OpDetailInfo &event) { + os << "Device," << event.op_type_ << ',' << event.op_name_ << ',' << event.op_full_name_ << ',' + << event.op_info_->op_count << ',' << event.op_info_->op_host_cost_time << ',' << event.op_avg_time_ << ',' + << event.proportion_ << ',' << event.op_info_->cupti_activity_time << ',' << event.op_info_->op_kernel_count; + return os; + } +}; + +struct OpType { + std::string op_type_; + int count_{0}; + float total_time_{0}; + float avg_time_{0}; + float proportion_{0}; + + std::string GetHeader() const { return "op_type,type_occurrences,total_time(us),total_proportion,avg_time(us)"; } + + friend std::ostream &operator<<(std::ostream &os, const OpType &event) { + os << event.op_type_ << ',' << event.count_ << ',' << event.total_time_ << ',' << event.proportion_ << ',' + << event.avg_time_; + return os; + } + + OpType &operator+=(const OpType &other) { + this->count_ += other.count_; + this->total_time_ += other.total_time_; + this->proportion_ += other.proportion_; + return *this; + } +}; + +struct ActivityData { + std::shared_ptr basic_info_{nullptr}; + std::string block_dim_; + std::string grid_dim_; + int count_{0}; + float total_duration_{0}; + float avg_duration_{0}; + float max_duration_{0}; + float min_duration_{0}; + + ActivityData() = default; + + explicit ActivityData(std::shared_ptr data); + + std::string GetHeader() const { + return "name,type,op_full_name,stream_id,block_dim,grid_dim,occurrences," + "total_duration(us),avg_duration(us),max_duration(us),min_duration(us)"; + } + + friend std::ostream &operator<<(std::ostream &os, const ActivityData &event) { + os << "\"" << event.basic_info_->kernel_name << "\"," << event.basic_info_->kernel_type << ',' + << event.basic_info_->op_name << ',' << event.basic_info_->stream_id << ',' << event.block_dim_ << ',' + << event.grid_dim_ << ',' << event.count_ << ',' << event.total_duration_ << ',' << event.avg_duration_ << ',' + << event.max_duration_ << ',' << event.min_duration_; + return os; + } + + ActivityData &operator+=(const ActivityData &other); +}; + +using OpInfoMap = std::unordered_map; +using DeviceActivityInfos = std::unordered_map; // +using AllActivityInfos = std::unordered_map; // +using OpTypeInfos = std::unordered_map; // +using OpDetailInfos = std::vector; + +class DataSaver { + public: + DataSaver() = default; + + ~DataSaver() = default; + + DataSaver(const DataSaver &) = delete; + + DataSaver &operator=(const DataSaver &) = delete; + + void ParseOpInfo(const OpInfoMap &op_info_maps); + + void ParseEvent(const std::vector &events); + + void WriteFile(std::string out_path); + + private: + void AddOpDetailInfoForType(const OpDetailInfo &op_detail_info); + + float GetTotalOpTime(const OpInfoMap &op_info_maps); + + void AddKernelEvent(const Event &event); + + void AddKernelEventToDevice(const Event &event, DeviceActivityInfos *device_activity_infos); + + void WriteOpType(const std::string &saver_base_dir); + + void WriteOpDetail(const std::string &saver_base_dir); + + void WriteActivity(const std::string &saver_base_dir); + + std::string device_id_; + AllActivityInfos activity_infos_; + OpTypeInfos op_type_infos_; + OpDetailInfos op_detail_infos_; +}; +} // namespace gpu +} // namespace profiler +} // namespace mindspore + +#endif // MINDSPORE_DATA_SAVER_H diff --git a/mindspore/ccsrc/profiler/device/gpu/gpu_profiling.cc b/mindspore/ccsrc/profiler/device/gpu/gpu_profiling.cc index 7719522cb2..3a5efa9822 100644 --- a/mindspore/ccsrc/profiler/device/gpu/gpu_profiling.cc +++ b/mindspore/ccsrc/profiler/device/gpu/gpu_profiling.cc @@ -19,6 +19,7 @@ #include #include "profiler/device/gpu/gpu_profiling.h" #include "profiler/device/gpu/cupti_interface.h" +#include "profiler/device/gpu/data_saver.h" #include "utils/log_adapter.h" #include "pybind_api/api_register.h" @@ -478,7 +479,11 @@ void GPUProfiler::Stop() { void GPUProfiler::SaveProfileData() { if (profile_data_path_.empty()) { MS_LOG(WARNING) << "profile_data_path is empty, skip save profile data."; - return; + } else { + DataSaver dataSaver; + dataSaver.ParseOpInfo(op_info_map_); + dataSaver.ParseEvent(events_); + dataSaver.WriteFile(profile_data_path_); } op_info_map_.clear(); op_name_map_.clear(); diff --git a/mindspore/profiler/parser/minddata_parser.py b/mindspore/profiler/parser/minddata_parser.py index 27ab95f705..faa8d15512 100644 --- a/mindspore/profiler/parser/minddata_parser.py +++ b/mindspore/profiler/parser/minddata_parser.py @@ -43,17 +43,21 @@ class MinddataParser: node_name, node_start, node_end, queue_size = "", 0, 0, 0 if node_info: node_name = node_info[0].replace("Node:", "") - if len(node_info) > 2: + + if len(node_info) > 3 and "queue" in node_info[1]: + queue_size = node_info[1].replace("queue size:", "") + queue_size = int(queue_size) if queue_size.isdigit() else queue_size + node_start = node_info[2].replace("Run start:", "") + node_start = int(node_start) if node_start.isdigit() else node_start + node_end = node_info[3].replace("Run end:", "") + node_end = int(node_end) if node_end.isdigit() else node_end + elif len(node_info) > 3 and "Run" in node_info[1]: + queue_size = node_info[3].replace("queue size:", "") + queue_size = int(queue_size) if queue_size.isdigit() else queue_size node_start = node_info[1].replace("Run start:", "") - if node_start.isdigit(): - node_start = int(node_start) + node_start = int(node_start) if node_start.isdigit() else node_start node_end = node_info[2].replace("Run end:", "") - if node_end.isdigit(): - node_end = int(node_end) - if len(node_info) > 3: - queue_size = node_info[3].replace("queue size:", "") - if queue_size.isdigit(): - queue_size = int(queue_size) + node_end = int(node_end) if node_end.isdigit() else node_end one_step_list = [node_name, node_start, node_end, queue_size] result.append(one_step_list) diff --git a/mindspore/profiler/profiling.py b/mindspore/profiler/profiling.py index 220c02e0d4..bfea46276d 100644 --- a/mindspore/profiler/profiling.py +++ b/mindspore/profiler/profiling.py @@ -79,35 +79,42 @@ class Profiler: optypes_to_deal='', optypes_not_deal='Variable', job_id=""): # get device_id and device_target self._get_devid_and_devtarget() - self._container_path = os.path.join(self._base_profiling_container_path, self._dev_id) - data_path = os.path.join(self._container_path, "data") - if not os.path.exists(data_path): - os.makedirs(data_path, exist_ok=True) self._output_path = validate_and_normalize_path(output_path) self._output_path = os.path.join(self._output_path, "profiler") if not os.path.exists(self._output_path): os.makedirs(self._output_path, exist_ok=True) - os.environ['PROFILING_MODE'] = 'true' - os.environ['PROFILING_OPTIONS'] = 'training_trace:task_trace' - os.environ['MINDDATA_PROFILING_DIR'] = self._output_path - os.environ['DEVICE_ID'] = self._dev_id - os.environ['AICPU_PROFILING_MODE'] = 'true' - os.environ['PROFILING_DIR'] = str(self._container_path) - - # use context interface to open profiling, for the new mindspore version(after 2020.5.21) - context.set_context(enable_profiling=True, profiling_options="training_trace:task_trace") - - self._subgraph = check_subgraph(subgraph) - self._valid_optype_name = optypes_to_deal.split(",") if optypes_to_deal else [] - self._filt_optype_names = optypes_not_deal.split(",") if optypes_not_deal else [] - self._detail = check_bool(is_detail, 'is_detail') - self._withfullpath = check_bool(is_show_op_path, 'is_show_op_path') - self._profiling_job_id = job_id - # add job id env through user input later - self._job_id_env = 0 - self._start_time = int(time.time() * 10000000) - logger.info("Profiling: profiling start time: %d", self._start_time) + if self._device_target and self._device_target == "GPU": + from mindspore._c_expression import GPUProfiler + self._gpu_profiler = GPUProfiler.get_instance() + self._gpu_profiler.init(self._output_path) + self._gpu_profiler.step_profiling_enable(True) + elif self._device_target and (self._device_target == "Ascend" or self._device_target != "Davinci"): + self._container_path = os.path.join(self._base_profiling_container_path, self._dev_id) + data_path = os.path.join(self._container_path, "data") + if not os.path.exists(data_path): + os.makedirs(data_path, exist_ok=True) + + os.environ['PROFILING_MODE'] = 'true' + os.environ['PROFILING_OPTIONS'] = 'training_trace:task_trace' + os.environ['MINDDATA_PROFILING_DIR'] = self._output_path + os.environ['DEVICE_ID'] = self._dev_id + os.environ['AICPU_PROFILING_MODE'] = 'true' + os.environ['PROFILING_DIR'] = str(self._container_path) + + # use context interface to open profiling, for the new mindspore version(after 2020.5.21) + context.set_context(enable_profiling=True, profiling_options="training_trace:task_trace") + + self._subgraph = check_subgraph(subgraph) + self._valid_optype_name = optypes_to_deal.split(",") if optypes_to_deal else [] + self._filt_optype_names = optypes_not_deal.split(",") if optypes_not_deal else [] + self._detail = check_bool(is_detail, 'is_detail') + self._withfullpath = check_bool(is_show_op_path, 'is_show_op_path') + self._profiling_job_id = job_id + # add job id env through user input later + self._job_id_env = 0 + self._start_time = int(time.time() * 10000000) + logger.info("Profiling: profiling start time: %d", self._start_time) def analyse(self): """ @@ -123,71 +130,74 @@ class Profiler: >>> model.train() >>> profiler.analyse() """ - release() - - job_id = self._get_profiling_job_id() - logger.info("Profiling: job id is %s ", job_id) - - source_path = os.path.join(PROFILING_LOG_BASE_PATH, job_id) - # parse hwts.log.data.45.dev file, and get task profiling data - hwts_output_filename = self._hwts_output_filename_target + self._dev_id + ".txt" - hwts_output_filename = os.path.join(self._output_path, hwts_output_filename) - hwtslog_parser = HWTSLogParser(source_path, hwts_output_filename) - result = hwtslog_parser.execute() - if not result: - logger.error("Profiling: fail to parse hwts log file.") - return - - # parse Framework file, and get the relation of op and tasks - framework_parser = FrameworkParser(job_id, self._dev_id, self._output_path) - framework_parser.parse() - op_task_dict = framework_parser.to_task_id_full_op_name_dict() - if not op_task_dict: - logger.error("Profiling: fail to parse framework files.") - return - - # get op compute time from hwts data and framework data, write output_op_compute_time.txt - opcompute_output_filename = self._opcompute_output_filename_target + self._dev_id + ".txt" - opcompute_output_filename = os.path.join(self._output_path, opcompute_output_filename) - optime_parser = OPComputeTimeParser( - hwts_output_filename, opcompute_output_filename, - op_task_dict, self._output_path, self._dev_id - ) - optime_parser.execute() - - # parse DATA_PREPROCESS.dev.AICPU file, write output_data_preprocess_aicpu_x.txt - output_data_preprocess_aicpu = self._aicpu_op_output_filename_target + self._dev_id + ".txt" - output_data_preprocess_aicpu = os.path.join(self._output_path, output_data_preprocess_aicpu) - aicpu_data_parser = DataPreProcessParser(source_path, output_data_preprocess_aicpu) - aicpu_data_parser.execute() - - # Parsing minddata AICPU profiling - MinddataParser.execute(source_path, self._output_path, self._dev_id) - - # parse minddata pipeline operator and queue - try: - pipeline_parser = MinddataPipelineParser(self._output_path, self._dev_id, self._output_path) - pipeline_parser.parse() - except ProfilerException as err: - logger.warning(err.message) - - # analyse op compute time info - try: - self._analyser_op_info() - except ProfilerException as err: - logger.warning(err.message) - - # analyse step trace info - try: - self._analyse_step_trace(source_path, framework_parser) - except ProfilerException as err: - logger.warning(err.message) - - # analyse timeline info - try: - self._analyse_timeline(aicpu_data_parser, optime_parser) - except (ProfilerIOException, ProfilerFileNotFoundException, RuntimeError) as err: - logger.warning('Fail to write timeline data: %s', err) + if self._device_target and self._device_target == "GPU": + self._gpu_profiler.stop() + elif self._device_target and (self._device_target == "Ascend" or self._device_target != "Davinci"): + release() + + job_id = self._get_profiling_job_id() + logger.info("Profiling: job id is %s ", job_id) + + source_path = os.path.join(PROFILING_LOG_BASE_PATH, job_id) + # parse hwts.log.data.45.dev file, and get task profiling data + hwts_output_filename = self._hwts_output_filename_target + self._dev_id + ".txt" + hwts_output_filename = os.path.join(self._output_path, hwts_output_filename) + hwtslog_parser = HWTSLogParser(source_path, hwts_output_filename) + result = hwtslog_parser.execute() + if not result: + logger.error("Profiling: fail to parse hwts log file.") + return + + # parse Framework file, and get the relation of op and tasks + framework_parser = FrameworkParser(job_id, self._dev_id, self._output_path) + framework_parser.parse() + op_task_dict = framework_parser.to_task_id_full_op_name_dict() + if not op_task_dict: + logger.error("Profiling: fail to parse framework files.") + return + + # get op compute time from hwts data and framework data, write output_op_compute_time.txt + opcompute_output_filename = self._opcompute_output_filename_target + self._dev_id + ".txt" + opcompute_output_filename = os.path.join(self._output_path, opcompute_output_filename) + optime_parser = OPComputeTimeParser( + hwts_output_filename, opcompute_output_filename, + op_task_dict, self._output_path, self._dev_id + ) + optime_parser.execute() + + # parse DATA_PREPROCESS.dev.AICPU file, write output_data_preprocess_aicpu_x.txt + output_data_preprocess_aicpu = self._aicpu_op_output_filename_target + self._dev_id + ".txt" + output_data_preprocess_aicpu = os.path.join(self._output_path, output_data_preprocess_aicpu) + aicpu_data_parser = DataPreProcessParser(source_path, output_data_preprocess_aicpu) + aicpu_data_parser.execute() + + # Parsing minddata AICPU profiling + MinddataParser.execute(source_path, self._output_path, self._dev_id) + + # parse minddata pipeline operator and queue + try: + pipeline_parser = MinddataPipelineParser(self._output_path, self._dev_id, self._output_path) + pipeline_parser.parse() + except ProfilerException as err: + logger.warning(err.message) + + # analyse op compute time info + try: + self._analyser_op_info() + except ProfilerException as err: + logger.warning(err.message) + + # analyse step trace info + try: + self._analyse_step_trace(source_path, framework_parser) + except ProfilerException as err: + logger.warning(err.message) + + # analyse timeline info + try: + self._analyse_timeline(aicpu_data_parser, optime_parser) + except (ProfilerIOException, ProfilerFileNotFoundException, RuntimeError) as err: + logger.warning('Fail to write timeline data: %s', err) def _analyse_step_trace(self, source_path, framework_parser): """ @@ -416,12 +426,12 @@ class Profiler: dev_id = "0" logger.error("Fail to get DEVICE_ID, use 0 instead.") - if device_target and device_target != "Davinci" \ - and device_target != "Ascend": + if device_target and device_target not in ["Davinci", "Ascend", "GPU"]: msg = "Profiling: unsupport backend: %s" % device_target raise RuntimeError(msg) self._dev_id = dev_id + self._device_target = device_target @staticmethod def trainable_parameters(network):