Browse Source

Add cpu profiler

tags/v1.2.0-rc1
gzhcv 5 years ago
parent
commit
2617f6c620
13 changed files with 727 additions and 31 deletions
  1. +11
    -2
      mindspore/ccsrc/profiler/CMakeLists.txt
  2. +174
    -0
      mindspore/ccsrc/profiler/device/cpu/cpu_data_saver.cc
  3. +123
    -0
      mindspore/ccsrc/profiler/device/cpu/cpu_data_saver.h
  4. +136
    -0
      mindspore/ccsrc/profiler/device/cpu/cpu_profiling.cc
  5. +86
    -0
      mindspore/ccsrc/profiler/device/cpu/cpu_profiling.h
  6. +24
    -1
      mindspore/ccsrc/profiler/device/gpu/data_saver.cc
  7. +3
    -1
      mindspore/ccsrc/profiler/device/gpu/data_saver.h
  8. +46
    -21
      mindspore/ccsrc/profiler/device/gpu/gpu_profiling.cc
  9. +1
    -0
      mindspore/ccsrc/profiler/device/gpu/gpu_profiling.h
  10. +11
    -0
      mindspore/ccsrc/runtime/device/cpu/cpu_kernel_runtime.cc
  11. +1
    -0
      mindspore/ccsrc/runtime/device/cpu/cpu_kernel_runtime.h
  12. +101
    -3
      mindspore/profiler/parser/integrator.py
  13. +10
    -3
      mindspore/profiler/profiling.py

+ 11
- 2
mindspore/ccsrc/profiler/CMakeLists.txt View File

@@ -1,14 +1,23 @@
if(ENABLE_GPU) if(ENABLE_GPU)
file(GLOB_RECURSE PROFILER_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "device/gpu/*.cc")
file(GLOB_RECURSE PROFILER_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
"device/gpu/*.cc" "device/cpu/*.cc")
set_property(SOURCE ${PROFILER_SRC_LIST} PROPERTY COMPILE_DEFINITIONS set_property(SOURCE ${PROFILER_SRC_LIST} PROPERTY COMPILE_DEFINITIONS
SUBMODULE_ID=mindspore::SubModuleId::SM_PROFILER) SUBMODULE_ID=mindspore::SubModuleId::SM_PROFILER)
add_library(_mindspore_profiler_obj OBJECT ${PROFILER_SRC_LIST}) add_library(_mindspore_profiler_obj OBJECT ${PROFILER_SRC_LIST})
endif() endif()


if(ENABLE_D) if(ENABLE_D)
file(GLOB_RECURSE PROFILER_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "device/ascend/*.cc" "device/common/*.cc")
file(GLOB_RECURSE PROFILER_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
"device/common/*.cc" "device/ascend/*.cc" "device/cpu/*.cc")
set_property(SOURCE ${PROFILER_SRC_LIST} PROPERTY COMPILE_DEFINITIONS set_property(SOURCE ${PROFILER_SRC_LIST} PROPERTY COMPILE_DEFINITIONS
SUBMODULE_ID=mindspore::SubModuleId::SM_PROFILER) SUBMODULE_ID=mindspore::SubModuleId::SM_PROFILER)
add_library(_mindspore_profiler_obj OBJECT ${PROFILER_SRC_LIST}) add_library(_mindspore_profiler_obj OBJECT ${PROFILER_SRC_LIST})
add_dependencies(_mindspore_profiler_obj mindspore::protobuf) add_dependencies(_mindspore_profiler_obj mindspore::protobuf)
endif() endif()

if(ENABLE_CPU AND NOT (ENABLE_D OR ENABLE_GPU))
file(GLOB_RECURSE PROFILER_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "device/cpu/*.cc")
set_property(SOURCE ${PROFILER_SRC_LIST} PROPERTY COMPILE_DEFINITIONS
SUBMODULE_ID=mindspore::SubModuleId::SM_PROFILER)
add_library(_mindspore_profiler_obj OBJECT ${PROFILER_SRC_LIST})
endif()

+ 174
- 0
mindspore/ccsrc/profiler/device/cpu/cpu_data_saver.cc View File

@@ -0,0 +1,174 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "profiler/device/cpu/cpu_data_saver.h"
#include <fstream>
#include <numeric>
#include "sys/stat.h"
#include "utils/log_adapter.h"
#include "utils/ms_utils.h"
#include "utils/ms_context.h"

namespace mindspore {
namespace profiler {
namespace cpu {
OpDetailInfo::OpDetailInfo(std::shared_ptr<OpInfo> op_info, float proportion)
: op_info_(op_info), proportion_(proportion) {
// op_full_name is like 'xxx/xxx/{op_type}-op{node_id}'
op_full_name_ = op_info->op_name;
auto op_type_begin_iter = op_full_name_.rfind('/') + 1;
auto op_type_end_iter = op_full_name_.rfind('-');
op_type_ = op_full_name_.substr(op_type_begin_iter, op_type_end_iter - op_type_begin_iter);
op_name_ = op_full_name_.substr(op_type_begin_iter);
op_avg_time_ = op_info->op_cost_time / op_info->op_count;
}

void DataSaver::ParseOpInfo(const OpInfoMap &op_info_maps) {
const float factor_percent = 100;
op_detail_infos_.reserve(op_info_maps.size());
float total_time_sum = GetTotalOpTime(op_info_maps);
for (auto item : op_info_maps) {
op_timestamps_map_[item.first] = item.second.start_duration;
float proportion = item.second.op_cost_time / total_time_sum * factor_percent;
auto op_info = std::make_shared<OpInfo>(item.second);
OpDetailInfo op_detail_info = OpDetailInfo(op_info, proportion);
op_detail_infos_.emplace_back(op_detail_info);
AddOpDetailInfoForType(op_detail_info);
}
// update average time of op type
for (auto &op_type : op_type_infos_) {
// device_infos: <type_name, op_type_info>
op_type.second.avg_time_ = op_type.second.total_time_ / op_type.second.count_;
}
MS_LOG(DEBUG) << "Get " << op_detail_infos_.size() << " operation items.";
MS_LOG(DEBUG) << "Get " << op_type_infos_.size() << " operation type items.";
}

void DataSaver::AddOpDetailInfoForType(const OpDetailInfo &op_detail_info) {
// Construct OpType object according to op detail info
OpType op_type = OpType{op_detail_info.op_type_,
op_detail_info.op_info_->op_count,
op_detail_info.op_info_->op_count,
op_detail_info.op_info_->op_cost_time,
0,
op_detail_info.proportion_};
// Set the OpType into op_type_infos_ map
std::string type_name = op_detail_info.op_type_;
auto iter = op_type_infos_.find(type_name);
if (iter == op_type_infos_.end()) {
op_type_infos_.emplace(type_name, op_type);
} else {
iter->second += op_type;
}
}

float DataSaver::GetTotalOpTime(const OpInfoMap &op_info_maps) {
float sum = 0;
sum = std::accumulate(op_info_maps.begin(), op_info_maps.end(), sum,
[](float i, auto iter) { return i + iter.second.op_cost_time; });
MS_LOG(DEBUG) << "The total op time is " << sum;
return sum;
}

void DataSaver::WriteFile(std::string out_path_dir) {
if (op_detail_infos_.empty() || op_type_infos_.empty()) {
MS_LOG(INFO) << "No cpu operation detail infos to write.";
return;
}
auto context_ptr = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(context_ptr);
auto device_id = context_ptr->get_param<uint32_t>(MS_CTX_DEVICE_ID);
device_id_ = std::to_string(device_id);
WriteOpDetail(out_path_dir);
WriteOpType(out_path_dir);
WriteOpTimestamp(out_path_dir);
}

void DataSaver::WriteOpType(const std::string &saver_base_dir) {
std::string file_path = saver_base_dir + "/cpu_op_type_info_" + device_id_ + ".csv";
std::ofstream ofs(file_path);
// check if the file is writable
if (!ofs.is_open()) {
MS_LOG(WARNING) << "Open file '" << file_path << "' failed!";
return;
}
try {
// write op type info into file
ofs << OpType().GetHeader() << std::endl;
for (auto op_type_info : op_type_infos_) {
ofs << op_type_info.second << std::endl;
}
} catch (const std::exception &e) {
MS_LOG(ERROR) << "Write " << file_path << "failed: " << e.what();
}
ofs.close();
ChangeFileMode(file_path);
MS_LOG(INFO) << "Write " << op_type_infos_.size() << " op type infos into file: " << file_path;
}

void DataSaver::WriteOpDetail(const std::string &saver_base_dir) {
std::string file_path = saver_base_dir + "/cpu_op_detail_info_" + device_id_ + ".csv";
std::ofstream ofs(file_path);
if (!ofs.is_open()) {
MS_LOG(WARNING) << "Open file '" << file_path << "' failed!";
return;
}
try {
// write op detail info into file
ofs << OpDetailInfo().GetHeader() << std::endl;
for (auto op_detail : op_detail_infos_) {
ofs << op_detail << std::endl;
}
} catch (const std::exception &e) {
MS_LOG(ERROR) << "Write " << file_path << "failed: " << e.what();
}
ofs.close();
ChangeFileMode(file_path);
MS_LOG(INFO) << "Write " << op_detail_infos_.size() << " op detail infos into file: " << file_path;
}

void DataSaver::WriteOpTimestamp(const std::string &saver_base_dir) {
std::string file_path = saver_base_dir + "/cpu_op_execute_timestamp_" + device_id_ + ".txt";
std::ofstream ofs(file_path);
// check if the file is writable
if (!ofs.is_open()) {
MS_LOG(WARNING) << "Open file '" << file_path << "' failed!";
return;
}
try {
// write op timestamp info into file
for (const auto &op_timestamp_info : op_timestamps_map_) {
ofs << op_timestamp_info.first << ";host_cpu_ops;";
for (auto start_end : op_timestamp_info.second) {
ofs << start_end.start_timestamp << "," << start_end.duration << " ";
}
ofs << std::endl;
}
} catch (const std::exception &e) {
MS_LOG(ERROR) << "Write " << file_path << "failed: " << e.what();
}
ofs.close();
ChangeFileMode(file_path);
}

void DataSaver::ChangeFileMode(const std::string &file_path) {
if (chmod(common::SafeCStr(file_path), S_IRUSR) == -1) {
MS_LOG(WARNING) << "Modify file: " << file_path << " to rw fail.";
return;
}
}
} // namespace cpu
} // namespace profiler
} // namespace mindspore

+ 123
- 0
mindspore/ccsrc/profiler/device/cpu/cpu_data_saver.h View File

@@ -0,0 +1,123 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MINDSPORE_CPU_DATA_SAVER_H
#define MINDSPORE_CPU_DATA_SAVER_H
#include <iostream>
#include <algorithm>
#include <unordered_map>
#include <vector>
#include <string>
#include <memory>
#include "profiler/device/cpu/cpu_profiling.h"
namespace mindspore {
namespace profiler {
namespace cpu {
struct OpDetailInfo {
std::string op_type_;
std::string op_name_;
std::string op_full_name_;
std::shared_ptr<OpInfo> op_info_{nullptr};
float op_avg_time_{0};
float proportion_{0};

OpDetailInfo() = default;

OpDetailInfo(std::shared_ptr<OpInfo> op_info, float proportion);

std::string GetHeader() const {
return "op_side,op_type,op_name,full_op_name,op_occurrences,compute_time(ms),"
"avg_execution_time(ms),total_proportion,subgraph,pid";
}

friend std::ostream &operator<<(std::ostream &os, const OpDetailInfo &event) {
os << "Host," << event.op_type_ << ',' << event.op_name_ << ',' << event.op_full_name_ << ','
<< event.op_info_->op_count << ',' << event.op_info_->op_cost_time << ',' << event.op_avg_time_ << ','
<< event.proportion_ << ",Default," << event.op_info_->pid;
return os;
}
};

struct OpType {
std::string op_type_;
int count_{0};
int step_{0};
float total_time_{0};
float avg_time_{0};
float proportion_{0};

std::string GetHeader() const {
return "op_type,total_called_times,called_times(per-step),"
"total_compute_time,compute_time(ms per-step),percent";
}

friend std::ostream &operator<<(std::ostream &os, const OpType &event) {
os << event.op_type_ << ',' << event.count_ << ',' << event.count_ / event.step_ << ',' << event.total_time_ << ','
<< event.total_time_ / event.step_ << ',' << event.proportion_;
return os;
}

OpType &operator+=(const OpType &other) {
this->count_ += other.count_;
this->total_time_ += other.total_time_;
this->proportion_ += other.proportion_;
return *this;
}
};

using OpInfoMap = std::unordered_map<std::string, OpInfo>;
using OpTypeInfos = std::unordered_map<std::string, OpType>; // <op_full_name, Optype>
using OpDetailInfos = std::vector<OpDetailInfo>;
// <op_full_name, StartDuration>
using OpTimestampInfo = std::unordered_map<std::string, std::vector<StartDuration>>;

class DataSaver {
public:
DataSaver() = default;

~DataSaver() = default;

DataSaver(const DataSaver &) = delete;

DataSaver &operator=(const DataSaver &) = delete;

void ParseOpInfo(const OpInfoMap &op_info_maps);

void WriteFile(std::string out_path);

private:
void AddOpDetailInfoForType(const OpDetailInfo &op_detail_info);

float GetTotalOpTime(const OpInfoMap &op_info_maps);

void WriteOpType(const std::string &saver_base_dir);

void WriteOpDetail(const std::string &saver_base_dir);

void WriteOpTimestamp(const std::string &saver_base_dir);

void ChangeFileMode(const std::string &file_path);

std::string device_id_;
OpTypeInfos op_type_infos_;
OpDetailInfos op_detail_infos_;
OpTimestampInfo op_timestamps_map_;
};
} // namespace cpu
} // namespace profiler
} // namespace mindspore

#endif // MINDSPORE_CPU_DATA_SAVER_H

+ 136
- 0
mindspore/ccsrc/profiler/device/cpu/cpu_profiling.cc View File

@@ -0,0 +1,136 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "profiler/device/cpu/cpu_profiling.h"

#include <time.h>
#include <cxxabi.h>
#include <cmath>
#include "profiler/device/cpu/cpu_data_saver.h"
#include "pybind_api/api_register.h"
#include "utils/log_adapter.h"
#include "utils/utils.h"

namespace mindspore {
namespace profiler {
namespace cpu {
std::shared_ptr<CPUProfiler> CPUProfiler::profiler_inst_ = nullptr;

uint64_t GetMonoTimeStamp() {
struct timespec ts;
#if defined(_WIN32) || defined(_WIN64)
clock_gettime(CLOCK_MONOTONIC, &ts);
#else
clock_gettime(CLOCK_MONOTONIC_RAW, &ts);
#endif
constexpr uint64_t kNSecondInSecond = 1000000000;
uint64_t cur_time_stamp = ts.tv_sec * kNSecondInSecond + ts.tv_nsec;
return cur_time_stamp;
}

std::shared_ptr<CPUProfiler> CPUProfiler::GetInstance() {
if (profiler_inst_ == nullptr) {
profiler_inst_ = std::shared_ptr<CPUProfiler>(new (std::nothrow) CPUProfiler());
}
return profiler_inst_;
}

void CPUProfiler::Init(const std::string &profileDataPath = "") {
MS_LOG(INFO) << "Initialize CPU Profiling";
base_time_ = GetMonoTimeStamp();
profile_data_path_ = profileDataPath;
MS_LOG(INFO) << " Host start time(ns): " << base_time_ << " profile data path: " << profile_data_path_;
}

void CPUProfiler::StepProfilingEnable(const bool enable_flag) {
MS_LOG(INFO) << "CPU Profiler enable flag: " << enable_flag;
enable_flag_ = enable_flag;
}

void CPUProfiler::SetRunTimeData(const std::string &op_name, const uint32_t pid) {
auto iter = op_info_map_.find(op_name);
if (iter != op_info_map_.end()) {
iter->second.op_count += 1;
} else {
OpInfo op_info;
op_info.op_name = op_name;
op_info.pid = pid;
op_info.op_count = 1;
op_info_map_[op_name] = op_info;
}
op_name_ = op_name;
pid_ = pid;
}

void CPUProfiler::SetRunTimeData(const std::string &op_name, const float time_elapsed) {
auto iter = op_info_map_.find(op_name);
if (iter != op_info_map_.end()) {
// The time unit is ms, convert to us
iter->second.op_cost_time += time_elapsed;
}
}

void CPUProfiler::SetRunTimeData(const std::string &op_name, const uint64_t start, const float duration) {
auto iter = op_info_map_.find(op_name);
if (iter != op_info_map_.end()) {
iter->second.start_duration.emplace_back(StartDuration({start, duration}));
}
}

void CPUProfiler::OpDataProducerBegin(const std::string op_name, const uint32_t pid) {
op_time_start_ = GetMonoTimeStamp();
op_time_mono_start_ = GetMonoTimeStamp();
SetRunTimeData(op_name, pid);
}

void CPUProfiler::OpDataProducerEnd() {
float op_time_elapsed = 0;
op_time_stop_ = GetMonoTimeStamp();
op_time_elapsed = (op_time_stop_ - op_time_start_) / kTimeUnit;
MS_LOG(DEBUG) << "Host Time Elapsed(us)," << op_name_ << "," << op_time_elapsed;
SetRunTimeData(op_name_, op_time_elapsed);
SetRunTimeData(op_name_, op_time_mono_start_, op_time_elapsed);
}

void CPUProfiler::Stop() {
MS_LOG(INFO) << "Stop CPU Profiling";
SaveProfileData();
ClearInst();
}

void CPUProfiler::SaveProfileData() {
if (profile_data_path_.empty()) {
MS_LOG(WARNING) << "Profile data path is empty, skip save profile data.";
} else {
DataSaver dataSaver;
dataSaver.ParseOpInfo(op_info_map_);
dataSaver.WriteFile(profile_data_path_);
}
}

void CPUProfiler::ClearInst() { op_info_map_.clear(); }

REGISTER_PYBIND_DEFINE(CPUProfiler_, ([](const py::module *m) {
(void)py::class_<CPUProfiler, std::shared_ptr<CPUProfiler>>(*m, "CPUProfiler")
.def_static("get_instance", &CPUProfiler::GetInstance, "CPUProfiler get_instance.")
.def("init", &CPUProfiler::Init, py::arg("profile_data_path"), "init")
.def("stop", &CPUProfiler::Stop, "stop")
.def("step_profiling_enable", &CPUProfiler::StepProfilingEnable, py::arg("enable_flag"),
"enable or disable step profiling");
}));
} // namespace cpu
} // namespace profiler
} // namespace mindspore

+ 86
- 0
mindspore/ccsrc/profiler/device/cpu/cpu_profiling.h View File

@@ -0,0 +1,86 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MINDSPORE_CPU_PROFILING_H
#define MINDSPORE_CPU_PROFILING_H
#include <algorithm>
#include <cstdio>
#include <map>
#include <memory>
#include <mutex>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>

namespace mindspore {
namespace profiler {
namespace cpu {
struct StartDuration {
uint64_t start_timestamp = 0l;
float duration = 0l;
};

struct OpInfo {
std::string op_name;
float op_cost_time = 0;
int op_count = 0;
std::vector<StartDuration> start_duration;
uint32_t pid;
};

const float kTimeUnit = 1000;

class CPUProfiler {
public:
static std::shared_ptr<CPUProfiler> GetInstance();
~CPUProfiler() = default;
CPUProfiler(const CPUProfiler &) = delete;
CPUProfiler &operator=(const CPUProfiler &) = delete;

void Init(const std::string &profileDataPath);
void Stop();
void StepProfilingEnable(const bool enable_flag);
bool GetEnableFlag() const { return enable_flag_; }
void OpDataProducerBegin(const std::string op_name, const uint32_t pid);
void OpDataProducerEnd();
std::string ProfileDataPath() const { return profile_data_path_; }

private:
CPUProfiler() = default;
void ClearInst();
void SetRunTimeData(const std::string &op_name, const uint32_t pid);
void SetRunTimeData(const std::string &op_name, const float time_elapsed);
void SetRunTimeData(const std::string &op_name, const uint64_t start, const float duration);

static std::shared_ptr<CPUProfiler> profiler_inst_;
bool enable_flag_ = false;
std::unordered_map<std::string, OpInfo> op_info_map_;
uint64_t base_time_;
std::string op_name_;
uint32_t pid_;
void SaveProfileData();

uint64_t op_time_start_;
uint64_t op_time_mono_start_;
uint64_t op_time_stop_;
std::string profile_data_path_;
};
} // namespace cpu
} // namespace profiler
} // namespace mindspore

#endif // MINDSPORE_CPU_PROFILING_H

+ 24
- 1
mindspore/ccsrc/profiler/device/gpu/data_saver.cc View File

@@ -153,7 +153,7 @@ void DataSaver::AddKernelEventToDevice(const Event &event, DeviceActivityInfos *
} }
} }


void DataSaver::WriteFile(std::string out_path_dir) {
void DataSaver::WriteFile(std::string out_path_dir, const BaseTime &start_time) {
if (out_path_dir.empty()) { if (out_path_dir.empty()) {
MS_LOG(WARNING) << "Output directory. Ignore the writing data."; MS_LOG(WARNING) << "Output directory. Ignore the writing data.";
return; return;
@@ -169,6 +169,7 @@ void DataSaver::WriteFile(std::string out_path_dir) {
WriteActivity(out_path_dir); WriteActivity(out_path_dir);
WriteOpTimestamp(out_path_dir); WriteOpTimestamp(out_path_dir);
WriteStepTrace(out_path_dir); WriteStepTrace(out_path_dir);
WriteStartTime(out_path_dir, start_time);
} }


void DataSaver::WriteOpType(const std::string &saver_base_dir) { void DataSaver::WriteOpType(const std::string &saver_base_dir) {
@@ -307,6 +308,28 @@ void DataSaver::WriteStepTrace(const std::string &saver_base_dir) {
MS_LOG(INFO) << "Write step trace infos into file: " << file_path; MS_LOG(INFO) << "Write step trace infos into file: " << file_path;
} }


void DataSaver::WriteStartTime(const std::string &saver_base_dir, const BaseTime &start_time) {
std::string file_path = saver_base_dir + "/start_time_" + device_id_ + ".txt";
std::ofstream ofs(file_path);
// check if the file is writable
if (!ofs.is_open()) {
MS_LOG(WARNING) << "Open file '" << file_path << "' failed!";
return;
}

// write start time info into file
try {
ofs << "host_monotonic_raw_time(ns): " << start_time.host_start_monotonic_raw_time << std::endl;
ofs << "gpu_start_time(ns): " << start_time.gpu_start_time << std::endl;
} catch (const std::exception &e) {
MS_LOG(ERROR) << "Write " << file_path << "failed:" << e.what();
}

ofs.close();
ChangeFileMode(file_path);
MS_LOG(INFO) << "Write profiler start time infos into file: " << file_path;
}

void DataSaver::SetStepTraceOpName(ProfilingTraceInfo trace_op_name) { step_trace_op_name = trace_op_name; } void DataSaver::SetStepTraceOpName(ProfilingTraceInfo trace_op_name) { step_trace_op_name = trace_op_name; }


void DataSaver::ChangeFileMode(const std::string &file_path) { void DataSaver::ChangeFileMode(const std::string &file_path) {


+ 3
- 1
mindspore/ccsrc/profiler/device/gpu/data_saver.h View File

@@ -129,7 +129,7 @@ class DataSaver {


void ParseEvent(const std::vector<Event> &events); void ParseEvent(const std::vector<Event> &events);


void WriteFile(std::string out_path);
void WriteFile(std::string out_path, const BaseTime &start_time);


private: private:
void AddOpDetailInfoForType(const OpDetailInfo &op_detail_info); void AddOpDetailInfoForType(const OpDetailInfo &op_detail_info);
@@ -150,6 +150,8 @@ class DataSaver {


void WriteStepTrace(const std::string &saver_base_dir); void WriteStepTrace(const std::string &saver_base_dir);


void WriteStartTime(const std::string &saver_base_dir, const BaseTime &start_time);

void ChangeFileMode(const std::string &file_path); void ChangeFileMode(const std::string &file_path);


std::string device_id_; std::string device_id_;


+ 46
- 21
mindspore/ccsrc/profiler/device/gpu/gpu_profiling.cc View File

@@ -1,5 +1,5 @@
/** /**
* Copyright 2020 Huawei Technologies Co., Ltd
* Copyright 2020-2021 Huawei Technologies Co., Ltd
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.
@@ -16,6 +16,7 @@


#include "profiler/device/gpu/gpu_profiling.h" #include "profiler/device/gpu/gpu_profiling.h"


#include <time.h>
#include <cxxabi.h> #include <cxxabi.h>
#include <chrono> #include <chrono>
#include <cmath> #include <cmath>
@@ -91,6 +92,14 @@ uint64_t GetHostTimeStamp() {
return cur_time_stamp; return cur_time_stamp;
} }


uint64_t GetHostMonoTimeStamp() {
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC_RAW, &ts);
constexpr uint64_t kNSecondInSecond = 1000000000;
uint64_t cur_time_stamp = ts.tv_sec * kNSecondInSecond + ts.tv_nsec;
return cur_time_stamp;
}

std::string GetKernelFunc(const char *name) { std::string GetKernelFunc(const char *name) {
char *demangledName = abi::__cxa_demangle(name, nullptr, nullptr, nullptr); char *demangledName = abi::__cxa_demangle(name, nullptr, nullptr, nullptr);
if (demangledName != nullptr) { if (demangledName != nullptr) {
@@ -100,16 +109,24 @@ std::string GetKernelFunc(const char *name) {
} }
} }


void CUPTIApiExit(const std::shared_ptr<GPUProfiler> &gpu_profiler_inst, CUpti_CallbackId cb_id,
const CUpti_CallbackData *cb_data) {
uint64_t start_timestamp = *cb_data->correlationData;
uint64_t end_timestamp = GetCUPTITimeStamp();
bool IsMemcpyAsyncEvent(CUpti_CallbackId cb_id) {
switch (cb_id) {
case CUPTI_DRIVER_TRACE_CBID_cuMemcpyAsync:
case CUPTI_DRIVER_TRACE_CBID_cuMemcpyHtoDAsync_v2:
case CUPTI_DRIVER_TRACE_CBID_cuMemcpyDtoHAsync_v2:
case CUPTI_DRIVER_TRACE_CBID_cuMemcpyDtoDAsync_v2:
case CUPTI_DRIVER_TRACE_CBID_cuMemcpyAtoHAsync_v2:
case CUPTI_DRIVER_TRACE_CBID_cuMemcpy2DAsync_v2:
case CUPTI_DRIVER_TRACE_CBID_cuMemcpy3DAsync_v2:
case CUPTI_DRIVER_TRACE_CBID_cuMemcpyHtoAAsync_v2:
case CUPTI_DRIVER_TRACE_CBID_cuMemcpyPeerAsync:
return true;
}
return false;
}

bool IsMemcpySyncEvent(CUpti_CallbackId cb_id) {
switch (cb_id) { switch (cb_id) {
case CUPTI_DRIVER_TRACE_CBID_cuLaunchKernel:
case CUPTI_DRIVER_TRACE_CBID_cuLaunchCooperativeKernel:
case CUPTI_DRIVER_TRACE_CBID_cuLaunchCooperativeKernelMultiDevice:
gpu_profiler_inst->EventHandleProcess(cb_id, cb_data, "cuLaunchKernel", start_timestamp, end_timestamp);
break;
case CUPTI_DRIVER_TRACE_CBID_cuMemcpy: case CUPTI_DRIVER_TRACE_CBID_cuMemcpy:
case CUPTI_DRIVER_TRACE_CBID_cuMemcpyHtoD_v2: case CUPTI_DRIVER_TRACE_CBID_cuMemcpyHtoD_v2:
case CUPTI_DRIVER_TRACE_CBID_cuMemcpyDtoH_v2: case CUPTI_DRIVER_TRACE_CBID_cuMemcpyDtoH_v2:
@@ -122,17 +139,21 @@ void CUPTIApiExit(const std::shared_ptr<GPUProfiler> &gpu_profiler_inst, CUpti_C
case CUPTI_DRIVER_TRACE_CBID_cuMemcpy2DUnaligned_v2: case CUPTI_DRIVER_TRACE_CBID_cuMemcpy2DUnaligned_v2:
case CUPTI_DRIVER_TRACE_CBID_cuMemcpy3D_v2: case CUPTI_DRIVER_TRACE_CBID_cuMemcpy3D_v2:
case CUPTI_DRIVER_TRACE_CBID_cuMemcpyHtoA_v2: case CUPTI_DRIVER_TRACE_CBID_cuMemcpyHtoA_v2:
case CUPTI_DRIVER_TRACE_CBID_cuMemcpyAsync:
case CUPTI_DRIVER_TRACE_CBID_cuMemcpyHtoDAsync_v2:
case CUPTI_DRIVER_TRACE_CBID_cuMemcpyDtoHAsync_v2:
case CUPTI_DRIVER_TRACE_CBID_cuMemcpyDtoDAsync_v2:
case CUPTI_DRIVER_TRACE_CBID_cuMemcpyAtoHAsync_v2:
case CUPTI_DRIVER_TRACE_CBID_cuMemcpy2DAsync_v2:
case CUPTI_DRIVER_TRACE_CBID_cuMemcpy3DAsync_v2:
case CUPTI_DRIVER_TRACE_CBID_cuMemcpyHtoAAsync_v2:
case CUPTI_DRIVER_TRACE_CBID_cuMemcpyPeer: case CUPTI_DRIVER_TRACE_CBID_cuMemcpyPeer:
case CUPTI_DRIVER_TRACE_CBID_cuMemcpyPeerAsync:
gpu_profiler_inst->EventHandleProcess(cb_id, cb_data, "cuMemcpy", start_timestamp, end_timestamp);
return true;
}
return false;
}

void CUPTIApiExit(const std::shared_ptr<GPUProfiler> &gpu_profiler_inst, CUpti_CallbackId cb_id,
const CUpti_CallbackData *cb_data) {
uint64_t start_timestamp = *cb_data->correlationData;
uint64_t end_timestamp = GetCUPTITimeStamp();
switch (cb_id) {
case CUPTI_DRIVER_TRACE_CBID_cuLaunchKernel:
case CUPTI_DRIVER_TRACE_CBID_cuLaunchCooperativeKernel:
case CUPTI_DRIVER_TRACE_CBID_cuLaunchCooperativeKernelMultiDevice:
gpu_profiler_inst->EventHandleProcess(cb_id, cb_data, "cuLaunchKernel", start_timestamp, end_timestamp);
break; break;
case CUPTI_DRIVER_TRACE_CBID_cuMemAlloc: case CUPTI_DRIVER_TRACE_CBID_cuMemAlloc:
case CUPTI_DRIVER_TRACE_CBID_cuMemAlloc_v2: case CUPTI_DRIVER_TRACE_CBID_cuMemAlloc_v2:
@@ -151,6 +172,9 @@ void CUPTIApiExit(const std::shared_ptr<GPUProfiler> &gpu_profiler_inst, CUpti_C
gpu_profiler_inst->EventHandleProcess(cb_id, cb_data, "others_api", start_timestamp, end_timestamp); gpu_profiler_inst->EventHandleProcess(cb_id, cb_data, "others_api", start_timestamp, end_timestamp);
break; break;
} }
if (IsMemcpyAsyncEvent(cb_id) || IsMemcpySyncEvent(cb_id)) {
gpu_profiler_inst->EventHandleProcess(cb_id, cb_data, "cuMemcpy", start_timestamp, end_timestamp);
}
} }


void CUPTICallBackFunc(void *user_data, CUpti_CallbackDomain domain, CUpti_CallbackId cb_id, void CUPTICallBackFunc(void *user_data, CUpti_CallbackDomain domain, CUpti_CallbackId cb_id,
@@ -369,6 +393,7 @@ void GPUProfiler::Init(const std::string &profileDataPath = "") {


base_time_.gpu_start_time = GetCUPTITimeStamp(); base_time_.gpu_start_time = GetCUPTITimeStamp();
base_time_.host_start_time = GetHostTimeStamp(); base_time_.host_start_time = GetHostTimeStamp();
base_time_.host_start_monotonic_raw_time = GetHostMonoTimeStamp();


profile_data_path_ = profileDataPath; profile_data_path_ = profileDataPath;
MS_LOG(INFO) << "GPU start time(ns):" << base_time_.gpu_start_time MS_LOG(INFO) << "GPU start time(ns):" << base_time_.gpu_start_time
@@ -477,7 +502,7 @@ void GPUProfiler::SaveProfileData() {
dataSaver.SetStepTraceOpName(step_trace_op_name); dataSaver.SetStepTraceOpName(step_trace_op_name);
dataSaver.ParseOpInfo(op_info_map_); dataSaver.ParseOpInfo(op_info_map_);
dataSaver.ParseEvent(events_); dataSaver.ParseEvent(events_);
dataSaver.WriteFile(profile_data_path_);
dataSaver.WriteFile(profile_data_path_, base_time_);
SaveExtraProfileData(); SaveExtraProfileData();
} }
} }


+ 1
- 0
mindspore/ccsrc/profiler/device/gpu/gpu_profiling.h View File

@@ -106,6 +106,7 @@ struct OpInfo {
struct BaseTime { struct BaseTime {
// nanosecond // nanosecond
uint64_t host_start_time = 0l; uint64_t host_start_time = 0l;
uint64_t host_start_monotonic_raw_time = 0l;
uint64_t gpu_start_time = 0l; uint64_t gpu_start_time = 0l;
}; };




+ 11
- 0
mindspore/ccsrc/runtime/device/cpu/cpu_kernel_runtime.cc View File

@@ -14,6 +14,7 @@
* limitations under the License. * limitations under the License.
*/ */
#include "runtime/device/cpu/cpu_kernel_runtime.h" #include "runtime/device/cpu/cpu_kernel_runtime.h"
#include <unistd.h>
#include <string> #include <string>
#include <vector> #include <vector>
#include <memory> #include <memory>
@@ -29,6 +30,7 @@
#include "backend/session/anf_runtime_algorithm.h" #include "backend/session/anf_runtime_algorithm.h"
#include "backend/session/session_basic.h" #include "backend/session/session_basic.h"
#include "frontend/operator/ops.h" #include "frontend/operator/ops.h"
#include "profiler/device/cpu/cpu_profiling.h"
#include "utils/shape_utils.h" #include "utils/shape_utils.h"
#include "utils/profile.h" #include "utils/profile.h"
#include "utils/trace_base.h" #include "utils/trace_base.h"
@@ -376,6 +378,8 @@ bool CPUKernelRuntime::Run(session::KernelGraph *kernel_graph, bool is_task_sink
static_cast<CPUMemoryManager *>(mem_manager_.get())->IncreaseAddressRefCount(kernel_graph); static_cast<CPUMemoryManager *>(mem_manager_.get())->IncreaseAddressRefCount(kernel_graph);


auto kernels = kernel_graph->execution_order(); auto kernels = kernel_graph->execution_order();
auto profiler_inst = profiler::cpu::CPUProfiler::GetInstance();
MS_EXCEPTION_IF_NULL(profiler_inst);
for (const auto &kernel : kernels) { for (const auto &kernel : kernels) {
#ifdef ENABLE_PROFILE #ifdef ENABLE_PROFILE
double start_time = GetTime(); double start_time = GetTime();
@@ -406,11 +410,18 @@ bool CPUKernelRuntime::Run(session::KernelGraph *kernel_graph, bool is_task_sink
AddRuntimeAddress(device_address, &kernel_workspaces); AddRuntimeAddress(device_address, &kernel_workspaces);
} }
bool ret = true; bool ret = true;
if (profiler_inst->GetEnableFlag()) {
uint32_t pid = getpid();
profiler_inst->OpDataProducerBegin(kernel->fullname_with_scope(), pid);
}
try { try {
ret = kernel_mod->Launch(kernel_inputs, kernel_workspaces, kernel_outputs, 0); ret = kernel_mod->Launch(kernel_inputs, kernel_workspaces, kernel_outputs, 0);
} catch (std::exception &e) { } catch (std::exception &e) {
MS_LOG(EXCEPTION) << e.what() << "\nTrace:" << trace::DumpSourceLines(kernel); MS_LOG(EXCEPTION) << e.what() << "\nTrace:" << trace::DumpSourceLines(kernel);
} }
if (profiler_inst->GetEnableFlag()) {
profiler_inst->OpDataProducerEnd();
}
if (!ret) { if (!ret) {
MS_LOG(EXCEPTION) << "Launch kernel failed. Trace:" << trace::DumpSourceLines(kernel); MS_LOG(EXCEPTION) << "Launch kernel failed. Trace:" << trace::DumpSourceLines(kernel);
} }


+ 1
- 0
mindspore/ccsrc/runtime/device/cpu/cpu_kernel_runtime.h View File

@@ -26,6 +26,7 @@
#include "backend/session/session_basic.h" #include "backend/session/session_basic.h"
#include "backend/session/anf_runtime_algorithm.h" #include "backend/session/anf_runtime_algorithm.h"
#include "utils/any.h" #include "utils/any.h"
#include "profiler/device/cpu/cpu_profiling.h"
namespace mindspore { namespace mindspore {
namespace device { namespace device {
namespace cpu { namespace cpu {


+ 101
- 3
mindspore/profiler/parser/integrator.py View File

@@ -1,4 +1,4 @@
# Copyright 2020 Huawei Technologies Co., Ltd
# Copyright 2020-2021 Huawei Technologies Co., Ltd
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
@@ -666,7 +666,7 @@ class GpuTimelineGenerator(BaseTimelineGenerator):
) )
file_path = validate_and_normalize_path(file_path) file_path = validate_and_normalize_path(file_path)
if not os.path.exists(file_path): if not os.path.exists(file_path):
logger.error("Failed to find parsed timeline file.")
logger.error(f"Failed to find parsed timeline file {file_path}.")
raise ProfilerFileNotFoundException('parsed timeline file') raise ProfilerFileNotFoundException('parsed timeline file')


return file_path return file_path
@@ -712,10 +712,33 @@ class GpuTimelineGenerator(BaseTimelineGenerator):


timeline_list = self._load_op_data(op_file_path) + \ timeline_list = self._load_op_data(op_file_path) + \
self._load_activity_data(activity_file_path, activity_args_file_path) self._load_activity_data(activity_file_path, activity_args_file_path)
cpu_timeline_generator = CpuTimelineGenerator(self._profiling_dir, self._device_id)
cpu_timeline_list = cpu_timeline_generator.load_cpu_op_data()
if cpu_timeline_list:
self._clock_synchronize_to_gpu(cpu_timeline_list)
timeline_list.extend(cpu_timeline_list)
timeline_list.sort(key=lambda x: float(x[2])) timeline_list.sort(key=lambda x: float(x[2]))


return timeline_list return timeline_list


def _clock_synchronize_to_gpu(self, timeline_list):
"""Synchronize the timestamp from device to host."""
start_time_file_path = os.path.join(self._profiling_dir, f"start_time_{self._device_id}.txt")

try:
with open(start_time_file_path) as f:
lines = f.readlines()
host_monotonic_start_time = int(lines[0].strip().split(':')[-1])
gpu_start_time = int(lines[1].strip().split(':')[-1])
except (IOError, OSError) as err:
logger.error(f'Error occurred when read {start_time_file_path}: {err}')
raise ProfilerIOException

time_diff = gpu_start_time - host_monotonic_start_time
start_time = 2
for idx, time_item in enumerate(timeline_list):
timeline_list[idx][start_time] = int(time_item[start_time]) + time_diff

def _load_op_data(self, op_file_path): def _load_op_data(self, op_file_path):
"""Load operator data from file""" """Load operator data from file"""
op_timeline_list = [] op_timeline_list = []
@@ -847,7 +870,7 @@ class AscendTimelineGenerator(BaseTimelineGenerator):
timeline_dict['pid'] = op_meta.pid timeline_dict['pid'] = op_meta.pid
self._timeline_meta.append(timeline_dict) self._timeline_meta.append(timeline_dict)


def init_timeline(self, all_reduce_info, framework_info, aicpu_info, min_cycle_counter):
def init_timeline(self, all_reduce_info, framework_info, aicpu_info, min_cycle_counter, source_path):
""" """
Init timeline metadata, adding all collected info. Init timeline metadata, adding all collected info.


@@ -862,6 +885,12 @@ class AscendTimelineGenerator(BaseTimelineGenerator):


logger.info('Initiating timeline...') logger.info('Initiating timeline...')
timeline_list = self._load_timeline_data() timeline_list = self._load_timeline_data()
cpu_timeline_generator = CpuTimelineGenerator(self._profiling_dir, self._device_id)
cpu_timeline_list = cpu_timeline_generator.get_timeline_data()
if cpu_timeline_list:
self._clock_synchronize_to_host(timeline_list, source_path)
timeline_list.extend(cpu_timeline_list)
timeline_list.sort(key=lambda x: float(x[2]))
self._timeline_summary['op_exe_times'] = len(timeline_list) self._timeline_summary['op_exe_times'] = len(timeline_list)


# Add AllReduce info to timeline temp list and sort by start time. # Add AllReduce info to timeline temp list and sort by start time.
@@ -897,3 +926,72 @@ class AscendTimelineGenerator(BaseTimelineGenerator):


# Update timeline summary info # Update timeline summary info
self._timeline_summary['num_of_streams'] += len(stream_count_dict.keys()) self._timeline_summary['num_of_streams'] += len(stream_count_dict.keys())

def _clock_synchronize_to_host(self, timeline_list, source_path):
"""Synchronize the timestamp from device to host."""
host_start_file_path = os.path.join(source_path, f"host_start.log.{self._device_id}")
dev_start_file_path = os.path.join(source_path, f"dev_start.log.{self._device_id}")

try:
with open(host_start_file_path) as f:
lines = f.readlines()
host_monotonic = int(lines[2].strip().split(':')[1])
except (IOError, OSError) as err:
logger.error('Error occurred when read host_start.log: %s', err)
raise ProfilerIOException
try:
with open(dev_start_file_path) as f:
lines = f.readlines()
dev_cntvct = int(lines[2].strip().split(':')[1])
except (IOError, OSError) as err:
logger.error('Error occurred when read dev_start.log: %s', err)
raise ProfilerIOException

factor_ns_to_ms = 1e6
factor_ms_to_ten_ns = 1e5
factor_ten_ns_to_ns = 10
start_time = 2
for idx, time_item in enumerate(timeline_list):
cycle_counter = int(float(time_item[start_time]) * factor_ms_to_ten_ns)
host_monotonic_time = host_monotonic + (cycle_counter - dev_cntvct) * factor_ten_ns_to_ns
timeline_list[idx][start_time] = host_monotonic_time / factor_ns_to_ms

class CpuTimelineGenerator(GpuTimelineGenerator):
"""Generate gpu Timeline data from file."""
_output_op_execute_time_file_path = "cpu_op_execute_timestamp_{}.txt"

def _get_and_validate_path(self, file_name):
"""Generate op or activity file path from file name, and validate this path."""
file_path = os.path.join(
self._profiling_dir,
file_name.format(self._device_id)
)
file_path = validate_and_normalize_path(file_path)

return file_path

def load_cpu_op_data(self):
"""Load cpu operator data from file"""
op_file_path = self._get_and_validate_path(
self._output_op_execute_time_file_path)
timeline_list = []
if not os.path.exists(op_file_path):
logger.info("No cpu operator info.")
return timeline_list
timeline_list = self._load_op_data(op_file_path)

return timeline_list

def get_timeline_data(self):
"""Get timeline data from file."""
timeline_list = self.load_cpu_op_data()
factor_ns_to_ms = 1e6
factor_us_to_ms = 1e3
start_time = 2
duration = 3
for idx, time_item in enumerate(timeline_list):
time_item[start_time] = float(time_item[start_time]) / factor_ns_to_ms
time_item[duration] = float(time_item[duration]) / factor_us_to_ms
timeline_list[idx] = time_item

return timeline_list

+ 10
- 3
mindspore/profiler/profiling.py View File

@@ -96,6 +96,11 @@ class Profiler:
os.environ['PROFILING_MODE'] = 'true' os.environ['PROFILING_MODE'] = 'true'
os.environ['MINDDATA_PROFILING_DIR'] = self._output_path os.environ['MINDDATA_PROFILING_DIR'] = self._output_path


if self._device_target:
from mindspore._c_expression import CPUProfiler
self._cpu_profiler = CPUProfiler.get_instance()
self._cpu_profiler.init(self._output_path)
self._cpu_profiler.step_profiling_enable(True)
if self._device_target and self._device_target == "GPU": if self._device_target and self._device_target == "GPU":
from mindspore._c_expression import GPUProfiler from mindspore._c_expression import GPUProfiler
self._gpu_profiler = GPUProfiler.get_instance() self._gpu_profiler = GPUProfiler.get_instance()
@@ -172,6 +177,7 @@ class Profiler:
>>> model.train() >>> model.train()
>>> profiler.analyse() >>> profiler.analyse()
""" """
self._cpu_profiler.stop()
if self._device_target and self._device_target == "GPU": if self._device_target and self._device_target == "GPU":
self._gpu_analyse() self._gpu_analyse()


@@ -244,7 +250,7 @@ class Profiler:


# analyse timeline info # analyse timeline info
try: try:
self._analyse_timeline(aicpu_data_parser, optime_parser)
self._analyse_timeline(aicpu_data_parser, optime_parser, source_path)
except (ProfilerIOException, ProfilerFileNotFoundException, RuntimeError) as err: except (ProfilerIOException, ProfilerFileNotFoundException, RuntimeError) as err:
logger.warning('Fail to write timeline data: %s', err) logger.warning('Fail to write timeline data: %s', err)


@@ -336,7 +342,7 @@ class Profiler:


return point_info return point_info


def _analyse_timeline(self, aicpu_parser, optime_parser):
def _analyse_timeline(self, aicpu_parser, optime_parser, source_path):
""" """
Analyse and parse timeline info. Analyse and parse timeline info.


@@ -368,7 +374,8 @@ class Profiler:
# Add info into timeline, such as AI CPU, AllReduce, framework info. # Add info into timeline, such as AI CPU, AllReduce, framework info.
aicpu_info = aicpu_parser.query_aicpu_data() aicpu_info = aicpu_parser.query_aicpu_data()
min_cycle_counter = min(aicpu_parser.min_cycle_counter, optime_parser.min_cycle_counter) min_cycle_counter = min(aicpu_parser.min_cycle_counter, optime_parser.min_cycle_counter)
timeline_analyser.init_timeline(all_reduce_info, framework_info, aicpu_info, min_cycle_counter)
timeline_analyser.init_timeline(all_reduce_info, framework_info, aicpu_info,
min_cycle_counter, source_path)
size_limit = 20 * 1024 * 1024 # 20MB size_limit = 20 * 1024 * 1024 # 20MB
timeline_analyser.write_timeline(size_limit) timeline_analyser.write_timeline(size_limit)
timeline_analyser.write_timeline_summary() timeline_analyser.write_timeline_summary()


Loading…
Cancel
Save