Browse Source

!12801 Fix the bug of incorrect cuda kernel name info

From: @gzhcv
Reviewed-by: 
Signed-off-by:
tags/v1.2.0-rc1
mindspore-ci-bot Gitee 5 years ago
parent
commit
2e6cea5dbf
5 changed files with 17 additions and 4 deletions
  1. +1
    -1
      mindspore/ccsrc/profiler/device/cpu/cpu_profiling.cc
  2. +1
    -1
      mindspore/ccsrc/profiler/device/cpu/cpu_profiling.h
  3. +14
    -0
      mindspore/ccsrc/profiler/device/gpu/gpu_profiling.cc
  4. +1
    -1
      mindspore/ccsrc/profiler/device/gpu/gpu_profiling_utils.cc
  5. +0
    -1
      mindspore/ccsrc/profiler/device/profiling.cc

+ 1
- 1
mindspore/ccsrc/profiler/device/cpu/cpu_profiling.cc View File

@@ -72,7 +72,7 @@ void CPUProfiler::OpDataProducerBegin(const std::string op_name, const uint32_t
void CPUProfiler::OpDataProducerEnd() {
float op_time_elapsed = 0;
op_time_stop_ = GetHostMonoTimeStamp();
op_time_elapsed = (op_time_stop_ - op_time_start_) / kTimeUnit;
op_time_elapsed = (op_time_stop_ - op_time_start_) / kNanosecondToMillisecond;
MS_LOG(DEBUG) << "Host Time Elapsed(us)," << op_name_ << "," << op_time_elapsed;
Profiler::SetRunTimeData(op_name_, op_time_elapsed);
Profiler::SetRunTimeData(op_name_, op_time_mono_start_, op_time_elapsed);


+ 1
- 1
mindspore/ccsrc/profiler/device/cpu/cpu_profiling.h View File

@@ -28,7 +28,7 @@
namespace mindspore {
namespace profiler {
namespace cpu {
const float kTimeUnit = 1000;
const float kNanosecondToMillisecond = 1000000;

class CPUProfiler : public Profiler {
public:


+ 14
- 0
mindspore/ccsrc/profiler/device/gpu/gpu_profiling.cc View File

@@ -194,6 +194,18 @@ void CUPTICallBackFunc(void *user_data, CUpti_CallbackDomain domain, CUpti_Callb
}
}

std::string GetKernelFuncName(std::string kernel_name) {
// remove the return type name (void) in kernel_name.
std::string search_pattern("void ");
auto func_name_begin_iter = kernel_name.find(search_pattern);
if (func_name_begin_iter == kernel_name.npos) {
func_name_begin_iter = 0;
} else {
func_name_begin_iter += search_pattern.length();
}
return kernel_name.substr(func_name_begin_iter);
}

std::shared_ptr<GPUProfiler> GPUProfiler::GetInstance() {
if (profiler_inst_ == nullptr) {
profiler_inst_ = std::shared_ptr<GPUProfiler>(new (std::nothrow) GPUProfiler());
@@ -341,6 +353,7 @@ void GPUProfiler::EventHandleProcess(CUpti_CallbackId cbid, const CUpti_Callback
uint32_t device_id = -1;
CuptiGetDeviceId(cbdata->context, &device_id);
event.kernel_name = cbdata->symbolName ? GetKernelFunc(cbdata->symbolName) : cbdata->functionName;
event.kernel_name = GetKernelFuncName(event.kernel_name);
event.kernel_type = typestring;
event.api_type = CUPTIApiType::kCallback;
event.start_time_stamp = startTimestamp;
@@ -609,6 +622,7 @@ void HandleActivityKernelRecord(Event *profilingData, CUpti_Activity *record) {
profilingData->activity_type = ActivityType::kKernel;
profilingData->api_type = CUPTIApiType::kActivity;
profilingData->kernel_name = GetKernelFunc(kernel->name);
profilingData->kernel_name = GetKernelFuncName(profilingData->kernel_name);
profilingData->kernel_type = "cuLaunchKernel";
profilingData->start_time_stamp = kernel->start;
profilingData->end_time_stamp = kernel->end;


+ 1
- 1
mindspore/ccsrc/profiler/device/gpu/gpu_profiling_utils.cc View File

@@ -92,7 +92,7 @@ void ProfilingUtils::SetTraceFpStart(const std::vector<CNodePtr> &cnode_exec_ord
if (cnode_exec_order.size() > 1) {
profiling_trace.trace_fp_start = cnode_exec_order.at(1)->fullname_with_scope();
} else {
MS_LOG(ERROR) << "No Op Behind the GetNext Op" << std::endl;
MS_LOG(WARNING) << "No Op Behind the GetNext Op" << std::endl;
}
} else {
profiling_trace.trace_fp_start = first_node->fullname_with_scope();


+ 0
- 1
mindspore/ccsrc/profiler/device/profiling.cc View File

@@ -41,7 +41,6 @@ uint64_t Profiler::GetHostMonoTimeStamp() {
void Profiler::SetRunTimeData(const std::string &op_name, const float time_elapsed) {
auto iter = op_info_map_.find(op_name);
if (iter != op_info_map_.end()) {
// The time unit is ms, convert to us
iter->second.op_host_cost_time += time_elapsed;
}
}


Loading…
Cancel
Save