Browse Source

print error msg if profiling enabled after hccl init

tags/v1.2.0-rc1
yanghaitao1 4 years ago
parent
commit
c8a4a2e9a5
4 changed files with 24 additions and 3 deletions
  1. +10
    -0
      mindspore/ccsrc/pipeline/jit/pipeline.cc
  2. +9
    -1
      mindspore/ccsrc/runtime/device/ascend/profiling/profiling_manager.cc
  3. +2
    -0
      mindspore/ccsrc/runtime/device/ascend/profiling/profiling_manager.h
  4. +3
    -2
      mindspore/profiler/profiling.py

+ 10
- 0
mindspore/ccsrc/pipeline/jit/pipeline.cc View File

@@ -62,6 +62,7 @@
#include "transform/graph_ir/convert.h" #include "transform/graph_ir/convert.h"
#include "transform/graph_ir/df_graph_manager.h" #include "transform/graph_ir/df_graph_manager.h"
#include "transform/graph_ir/op_adapter_map.h" #include "transform/graph_ir/op_adapter_map.h"
#include "runtime/device/ascend/profiling/profiling_manager.h"
#endif #endif
#ifdef ENABLE_DUMP_IR #ifdef ENABLE_DUMP_IR
#include "debug/rdr/running_data_recorder.h" #include "debug/rdr/running_data_recorder.h"
@@ -79,6 +80,10 @@ using mindspore::abstract::AbstractTensorPtr;
using mindspore::abstract::AbstractTuple; using mindspore::abstract::AbstractTuple;
using mindspore::abstract::AbstractTuplePtr; using mindspore::abstract::AbstractTuplePtr;


#if (ENABLE_GE || ENABLE_D)
using mindspore::device::ascend::ProfilingManager;
#endif

const char IR_TYPE_ANF[] = "anf_ir"; const char IR_TYPE_ANF[] = "anf_ir";
const char IR_TYPE_ONNX[] = "onnx_ir"; const char IR_TYPE_ONNX[] = "onnx_ir";
const char IR_TYPE_MINDIR[] = "mind_ir"; const char IR_TYPE_MINDIR[] = "mind_ir";
@@ -1078,6 +1083,11 @@ void InitHccl() {
(void)context::OpenTsd(ms_context); (void)context::OpenTsd(ms_context);
} }
#endif #endif
#if (ENABLE_GE || ENABLE_D)
if (!ProfilingManager::GetInstance().IsProfiling()) {
ProfilingManager::GetInstance().SetHcclEnabledBefProfilingEnabled();
}
#endif
} }


void FinalizeHccl() { void FinalizeHccl() {


+ 9
- 1
mindspore/ccsrc/runtime/device/ascend/profiling/profiling_manager.cc View File

@@ -42,7 +42,7 @@ ProfilingManager &ProfilingManager::GetInstance() {
return inst; return inst;
} }


ProfilingManager::ProfilingManager() : device_id_(0), prof_cb_({0}) {}
ProfilingManager::ProfilingManager() : device_id_(0), prof_cb_({0}), hccl_enabled_bef_profiling_enabled_(false) {}


uint64_t ProfilingManager::GetJobId() const { uint64_t ProfilingManager::GetJobId() const {
const char *job_id = std::getenv("JOB_ID"); const char *job_id = std::getenv("JOB_ID");
@@ -139,6 +139,14 @@ bool ProfilingManager::StartupProfiling(uint32_t device_id) {
MS_LOG(INFO) << "No need profiling. please export PROFILING_MODE and in train mode."; MS_LOG(INFO) << "No need profiling. please export PROFILING_MODE and in train mode.";
return true; return true;
} }

if (hccl_enabled_bef_profiling_enabled_) {
MS_LOG(ERROR)
<< "Please check the Profiler object initialized before mindspore.context.set_auto_parallel_context() "
"and mindspore.communication.management.init(). Profiler should be initialized before these code.";
return false;
}

device_id_ = device_id; device_id_ = device_id;


struct MsprofGeOptions prof_conf = {0}; struct MsprofGeOptions prof_conf = {0};


+ 2
- 0
mindspore/ccsrc/runtime/device/ascend/profiling/profiling_manager.h View File

@@ -61,6 +61,7 @@ class ProfilingManager {
void SetMsprofReporterCallback(MsprofReporterCallback func) { prof_cb_.msprofReporterCallback = func; } void SetMsprofReporterCallback(MsprofReporterCallback func) { prof_cb_.msprofReporterCallback = func; }
void SetMsprofSetDeviceCallback(MsprofSetDeviceCallback func) { prof_cb_.msprofSetDeviceCallback = func; } void SetMsprofSetDeviceCallback(MsprofSetDeviceCallback func) { prof_cb_.msprofSetDeviceCallback = func; }
Status GetProfConf(NotNull<MsprofGeOptions *> prof); Status GetProfConf(NotNull<MsprofGeOptions *> prof);
void SetHcclEnabledBefProfilingEnabled() { hccl_enabled_bef_profiling_enabled_ = true; }


protected: protected:
ProfilingManager(); ProfilingManager();
@@ -70,6 +71,7 @@ class ProfilingManager {
bool ProfStartUp(NotNull<MsprofGeOptions *> prof_conf); bool ProfStartUp(NotNull<MsprofGeOptions *> prof_conf);
uint32_t device_id_; uint32_t device_id_;
MsprofCallback prof_cb_; MsprofCallback prof_cb_;
bool hccl_enabled_bef_profiling_enabled_;
}; };


Status RegProfCtrlCallback(MsprofCtrlCallback func); Status RegProfCtrlCallback(MsprofCtrlCallback func);


+ 3
- 2
mindspore/profiler/profiling.py View File

@@ -256,8 +256,9 @@ class Profiler:
"""Collect and analyse gpu performance data""" """Collect and analyse gpu performance data"""
if context.get_auto_parallel_context('device_num') > 1 and self._dev_id != str(get_rank()): if context.get_auto_parallel_context('device_num') > 1 and self._dev_id != str(get_rank()):
self._dev_id = str(get_rank()) self._dev_id = str(get_rank())
logger.error('Please check the Profiler object initialized after set_auto_parallel_context() '
'and init(). Profiler should be initialized after these code. ')
logger.error('Please check the Profiler object initialized after mindspore.context.set_auto_parallel_'
'context() and mindspore.communication.management.init(). Profiler should be initialized'
' after these code.')
self._gpu_profiler.stop() self._gpu_profiler.stop()
timeline_generator = self._generate_timeline() timeline_generator = self._generate_timeline()




Loading…
Cancel
Save