Browse Source

gpu loopsink

tags/v1.1.0
VectorSL 5 years ago
parent
commit
2cabcbcf7e
5 changed files with 46 additions and 10 deletions
  1. +6
    -1
      mindspore/ccsrc/backend/session/gpu_session.cc
  2. +25
    -4
      mindspore/ccsrc/pipeline/jit/pipeline.cc
  3. +4
    -0
      mindspore/ccsrc/pipeline/jit/resource.h
  4. +5
    -0
      mindspore/ccsrc/utils/config_manager.h
  5. +6
    -5
      mindspore/train/dataset_helper.py

+ 6
- 1
mindspore/ccsrc/backend/session/gpu_session.cc View File

@@ -348,7 +348,12 @@ void GPUSession::RunGraphImpl(const GraphId &graph_id, const std::vector<tensor:
InitPSParamAndOptim(kernel_graph, inputs); InitPSParamAndOptim(kernel_graph, inputs);
#endif #endif
MS_EXCEPTION_IF_NULL(kernel_graph); MS_EXCEPTION_IF_NULL(kernel_graph);
Execute(kernel_graph);
// It's InitDataset graph if kernel_num == 1, skip the loop.
int kernel_num = kernel_graph->execution_order().size();
int64_t loopsize = (kernel_num > 1) ? ConfigManager::GetInstance().gpu_loopsink_size() : 1;
for (int64_t i = 0; i < loopsize; i++) {
Execute(kernel_graph);
}
PostLoadTensor(kernel_graph); PostLoadTensor(kernel_graph);
// Summary // Summary
auto context_ptr = MsContext::GetInstance(); auto context_ptr = MsContext::GetInstance();


+ 25
- 4
mindspore/ccsrc/pipeline/jit/pipeline.cc View File

@@ -526,7 +526,6 @@ static std::string PrintArgs(const py::tuple &args) {


bool ExecutorPy::Compile(const py::object &obj, const py::tuple &args, const py::object &phase, bool use_vm) { bool ExecutorPy::Compile(const py::object &obj, const py::tuple &args, const py::object &phase, bool use_vm) {
bool ret_value = false; bool ret_value = false;

try { try {
MS_LOG(DEBUG) << PrintArgs(args); MS_LOG(DEBUG) << PrintArgs(args);
ret_value = CompileInner(obj, args, phase, use_vm); ret_value = CompileInner(obj, args, phase, use_vm);
@@ -567,7 +566,6 @@ bool ExecutorPy::Compile(const py::object &obj, const py::tuple &args, const py:
std::string exName(abi::__cxa_current_exception_type()->name()); std::string exName(abi::__cxa_current_exception_type()->name());
MS_LOG(EXCEPTION) << "Error occurred when compile graph. Exception name: " << exName; MS_LOG(EXCEPTION) << "Error occurred when compile graph. Exception name: " << exName;
} }

return ret_value; return ret_value;
} }


@@ -648,6 +646,17 @@ void Pipeline::Run() {
#endif #endif
MS_LOG(DEBUG) << "Action " << action.first << " end."; MS_LOG(DEBUG) << "Action " << action.first << " end.";
}; };
if (action.first == "task_emit") {
auto func_graph = resource_->func_graph();
if (func_graph != nullptr && func_graph->manager() != nullptr) {
auto manager = func_graph->manager();
size_t graph_nums = manager->func_graphs().size();
if (graph_nums == 1) {
resource_->set_gpu_loopsink_flag(true);
MS_LOG(INFO) << "Change gpu_loopsink_flag_ to true.";
}
}
}
if (!result) { if (!result) {
MS_LOG(EXCEPTION) << "Pipeline running to end, failed in step:" << action.first; MS_LOG(EXCEPTION) << "Pipeline running to end, failed in step:" << action.first;
} }
@@ -813,10 +822,22 @@ py::object ExecutorPy::Run(const py::tuple &args, const py::object &phase) {
if (run == nullptr) { if (run == nullptr) {
MS_LOG(EXCEPTION) << "Can't find run graph func for " << phase_s; MS_LOG(EXCEPTION) << "Can't find run graph func for " << phase_s;
} }
// Set loopsink size for each phase.
bool is_loopsink = info_[phase_s]->resource->gpu_loopsink_flag();
int64_t sinksize = ConfigManager::GetInstance().iter_num();
ConfigManager::GetInstance().set_gpu_loopsink_size(is_loopsink ? sinksize : 1);
// If target is not gpu or is loopsink, keep vmloop 1.
bool g = (MsContext::GetInstance()->get_param<std::string>(MS_CTX_DEVICE_TARGET) == kGPUDevice);
int64_t vm_loop = (!g || is_loopsink) ? 1 : sinksize;
MS_LOG(INFO) << "VM loop size " << vm_loop << ", loopsink size " << (is_loopsink ? sinksize : 1);
py::object ret;
MS_LOG(DEBUG) << "Eval run" << backend; MS_LOG(DEBUG) << "Eval run" << backend;
BaseRef value = (*run)(execute_info->arg_list);
for (int64_t i = 0; i < vm_loop; i++) {
BaseRef value = (*run)(execute_info->arg_list);
ret = BaseRefToPyData(value);
}
MS_LOG(DEBUG) << "Run end"; MS_LOG(DEBUG) << "Run end";
return BaseRefToPyData(value);
return ret;
} }


FuncGraphPtr ExecutorPy::BuildGraph(const py::dict &init_params, const std::string &phase, FuncGraphPtr ExecutorPy::BuildGraph(const py::dict &init_params, const std::string &phase,


+ 4
- 0
mindspore/ccsrc/pipeline/jit/resource.h View File

@@ -74,6 +74,9 @@ class Resource : public ResourceBase {
const abstract::AbstractBasePtrList &args_spec() const { return args_spec_; } const abstract::AbstractBasePtrList &args_spec() const { return args_spec_; }
void set_args_spec(const abstract::AbstractBasePtrList &args_spec) { args_spec_ = args_spec; } void set_args_spec(const abstract::AbstractBasePtrList &args_spec) { args_spec_ = args_spec; }


void set_gpu_loopsink_flag(const bool &flag) { gpu_loopsink_flag_ = flag; }
bool gpu_loopsink_flag() { return gpu_loopsink_flag_; }

// Reclaim resource and clear the cache. // Reclaim resource and clear the cache.
// ExecutorPy::Compile() can be called multiple times, so cache // ExecutorPy::Compile() can be called multiple times, so cache
// should be cleared. // should be cleared.
@@ -85,6 +88,7 @@ class Resource : public ResourceBase {
abstract::AbstractBasePtrList args_spec_; abstract::AbstractBasePtrList args_spec_;
py::object input_; py::object input_;
bool is_cleaned_; bool is_cleaned_;
bool gpu_loopsink_flag_{false};
}; };


using ResourcePtr = std::shared_ptr<pipeline::Resource>; using ResourcePtr = std::shared_ptr<pipeline::Resource>;


+ 5
- 0
mindspore/ccsrc/utils/config_manager.h View File

@@ -106,6 +106,10 @@ class ConfigManager {


std::map<std::string, std::string> ge_initialize_options_; std::map<std::string, std::string> ge_initialize_options_;


int64_t gpu_loopsink_size() const { return gpu_loopsink_size_; }

void set_gpu_loopsink_size(const int64_t size) { gpu_loopsink_size_ = size; }

private: private:
ConfigManager() = default; ConfigManager() = default;
~ConfigManager() = default; ~ConfigManager() = default;
@@ -115,6 +119,7 @@ class ConfigManager {
DatasetGraphParam dataset_param_{"", 0, 0, {}, {}, {}}; DatasetGraphParam dataset_param_{"", 0, 0, {}, {}, {}};
int64_t iter_num_{1}; int64_t iter_num_{1};
std::string dataset_phase_{""}; std::string dataset_phase_{""};
int64_t gpu_loopsink_size_{1};
}; };


} // namespace mindspore } // namespace mindspore


+ 6
- 5
mindspore/train/dataset_helper.py View File

@@ -45,7 +45,7 @@ def connect_network_with_dataset(network, dataset_helper):
data channel corresponding to the 'queue_name' and passed to the input network during forward computation. data channel corresponding to the 'queue_name' and passed to the input network during forward computation.


Note: Note:
In the case of running the network on Ascend in graph mode, this function will wrap the input network with
In the case of running the network on Ascend/GPU in graph mode, this function will wrap the input network with
'GetNext', in other cases, the input network will be returned with no change. 'GetNext', in other cases, the input network will be returned with no change.
The 'GetNext' is required to get data only in sink mode, so this function is not applicable to no-sink mode. The 'GetNext' is required to get data only in sink mode, so this function is not applicable to no-sink mode.


@@ -88,8 +88,8 @@ def connect_network_with_dataset(network, dataset_helper):
if isinstance(dataset_iter, _DatasetIterNormal): if isinstance(dataset_iter, _DatasetIterNormal):
raise RuntimeError("Dataset should be connected with network only in sink mode.") raise RuntimeError("Dataset should be connected with network only in sink mode.")


if not hasattr(dataset, '__ME_INITED__') and context.get_context("device_target") == "Ascend" and \
not context.get_context("enable_ge"):
if not hasattr(dataset, '__ME_INITED__') and (context.get_context("device_target") == "Ascend" \
or context.get_context("device_target") == "GPU") and not context.get_context("enable_ge"):
dataset.__ME_INITED__ = True dataset.__ME_INITED__ = True
dataset_types, dataset_shapes = dataset_helper.types_shapes() dataset_types, dataset_shapes = dataset_helper.types_shapes()
queue_name = dataset.__TRANSFER_DATASET__.queue_name queue_name = dataset.__TRANSFER_DATASET__.queue_name
@@ -139,7 +139,7 @@ class DatasetHelper:
if ms_role in ("MS_PSERVER", "MS_SCHED"): if ms_role in ("MS_PSERVER", "MS_SCHED"):
iterclass = _DatasetIterPSLite iterclass = _DatasetIterPSLite
else: else:
iterclass = _DatasetIterMS
iterclass = _DatasetIterMSLoopSink
elif context.get_context("device_target") == "CPU": elif context.get_context("device_target") == "CPU":
raise RuntimeError("Currently dataset sink mode is not supported when the device target is CPU.") raise RuntimeError("Currently dataset sink mode is not supported when the device target is CPU.")
self.iter = iterclass(dataset, sink_size, epoch_num) self.iter = iterclass(dataset, sink_size, epoch_num)
@@ -218,7 +218,8 @@ class _DatasetIter:
if hasattr(self.dataset, '__loop_size__'): if hasattr(self.dataset, '__loop_size__'):
sink_size = self.dataset.__loop_size__ sink_size = self.dataset.__loop_size__
else: else:
if context.get_context("enable_ge") or context.get_context("device_target") == "Ascend":
if context.get_context("enable_ge") or context.get_context("device_target") == "Ascend" \
or context.get_context("device_target") == "GPU":
if self.sink_size > 0: if self.sink_size > 0:
sink_size = self.sink_size sink_size = self.sink_size
else: else:


Loading…
Cancel
Save