Browse Source

add auto parallel pipeline

tags/v1.1.0
lichenever 5 years ago
parent
commit
2e1c43483e
20 changed files with 815 additions and 7 deletions
  1. +2
    -0
      mindspore/ccsrc/frontend/optimizer/ad/dfunctor.cc
  2. +2
    -2
      mindspore/ccsrc/frontend/optimizer/irpass/inline.h
  3. +1
    -1
      mindspore/ccsrc/frontend/parallel/context.h
  4. +442
    -0
      mindspore/ccsrc/frontend/parallel/pipeline_transformer/pipeline_transformer.cc
  5. +72
    -0
      mindspore/ccsrc/frontend/parallel/pipeline_transformer/pipeline_transformer.h
  6. +1
    -0
      mindspore/ccsrc/pipeline/jit/CMakeLists.txt
  7. +7
    -0
      mindspore/ccsrc/pipeline/jit/action.cc
  8. +4
    -0
      mindspore/ccsrc/pipeline/jit/parse/data_converter.cc
  9. +1
    -0
      mindspore/ccsrc/pipeline/jit/parse/parse_base.h
  10. +3
    -0
      mindspore/ccsrc/pipeline/jit/pass.cc
  11. +1
    -0
      mindspore/ccsrc/pipeline/jit/pass.h
  12. +99
    -0
      mindspore/ccsrc/pipeline/jit/pipeline_split.cc
  13. +28
    -0
      mindspore/ccsrc/pipeline/jit/pipeline_split.h
  14. +6
    -1
      mindspore/core/ir/anf.h
  15. +2
    -1
      mindspore/core/ir/func_graph.cc
  16. +3
    -0
      mindspore/core/ir/func_graph.h
  17. +2
    -0
      mindspore/core/ir/func_graph_cloner.cc
  18. +27
    -1
      mindspore/ops/_grad/grad_comm_ops.py
  19. +1
    -1
      mindspore/ops/operations/__init__.py
  20. +111
    -0
      mindspore/ops/operations/comm_ops.py

+ 2
- 0
mindspore/ccsrc/frontend/optimizer/ad/dfunctor.cc View File

@@ -49,10 +49,12 @@ DFunctor::DFunctor(const FuncGraphPtr &primal_graph, const pipeline::ResourceBas
}
// To keep switch_layer's inputs from being inlined
k_graph_->set_switch_layer_input(primal_graph->switch_layer_input());
k_graph_->set_stage(primal_graph->stage());
TraceManager::EndTrace();

TraceManager::DebugTrace(std::make_shared<TraceGradBprop>(primal_graph->debug_info()));
tape_ = std::make_shared<FuncGraph>();
tape_->set_stage(primal_graph->stage());
// Add "_Grad" postfix
if (primal_graph->has_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL)) {
std::string grad_op_name = GetValue<std::string>(primal_graph->get_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL)) + "_Grad";


+ 2
- 2
mindspore/ccsrc/frontend/optimizer/irpass/inline.h View File

@@ -41,7 +41,7 @@ class ReplaceApplicator : public AnfVisitor {
}

auto fg = GetValueNode<FuncGraphPtr>(node);
if (fg->has_flag(FUNC_GRAPH_FLAG_DEFER_INLINE) || fg->stub() || *(fg->switch_layer_input())) {
if (fg->has_flag(FUNC_GRAPH_FLAG_DEFER_INLINE) || fg->stage() != -1 || fg->stub() || *(fg->switch_layer_input())) {
return nullptr;
}

@@ -124,7 +124,7 @@ class InlinerBase : public AnfVisitor {

// G
auto fg = GetValueNode<FuncGraphPtr>(inputs[0]);
if (fg->has_flag(FUNC_GRAPH_FLAG_DEFER_INLINE) || fg->stub()) {
if (fg->has_flag(FUNC_GRAPH_FLAG_DEFER_INLINE) || fg->stage() != -1 || fg->stub()) {
return nullptr;
}



+ 1
- 1
mindspore/ccsrc/frontend/parallel/context.h View File

@@ -122,7 +122,7 @@ class ParallelContext {
std::string parallel_mode_;
std::string strategy_search_mode_;
std::vector<int64_t> stages_;
int32_t pipeline_stage_split_num_;
int64_t pipeline_stage_split_num_ = 0;
bool parameter_broadcast_;
bool device_num_is_set_;
bool global_rank_is_set_;


+ 442
- 0
mindspore/ccsrc/frontend/parallel/pipeline_transformer/pipeline_transformer.cc View File

@@ -0,0 +1,442 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <unordered_map>
#include <set>
#include <vector>
#include <string>
#include <utility>
#include <algorithm>
#include <memory>
#include "frontend/parallel/pipeline_transformer/pipeline_transformer.h"
#include "frontend/parallel/graph_util/generate_graph.h"
#include "frontend/parallel/auto_parallel/graph_costmodel.h"
#include "frontend/parallel/ops_info/ops_utils.h"
#include "frontend/parallel/group_manager.h"
#include "frontend/parallel/context.h"
#include "utils/comm_manager.h"
#include "utils/ms_context.h"

namespace mindspore {
namespace parallel {
static std::unordered_map<AnfNodePtr, std::set<int>> parameter_color_map;
static std::pair<bool, int> IsSharedNode(const AnfNodePtr &node, const AnfNodeIndexSet &node_users);
static bool IsSomePrimitive(const CNodePtr &cnode, const std::string &name);
static int send_tag = 0;
static int recv_tag = 0;

void PipelineTransformer::Coloring() {
auto need_coloring = true;
while (need_coloring) {
need_coloring = false;
for (auto &fg : manager_->func_graphs()) {
auto value_nodes = fg->value_nodes();
for (auto &value_pair : value_nodes) {
auto node = value_pair.first;
if (!IsValueNode<FuncGraph>(node)) {
continue;
}
auto graph = GetValueNode<FuncGraphPtr>(node);
auto node_users = manager_->node_users()[node];
for (auto &user_pair : node_users) {
auto user_node = user_pair.first->cast<CNodePtr>();
user_node->set_stage(graph->stage());
auto user_node_graph = user_node->func_graph();
if (graph->stage() == stage_ && user_node_graph->stage() == -1) {
user_node_graph->set_stage(graph->stage());
need_coloring = true;
}
}
}
}
}
return;
}

void PipelineTransformer::BroadCastColoring() {
for (auto &fg : manager_->func_graphs()) {
DoBroadCast(fg);
}
}

void PipelineTransformer::DoBroadCast(const FuncGraphPtr &func) {
auto need_coloring = true;
while (need_coloring) {
need_coloring = false;
auto all_nodes = func->nodes();
for (auto &node : all_nodes) {
// only cnode can broadcast color.
if (!node->isa<CNode>()) {
continue;
}
auto cnode = node->cast<CNodePtr>();
if (cnode->stage() == -1) {
// broadcast from inputs to outputs
for (auto &input : cnode->inputs()) {
if (input->isa<CNode>() && input->stage() == stage_) {
cnode->set_stage(input->stage());
need_coloring = true;
}
}
} else if (cnode->stage() == stage_) {
// broadcast from outputs to inputs
for (auto &input : cnode->inputs()) {
if (input->stage() != -1 || !input->isa<CNode>()) {
continue;
}
auto input_cnode = input->cast<CNodePtr>();
auto prim = GetValueNode<PrimitivePtr>(input_cnode->input(0));
if (prim != nullptr && prim->name() == VIRTUAL_DATA_SET) {
continue;
}
input->set_stage(cnode->stage());
need_coloring = true;
}
}
}
}
}

void PipelineTransformer::HandleSharedParameter() {
auto parameters = root_->parameters();
for (auto &parameter : parameters) {
auto parameter_stage = parameter_color_map[parameter];
if (parameter_stage.size() <= 1) {
continue;
}
auto users = manager_->node_users()[parameter];
for (auto &user : users) {
auto node = user.first;
auto graph = node->func_graph();
if (graph != root_ && graph->stage() == -1) {
MS_LOG(EXCEPTION) << "Don't support this situation.";
}
if (graph == root_ || graph->stage() != stage_) {
continue;
}
if (stage_ == *parameter_stage.begin()) {
std::vector<AnfNodePtr> make_tuple_input = {NewValueNode(prim::kPrimMakeTuple)};
for (auto &stage : parameter_stage) {
if (stage == stage_) {
continue;
} else {
auto send_out = InsertSend(graph, parameter, stage, stage_);
make_tuple_input.push_back(send_out.depend);
}
}
auto make_tuple = graph->NewCNode(make_tuple_input);
OperatorAttrs depend_attrs;
auto depend_op = CreatOpInstance(depend_attrs, DEPEND, "");
std::vector<AnfNodePtr> depend_input = {NewValueNode(depend_op), parameter, make_tuple};
auto depend = graph->NewCNode(depend_input);
manager_->SetEdge(node, user.second, depend);
break;
} else {
InsertReceive(graph, parameter, node, user.second, stage_, *parameter_stage.begin());
break;
}
}
}
}

void PipelineTransformer::ParameterColoring() {
auto parameters = root_->parameters();
for (auto &parameter : parameters) {
auto users = manager_->node_users()[parameter];
std::set<int> parameter_stage;
for (auto &user : users) {
auto node = user.first;
auto graph = node->func_graph();
if (graph != root_ && graph->stage() != -1) {
parameter_stage.insert(graph->stage());
parameter->set_stage(graph->stage());
}
}
parameter_color_map[parameter] = parameter_stage;
}
}

static std::pair<ValueListPtr, TypePtr> GetShapeType(const AnfNodePtr &node) {
abstract::ShapePtr shape_ptr;
TypePtr type;
std::vector<int64_t> shape;
auto cnode = node->cast<CNodePtr>();
if (cnode != nullptr && IsValueNode<FuncGraph>(cnode->input(0))) {
auto graph = GetValueNode<FuncGraphPtr>(cnode->input(0));
auto graph_return = graph->get_return();
shape_ptr = dyn_cast<abstract::Shape>(graph_return->Shape());
type = graph_return->Type();
} else {
shape_ptr = dyn_cast<abstract::Shape>(node->Shape());
type = node->Type();
}
MS_EXCEPTION_IF_NULL(shape_ptr);
MS_EXCEPTION_IF_NULL(type);
auto shape_int = shape_ptr->shape();
std::vector<ValuePtr> element;
std::transform(shape_int.begin(), shape_int.end(), std::back_inserter(element),
[](int elem) { return MakeValue(elem); });
auto shape_list = std::make_shared<ValueList>(element);
auto tensor_type = type->cast<mindspore::TensorTypePtr>();
MS_EXCEPTION_IF_NULL(tensor_type);
auto dtype = tensor_type->element();
MS_EXCEPTION_IF_NULL(dtype);
return std::make_pair(shape_list, dtype);
}

SendAttr PipelineTransformer::InsertSend(const FuncGraphPtr &graph, const AnfNodePtr &parameter,
const int &user_node_stage, const int &node_stage) {
Attr attr_tag = std::make_pair("sr_tag", MakeValue(send_tag));
send_tag += 1;
auto dest_rank = global_rank_ + (user_node_stage - node_stage) * per_stage_rank_num_;
Attr attr_rank = std::make_pair("dest_rank", MakeValue(dest_rank));
OperatorAttrs attrs = {attr_tag, attr_rank};
auto send_op = CreatOpInstance(attrs, "Send", "send");
auto send_node = NewValueNode(send_op);
auto prim = GetValueNode<PrimitivePtr>(send_node);
auto shape_type_pair = GetShapeType(parameter);
prim->set_attr("shape", shape_type_pair.first);
prim->set_attr("dtype", shape_type_pair.second);
std::vector<AnfNodePtr> send_input = {send_node, parameter};
auto send = graph->NewCNode(send_input);
OperatorAttrs depend_attrs;
auto depend_op = CreatOpInstance(depend_attrs, "Depend", "depend");
std::vector<AnfNodePtr> depend_input = {NewValueNode(depend_op), parameter, send};
auto depend = graph->NewCNode(depend_input);
SendAttr send_out = {shape_type_pair.first, shape_type_pair.second, depend};
return send_out;
}

void PipelineTransformer::InsertReceive(const FuncGraphPtr &graph, const AnfNodePtr &node, const AnfNodePtr &use_node,
const int &index, const int &user_node_stage, const int &node_stage) {
Attr attr_tag = std::make_pair("sr_tag", MakeValue(recv_tag));
recv_tag += 1;
auto src_rank = global_rank_ + (user_node_stage - node_stage) * per_stage_rank_num_;
Attr attr_rank = std::make_pair("src_rank", MakeValue(src_rank));
auto shape_type_pair = GetShapeType(node);
Attr attr_shape = std::make_pair("shape", shape_type_pair.first);
Attr attr_dtype = std::make_pair("dtype", shape_type_pair.second);
OperatorAttrs attrs = {attr_tag, attr_rank, attr_shape, attr_dtype};
auto recv_op = CreatOpInstance(attrs, "Receive", "recv");
std::vector<AnfNodePtr> recv_input = {NewValueNode(recv_op)};
auto recv = graph->NewCNode(recv_input);
manager_->SetEdge(use_node, index, recv);
}

static std::pair<bool, int> IsSharedNode(const AnfNodePtr &node, const AnfNodeIndexSet &node_users) {
std::set<int> tag_set;
auto node_stage = node->stage();
int min_tag = node_stage;
for (auto &user_pair : node_users) {
auto user_node = user_pair.first;
auto user_node_stage = user_node->stage();
tag_set.insert(user_node_stage);
if (user_node_stage == -1) {
continue;
}
min_tag = min_tag > user_node_stage ? user_node_stage : min_tag;
}
bool is_shared = tag_set.size() > 1;
return std::make_pair(is_shared, min_tag);
}

void PipelineTransformer::CutBorder(const FuncGraphPtr &graph) {
OperatorAttrs depend_attrs;
auto depend_op = CreatOpInstance(depend_attrs, "Depend", "");
std::vector<AnfNodePtr> out_input = {NewValueNode(depend_op)};
auto all_nodes = graph->nodes();
for (auto &node : all_nodes) {
if (!node->isa<CNode>() || node->stage() == -1) {
continue;
}
auto node_users = manager_->node_users()[node];
auto shared_min_tag_pair = IsSharedNode(node, node_users);
auto is_shared = shared_min_tag_pair.first;
auto min_tag = shared_min_tag_pair.second;
for (auto &user_pair : node_users) {
auto user_node = user_pair.first;
auto node_stage = node->stage();
auto user_node_stage = user_node->stage();
if (node_stage != stage_ && user_node_stage != stage_) {
continue;
}
if (node_stage < user_node_stage) {
if (is_shared && (min_tag != node_stage)) {
continue;
}
if (node_stage == stage_) {
auto send_out = InsertSend(graph, node, user_node_stage, node_stage);
out_input.insert(out_input.begin() + 1, send_out.depend);
type_ptr_ = send_out.type;
shape_ = send_out.shape;
} else {
InsertReceive(graph, node, user_node, user_pair.second, user_node_stage, node_stage);
}
continue;
}
if (node_stage == user_node_stage) {
if (is_shared && (min_tag != node_stage)) {
InsertReceive(graph, node, user_node, user_pair.second, min_tag, stage_);
}
continue;
}
if (node_stage > user_node_stage) {
auto cnode = node->cast<CNodePtr>();
auto user_cnode = user_node->cast<CNodePtr>();
if (IsValueNode<FuncGraph>(cnode->input(0)) && IsValueNode<FuncGraph>(user_cnode->input(0))) {
MS_LOG(EXCEPTION) << "Don't support this situation";
}
continue;
}
}
}
if (out_input.size() == 2) {
manager_->Replace(graph->output(), out_input[1]);
}
if (out_input.size() > 2) {
std::vector<AnfNodePtr> make_tuple_inputs = {NewValueNode(prim::kPrimMakeTuple)};
make_tuple_inputs.insert(make_tuple_inputs.begin() + 1, out_input.begin() + 2, out_input.end());
auto make_tuple = graph->NewCNode(make_tuple_inputs);
std::vector<AnfNodePtr> out_depend_inputs = {out_input[0], out_input[1], make_tuple};
auto out_node = graph->NewCNode(out_depend_inputs);
manager_->Replace(graph->output(), out_node);
}
}

void PipelineTransformer::CutGraph() {
for (auto &fg : manager_->func_graphs()) {
if (fg == root_) {
ElimRootParameter();
continue;
}
CutBorder(fg);
}
}

void PipelineTransformer::ElimRootParameter() {
auto output = root_->output()->cast<CNodePtr>();
MS_EXCEPTION_IF_NULL(output);
auto prim = GetValueNode<PrimitivePtr>(output->input(0));
if (prim->name() == DEPEND) {
auto opt_cnode = output->input(2)->cast<CNodePtr>();
auto prim_make_tuple = GetValueNode<PrimitivePtr>(opt_cnode->input(0));
if (prim_make_tuple->name() == MAKE_TUPLE) {
std::vector<AnfNodePtr> new_node_input = {opt_cnode->input(0)};
for (auto &input : opt_cnode->inputs()) {
if (input->isa<CNode>()) {
if (IsStageNode(input->cast<CNodePtr>())) {
new_node_input.push_back(input);
}
}
}
auto new_node = root_->NewCNode(new_node_input);
manager_->Replace(opt_cnode, new_node);
}
}
}

bool PipelineTransformer::IsStageNode(const CNodePtr &node) {
for (auto &input : node->inputs()) {
if (input->isa<Parameter>()) {
return (*parameter_color_map[input].begin() == stage_ || input->stage() == -1);
} else if (input->isa<CNode>()) {
auto pre_node = input->cast<CNodePtr>();
return IsStageNode(pre_node);
} else {
continue;
}
}
return true;
}

void PipelineTransformer::ElimGraphStage() {
for (auto &fg : manager_->func_graphs()) {
fg->set_stage(-1);
}
}

static bool IsSomePrimitive(const CNodePtr &cnode, const std::string &name) {
ValueNodePtr anf_node = cnode->input(0)->cast<ValueNodePtr>();
MS_EXCEPTION_IF_NULL(anf_node);
PrimitivePtr prim = anf_node->value()->cast<PrimitivePtr>();
return (prim->name() == name);
}

std::pair<CNodePtr, FuncGraphPtr> PipelineTransformer::FindSensNode() {
std::pair<CNodePtr, FuncGraphPtr> sens_graph_pair;
CNodePtr sens_cnode;
FuncGraphPtr func_graph;
for (auto &node : root_->nodes()) {
if (!node->isa<CNode>()) {
continue;
}
sens_cnode = node->cast<CNodePtr>();
AnfNodePtr expect_tuple_getitem = sens_cnode->input(0);
MS_EXCEPTION_IF_NULL(expect_tuple_getitem);
if (!expect_tuple_getitem->isa<CNode>()) {
continue;
}

auto expect_tuple_getitem_cnode = expect_tuple_getitem->cast<CNodePtr>();
if (!IsSomePrimitive(expect_tuple_getitem_cnode, TUPLE_GETITEM)) {
continue;
}
auto expect_anonymous = expect_tuple_getitem_cnode->input(1);
if (!expect_anonymous->isa<CNode>()) {
continue;
}
auto expect_anonymous_cnode = expect_anonymous->cast<CNodePtr>();
AnfNodePtr expect_j = expect_anonymous_cnode->input(0);
if (!expect_j->isa<CNode>()) {
continue;
}
auto expect_j_cnode = expect_j->cast<CNodePtr>();
if (!IsSomePrimitive(expect_j_cnode, J)) {
continue;
}
func_graph = GetValueNode<FuncGraphPtr>(expect_j_cnode->input(1));
break;
}
sens_graph_pair = std::make_pair(sens_cnode, func_graph);
return sens_graph_pair;
}

void PipelineTransformer::CoverSensShape() {
auto sens_graph_pair = FindSensNode();
auto sens_cnode = sens_graph_pair.first;
MS_EXCEPTION_IF_NULL(sens_cnode);
OperatorAttrs attrs;
auto fill_op = CreatOpInstance(attrs, "Fill", "");
std::vector<AnfNodePtr> fill_input = {NewValueNode(fill_op), NewValueNode(type_ptr_),
NewValueNode(MakeValue(shape_->value())), NewValueNode(0)};
auto fill = root_->NewCNode(fill_input);
std::vector<AnfNodePtr> new_sens_input = {sens_cnode->input(0), fill};
auto new_sens_node = root_->NewCNode(new_sens_input);
manager_->Replace(sens_cnode, new_sens_node);
}

void PipelineTransformer::ElimParameter() {
auto parameters = root_->parameters();
std::vector<AnfNodePtr> parameter_list;
for (auto &parameter : parameters) {
if (!manager_->node_users()[parameter].empty()) {
parameter_list.push_back(parameter);
}
}
manager_->SetParameters(root_, parameter_list);
}
} // namespace parallel
} // namespace mindspore

+ 72
- 0
mindspore/ccsrc/frontend/parallel/pipeline_transformer/pipeline_transformer.h View File

@@ -0,0 +1,72 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MINDSPORE_CCSRC_FRONTEND_PARALLEL_PIPELINE_TRANSFORMER_PIPELINE_TRANSFORMER_H_
#define MINDSPORE_CCSRC_FRONTEND_PARALLEL_PIPELINE_TRANSFORMER_PIPELINE_TRANSFORMER_H_

#include <utility>
#include "ir/value.h"
#include "ir/graph_utils.h"
#include "base/base.h"

namespace mindspore {
namespace parallel {
typedef struct {
ValueListPtr shape;
TypePtr type;
AnfNodePtr depend;
} SendAttr;

class PipelineTransformer {
public:
PipelineTransformer(const FuncGraphManagerPtr &manager, const int &stage, const FuncGraphPtr &root,
const int64_t &global_rank, const int64_t &per_stage_rank_num)
: manager_(manager),
stage_(stage),
root_(root),
global_rank_(global_rank),
per_stage_rank_num_(per_stage_rank_num) {}
void Coloring();
void BroadCastColoring();
void HandleSharedParameter();
void CutGraph();
void ParameterColoring();
void CoverSensShape();
void ElimGraphStage();
void ElimParameter();

private:
void DoBroadCast(const FuncGraphPtr &func);
SendAttr InsertSend(const FuncGraphPtr &graph, const AnfNodePtr &parameter, const int &user_node_stage,
const int &node_stage);
void InsertReceive(const FuncGraphPtr &graph, const AnfNodePtr &node, const AnfNodePtr &use_node, const int &index,
const int &user_node_stage, const int &node_stage);
void CutBorder(const FuncGraphPtr &graph);
void ElimRootParameter();
bool IsStageNode(const CNodePtr &node);
std::pair<CNodePtr, FuncGraphPtr> FindSensNode();
FuncGraphManagerPtr manager_;
int64_t stage_;
FuncGraphPtr root_;
int64_t global_rank_;
int64_t per_stage_rank_num_;
TypePtr type_ptr_;
ValueListPtr shape_;
};
} // namespace parallel
} // namespace mindspore

#endif // MINDSPORE_CCSRC_FRONTEND_PARALLEL_PIPELINE_TRANSFORMER_PIPELINE_TRANSFORMER_H_

+ 1
- 0
mindspore/ccsrc/pipeline/jit/CMakeLists.txt View File

@@ -5,6 +5,7 @@ file(GLOB_RECURSE _PIPELINE_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
"action.cc"
"validator.cc"
"remove_value_node_dup.cc"
"pipeline_split.cc"
"parse/*.cc"
"static_analysis/*.cc"
)


+ 7
- 0
mindspore/ccsrc/pipeline/jit/action.cc View File

@@ -302,6 +302,10 @@ bool OptimizeAction(const ResourcePtr &res, const std::vector<PassItem> &passes)
}

bool OptInlineAction(const ResourcePtr &res) {
if (parallel::ParallelContext::GetInstance()->parallel_mode() == "semi_auto_parallel" ||
parallel::ParallelContext::GetInstance()->parallel_mode() == "auto_parallel") {
return OptimizeAction(res, kInlinePasses);
}
if (opt::python_pass::PyPassManager::GetInstance()->GetPassGroup(opt::python_pass::Phase::PREAD)->size() != 0) {
return OptimizeAction(res, kInlinePasses);
}
@@ -480,6 +484,7 @@ bool RemoveValueNodeDuplicationsAction(const ResourcePtr &res) {
return true;
}

bool PipelineSplitAction(const ResourcePtr &res) { return PipelineSplitPass(res); }
bool ValidateAction(const ResourcePtr &res) { return ValidatePass(res); }

bool ActionPyStub(const ResourcePtr &res, opt::python_pass::Phase phase) {
@@ -559,6 +564,8 @@ static std::vector<ActionItem> CommonPipeline() {
actions.emplace_back(std::make_pair("inline", OptInlineAction));
// Add pre-ad, post-inline python pass stub
actions.emplace_back(std::make_pair("py_pre_ad", PreAdActionPyStub));
// Do PipelineSplit
actions.emplace_back(std::make_pair("pipeline_split", PipelineSplitAction));

return actions;
}


+ 4
- 0
mindspore/ccsrc/pipeline/jit/parse/data_converter.cc View File

@@ -246,6 +246,10 @@ bool ConvertCellObjToFuncGraph(const CellPtr &cell, ValuePtr *const data) {
func_graph->set_flag(FUNC_GRAPH_FLAG_DEFER_INLINE, true);
}
}
if (py::hasattr(obj, STAGE_NAME)) {
auto stage = py::cast<int>(py::getattr(obj, STAGE_NAME));
func_graph->set_stage(stage);
}
*data = func_graph;
return true;
}


+ 1
- 0
mindspore/ccsrc/pipeline/jit/parse/parse_base.h View File

@@ -132,6 +132,7 @@ const char PYTHON_EXTERN_MINDSPORE_FLAG[] = "_mindspore_flags";
// define the parse constant
const int64_t MAX_COMPARISON_OPS_SUPPORTED = 1;
const char CUSTOM_BPROP_NAME[] = "bprop";
const char STAGE_NAME[] = "stage";

// define the Namespace name
const char RESOLVE_NAMESPACE_NAME_AST[] = "Ast"; // for ast type namespace


+ 3
- 0
mindspore/ccsrc/pipeline/jit/pass.cc View File

@@ -38,6 +38,7 @@
#include "frontend/parallel/step_auto_parallel.h"
#include "frontend/parallel/allreduce_fusion/step_allreduce_fusion.h"
#include "utils/log_adapter.h"
#include "pipeline/jit/pipeline_split.h"

namespace mindspore {
namespace pipeline {
@@ -418,6 +419,8 @@ bool TransformTopGraphPass(const ResourcePtr &res) {
return true;
}

bool PipelineSplitPass(const ResourcePtr &res) { return PipelineSplit(res); }

bool ValidatePass(const ResourcePtr &res) {
MS_EXCEPTION_IF_NULL(res->func_graph());
FuncGraphPtr func_graph = res->func_graph();


+ 1
- 0
mindspore/ccsrc/pipeline/jit/pass.h View File

@@ -33,6 +33,7 @@ extern std::vector<PassItem> kInlinePasses;
extern std::vector<PassItem> kPynativePasses;

bool CconvPass(const ResourcePtr &res);
bool PipelineSplitPass(const ResourcePtr &res);
bool ValidatePass(const ResourcePtr &res);
bool ConvertPrepareAdapt(const ResourcePtr &res);
bool AddControlDependPass(const ResourcePtr &res);


+ 99
- 0
mindspore/ccsrc/pipeline/jit/pipeline_split.cc View File

@@ -0,0 +1,99 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <string>
#include <memory>
#include "pipeline/jit/pipeline_split.h"
#include "utils/ms_context.h"
#include "utils/comm_manager.h"
#include "frontend/parallel/context.h"
#include "frontend/parallel/pipeline_transformer/pipeline_transformer.h"

namespace mindspore {
namespace pipeline {

static int64_t GetRank();
static int64_t InferStage(const int64_t &rank_id, const int64_t &stage_num, const int64_t &device_num);
static int64_t GetRank() {
auto ms_context = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(ms_context);
std::string world_group;
std::string backend = ms_context->get_param<std::string>(MS_CTX_DEVICE_TARGET);
if (backend == kAscendDevice) {
world_group = parallel::HCCL_WORLD_GROUP;
} else if (backend == kGPUDevice) {
world_group = parallel::NCCL_WORLD_GROUP;
} else {
MS_LOG(EXCEPTION) << "Invalid backend: " << backend;
}
int64_t global_rank = parallel::ParallelContext::GetInstance()->global_rank();
uint32_t rank_id;
if (!parallel::ParallelContext::GetInstance()->global_rank_is_set()) {
if (!CommManager::GetInstance().GetRankID(world_group, &rank_id)) {
MS_LOG(EXCEPTION) << "Get rank id failed.";
}
global_rank = UintToInt(rank_id);
}
return global_rank;
}

static int64_t InferStage(const int64_t &rank_id, const int64_t &stage_num, const int64_t &device_num) {
if (device_num % stage_num != 0) {
MS_LOG(EXCEPTION) << "Device_num must be divisible by the stage_num, got device_num: " << device_num
<< "stage_num: " << stage_num;
}
auto per_stage_rank_num = device_num / stage_num;
return rank_id / per_stage_rank_num;
}

// Only auto_parallel and semi_auto_parallel support PipelineSplit
bool PipelineSplit(const ResourcePtr &res) {
auto parallel_mode = parallel::ParallelContext::GetInstance()->parallel_mode();
if (parallel_mode != parallel::SEMI_AUTO_PARALLEL || parallel_mode != parallel::AUTO_PARALLEL) {
MS_LOG(INFO) << "Only auto_parallel and semi_auto_parallel support pipeline split.";
return true;
}
auto stage_num = parallel::ParallelContext::GetInstance()->pipeline_stage_split_num();
if (stage_num <= 1) {
MS_LOG(INFO) << "stage num is: " << stage_num << ". No need Pipeline split.";
return true;
}
auto manager = res->manager();
auto root = res->func_graph();
auto global_rank = GetRank();
auto device_num = parallel::ParallelContext::GetInstance()->device_num();
auto stage = InferStage(global_rank, stage_num, device_num);
auto per_stage_rank_num = device_num / stage_num;
auto transformer =
std::make_shared<parallel::PipelineTransformer>(manager, stage, root, global_rank, per_stage_rank_num);
// step1: Do color graph
transformer->Coloring();
// step2: Do color broadcast
transformer->BroadCastColoring();
// step3: Handle shared parameters
transformer->ParameterColoring();
transformer->HandleSharedParameter();
// step4: Cut Graph
transformer->CutGraph();
// step5: Handle Sens
transformer->CoverSensShape();
// step6: Elim Graph stages and no used parameter
transformer->ElimGraphStage();
transformer->ElimParameter();
return true;
}
} // namespace pipeline
} // namespace mindspore

+ 28
- 0
mindspore/ccsrc/pipeline/jit/pipeline_split.h View File

@@ -0,0 +1,28 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MINDSPORE_CCSRC_PIPELINE_JIT_PIPELINE_SPLIT_H_
#define MINDSPORE_CCSRC_PIPELINE_JIT_PIPELINE_SPLIT_H_

#include "pipeline/jit/resource.h"

namespace mindspore {
namespace pipeline {
bool PipelineSplit(const ResourcePtr &res);
} // namespace pipeline
} // namespace mindspore

#endif // MINDSPORE_CCSRC_PIPELINE_JIT_PIPELINE_SPLIT_H_

+ 6
- 1
mindspore/core/ir/anf.h View File

@@ -98,7 +98,8 @@ class AnfNode : public Base {
debug_info_(std::make_shared<NodeDebugInfo>()),
fullname_with_scope_(""),
hash_(std::hash<const AnfNode *>()),
kernel_info_(nullptr) {
kernel_info_(nullptr),
stage_(-1) {
scope_ = ScopeManager::GetInstance().GetCurrentScope();
}

@@ -184,6 +185,9 @@ class AnfNode : public Base {
return user_data_.has(T::key);
}

int64_t stage() { return stage_; }
void set_stage(const int &stage) { stage_ = stage; }

protected:
// Hold a weak ref to Graph as Graph also hold ref to AnfNode.
// Otherwise, func_graph_ and AnfNode will make a reference cycle.
@@ -198,6 +202,7 @@ class AnfNode : public Base {
ScopePtr scope_;
KernelInfoDevicePtr kernel_info_;
UserData user_data_;
int64_t stage_;
};

// CNode represents the complex node with a set of arguments.


+ 2
- 1
mindspore/core/ir/func_graph.cc View File

@@ -46,7 +46,8 @@ FuncGraph::FuncGraph()
is_generated_(false),
return_(nullptr),
manager_(std::weak_ptr<FuncGraphManager>()),
stub_(false) {
stub_(false),
stage_(-1) {
debug_info_ = std::make_shared<GraphDebugInfo>();
switch_layer_input_ = std::make_shared<bool>(false);
}


+ 3
- 0
mindspore/core/ir/func_graph.h View File

@@ -355,6 +355,8 @@ class FuncGraph : public FuncGraphBase {
std::shared_ptr<bool> switch_layer_input() const { return switch_layer_input_; }
void set_switch_layer_input(std::shared_ptr<bool> switch_layer_input) { switch_layer_input_ = switch_layer_input; }
bool ContainMultiTarget() const;
int64_t stage() { return stage_; }
void set_stage(int64_t stage) { stage_ = stage; }

private:
// graph is manipulated by manager and others
@@ -419,6 +421,7 @@ class FuncGraph : public FuncGraphBase {
// Design switch_layer_input as a ptr to
// share between derived backpropagator and cloned graphs
std::shared_ptr<bool> switch_layer_input_;
int64_t stage_;
std::unordered_map<AbstractBasePtrList, FuncGraphPtr, abstract::AbstractBasePtrListHasher,
abstract::AbstractBasePtrListEqual>
func_graph_cache_;


+ 2
- 0
mindspore/core/ir/func_graph_cloner.cc View File

@@ -186,6 +186,7 @@ void Cloner::CloneFuncGraphValueNodes(const FuncGraphPtr &func_graph, const Func
MS_EXCEPTION_IF_NULL(target_func_graph);
MS_EXCEPTION_IF_NULL(manager_);

target_func_graph->set_stage(func_graph->stage());
auto old_return = func_graph->get_return();
if (old_return != nullptr) {
auto return_node = repl_node_[old_return]->cast<CNodePtr>();
@@ -668,6 +669,7 @@ FuncGraphPtr TransformableClone(const FuncGraphPtr &func_graph, const TraceInfoP
if (func_graph->has_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL)) {
new_func_graph->set_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL, func_graph->get_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL));
}
new_func_graph->set_stage(func_graph->stage());

return new_func_graph;
}


+ 27
- 1
mindspore/ops/_grad/grad_comm_ops.py View File

@@ -20,7 +20,7 @@ from .. import operations as P
from ...common.tensor import RowTensor
from ..composite.multitype_ops.zeros_like_impl import zeros_like
from ..operations.comm_ops import (AllGather, _HostAllGather, AllReduce, _AlltoAll, Broadcast,
_GetTensorSlice, _MirrorOperator, ReduceOp,
_GetTensorSlice, _MirrorOperator, ReduceOp, Send, Receive,
ReduceScatter, _HostReduceScatter, _VirtualDiv)
from .grad_base import bprop_getters

@@ -70,6 +70,32 @@ def get_bprop_all_reduce(self):
return bprop


@bprop_getters.register(Send)
def get_bprop_send(self):
"""Generate bprop for Send."""
shape = self.get_attr_dict()["shape"]
dtype = self.get_attr_dict()["dtype"]
send_grad = Receive(self.sr_tag, self.rank, shape, dtype, self.group)

def bprop(x, out, dout):
dx = send_grad()
return (dx,)
return bprop


@bprop_getters.register(Receive)
def get_bprop_receive(self):
"""Generate bprop for Receive."""
receive_grad = Send(self.tag, self.rank, self.group)
depend = P.Depend()

def bprop(out, dout):
send_out = receive_grad(dout)
dx = depend(dout, send_out)
return (dx,)
return bprop


@bprop_getters.register(Broadcast)
def get_bprop_broad_cast(self):
"""Generate bprop for Broadcast."""


+ 1
- 1
mindspore/ops/operations/__init__.py View File

@@ -36,7 +36,7 @@ from .array_ops import (Argmax, Argmin, Cast, Concat, Pack, Unpack,
Unique, GatherD, Identity, RepeatElements)
from .comm_ops import (AllGather, AllReduce, _AlltoAll, ReduceScatter, Broadcast,
_MirrorOperator, ReduceOp, _VirtualDataset,
_VirtualDiv, _GetTensorSlice,
_VirtualDiv, _GetTensorSlice, Send, Receive,
_HostAllGather, _HostReduceScatter)
from .debug_ops import (ImageSummary, InsertGradientOf, HookBackward, ScalarSummary,
TensorSummary, HistogramSummary, Print, Assert)


+ 111
- 0
mindspore/ops/operations/comm_ops.py View File

@@ -109,6 +109,117 @@ class AllReduce(PrimitiveWithInfer):
return x_dtype


class Send(PrimitiveWithInfer):
"""
Send tensors from src_rank to the specified dest_rank.

Note:
Send and Recveive must be used in combination and have same sr_tag.
Send must be used between servers.

Args:
sr_tag (int): A required integer identifying the send/recv message tag. The message will
will be received by the Receive op with the same "sr_tag".
dest_rank (int): A required integer identifying the destination rank.
group (str): The communication group to work on. Default: "hccl_world_group/nccl_world_group".

Inputs:
- **input_x** (Tensor) - The shape of tensor is :math:`(x_1, x_2, ..., x_R)`.

Examples:
>>> import mindspore.ops.operations as P
>>> import mindspore.nn as nn
>>> from mindspore.communication import init
>>> from mindspore import Tensor
>>> import numpy as np
>>>
>>> init()
>>> class Net(nn.Cell):
>>> def __init__(self):
>>> super(Net, self).__init__()
>>> self.depend = P.Depend()
>>> self.send = P.Send(st_tag=0, dest_rank=8, group="hccl_world_group")
>>>
>>> def construct(self, x):
>>> out = self.depend(x, self.send(x))
>>> return out
>>>
>>> input_ = Tensor(np.ones([2, 8]).astype(np.float32))
>>> net = Net()
>>> output = net(input_)
"""
@prim_attr_register
def __init__(self, sr_tag, dest_rank, group=GlobalComm.WORLD_COMM_GROUP):
self.rank = get_rank(_get_group(group))
self.sr_tag = sr_tag
self.group = group

def infer_shape(self, x_shape):
self.add_prim_attr("shape", x_shape)
return x_shape

def infer_dtype(self, x_dtype):
self.add_prim_attr("dtype", x_dtype)
return x_dtype


class Receive(PrimitiveWithInfer):
"""
receive tensors from src_rank.

Note:
Send and Recveive must be used in combination and have same sr_tag.
Receive must be used between servers.

Args:
sr_tag (int): A required integer identifying the send/recv message tag. The message will
will be send by the Send op with the same "sr_tag".
src_rank (int): A required integer identifying the source rank.
shape (list[int]): A required list identifying the shape of the tensor to be received.
dtype (Type): A required Type indentifying the type of the tensor to be received. The supported types:
int8, int16, int32, float16, float32.
group (str): The communication group to work on. Default: "hccl_world_group/nccl_world_group".

Inputs:
- **input_x** (Tensor) - The shape of tensor is :math:`(x_1, x_2, ..., x_R)`.

Examples:
>>> import mindspore.ops.operations as P
>>> import mindspore.nn as nn
>>> from mindspore.communication import init
>>> from mindspore import Tensor
>>> import numpy as np
>>>
>>> init()
>>> class Net(nn.Cell):
>>> def __init__(self):
>>> super(Net, self).__init__()
>>> self.send = P.Receive(st_tag=0, src_rank=0, shape=[2, 8], dtype=np.float32,
>>> group="hccl_world_group")
>>>
>>> def construct(self, x):
>>> out = self.depend(x, self.send(x))
>>> return out
>>>
>>> input_ = Tensor(np.ones([2, 8]).astype(np.float32))
>>> net = Net()
>>> output = net(input_)
"""
@prim_attr_register
def __init__(self, sr_tag, src_rank, shape, dtype, group=GlobalComm.WORLD_COMM_GROUP):
self.rank = get_rank(_get_group(group))
self.tag = sr_tag
self.shape = shape
self.dtype = dtype
self.group = group

def infer_shape(self):
return self.shape

def infer_dtype(self):
return self.dtype


class AllGather(PrimitiveWithInfer):
"""
Gathers tensors from the specified communication group.


Loading…
Cancel
Save