| @@ -49,10 +49,12 @@ DFunctor::DFunctor(const FuncGraphPtr &primal_graph, const pipeline::ResourceBas | |||
| } | |||
| // To keep switch_layer's inputs from being inlined | |||
| k_graph_->set_switch_layer_input(primal_graph->switch_layer_input()); | |||
| k_graph_->set_stage(primal_graph->stage()); | |||
| TraceManager::EndTrace(); | |||
| TraceManager::DebugTrace(std::make_shared<TraceGradBprop>(primal_graph->debug_info())); | |||
| tape_ = std::make_shared<FuncGraph>(); | |||
| tape_->set_stage(primal_graph->stage()); | |||
| // Add "_Grad" postfix | |||
| if (primal_graph->has_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL)) { | |||
| std::string grad_op_name = GetValue<std::string>(primal_graph->get_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL)) + "_Grad"; | |||
| @@ -41,7 +41,7 @@ class ReplaceApplicator : public AnfVisitor { | |||
| } | |||
| auto fg = GetValueNode<FuncGraphPtr>(node); | |||
| if (fg->has_flag(FUNC_GRAPH_FLAG_DEFER_INLINE) || fg->stub() || *(fg->switch_layer_input())) { | |||
| if (fg->has_flag(FUNC_GRAPH_FLAG_DEFER_INLINE) || fg->stage() != -1 || fg->stub() || *(fg->switch_layer_input())) { | |||
| return nullptr; | |||
| } | |||
| @@ -124,7 +124,7 @@ class InlinerBase : public AnfVisitor { | |||
| // G | |||
| auto fg = GetValueNode<FuncGraphPtr>(inputs[0]); | |||
| if (fg->has_flag(FUNC_GRAPH_FLAG_DEFER_INLINE) || fg->stub()) { | |||
| if (fg->has_flag(FUNC_GRAPH_FLAG_DEFER_INLINE) || fg->stage() != -1 || fg->stub()) { | |||
| return nullptr; | |||
| } | |||
| @@ -122,7 +122,7 @@ class ParallelContext { | |||
| std::string parallel_mode_; | |||
| std::string strategy_search_mode_; | |||
| std::vector<int64_t> stages_; | |||
| int32_t pipeline_stage_split_num_; | |||
| int64_t pipeline_stage_split_num_ = 0; | |||
| bool parameter_broadcast_; | |||
| bool device_num_is_set_; | |||
| bool global_rank_is_set_; | |||
| @@ -0,0 +1,442 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include <unordered_map> | |||
| #include <set> | |||
| #include <vector> | |||
| #include <string> | |||
| #include <utility> | |||
| #include <algorithm> | |||
| #include <memory> | |||
| #include "frontend/parallel/pipeline_transformer/pipeline_transformer.h" | |||
| #include "frontend/parallel/graph_util/generate_graph.h" | |||
| #include "frontend/parallel/auto_parallel/graph_costmodel.h" | |||
| #include "frontend/parallel/ops_info/ops_utils.h" | |||
| #include "frontend/parallel/group_manager.h" | |||
| #include "frontend/parallel/context.h" | |||
| #include "utils/comm_manager.h" | |||
| #include "utils/ms_context.h" | |||
| namespace mindspore { | |||
| namespace parallel { | |||
| static std::unordered_map<AnfNodePtr, std::set<int>> parameter_color_map; | |||
| static std::pair<bool, int> IsSharedNode(const AnfNodePtr &node, const AnfNodeIndexSet &node_users); | |||
| static bool IsSomePrimitive(const CNodePtr &cnode, const std::string &name); | |||
| static int send_tag = 0; | |||
| static int recv_tag = 0; | |||
| void PipelineTransformer::Coloring() { | |||
| auto need_coloring = true; | |||
| while (need_coloring) { | |||
| need_coloring = false; | |||
| for (auto &fg : manager_->func_graphs()) { | |||
| auto value_nodes = fg->value_nodes(); | |||
| for (auto &value_pair : value_nodes) { | |||
| auto node = value_pair.first; | |||
| if (!IsValueNode<FuncGraph>(node)) { | |||
| continue; | |||
| } | |||
| auto graph = GetValueNode<FuncGraphPtr>(node); | |||
| auto node_users = manager_->node_users()[node]; | |||
| for (auto &user_pair : node_users) { | |||
| auto user_node = user_pair.first->cast<CNodePtr>(); | |||
| user_node->set_stage(graph->stage()); | |||
| auto user_node_graph = user_node->func_graph(); | |||
| if (graph->stage() == stage_ && user_node_graph->stage() == -1) { | |||
| user_node_graph->set_stage(graph->stage()); | |||
| need_coloring = true; | |||
| } | |||
| } | |||
| } | |||
| } | |||
| } | |||
| return; | |||
| } | |||
| void PipelineTransformer::BroadCastColoring() { | |||
| for (auto &fg : manager_->func_graphs()) { | |||
| DoBroadCast(fg); | |||
| } | |||
| } | |||
| void PipelineTransformer::DoBroadCast(const FuncGraphPtr &func) { | |||
| auto need_coloring = true; | |||
| while (need_coloring) { | |||
| need_coloring = false; | |||
| auto all_nodes = func->nodes(); | |||
| for (auto &node : all_nodes) { | |||
| // only cnode can broadcast color. | |||
| if (!node->isa<CNode>()) { | |||
| continue; | |||
| } | |||
| auto cnode = node->cast<CNodePtr>(); | |||
| if (cnode->stage() == -1) { | |||
| // broadcast from inputs to outputs | |||
| for (auto &input : cnode->inputs()) { | |||
| if (input->isa<CNode>() && input->stage() == stage_) { | |||
| cnode->set_stage(input->stage()); | |||
| need_coloring = true; | |||
| } | |||
| } | |||
| } else if (cnode->stage() == stage_) { | |||
| // broadcast from outputs to inputs | |||
| for (auto &input : cnode->inputs()) { | |||
| if (input->stage() != -1 || !input->isa<CNode>()) { | |||
| continue; | |||
| } | |||
| auto input_cnode = input->cast<CNodePtr>(); | |||
| auto prim = GetValueNode<PrimitivePtr>(input_cnode->input(0)); | |||
| if (prim != nullptr && prim->name() == VIRTUAL_DATA_SET) { | |||
| continue; | |||
| } | |||
| input->set_stage(cnode->stage()); | |||
| need_coloring = true; | |||
| } | |||
| } | |||
| } | |||
| } | |||
| } | |||
| void PipelineTransformer::HandleSharedParameter() { | |||
| auto parameters = root_->parameters(); | |||
| for (auto ¶meter : parameters) { | |||
| auto parameter_stage = parameter_color_map[parameter]; | |||
| if (parameter_stage.size() <= 1) { | |||
| continue; | |||
| } | |||
| auto users = manager_->node_users()[parameter]; | |||
| for (auto &user : users) { | |||
| auto node = user.first; | |||
| auto graph = node->func_graph(); | |||
| if (graph != root_ && graph->stage() == -1) { | |||
| MS_LOG(EXCEPTION) << "Don't support this situation."; | |||
| } | |||
| if (graph == root_ || graph->stage() != stage_) { | |||
| continue; | |||
| } | |||
| if (stage_ == *parameter_stage.begin()) { | |||
| std::vector<AnfNodePtr> make_tuple_input = {NewValueNode(prim::kPrimMakeTuple)}; | |||
| for (auto &stage : parameter_stage) { | |||
| if (stage == stage_) { | |||
| continue; | |||
| } else { | |||
| auto send_out = InsertSend(graph, parameter, stage, stage_); | |||
| make_tuple_input.push_back(send_out.depend); | |||
| } | |||
| } | |||
| auto make_tuple = graph->NewCNode(make_tuple_input); | |||
| OperatorAttrs depend_attrs; | |||
| auto depend_op = CreatOpInstance(depend_attrs, DEPEND, ""); | |||
| std::vector<AnfNodePtr> depend_input = {NewValueNode(depend_op), parameter, make_tuple}; | |||
| auto depend = graph->NewCNode(depend_input); | |||
| manager_->SetEdge(node, user.second, depend); | |||
| break; | |||
| } else { | |||
| InsertReceive(graph, parameter, node, user.second, stage_, *parameter_stage.begin()); | |||
| break; | |||
| } | |||
| } | |||
| } | |||
| } | |||
| void PipelineTransformer::ParameterColoring() { | |||
| auto parameters = root_->parameters(); | |||
| for (auto ¶meter : parameters) { | |||
| auto users = manager_->node_users()[parameter]; | |||
| std::set<int> parameter_stage; | |||
| for (auto &user : users) { | |||
| auto node = user.first; | |||
| auto graph = node->func_graph(); | |||
| if (graph != root_ && graph->stage() != -1) { | |||
| parameter_stage.insert(graph->stage()); | |||
| parameter->set_stage(graph->stage()); | |||
| } | |||
| } | |||
| parameter_color_map[parameter] = parameter_stage; | |||
| } | |||
| } | |||
| static std::pair<ValueListPtr, TypePtr> GetShapeType(const AnfNodePtr &node) { | |||
| abstract::ShapePtr shape_ptr; | |||
| TypePtr type; | |||
| std::vector<int64_t> shape; | |||
| auto cnode = node->cast<CNodePtr>(); | |||
| if (cnode != nullptr && IsValueNode<FuncGraph>(cnode->input(0))) { | |||
| auto graph = GetValueNode<FuncGraphPtr>(cnode->input(0)); | |||
| auto graph_return = graph->get_return(); | |||
| shape_ptr = dyn_cast<abstract::Shape>(graph_return->Shape()); | |||
| type = graph_return->Type(); | |||
| } else { | |||
| shape_ptr = dyn_cast<abstract::Shape>(node->Shape()); | |||
| type = node->Type(); | |||
| } | |||
| MS_EXCEPTION_IF_NULL(shape_ptr); | |||
| MS_EXCEPTION_IF_NULL(type); | |||
| auto shape_int = shape_ptr->shape(); | |||
| std::vector<ValuePtr> element; | |||
| std::transform(shape_int.begin(), shape_int.end(), std::back_inserter(element), | |||
| [](int elem) { return MakeValue(elem); }); | |||
| auto shape_list = std::make_shared<ValueList>(element); | |||
| auto tensor_type = type->cast<mindspore::TensorTypePtr>(); | |||
| MS_EXCEPTION_IF_NULL(tensor_type); | |||
| auto dtype = tensor_type->element(); | |||
| MS_EXCEPTION_IF_NULL(dtype); | |||
| return std::make_pair(shape_list, dtype); | |||
| } | |||
| SendAttr PipelineTransformer::InsertSend(const FuncGraphPtr &graph, const AnfNodePtr ¶meter, | |||
| const int &user_node_stage, const int &node_stage) { | |||
| Attr attr_tag = std::make_pair("sr_tag", MakeValue(send_tag)); | |||
| send_tag += 1; | |||
| auto dest_rank = global_rank_ + (user_node_stage - node_stage) * per_stage_rank_num_; | |||
| Attr attr_rank = std::make_pair("dest_rank", MakeValue(dest_rank)); | |||
| OperatorAttrs attrs = {attr_tag, attr_rank}; | |||
| auto send_op = CreatOpInstance(attrs, "Send", "send"); | |||
| auto send_node = NewValueNode(send_op); | |||
| auto prim = GetValueNode<PrimitivePtr>(send_node); | |||
| auto shape_type_pair = GetShapeType(parameter); | |||
| prim->set_attr("shape", shape_type_pair.first); | |||
| prim->set_attr("dtype", shape_type_pair.second); | |||
| std::vector<AnfNodePtr> send_input = {send_node, parameter}; | |||
| auto send = graph->NewCNode(send_input); | |||
| OperatorAttrs depend_attrs; | |||
| auto depend_op = CreatOpInstance(depend_attrs, "Depend", "depend"); | |||
| std::vector<AnfNodePtr> depend_input = {NewValueNode(depend_op), parameter, send}; | |||
| auto depend = graph->NewCNode(depend_input); | |||
| SendAttr send_out = {shape_type_pair.first, shape_type_pair.second, depend}; | |||
| return send_out; | |||
| } | |||
| void PipelineTransformer::InsertReceive(const FuncGraphPtr &graph, const AnfNodePtr &node, const AnfNodePtr &use_node, | |||
| const int &index, const int &user_node_stage, const int &node_stage) { | |||
| Attr attr_tag = std::make_pair("sr_tag", MakeValue(recv_tag)); | |||
| recv_tag += 1; | |||
| auto src_rank = global_rank_ + (user_node_stage - node_stage) * per_stage_rank_num_; | |||
| Attr attr_rank = std::make_pair("src_rank", MakeValue(src_rank)); | |||
| auto shape_type_pair = GetShapeType(node); | |||
| Attr attr_shape = std::make_pair("shape", shape_type_pair.first); | |||
| Attr attr_dtype = std::make_pair("dtype", shape_type_pair.second); | |||
| OperatorAttrs attrs = {attr_tag, attr_rank, attr_shape, attr_dtype}; | |||
| auto recv_op = CreatOpInstance(attrs, "Receive", "recv"); | |||
| std::vector<AnfNodePtr> recv_input = {NewValueNode(recv_op)}; | |||
| auto recv = graph->NewCNode(recv_input); | |||
| manager_->SetEdge(use_node, index, recv); | |||
| } | |||
| static std::pair<bool, int> IsSharedNode(const AnfNodePtr &node, const AnfNodeIndexSet &node_users) { | |||
| std::set<int> tag_set; | |||
| auto node_stage = node->stage(); | |||
| int min_tag = node_stage; | |||
| for (auto &user_pair : node_users) { | |||
| auto user_node = user_pair.first; | |||
| auto user_node_stage = user_node->stage(); | |||
| tag_set.insert(user_node_stage); | |||
| if (user_node_stage == -1) { | |||
| continue; | |||
| } | |||
| min_tag = min_tag > user_node_stage ? user_node_stage : min_tag; | |||
| } | |||
| bool is_shared = tag_set.size() > 1; | |||
| return std::make_pair(is_shared, min_tag); | |||
| } | |||
| void PipelineTransformer::CutBorder(const FuncGraphPtr &graph) { | |||
| OperatorAttrs depend_attrs; | |||
| auto depend_op = CreatOpInstance(depend_attrs, "Depend", ""); | |||
| std::vector<AnfNodePtr> out_input = {NewValueNode(depend_op)}; | |||
| auto all_nodes = graph->nodes(); | |||
| for (auto &node : all_nodes) { | |||
| if (!node->isa<CNode>() || node->stage() == -1) { | |||
| continue; | |||
| } | |||
| auto node_users = manager_->node_users()[node]; | |||
| auto shared_min_tag_pair = IsSharedNode(node, node_users); | |||
| auto is_shared = shared_min_tag_pair.first; | |||
| auto min_tag = shared_min_tag_pair.second; | |||
| for (auto &user_pair : node_users) { | |||
| auto user_node = user_pair.first; | |||
| auto node_stage = node->stage(); | |||
| auto user_node_stage = user_node->stage(); | |||
| if (node_stage != stage_ && user_node_stage != stage_) { | |||
| continue; | |||
| } | |||
| if (node_stage < user_node_stage) { | |||
| if (is_shared && (min_tag != node_stage)) { | |||
| continue; | |||
| } | |||
| if (node_stage == stage_) { | |||
| auto send_out = InsertSend(graph, node, user_node_stage, node_stage); | |||
| out_input.insert(out_input.begin() + 1, send_out.depend); | |||
| type_ptr_ = send_out.type; | |||
| shape_ = send_out.shape; | |||
| } else { | |||
| InsertReceive(graph, node, user_node, user_pair.second, user_node_stage, node_stage); | |||
| } | |||
| continue; | |||
| } | |||
| if (node_stage == user_node_stage) { | |||
| if (is_shared && (min_tag != node_stage)) { | |||
| InsertReceive(graph, node, user_node, user_pair.second, min_tag, stage_); | |||
| } | |||
| continue; | |||
| } | |||
| if (node_stage > user_node_stage) { | |||
| auto cnode = node->cast<CNodePtr>(); | |||
| auto user_cnode = user_node->cast<CNodePtr>(); | |||
| if (IsValueNode<FuncGraph>(cnode->input(0)) && IsValueNode<FuncGraph>(user_cnode->input(0))) { | |||
| MS_LOG(EXCEPTION) << "Don't support this situation"; | |||
| } | |||
| continue; | |||
| } | |||
| } | |||
| } | |||
| if (out_input.size() == 2) { | |||
| manager_->Replace(graph->output(), out_input[1]); | |||
| } | |||
| if (out_input.size() > 2) { | |||
| std::vector<AnfNodePtr> make_tuple_inputs = {NewValueNode(prim::kPrimMakeTuple)}; | |||
| make_tuple_inputs.insert(make_tuple_inputs.begin() + 1, out_input.begin() + 2, out_input.end()); | |||
| auto make_tuple = graph->NewCNode(make_tuple_inputs); | |||
| std::vector<AnfNodePtr> out_depend_inputs = {out_input[0], out_input[1], make_tuple}; | |||
| auto out_node = graph->NewCNode(out_depend_inputs); | |||
| manager_->Replace(graph->output(), out_node); | |||
| } | |||
| } | |||
| void PipelineTransformer::CutGraph() { | |||
| for (auto &fg : manager_->func_graphs()) { | |||
| if (fg == root_) { | |||
| ElimRootParameter(); | |||
| continue; | |||
| } | |||
| CutBorder(fg); | |||
| } | |||
| } | |||
| void PipelineTransformer::ElimRootParameter() { | |||
| auto output = root_->output()->cast<CNodePtr>(); | |||
| MS_EXCEPTION_IF_NULL(output); | |||
| auto prim = GetValueNode<PrimitivePtr>(output->input(0)); | |||
| if (prim->name() == DEPEND) { | |||
| auto opt_cnode = output->input(2)->cast<CNodePtr>(); | |||
| auto prim_make_tuple = GetValueNode<PrimitivePtr>(opt_cnode->input(0)); | |||
| if (prim_make_tuple->name() == MAKE_TUPLE) { | |||
| std::vector<AnfNodePtr> new_node_input = {opt_cnode->input(0)}; | |||
| for (auto &input : opt_cnode->inputs()) { | |||
| if (input->isa<CNode>()) { | |||
| if (IsStageNode(input->cast<CNodePtr>())) { | |||
| new_node_input.push_back(input); | |||
| } | |||
| } | |||
| } | |||
| auto new_node = root_->NewCNode(new_node_input); | |||
| manager_->Replace(opt_cnode, new_node); | |||
| } | |||
| } | |||
| } | |||
| bool PipelineTransformer::IsStageNode(const CNodePtr &node) { | |||
| for (auto &input : node->inputs()) { | |||
| if (input->isa<Parameter>()) { | |||
| return (*parameter_color_map[input].begin() == stage_ || input->stage() == -1); | |||
| } else if (input->isa<CNode>()) { | |||
| auto pre_node = input->cast<CNodePtr>(); | |||
| return IsStageNode(pre_node); | |||
| } else { | |||
| continue; | |||
| } | |||
| } | |||
| return true; | |||
| } | |||
| void PipelineTransformer::ElimGraphStage() { | |||
| for (auto &fg : manager_->func_graphs()) { | |||
| fg->set_stage(-1); | |||
| } | |||
| } | |||
| static bool IsSomePrimitive(const CNodePtr &cnode, const std::string &name) { | |||
| ValueNodePtr anf_node = cnode->input(0)->cast<ValueNodePtr>(); | |||
| MS_EXCEPTION_IF_NULL(anf_node); | |||
| PrimitivePtr prim = anf_node->value()->cast<PrimitivePtr>(); | |||
| return (prim->name() == name); | |||
| } | |||
| std::pair<CNodePtr, FuncGraphPtr> PipelineTransformer::FindSensNode() { | |||
| std::pair<CNodePtr, FuncGraphPtr> sens_graph_pair; | |||
| CNodePtr sens_cnode; | |||
| FuncGraphPtr func_graph; | |||
| for (auto &node : root_->nodes()) { | |||
| if (!node->isa<CNode>()) { | |||
| continue; | |||
| } | |||
| sens_cnode = node->cast<CNodePtr>(); | |||
| AnfNodePtr expect_tuple_getitem = sens_cnode->input(0); | |||
| MS_EXCEPTION_IF_NULL(expect_tuple_getitem); | |||
| if (!expect_tuple_getitem->isa<CNode>()) { | |||
| continue; | |||
| } | |||
| auto expect_tuple_getitem_cnode = expect_tuple_getitem->cast<CNodePtr>(); | |||
| if (!IsSomePrimitive(expect_tuple_getitem_cnode, TUPLE_GETITEM)) { | |||
| continue; | |||
| } | |||
| auto expect_anonymous = expect_tuple_getitem_cnode->input(1); | |||
| if (!expect_anonymous->isa<CNode>()) { | |||
| continue; | |||
| } | |||
| auto expect_anonymous_cnode = expect_anonymous->cast<CNodePtr>(); | |||
| AnfNodePtr expect_j = expect_anonymous_cnode->input(0); | |||
| if (!expect_j->isa<CNode>()) { | |||
| continue; | |||
| } | |||
| auto expect_j_cnode = expect_j->cast<CNodePtr>(); | |||
| if (!IsSomePrimitive(expect_j_cnode, J)) { | |||
| continue; | |||
| } | |||
| func_graph = GetValueNode<FuncGraphPtr>(expect_j_cnode->input(1)); | |||
| break; | |||
| } | |||
| sens_graph_pair = std::make_pair(sens_cnode, func_graph); | |||
| return sens_graph_pair; | |||
| } | |||
| void PipelineTransformer::CoverSensShape() { | |||
| auto sens_graph_pair = FindSensNode(); | |||
| auto sens_cnode = sens_graph_pair.first; | |||
| MS_EXCEPTION_IF_NULL(sens_cnode); | |||
| OperatorAttrs attrs; | |||
| auto fill_op = CreatOpInstance(attrs, "Fill", ""); | |||
| std::vector<AnfNodePtr> fill_input = {NewValueNode(fill_op), NewValueNode(type_ptr_), | |||
| NewValueNode(MakeValue(shape_->value())), NewValueNode(0)}; | |||
| auto fill = root_->NewCNode(fill_input); | |||
| std::vector<AnfNodePtr> new_sens_input = {sens_cnode->input(0), fill}; | |||
| auto new_sens_node = root_->NewCNode(new_sens_input); | |||
| manager_->Replace(sens_cnode, new_sens_node); | |||
| } | |||
| void PipelineTransformer::ElimParameter() { | |||
| auto parameters = root_->parameters(); | |||
| std::vector<AnfNodePtr> parameter_list; | |||
| for (auto ¶meter : parameters) { | |||
| if (!manager_->node_users()[parameter].empty()) { | |||
| parameter_list.push_back(parameter); | |||
| } | |||
| } | |||
| manager_->SetParameters(root_, parameter_list); | |||
| } | |||
| } // namespace parallel | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,72 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_FRONTEND_PARALLEL_PIPELINE_TRANSFORMER_PIPELINE_TRANSFORMER_H_ | |||
| #define MINDSPORE_CCSRC_FRONTEND_PARALLEL_PIPELINE_TRANSFORMER_PIPELINE_TRANSFORMER_H_ | |||
| #include <utility> | |||
| #include "ir/value.h" | |||
| #include "ir/graph_utils.h" | |||
| #include "base/base.h" | |||
| namespace mindspore { | |||
| namespace parallel { | |||
| typedef struct { | |||
| ValueListPtr shape; | |||
| TypePtr type; | |||
| AnfNodePtr depend; | |||
| } SendAttr; | |||
| class PipelineTransformer { | |||
| public: | |||
| PipelineTransformer(const FuncGraphManagerPtr &manager, const int &stage, const FuncGraphPtr &root, | |||
| const int64_t &global_rank, const int64_t &per_stage_rank_num) | |||
| : manager_(manager), | |||
| stage_(stage), | |||
| root_(root), | |||
| global_rank_(global_rank), | |||
| per_stage_rank_num_(per_stage_rank_num) {} | |||
| void Coloring(); | |||
| void BroadCastColoring(); | |||
| void HandleSharedParameter(); | |||
| void CutGraph(); | |||
| void ParameterColoring(); | |||
| void CoverSensShape(); | |||
| void ElimGraphStage(); | |||
| void ElimParameter(); | |||
| private: | |||
| void DoBroadCast(const FuncGraphPtr &func); | |||
| SendAttr InsertSend(const FuncGraphPtr &graph, const AnfNodePtr ¶meter, const int &user_node_stage, | |||
| const int &node_stage); | |||
| void InsertReceive(const FuncGraphPtr &graph, const AnfNodePtr &node, const AnfNodePtr &use_node, const int &index, | |||
| const int &user_node_stage, const int &node_stage); | |||
| void CutBorder(const FuncGraphPtr &graph); | |||
| void ElimRootParameter(); | |||
| bool IsStageNode(const CNodePtr &node); | |||
| std::pair<CNodePtr, FuncGraphPtr> FindSensNode(); | |||
| FuncGraphManagerPtr manager_; | |||
| int64_t stage_; | |||
| FuncGraphPtr root_; | |||
| int64_t global_rank_; | |||
| int64_t per_stage_rank_num_; | |||
| TypePtr type_ptr_; | |||
| ValueListPtr shape_; | |||
| }; | |||
| } // namespace parallel | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_FRONTEND_PARALLEL_PIPELINE_TRANSFORMER_PIPELINE_TRANSFORMER_H_ | |||
| @@ -5,6 +5,7 @@ file(GLOB_RECURSE _PIPELINE_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} | |||
| "action.cc" | |||
| "validator.cc" | |||
| "remove_value_node_dup.cc" | |||
| "pipeline_split.cc" | |||
| "parse/*.cc" | |||
| "static_analysis/*.cc" | |||
| ) | |||
| @@ -302,6 +302,10 @@ bool OptimizeAction(const ResourcePtr &res, const std::vector<PassItem> &passes) | |||
| } | |||
| bool OptInlineAction(const ResourcePtr &res) { | |||
| if (parallel::ParallelContext::GetInstance()->parallel_mode() == "semi_auto_parallel" || | |||
| parallel::ParallelContext::GetInstance()->parallel_mode() == "auto_parallel") { | |||
| return OptimizeAction(res, kInlinePasses); | |||
| } | |||
| if (opt::python_pass::PyPassManager::GetInstance()->GetPassGroup(opt::python_pass::Phase::PREAD)->size() != 0) { | |||
| return OptimizeAction(res, kInlinePasses); | |||
| } | |||
| @@ -480,6 +484,7 @@ bool RemoveValueNodeDuplicationsAction(const ResourcePtr &res) { | |||
| return true; | |||
| } | |||
| bool PipelineSplitAction(const ResourcePtr &res) { return PipelineSplitPass(res); } | |||
| bool ValidateAction(const ResourcePtr &res) { return ValidatePass(res); } | |||
| bool ActionPyStub(const ResourcePtr &res, opt::python_pass::Phase phase) { | |||
| @@ -559,6 +564,8 @@ static std::vector<ActionItem> CommonPipeline() { | |||
| actions.emplace_back(std::make_pair("inline", OptInlineAction)); | |||
| // Add pre-ad, post-inline python pass stub | |||
| actions.emplace_back(std::make_pair("py_pre_ad", PreAdActionPyStub)); | |||
| // Do PipelineSplit | |||
| actions.emplace_back(std::make_pair("pipeline_split", PipelineSplitAction)); | |||
| return actions; | |||
| } | |||
| @@ -246,6 +246,10 @@ bool ConvertCellObjToFuncGraph(const CellPtr &cell, ValuePtr *const data) { | |||
| func_graph->set_flag(FUNC_GRAPH_FLAG_DEFER_INLINE, true); | |||
| } | |||
| } | |||
| if (py::hasattr(obj, STAGE_NAME)) { | |||
| auto stage = py::cast<int>(py::getattr(obj, STAGE_NAME)); | |||
| func_graph->set_stage(stage); | |||
| } | |||
| *data = func_graph; | |||
| return true; | |||
| } | |||
| @@ -132,6 +132,7 @@ const char PYTHON_EXTERN_MINDSPORE_FLAG[] = "_mindspore_flags"; | |||
| // define the parse constant | |||
| const int64_t MAX_COMPARISON_OPS_SUPPORTED = 1; | |||
| const char CUSTOM_BPROP_NAME[] = "bprop"; | |||
| const char STAGE_NAME[] = "stage"; | |||
| // define the Namespace name | |||
| const char RESOLVE_NAMESPACE_NAME_AST[] = "Ast"; // for ast type namespace | |||
| @@ -38,6 +38,7 @@ | |||
| #include "frontend/parallel/step_auto_parallel.h" | |||
| #include "frontend/parallel/allreduce_fusion/step_allreduce_fusion.h" | |||
| #include "utils/log_adapter.h" | |||
| #include "pipeline/jit/pipeline_split.h" | |||
| namespace mindspore { | |||
| namespace pipeline { | |||
| @@ -418,6 +419,8 @@ bool TransformTopGraphPass(const ResourcePtr &res) { | |||
| return true; | |||
| } | |||
| bool PipelineSplitPass(const ResourcePtr &res) { return PipelineSplit(res); } | |||
| bool ValidatePass(const ResourcePtr &res) { | |||
| MS_EXCEPTION_IF_NULL(res->func_graph()); | |||
| FuncGraphPtr func_graph = res->func_graph(); | |||
| @@ -33,6 +33,7 @@ extern std::vector<PassItem> kInlinePasses; | |||
| extern std::vector<PassItem> kPynativePasses; | |||
| bool CconvPass(const ResourcePtr &res); | |||
| bool PipelineSplitPass(const ResourcePtr &res); | |||
| bool ValidatePass(const ResourcePtr &res); | |||
| bool ConvertPrepareAdapt(const ResourcePtr &res); | |||
| bool AddControlDependPass(const ResourcePtr &res); | |||
| @@ -0,0 +1,99 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include <string> | |||
| #include <memory> | |||
| #include "pipeline/jit/pipeline_split.h" | |||
| #include "utils/ms_context.h" | |||
| #include "utils/comm_manager.h" | |||
| #include "frontend/parallel/context.h" | |||
| #include "frontend/parallel/pipeline_transformer/pipeline_transformer.h" | |||
| namespace mindspore { | |||
| namespace pipeline { | |||
| static int64_t GetRank(); | |||
| static int64_t InferStage(const int64_t &rank_id, const int64_t &stage_num, const int64_t &device_num); | |||
| static int64_t GetRank() { | |||
| auto ms_context = MsContext::GetInstance(); | |||
| MS_EXCEPTION_IF_NULL(ms_context); | |||
| std::string world_group; | |||
| std::string backend = ms_context->get_param<std::string>(MS_CTX_DEVICE_TARGET); | |||
| if (backend == kAscendDevice) { | |||
| world_group = parallel::HCCL_WORLD_GROUP; | |||
| } else if (backend == kGPUDevice) { | |||
| world_group = parallel::NCCL_WORLD_GROUP; | |||
| } else { | |||
| MS_LOG(EXCEPTION) << "Invalid backend: " << backend; | |||
| } | |||
| int64_t global_rank = parallel::ParallelContext::GetInstance()->global_rank(); | |||
| uint32_t rank_id; | |||
| if (!parallel::ParallelContext::GetInstance()->global_rank_is_set()) { | |||
| if (!CommManager::GetInstance().GetRankID(world_group, &rank_id)) { | |||
| MS_LOG(EXCEPTION) << "Get rank id failed."; | |||
| } | |||
| global_rank = UintToInt(rank_id); | |||
| } | |||
| return global_rank; | |||
| } | |||
| static int64_t InferStage(const int64_t &rank_id, const int64_t &stage_num, const int64_t &device_num) { | |||
| if (device_num % stage_num != 0) { | |||
| MS_LOG(EXCEPTION) << "Device_num must be divisible by the stage_num, got device_num: " << device_num | |||
| << "stage_num: " << stage_num; | |||
| } | |||
| auto per_stage_rank_num = device_num / stage_num; | |||
| return rank_id / per_stage_rank_num; | |||
| } | |||
| // Only auto_parallel and semi_auto_parallel support PipelineSplit | |||
| bool PipelineSplit(const ResourcePtr &res) { | |||
| auto parallel_mode = parallel::ParallelContext::GetInstance()->parallel_mode(); | |||
| if (parallel_mode != parallel::SEMI_AUTO_PARALLEL || parallel_mode != parallel::AUTO_PARALLEL) { | |||
| MS_LOG(INFO) << "Only auto_parallel and semi_auto_parallel support pipeline split."; | |||
| return true; | |||
| } | |||
| auto stage_num = parallel::ParallelContext::GetInstance()->pipeline_stage_split_num(); | |||
| if (stage_num <= 1) { | |||
| MS_LOG(INFO) << "stage num is: " << stage_num << ". No need Pipeline split."; | |||
| return true; | |||
| } | |||
| auto manager = res->manager(); | |||
| auto root = res->func_graph(); | |||
| auto global_rank = GetRank(); | |||
| auto device_num = parallel::ParallelContext::GetInstance()->device_num(); | |||
| auto stage = InferStage(global_rank, stage_num, device_num); | |||
| auto per_stage_rank_num = device_num / stage_num; | |||
| auto transformer = | |||
| std::make_shared<parallel::PipelineTransformer>(manager, stage, root, global_rank, per_stage_rank_num); | |||
| // step1: Do color graph | |||
| transformer->Coloring(); | |||
| // step2: Do color broadcast | |||
| transformer->BroadCastColoring(); | |||
| // step3: Handle shared parameters | |||
| transformer->ParameterColoring(); | |||
| transformer->HandleSharedParameter(); | |||
| // step4: Cut Graph | |||
| transformer->CutGraph(); | |||
| // step5: Handle Sens | |||
| transformer->CoverSensShape(); | |||
| // step6: Elim Graph stages and no used parameter | |||
| transformer->ElimGraphStage(); | |||
| transformer->ElimParameter(); | |||
| return true; | |||
| } | |||
| } // namespace pipeline | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,28 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_PIPELINE_JIT_PIPELINE_SPLIT_H_ | |||
| #define MINDSPORE_CCSRC_PIPELINE_JIT_PIPELINE_SPLIT_H_ | |||
| #include "pipeline/jit/resource.h" | |||
| namespace mindspore { | |||
| namespace pipeline { | |||
| bool PipelineSplit(const ResourcePtr &res); | |||
| } // namespace pipeline | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_PIPELINE_JIT_PIPELINE_SPLIT_H_ | |||
| @@ -98,7 +98,8 @@ class AnfNode : public Base { | |||
| debug_info_(std::make_shared<NodeDebugInfo>()), | |||
| fullname_with_scope_(""), | |||
| hash_(std::hash<const AnfNode *>()), | |||
| kernel_info_(nullptr) { | |||
| kernel_info_(nullptr), | |||
| stage_(-1) { | |||
| scope_ = ScopeManager::GetInstance().GetCurrentScope(); | |||
| } | |||
| @@ -184,6 +185,9 @@ class AnfNode : public Base { | |||
| return user_data_.has(T::key); | |||
| } | |||
| int64_t stage() { return stage_; } | |||
| void set_stage(const int &stage) { stage_ = stage; } | |||
| protected: | |||
| // Hold a weak ref to Graph as Graph also hold ref to AnfNode. | |||
| // Otherwise, func_graph_ and AnfNode will make a reference cycle. | |||
| @@ -198,6 +202,7 @@ class AnfNode : public Base { | |||
| ScopePtr scope_; | |||
| KernelInfoDevicePtr kernel_info_; | |||
| UserData user_data_; | |||
| int64_t stage_; | |||
| }; | |||
| // CNode represents the complex node with a set of arguments. | |||
| @@ -46,7 +46,8 @@ FuncGraph::FuncGraph() | |||
| is_generated_(false), | |||
| return_(nullptr), | |||
| manager_(std::weak_ptr<FuncGraphManager>()), | |||
| stub_(false) { | |||
| stub_(false), | |||
| stage_(-1) { | |||
| debug_info_ = std::make_shared<GraphDebugInfo>(); | |||
| switch_layer_input_ = std::make_shared<bool>(false); | |||
| } | |||
| @@ -355,6 +355,8 @@ class FuncGraph : public FuncGraphBase { | |||
| std::shared_ptr<bool> switch_layer_input() const { return switch_layer_input_; } | |||
| void set_switch_layer_input(std::shared_ptr<bool> switch_layer_input) { switch_layer_input_ = switch_layer_input; } | |||
| bool ContainMultiTarget() const; | |||
| int64_t stage() { return stage_; } | |||
| void set_stage(int64_t stage) { stage_ = stage; } | |||
| private: | |||
| // graph is manipulated by manager and others | |||
| @@ -419,6 +421,7 @@ class FuncGraph : public FuncGraphBase { | |||
| // Design switch_layer_input as a ptr to | |||
| // share between derived backpropagator and cloned graphs | |||
| std::shared_ptr<bool> switch_layer_input_; | |||
| int64_t stage_; | |||
| std::unordered_map<AbstractBasePtrList, FuncGraphPtr, abstract::AbstractBasePtrListHasher, | |||
| abstract::AbstractBasePtrListEqual> | |||
| func_graph_cache_; | |||
| @@ -186,6 +186,7 @@ void Cloner::CloneFuncGraphValueNodes(const FuncGraphPtr &func_graph, const Func | |||
| MS_EXCEPTION_IF_NULL(target_func_graph); | |||
| MS_EXCEPTION_IF_NULL(manager_); | |||
| target_func_graph->set_stage(func_graph->stage()); | |||
| auto old_return = func_graph->get_return(); | |||
| if (old_return != nullptr) { | |||
| auto return_node = repl_node_[old_return]->cast<CNodePtr>(); | |||
| @@ -668,6 +669,7 @@ FuncGraphPtr TransformableClone(const FuncGraphPtr &func_graph, const TraceInfoP | |||
| if (func_graph->has_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL)) { | |||
| new_func_graph->set_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL, func_graph->get_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL)); | |||
| } | |||
| new_func_graph->set_stage(func_graph->stage()); | |||
| return new_func_graph; | |||
| } | |||
| @@ -20,7 +20,7 @@ from .. import operations as P | |||
| from ...common.tensor import RowTensor | |||
| from ..composite.multitype_ops.zeros_like_impl import zeros_like | |||
| from ..operations.comm_ops import (AllGather, _HostAllGather, AllReduce, _AlltoAll, Broadcast, | |||
| _GetTensorSlice, _MirrorOperator, ReduceOp, | |||
| _GetTensorSlice, _MirrorOperator, ReduceOp, Send, Receive, | |||
| ReduceScatter, _HostReduceScatter, _VirtualDiv) | |||
| from .grad_base import bprop_getters | |||
| @@ -70,6 +70,32 @@ def get_bprop_all_reduce(self): | |||
| return bprop | |||
| @bprop_getters.register(Send) | |||
| def get_bprop_send(self): | |||
| """Generate bprop for Send.""" | |||
| shape = self.get_attr_dict()["shape"] | |||
| dtype = self.get_attr_dict()["dtype"] | |||
| send_grad = Receive(self.sr_tag, self.rank, shape, dtype, self.group) | |||
| def bprop(x, out, dout): | |||
| dx = send_grad() | |||
| return (dx,) | |||
| return bprop | |||
| @bprop_getters.register(Receive) | |||
| def get_bprop_receive(self): | |||
| """Generate bprop for Receive.""" | |||
| receive_grad = Send(self.tag, self.rank, self.group) | |||
| depend = P.Depend() | |||
| def bprop(out, dout): | |||
| send_out = receive_grad(dout) | |||
| dx = depend(dout, send_out) | |||
| return (dx,) | |||
| return bprop | |||
| @bprop_getters.register(Broadcast) | |||
| def get_bprop_broad_cast(self): | |||
| """Generate bprop for Broadcast.""" | |||
| @@ -36,7 +36,7 @@ from .array_ops import (Argmax, Argmin, Cast, Concat, Pack, Unpack, | |||
| Unique, GatherD, Identity, RepeatElements) | |||
| from .comm_ops import (AllGather, AllReduce, _AlltoAll, ReduceScatter, Broadcast, | |||
| _MirrorOperator, ReduceOp, _VirtualDataset, | |||
| _VirtualDiv, _GetTensorSlice, | |||
| _VirtualDiv, _GetTensorSlice, Send, Receive, | |||
| _HostAllGather, _HostReduceScatter) | |||
| from .debug_ops import (ImageSummary, InsertGradientOf, HookBackward, ScalarSummary, | |||
| TensorSummary, HistogramSummary, Print, Assert) | |||
| @@ -109,6 +109,117 @@ class AllReduce(PrimitiveWithInfer): | |||
| return x_dtype | |||
| class Send(PrimitiveWithInfer): | |||
| """ | |||
| Send tensors from src_rank to the specified dest_rank. | |||
| Note: | |||
| Send and Recveive must be used in combination and have same sr_tag. | |||
| Send must be used between servers. | |||
| Args: | |||
| sr_tag (int): A required integer identifying the send/recv message tag. The message will | |||
| will be received by the Receive op with the same "sr_tag". | |||
| dest_rank (int): A required integer identifying the destination rank. | |||
| group (str): The communication group to work on. Default: "hccl_world_group/nccl_world_group". | |||
| Inputs: | |||
| - **input_x** (Tensor) - The shape of tensor is :math:`(x_1, x_2, ..., x_R)`. | |||
| Examples: | |||
| >>> import mindspore.ops.operations as P | |||
| >>> import mindspore.nn as nn | |||
| >>> from mindspore.communication import init | |||
| >>> from mindspore import Tensor | |||
| >>> import numpy as np | |||
| >>> | |||
| >>> init() | |||
| >>> class Net(nn.Cell): | |||
| >>> def __init__(self): | |||
| >>> super(Net, self).__init__() | |||
| >>> self.depend = P.Depend() | |||
| >>> self.send = P.Send(st_tag=0, dest_rank=8, group="hccl_world_group") | |||
| >>> | |||
| >>> def construct(self, x): | |||
| >>> out = self.depend(x, self.send(x)) | |||
| >>> return out | |||
| >>> | |||
| >>> input_ = Tensor(np.ones([2, 8]).astype(np.float32)) | |||
| >>> net = Net() | |||
| >>> output = net(input_) | |||
| """ | |||
| @prim_attr_register | |||
| def __init__(self, sr_tag, dest_rank, group=GlobalComm.WORLD_COMM_GROUP): | |||
| self.rank = get_rank(_get_group(group)) | |||
| self.sr_tag = sr_tag | |||
| self.group = group | |||
| def infer_shape(self, x_shape): | |||
| self.add_prim_attr("shape", x_shape) | |||
| return x_shape | |||
| def infer_dtype(self, x_dtype): | |||
| self.add_prim_attr("dtype", x_dtype) | |||
| return x_dtype | |||
| class Receive(PrimitiveWithInfer): | |||
| """ | |||
| receive tensors from src_rank. | |||
| Note: | |||
| Send and Recveive must be used in combination and have same sr_tag. | |||
| Receive must be used between servers. | |||
| Args: | |||
| sr_tag (int): A required integer identifying the send/recv message tag. The message will | |||
| will be send by the Send op with the same "sr_tag". | |||
| src_rank (int): A required integer identifying the source rank. | |||
| shape (list[int]): A required list identifying the shape of the tensor to be received. | |||
| dtype (Type): A required Type indentifying the type of the tensor to be received. The supported types: | |||
| int8, int16, int32, float16, float32. | |||
| group (str): The communication group to work on. Default: "hccl_world_group/nccl_world_group". | |||
| Inputs: | |||
| - **input_x** (Tensor) - The shape of tensor is :math:`(x_1, x_2, ..., x_R)`. | |||
| Examples: | |||
| >>> import mindspore.ops.operations as P | |||
| >>> import mindspore.nn as nn | |||
| >>> from mindspore.communication import init | |||
| >>> from mindspore import Tensor | |||
| >>> import numpy as np | |||
| >>> | |||
| >>> init() | |||
| >>> class Net(nn.Cell): | |||
| >>> def __init__(self): | |||
| >>> super(Net, self).__init__() | |||
| >>> self.send = P.Receive(st_tag=0, src_rank=0, shape=[2, 8], dtype=np.float32, | |||
| >>> group="hccl_world_group") | |||
| >>> | |||
| >>> def construct(self, x): | |||
| >>> out = self.depend(x, self.send(x)) | |||
| >>> return out | |||
| >>> | |||
| >>> input_ = Tensor(np.ones([2, 8]).astype(np.float32)) | |||
| >>> net = Net() | |||
| >>> output = net(input_) | |||
| """ | |||
| @prim_attr_register | |||
| def __init__(self, sr_tag, src_rank, shape, dtype, group=GlobalComm.WORLD_COMM_GROUP): | |||
| self.rank = get_rank(_get_group(group)) | |||
| self.tag = sr_tag | |||
| self.shape = shape | |||
| self.dtype = dtype | |||
| self.group = group | |||
| def infer_shape(self): | |||
| return self.shape | |||
| def infer_dtype(self): | |||
| return self.dtype | |||
| class AllGather(PrimitiveWithInfer): | |||
| """ | |||
| Gathers tensors from the specified communication group. | |||