warning clean

4 years ago · 86162ebcc0
--- a/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/transdata_split.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/transdata_split.cc
@@ -14,10 +14,8 @@
 * limitations under the License.
 */
 #include "backend/optimizer/ascend/ir_fission/transdata_split.h"
 #include "backend/optimizer/ascend/ascend_helper.h"
 #include "backend/session/anf_runtime_algorithm.h"
 #include <set>
 #include "debug/anf_ir_dump.h"
 #include "utils/trace_base.h"

 namespace mindspore {
 namespace opt {
--- a/mindspore/ccsrc/backend/optimizer/ascend/mindir/bn_grad_unify_mindir.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/mindir/bn_grad_unify_mindir.cc
@@ -14,15 +14,9 @@
 * limitations under the License.
 */
 #include "backend/optimizer/ascend/mindir/bn_grad_unify_mindir.h"

 #include <vector>
 #include <memory>

 #include "utils/utils.h"
 #include "utils/ms_context.h"
 #include "backend/optimizer/common/helper.h"
 #include "backend/session/anf_runtime_algorithm.h"
 #include "utils/trace_base.h"

 namespace mindspore {
 namespace opt {
--- a/mindspore/ccsrc/backend/optimizer/ascend/mindir/dynamic_reshape_unify_mindir.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/mindir/dynamic_reshape_unify_mindir.cc
@@ -14,15 +14,9 @@
 * limitations under the License.
 */
 #include "backend/optimizer/ascend/mindir/dynamic_reshape_unify_mindir.h"

 #include <vector>
 #include <memory>

 #include "utils/utils.h"
 #include "utils/ms_context.h"
 #include "backend/optimizer/common/helper.h"
 #include "backend/session/anf_runtime_algorithm.h"
 #include "utils/trace_base.h"

 namespace mindspore {
 namespace opt {
--- a/mindspore/ccsrc/backend/optimizer/gpu/adjust_depend_for_parallel_optimizer_recompute_all_gather_fusion.cc
+++ b/mindspore/ccsrc/backend/optimizer/gpu/adjust_depend_for_parallel_optimizer_recompute_all_gather_fusion.cc
@@ -1,149 +0,0 @@
 /**
 * Copyright 2021 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #include "backend/optimizer/gpu/adjust_depend_for_parallel_optimizer_recompute_all_gather_fusion.h"

 #include <algorithm>

 #include "utils/hash_map.h"
 #include "backend/session/anf_runtime_algorithm.h"
 #include "utils/utils.h"

 namespace mindspore {
 namespace opt {
 bool AdjustDependForParallelOptimizerRecomputeAllGatherFusion::Run(const FuncGraphPtr &graph) {
  MS_EXCEPTION_IF_NULL(graph);
  mindspore::HashMap<int64_t, bool> forward_allgather_recompute_value_in_fusion_group;
  std::vector<AnfNodePtr> node_list = TopoSort(graph->get_return());
  std::vector<int64_t> parallel_optimizer_recompute_allgather_fusion_ids;
  std::vector<AnfNodePtr> parallel_optimizer_recompute_allgathers;
  std::vector<AnfNodePtr> parallel_optimizer_recompute_first_fusion_allgathers;
  int64_t unrecompute_max_fusion_id = -1;
  int64_t recompute_min_fusion_id = 0;
  for (auto &node : node_list) {
    MS_EXCEPTION_IF_NULL(node);
    if (!node->cast<CNodePtr>() || !AnfUtils::IsRealKernel(node)) {
      continue;
    }
    auto cnode = node->cast<CNodePtr>();
    if (!AnfAlgo::IsAllgather(cnode) || !AnfAlgo::IsFusion(cnode) || !AnfAlgo::IsFromParallelOptimizer(cnode)) {
      continue;
    }
    if (AnfAlgo::IsRecompute(cnode)) {
      int64_t fusion_id = AnfAlgo::GetNodeAttr<int64_t>(cnode, kAttrFusion);
      if (std::find(parallel_optimizer_recompute_allgather_fusion_ids.begin(),
                    parallel_optimizer_recompute_allgather_fusion_ids.end(),
                    fusion_id) == parallel_optimizer_recompute_allgather_fusion_ids.end()) {
        parallel_optimizer_recompute_allgather_fusion_ids.push_back(fusion_id);
        if (recompute_min_fusion_id == 0 || fusion_id < recompute_min_fusion_id) {
          recompute_min_fusion_id = fusion_id;
        }
        parallel_optimizer_recompute_first_fusion_allgathers.push_back(node);
      } else {
        parallel_optimizer_recompute_allgathers.push_back(node);
      }
    } else {
      int64_t unrecompute_fusion_id = AnfAlgo::GetNodeAttr<int64_t>(cnode, kAttrFusion);
      unrecompute_max_fusion_id = std::max(unrecompute_fusion_id, unrecompute_max_fusion_id);
      bool would_be_recomputed =
        AnfAlgo::HasNodeAttr(kAttrRecompute, cnode) && AnfAlgo::GetNodeAttr<bool>(cnode, kAttrRecompute);
      auto [iter, inserted] =
        forward_allgather_recompute_value_in_fusion_group.emplace(unrecompute_fusion_id, would_be_recomputed);
      if (!inserted && iter->second != would_be_recomputed) {
        MS_LOG(EXCEPTION) << "In same fusion group, the allgather recompute attribute should be equal. "
                             "The normal node is:"
                          << cnode->fullname_with_scope();
      }
    }
  }
  IncreaseAllgatherFusionId(parallel_optimizer_recompute_allgathers,
                            parallel_optimizer_recompute_first_fusion_allgathers, unrecompute_max_fusion_id,
                            recompute_min_fusion_id);
  return AdjustAllgatherDepend(graph, parallel_optimizer_recompute_allgathers);
 }

 void AdjustDependForParallelOptimizerRecomputeAllGatherFusion::IncreaseAllgatherFusionId(
  const std::vector<AnfNodePtr> &parallel_optimizer_recompute_allgathers,
  const std::vector<AnfNodePtr> &parallel_optimizer_recompute_first_fusion_allgathers,
  int64_t unrecompute_max_fusion_id, int64_t recompute_min_fusion_id) {
  // means that there may some forward allgather and duplicated allgather would be fused.
  if (recompute_min_fusion_id <= unrecompute_max_fusion_id) {
    MS_LOG(WARNING) << "Increase the duplicated allgather fusion id";
    for (auto &adjust_node : parallel_optimizer_recompute_first_fusion_allgathers) {
      int64_t current_fusion_id = AnfAlgo::GetNodeAttr<int64_t>(adjust_node, kAttrFusion);
      int64_t destination_fusion_id = current_fusion_id + unrecompute_max_fusion_id - recompute_min_fusion_id + 2;
      AnfAlgo::SetNodeAttr(kAttrFusion, MakeValue(destination_fusion_id), adjust_node);
    }
    for (auto &adjust_node : parallel_optimizer_recompute_allgathers) {
      int64_t current_fusion_id = AnfAlgo::GetNodeAttr<int64_t>(adjust_node, kAttrFusion);
      int64_t destination_fusion_id = current_fusion_id + unrecompute_max_fusion_id - recompute_min_fusion_id + 2;
      AnfAlgo::SetNodeAttr(kAttrFusion, MakeValue(destination_fusion_id), adjust_node);
    }
  }
 }

 bool AdjustDependForParallelOptimizerRecomputeAllGatherFusion::AdjustAllgatherDepend(
  const FuncGraphPtr &graph, const std::vector<AnfNodePtr> &parallel_optimizer_recompute_allgathers) {
  FuncGraphManagerPtr manager = graph->manager();
  bool changed = false;
  for (auto &node : parallel_optimizer_recompute_allgathers) {
    auto cnode = node->cast<CNodePtr>();
    auto depend_node = AnfAlgo::GetInputNode(cnode, 0);
    if (IsPrimitiveCNode(depend_node, prim::kPrimDepend)) {
      auto depend_cnode = depend_node->cast<CNodePtr>();
      AnfNodeIndexSet allgather_node_set = manager->node_users()[cnode];
      for (auto &node_pair : allgather_node_set) {
        auto allgather_next_node = node_pair.first;
        CNodePtr allgather_next_cnode = node_pair.first->cast<CNodePtr>();
        if (allgather_next_cnode == nullptr || !IsValueNode<Primitive>(allgather_next_cnode->input(0))) {
          continue;
        }
        std::vector<AnfNodePtr> inputs = {NewValueNode(std::make_shared<Primitive>(prim::kPrimDepend->name())),
                                          allgather_next_node, AnfAlgo::GetInputNode(depend_cnode, 1)};
        auto new_depend = graph->NewCNode(inputs);
        new_depend->set_abstract(depend_node->abstract());
        manager->SetEdge(node, 1, AnfAlgo::GetInputNode(depend_cnode, 0));
        (void)manager->Replace(allgather_next_node, new_depend);
        changed = true;
      }
    } else if (IsPrimitiveCNode(depend_node, prim::kPrimCast) &&
               IsPrimitiveCNode(AnfAlgo::GetInputNode(depend_node->cast<CNodePtr>(), 0), prim::kPrimDepend)) {
      auto cast_cnode = depend_node->cast<CNodePtr>();
      auto cast_depend_node = AnfAlgo::GetInputNode(cast_cnode, 0);
      auto cast_depend_cnode = cast_depend_node->cast<CNodePtr>();
      AnfNodeIndexSet allgather_node_set = manager->node_users()[cnode];
      for (auto &node_pair : allgather_node_set) {
        auto allgather_next_node = node_pair.first;
        CNodePtr allgather_next_cnode = node_pair.first->cast<CNodePtr>();
        if (allgather_next_cnode == nullptr || !IsValueNode<Primitive>(allgather_next_cnode->input(0))) {
          continue;
        }
        std::vector<AnfNodePtr> inputs = {NewValueNode(std::make_shared<Primitive>(prim::kPrimDepend->name())),
                                          allgather_next_node, AnfAlgo::GetInputNode(cast_depend_cnode, 1)};
        auto new_depend = graph->NewCNode(inputs);
        new_depend->set_abstract(cast_depend_node->abstract());
        manager->SetEdge(depend_node, 1, AnfAlgo::GetInputNode(cast_depend_cnode, 0));
        (void)manager->Replace(allgather_next_node, new_depend);
        changed = true;
      }
    } else {
      MS_LOG(WARNING) << "The parallel optimizer recompute allgather has no depend edge";
    }
  }
  return changed;
 }
 }  // namespace opt
 }  // namespace mindspore
--- a/mindspore/ccsrc/backend/optimizer/gpu/adjust_depend_for_parallel_optimizer_recompute_all_gather_fusion.h
+++ b/mindspore/ccsrc/backend/optimizer/gpu/adjust_depend_for_parallel_optimizer_recompute_all_gather_fusion.h
@@ -1,44 +0,0 @@
 /**
 * Copyright 2021 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #ifndef MINDSPORE_CCSRC_BACKEND_OPTIMIZER_PASS_ADJUST_DEPEND_FOR_PARALLEL_OPTIMIZER_RECOMPUTE_ALL_GATHER_H_
 #define MINDSPORE_CCSRC_BACKEND_OPTIMIZER_PASS_ADJUST_DEPEND_FOR_PARALLEL_OPTIMIZER_RECOMPUTE_ALL_GATHER_H_
 #include <vector>
 #include <string>
 #include <utility>
 #include <memory>

 #include "backend/optimizer/common/optimizer.h"

 namespace mindspore {
 namespace opt {
 class AdjustDependForParallelOptimizerRecomputeAllGatherFusion : public Pass {
 public:
  explicit AdjustDependForParallelOptimizerRecomputeAllGatherFusion(const std::string &name)
      : Pass("adjust_depend_for_parallel_optimizer_recompute_all_gather") {}
  ~AdjustDependForParallelOptimizerRecomputeAllGatherFusion() override = default;
  bool Run(const FuncGraphPtr &graph) override;

 private:
  bool AdjustAllgatherDepend(const FuncGraphPtr &graph,
                             const std::vector<AnfNodePtr> &parallel_optimizer_recompute_allgathers);
  void IncreaseAllgatherFusionId(const std::vector<AnfNodePtr> &parallel_optimizer_recompute_allgathers,
                                 const std::vector<AnfNodePtr> &parallel_optimizer_recompute_first_fusion_allgathers,
                                 int64_t unrecompute_max_fusion_id, int64_t recompute_min_fusion_id);
 };
 }  // namespace opt
 }  // namespace mindspore
 #endif  // MINDSPORE_CCSRC_BACKEND_OPTIMIZER_PASS_ADJUST_DEPEND_FOR_PARALLEL_OPTIMIZER_RECOMPUTE_ALL_GATHER_H_
--- a/mindspore/ccsrc/backend/optimizer/pass/adjust_depend_for_parallel_optimizer_recompute_all_gather.cc
+++ b/mindspore/ccsrc/backend/optimizer/pass/adjust_depend_for_parallel_optimizer_recompute_all_gather.cc
@@ -15,7 +15,7 @@
 */

 #include "backend/optimizer/pass/adjust_depend_for_parallel_optimizer_recompute_all_gather.h"
 #include "utils/utils.h"
 #include <algorithm>
 #include "backend/session/anf_runtime_algorithm.h"

 namespace mindspore {
--- a/mindspore/ccsrc/backend/optimizer/pass/adjust_depend_for_parallel_optimizer_recompute_all_gather.h
+++ b/mindspore/ccsrc/backend/optimizer/pass/adjust_depend_for_parallel_optimizer_recompute_all_gather.h
@@ -26,20 +26,17 @@
 #include "ir/anf.h"
 #include "backend/optimizer/common/helper.h"
 #include "backend/optimizer/common/optimizer.h"
 #include "backend/optimizer/ascend/ascend_helper.h"

 namespace mindspore {
 namespace opt {
 class AdjustDependForParallelOptimizerRecomputeAllGather : public Pass {
 public:
  AdjustDependForParallelOptimizerRecomputeAllGather()
      : Pass("adjust_depend_for_parallel_optimizer_recompute_all_gather"),
        kernel_select_(std::make_shared<KernelSelect>()) {}
      : Pass("adjust_depend_for_parallel_optimizer_recompute_all_gather") {}
  ~AdjustDependForParallelOptimizerRecomputeAllGather() override = default;
  bool Run(const FuncGraphPtr &graph) override;

 private:
  KernelSelectPtr kernel_select_;
  bool AdjustAllgatherDepend(const FuncGraphPtr &graph,
                             const std::vector<AnfNodePtr> &parallel_optimizer_recompute_allgathers);
  void IncreaseAllgatherFusionId(const std::vector<AnfNodePtr> &parallel_optimizer_recompute_allgathers,
--- a/mindspore/ccsrc/backend/optimizer/pass/reduce_sum_optimizer.cc
+++ b/mindspore/ccsrc/backend/optimizer/pass/reduce_sum_optimizer.cc
@@ -16,15 +16,7 @@

 #include "backend/optimizer/pass/reduce_sum_optimizer.h"
 #include <vector>
 #include "backend/optimizer/common/helper.h"
 #include "backend/session/anf_runtime_algorithm.h"
 #include "utils/utils.h"
 #include "abstract/abstract_value.h"
 #include "base/core_ops.h"
 #include "ir/anf.h"
 #include "ir/dtype.h"
 #include "ir/scalar.h"
 #include "utils/anf_utils.h"

 namespace mindspore {
 namespace opt {
@@ -119,7 +111,6 @@ AnfNodePtr ReduceSumOptimizer::InsertAssistNode(const CNodePtr &cnode, const Ker
 // create a new assist value node to deal with the following two case:
 // 1: the axis_input is empty, the new tensor of the new value node should be 'range(shape.size())',
 // the shape is the first input'shape of ReduceSum;
 //
 // 2: the value of axis_input contain the value less 0,
 // the new tensor of the new value node should be "shape.size() + the_old_value_less_0",
 // the shape is the first input'shape of ReduceSum;
--- a/mindspore/ccsrc/backend/session/gpu_session.cc
+++ b/mindspore/ccsrc/backend/session/gpu_session.cc
@@ -57,7 +57,7 @@
 #include "backend/optimizer/gpu/concat_outputs_for_all_gather.h"
 #include "backend/optimizer/pass/getitem_tuple.h"
 #include "backend/optimizer/pass/optimize_updatestate.h"
 #include "backend/optimizer/gpu/adjust_depend_for_parallel_optimizer_recompute_all_gather_fusion.h"
 #include "backend/optimizer/pass/adjust_depend_for_parallel_optimizer_recompute_all_gather.h"
 #include "utils/ms_device_shape_transfer.h"
 #include "debug/anf_ir_dump.h"
 #include "debug/dump_proto.h"
@@ -200,8 +200,7 @@ void GPUSession::HardwareOptimize(const std::shared_ptr<KernelGraph> &kernel_gra
  pm->AddPass(std::make_shared<opt::AddReluV2Fusion>());
  pm->AddPass(std::make_shared<opt::AddReluGradV2Fusion>());
  pm->AddPass(std::make_shared<opt::AllReduceFusion>());
  pm->AddPass(std::make_shared<opt::AdjustDependForParallelOptimizerRecomputeAllGatherFusion>(
    "adjust_depend_for_parallel_optimizer_recompute_all_gather_fusion"));
  pm->AddPass(std::make_shared<opt::AdjustDependForParallelOptimizerRecomputeAllGather>());
  pm->AddPass(std::make_shared<opt::AllGatherFusion>());
  pm->AddPass(std::make_shared<opt::ConcatOutputsForAllGather>());
  pm->AddPass(std::make_shared<opt::GetitemTuple>());
--- a/mindspore/ccsrc/runtime/hardware/gpu/gpu_device_context.cc
+++ b/mindspore/ccsrc/runtime/hardware/gpu/gpu_device_context.cc
@@ -292,8 +292,7 @@ void GPUDeviceContext::OptimizeGraphWithDeviceInfo(const KernelGraphPtr &graph)
  pm->AddPass(std::make_shared<opt::AddReluV2Fusion>());
  pm->AddPass(std::make_shared<opt::AddReluGradV2Fusion>());
  pm->AddPass(std::make_shared<opt::AllReduceFusion>());
  pm->AddPass(std::make_shared<opt::AdjustDependForParallelOptimizerRecomputeAllGatherFusion>(
    "adjust_depend_for_parallel_optimizer_recompute_all_gather_fusion"));
  pm->AddPass(std::make_shared<opt::AdjustDependForParallelOptimizerRecomputeAllGather>());
  pm->AddPass(std::make_shared<opt::AllGatherFusion>());
  pm->AddPass(std::make_shared<opt::ConcatOutputsForAllGather>());
  pm->AddPass(std::make_shared<opt::GetitemTuple>());
--- a/mindspore/ccsrc/runtime/hardware/gpu/optimizer.h
+++ b/mindspore/ccsrc/runtime/hardware/gpu/optimizer.h
@@ -21,6 +21,7 @@
 #include "backend/optimizer/common/optimizer.h"
 #include "backend/optimizer/common/pass_manager.h"
 #include "backend/optimizer/common/common_backend_optimization.h"
 #include "backend/optimizer/pass/adjust_depend_for_parallel_optimizer_recompute_all_gather.h"
 #include "backend/optimizer/gpu/adam_weight_decay_fusion.h"
 #include "backend/optimizer/gpu/adam_fusion.h"
 #include "backend/optimizer/gpu/alltoall_fusion.h"
@@ -52,7 +53,6 @@
 #include "backend/optimizer/gpu/matmul_biasadd_fusion.h"
 #include "backend/optimizer/gpu/bce_with_logits_loss_fusion.h"
 #include "backend/optimizer/gpu/insert_cast_gpu.h"
 #include "backend/optimizer/gpu/adjust_depend_for_parallel_optimizer_recompute_all_gather_fusion.h"
 #include "backend/optimizer/gpu/neighbor_exchange_v2_fusion.h"

 #endif  // MINDSPORE_CCSRC_RUNTIME_HARDWARE_GPU_OPTIMIZER_H_