Browse Source

warning clean

tags/v1.6.0
lby 4 years ago
parent
commit
86162ebcc0
11 changed files with 7 additions and 228 deletions
  1. +1
    -3
      mindspore/ccsrc/backend/optimizer/ascend/ir_fission/transdata_split.cc
  2. +0
    -6
      mindspore/ccsrc/backend/optimizer/ascend/mindir/bn_grad_unify_mindir.cc
  3. +0
    -6
      mindspore/ccsrc/backend/optimizer/ascend/mindir/dynamic_reshape_unify_mindir.cc
  4. +0
    -149
      mindspore/ccsrc/backend/optimizer/gpu/adjust_depend_for_parallel_optimizer_recompute_all_gather_fusion.cc
  5. +0
    -44
      mindspore/ccsrc/backend/optimizer/gpu/adjust_depend_for_parallel_optimizer_recompute_all_gather_fusion.h
  6. +1
    -1
      mindspore/ccsrc/backend/optimizer/pass/adjust_depend_for_parallel_optimizer_recompute_all_gather.cc
  7. +1
    -4
      mindspore/ccsrc/backend/optimizer/pass/adjust_depend_for_parallel_optimizer_recompute_all_gather.h
  8. +0
    -9
      mindspore/ccsrc/backend/optimizer/pass/reduce_sum_optimizer.cc
  9. +2
    -3
      mindspore/ccsrc/backend/session/gpu_session.cc
  10. +1
    -2
      mindspore/ccsrc/runtime/hardware/gpu/gpu_device_context.cc
  11. +1
    -1
      mindspore/ccsrc/runtime/hardware/gpu/optimizer.h

+ 1
- 3
mindspore/ccsrc/backend/optimizer/ascend/ir_fission/transdata_split.cc View File

@@ -14,10 +14,8 @@
* limitations under the License.
*/
#include "backend/optimizer/ascend/ir_fission/transdata_split.h"
#include "backend/optimizer/ascend/ascend_helper.h"
#include "backend/session/anf_runtime_algorithm.h"
#include <set>
#include "debug/anf_ir_dump.h"
#include "utils/trace_base.h"

namespace mindspore {
namespace opt {


+ 0
- 6
mindspore/ccsrc/backend/optimizer/ascend/mindir/bn_grad_unify_mindir.cc View File

@@ -14,15 +14,9 @@
* limitations under the License.
*/
#include "backend/optimizer/ascend/mindir/bn_grad_unify_mindir.h"

#include <vector>
#include <memory>

#include "utils/utils.h"
#include "utils/ms_context.h"
#include "backend/optimizer/common/helper.h"
#include "backend/session/anf_runtime_algorithm.h"
#include "utils/trace_base.h"

namespace mindspore {
namespace opt {


+ 0
- 6
mindspore/ccsrc/backend/optimizer/ascend/mindir/dynamic_reshape_unify_mindir.cc View File

@@ -14,15 +14,9 @@
* limitations under the License.
*/
#include "backend/optimizer/ascend/mindir/dynamic_reshape_unify_mindir.h"

#include <vector>
#include <memory>

#include "utils/utils.h"
#include "utils/ms_context.h"
#include "backend/optimizer/common/helper.h"
#include "backend/session/anf_runtime_algorithm.h"
#include "utils/trace_base.h"

namespace mindspore {
namespace opt {


+ 0
- 149
mindspore/ccsrc/backend/optimizer/gpu/adjust_depend_for_parallel_optimizer_recompute_all_gather_fusion.cc View File

@@ -1,149 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "backend/optimizer/gpu/adjust_depend_for_parallel_optimizer_recompute_all_gather_fusion.h"

#include <algorithm>

#include "utils/hash_map.h"
#include "backend/session/anf_runtime_algorithm.h"
#include "utils/utils.h"

namespace mindspore {
namespace opt {
bool AdjustDependForParallelOptimizerRecomputeAllGatherFusion::Run(const FuncGraphPtr &graph) {
MS_EXCEPTION_IF_NULL(graph);
mindspore::HashMap<int64_t, bool> forward_allgather_recompute_value_in_fusion_group;
std::vector<AnfNodePtr> node_list = TopoSort(graph->get_return());
std::vector<int64_t> parallel_optimizer_recompute_allgather_fusion_ids;
std::vector<AnfNodePtr> parallel_optimizer_recompute_allgathers;
std::vector<AnfNodePtr> parallel_optimizer_recompute_first_fusion_allgathers;
int64_t unrecompute_max_fusion_id = -1;
int64_t recompute_min_fusion_id = 0;
for (auto &node : node_list) {
MS_EXCEPTION_IF_NULL(node);
if (!node->cast<CNodePtr>() || !AnfUtils::IsRealKernel(node)) {
continue;
}
auto cnode = node->cast<CNodePtr>();
if (!AnfAlgo::IsAllgather(cnode) || !AnfAlgo::IsFusion(cnode) || !AnfAlgo::IsFromParallelOptimizer(cnode)) {
continue;
}
if (AnfAlgo::IsRecompute(cnode)) {
int64_t fusion_id = AnfAlgo::GetNodeAttr<int64_t>(cnode, kAttrFusion);
if (std::find(parallel_optimizer_recompute_allgather_fusion_ids.begin(),
parallel_optimizer_recompute_allgather_fusion_ids.end(),
fusion_id) == parallel_optimizer_recompute_allgather_fusion_ids.end()) {
parallel_optimizer_recompute_allgather_fusion_ids.push_back(fusion_id);
if (recompute_min_fusion_id == 0 || fusion_id < recompute_min_fusion_id) {
recompute_min_fusion_id = fusion_id;
}
parallel_optimizer_recompute_first_fusion_allgathers.push_back(node);
} else {
parallel_optimizer_recompute_allgathers.push_back(node);
}
} else {
int64_t unrecompute_fusion_id = AnfAlgo::GetNodeAttr<int64_t>(cnode, kAttrFusion);
unrecompute_max_fusion_id = std::max(unrecompute_fusion_id, unrecompute_max_fusion_id);
bool would_be_recomputed =
AnfAlgo::HasNodeAttr(kAttrRecompute, cnode) && AnfAlgo::GetNodeAttr<bool>(cnode, kAttrRecompute);
auto [iter, inserted] =
forward_allgather_recompute_value_in_fusion_group.emplace(unrecompute_fusion_id, would_be_recomputed);
if (!inserted && iter->second != would_be_recomputed) {
MS_LOG(EXCEPTION) << "In same fusion group, the allgather recompute attribute should be equal. "
"The normal node is:"
<< cnode->fullname_with_scope();
}
}
}
IncreaseAllgatherFusionId(parallel_optimizer_recompute_allgathers,
parallel_optimizer_recompute_first_fusion_allgathers, unrecompute_max_fusion_id,
recompute_min_fusion_id);
return AdjustAllgatherDepend(graph, parallel_optimizer_recompute_allgathers);
}

void AdjustDependForParallelOptimizerRecomputeAllGatherFusion::IncreaseAllgatherFusionId(
const std::vector<AnfNodePtr> &parallel_optimizer_recompute_allgathers,
const std::vector<AnfNodePtr> &parallel_optimizer_recompute_first_fusion_allgathers,
int64_t unrecompute_max_fusion_id, int64_t recompute_min_fusion_id) {
// means that there may some forward allgather and duplicated allgather would be fused.
if (recompute_min_fusion_id <= unrecompute_max_fusion_id) {
MS_LOG(WARNING) << "Increase the duplicated allgather fusion id";
for (auto &adjust_node : parallel_optimizer_recompute_first_fusion_allgathers) {
int64_t current_fusion_id = AnfAlgo::GetNodeAttr<int64_t>(adjust_node, kAttrFusion);
int64_t destination_fusion_id = current_fusion_id + unrecompute_max_fusion_id - recompute_min_fusion_id + 2;
AnfAlgo::SetNodeAttr(kAttrFusion, MakeValue(destination_fusion_id), adjust_node);
}
for (auto &adjust_node : parallel_optimizer_recompute_allgathers) {
int64_t current_fusion_id = AnfAlgo::GetNodeAttr<int64_t>(adjust_node, kAttrFusion);
int64_t destination_fusion_id = current_fusion_id + unrecompute_max_fusion_id - recompute_min_fusion_id + 2;
AnfAlgo::SetNodeAttr(kAttrFusion, MakeValue(destination_fusion_id), adjust_node);
}
}
}

bool AdjustDependForParallelOptimizerRecomputeAllGatherFusion::AdjustAllgatherDepend(
const FuncGraphPtr &graph, const std::vector<AnfNodePtr> &parallel_optimizer_recompute_allgathers) {
FuncGraphManagerPtr manager = graph->manager();
bool changed = false;
for (auto &node : parallel_optimizer_recompute_allgathers) {
auto cnode = node->cast<CNodePtr>();
auto depend_node = AnfAlgo::GetInputNode(cnode, 0);
if (IsPrimitiveCNode(depend_node, prim::kPrimDepend)) {
auto depend_cnode = depend_node->cast<CNodePtr>();
AnfNodeIndexSet allgather_node_set = manager->node_users()[cnode];
for (auto &node_pair : allgather_node_set) {
auto allgather_next_node = node_pair.first;
CNodePtr allgather_next_cnode = node_pair.first->cast<CNodePtr>();
if (allgather_next_cnode == nullptr || !IsValueNode<Primitive>(allgather_next_cnode->input(0))) {
continue;
}
std::vector<AnfNodePtr> inputs = {NewValueNode(std::make_shared<Primitive>(prim::kPrimDepend->name())),
allgather_next_node, AnfAlgo::GetInputNode(depend_cnode, 1)};
auto new_depend = graph->NewCNode(inputs);
new_depend->set_abstract(depend_node->abstract());
manager->SetEdge(node, 1, AnfAlgo::GetInputNode(depend_cnode, 0));
(void)manager->Replace(allgather_next_node, new_depend);
changed = true;
}
} else if (IsPrimitiveCNode(depend_node, prim::kPrimCast) &&
IsPrimitiveCNode(AnfAlgo::GetInputNode(depend_node->cast<CNodePtr>(), 0), prim::kPrimDepend)) {
auto cast_cnode = depend_node->cast<CNodePtr>();
auto cast_depend_node = AnfAlgo::GetInputNode(cast_cnode, 0);
auto cast_depend_cnode = cast_depend_node->cast<CNodePtr>();
AnfNodeIndexSet allgather_node_set = manager->node_users()[cnode];
for (auto &node_pair : allgather_node_set) {
auto allgather_next_node = node_pair.first;
CNodePtr allgather_next_cnode = node_pair.first->cast<CNodePtr>();
if (allgather_next_cnode == nullptr || !IsValueNode<Primitive>(allgather_next_cnode->input(0))) {
continue;
}
std::vector<AnfNodePtr> inputs = {NewValueNode(std::make_shared<Primitive>(prim::kPrimDepend->name())),
allgather_next_node, AnfAlgo::GetInputNode(cast_depend_cnode, 1)};
auto new_depend = graph->NewCNode(inputs);
new_depend->set_abstract(cast_depend_node->abstract());
manager->SetEdge(depend_node, 1, AnfAlgo::GetInputNode(cast_depend_cnode, 0));
(void)manager->Replace(allgather_next_node, new_depend);
changed = true;
}
} else {
MS_LOG(WARNING) << "The parallel optimizer recompute allgather has no depend edge";
}
}
return changed;
}
} // namespace opt
} // namespace mindspore

+ 0
- 44
mindspore/ccsrc/backend/optimizer/gpu/adjust_depend_for_parallel_optimizer_recompute_all_gather_fusion.h View File

@@ -1,44 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MINDSPORE_CCSRC_BACKEND_OPTIMIZER_PASS_ADJUST_DEPEND_FOR_PARALLEL_OPTIMIZER_RECOMPUTE_ALL_GATHER_H_
#define MINDSPORE_CCSRC_BACKEND_OPTIMIZER_PASS_ADJUST_DEPEND_FOR_PARALLEL_OPTIMIZER_RECOMPUTE_ALL_GATHER_H_
#include <vector>
#include <string>
#include <utility>
#include <memory>

#include "backend/optimizer/common/optimizer.h"

namespace mindspore {
namespace opt {
class AdjustDependForParallelOptimizerRecomputeAllGatherFusion : public Pass {
public:
explicit AdjustDependForParallelOptimizerRecomputeAllGatherFusion(const std::string &name)
: Pass("adjust_depend_for_parallel_optimizer_recompute_all_gather") {}
~AdjustDependForParallelOptimizerRecomputeAllGatherFusion() override = default;
bool Run(const FuncGraphPtr &graph) override;

private:
bool AdjustAllgatherDepend(const FuncGraphPtr &graph,
const std::vector<AnfNodePtr> &parallel_optimizer_recompute_allgathers);
void IncreaseAllgatherFusionId(const std::vector<AnfNodePtr> &parallel_optimizer_recompute_allgathers,
const std::vector<AnfNodePtr> &parallel_optimizer_recompute_first_fusion_allgathers,
int64_t unrecompute_max_fusion_id, int64_t recompute_min_fusion_id);
};
} // namespace opt
} // namespace mindspore
#endif // MINDSPORE_CCSRC_BACKEND_OPTIMIZER_PASS_ADJUST_DEPEND_FOR_PARALLEL_OPTIMIZER_RECOMPUTE_ALL_GATHER_H_

+ 1
- 1
mindspore/ccsrc/backend/optimizer/pass/adjust_depend_for_parallel_optimizer_recompute_all_gather.cc View File

@@ -15,7 +15,7 @@
*/

#include "backend/optimizer/pass/adjust_depend_for_parallel_optimizer_recompute_all_gather.h"
#include "utils/utils.h"
#include <algorithm>
#include "backend/session/anf_runtime_algorithm.h"

namespace mindspore {


+ 1
- 4
mindspore/ccsrc/backend/optimizer/pass/adjust_depend_for_parallel_optimizer_recompute_all_gather.h View File

@@ -26,20 +26,17 @@
#include "ir/anf.h"
#include "backend/optimizer/common/helper.h"
#include "backend/optimizer/common/optimizer.h"
#include "backend/optimizer/ascend/ascend_helper.h"

namespace mindspore {
namespace opt {
class AdjustDependForParallelOptimizerRecomputeAllGather : public Pass {
public:
AdjustDependForParallelOptimizerRecomputeAllGather()
: Pass("adjust_depend_for_parallel_optimizer_recompute_all_gather"),
kernel_select_(std::make_shared<KernelSelect>()) {}
: Pass("adjust_depend_for_parallel_optimizer_recompute_all_gather") {}
~AdjustDependForParallelOptimizerRecomputeAllGather() override = default;
bool Run(const FuncGraphPtr &graph) override;

private:
KernelSelectPtr kernel_select_;
bool AdjustAllgatherDepend(const FuncGraphPtr &graph,
const std::vector<AnfNodePtr> &parallel_optimizer_recompute_allgathers);
void IncreaseAllgatherFusionId(const std::vector<AnfNodePtr> &parallel_optimizer_recompute_allgathers,


+ 0
- 9
mindspore/ccsrc/backend/optimizer/pass/reduce_sum_optimizer.cc View File

@@ -16,15 +16,7 @@

#include "backend/optimizer/pass/reduce_sum_optimizer.h"
#include <vector>
#include "backend/optimizer/common/helper.h"
#include "backend/session/anf_runtime_algorithm.h"
#include "utils/utils.h"
#include "abstract/abstract_value.h"
#include "base/core_ops.h"
#include "ir/anf.h"
#include "ir/dtype.h"
#include "ir/scalar.h"
#include "utils/anf_utils.h"

namespace mindspore {
namespace opt {
@@ -119,7 +111,6 @@ AnfNodePtr ReduceSumOptimizer::InsertAssistNode(const CNodePtr &cnode, const Ker
// create a new assist value node to deal with the following two case:
// 1: the axis_input is empty, the new tensor of the new value node should be 'range(shape.size())',
// the shape is the first input'shape of ReduceSum;
//
// 2: the value of axis_input contain the value less 0,
// the new tensor of the new value node should be "shape.size() + the_old_value_less_0",
// the shape is the first input'shape of ReduceSum;


+ 2
- 3
mindspore/ccsrc/backend/session/gpu_session.cc View File

@@ -57,7 +57,7 @@
#include "backend/optimizer/gpu/concat_outputs_for_all_gather.h"
#include "backend/optimizer/pass/getitem_tuple.h"
#include "backend/optimizer/pass/optimize_updatestate.h"
#include "backend/optimizer/gpu/adjust_depend_for_parallel_optimizer_recompute_all_gather_fusion.h"
#include "backend/optimizer/pass/adjust_depend_for_parallel_optimizer_recompute_all_gather.h"
#include "utils/ms_device_shape_transfer.h"
#include "debug/anf_ir_dump.h"
#include "debug/dump_proto.h"
@@ -200,8 +200,7 @@ void GPUSession::HardwareOptimize(const std::shared_ptr<KernelGraph> &kernel_gra
pm->AddPass(std::make_shared<opt::AddReluV2Fusion>());
pm->AddPass(std::make_shared<opt::AddReluGradV2Fusion>());
pm->AddPass(std::make_shared<opt::AllReduceFusion>());
pm->AddPass(std::make_shared<opt::AdjustDependForParallelOptimizerRecomputeAllGatherFusion>(
"adjust_depend_for_parallel_optimizer_recompute_all_gather_fusion"));
pm->AddPass(std::make_shared<opt::AdjustDependForParallelOptimizerRecomputeAllGather>());
pm->AddPass(std::make_shared<opt::AllGatherFusion>());
pm->AddPass(std::make_shared<opt::ConcatOutputsForAllGather>());
pm->AddPass(std::make_shared<opt::GetitemTuple>());


+ 1
- 2
mindspore/ccsrc/runtime/hardware/gpu/gpu_device_context.cc View File

@@ -292,8 +292,7 @@ void GPUDeviceContext::OptimizeGraphWithDeviceInfo(const KernelGraphPtr &graph)
pm->AddPass(std::make_shared<opt::AddReluV2Fusion>());
pm->AddPass(std::make_shared<opt::AddReluGradV2Fusion>());
pm->AddPass(std::make_shared<opt::AllReduceFusion>());
pm->AddPass(std::make_shared<opt::AdjustDependForParallelOptimizerRecomputeAllGatherFusion>(
"adjust_depend_for_parallel_optimizer_recompute_all_gather_fusion"));
pm->AddPass(std::make_shared<opt::AdjustDependForParallelOptimizerRecomputeAllGather>());
pm->AddPass(std::make_shared<opt::AllGatherFusion>());
pm->AddPass(std::make_shared<opt::ConcatOutputsForAllGather>());
pm->AddPass(std::make_shared<opt::GetitemTuple>());


+ 1
- 1
mindspore/ccsrc/runtime/hardware/gpu/optimizer.h View File

@@ -21,6 +21,7 @@
#include "backend/optimizer/common/optimizer.h"
#include "backend/optimizer/common/pass_manager.h"
#include "backend/optimizer/common/common_backend_optimization.h"
#include "backend/optimizer/pass/adjust_depend_for_parallel_optimizer_recompute_all_gather.h"
#include "backend/optimizer/gpu/adam_weight_decay_fusion.h"
#include "backend/optimizer/gpu/adam_fusion.h"
#include "backend/optimizer/gpu/alltoall_fusion.h"
@@ -52,7 +53,6 @@
#include "backend/optimizer/gpu/matmul_biasadd_fusion.h"
#include "backend/optimizer/gpu/bce_with_logits_loss_fusion.h"
#include "backend/optimizer/gpu/insert_cast_gpu.h"
#include "backend/optimizer/gpu/adjust_depend_for_parallel_optimizer_recompute_all_gather_fusion.h"
#include "backend/optimizer/gpu/neighbor_exchange_v2_fusion.h"

#endif // MINDSPORE_CCSRC_RUNTIME_HARDWARE_GPU_OPTIMIZER_H_

Loading…
Cancel
Save