Browse Source

change cast and weight format

tags/v1.1.0
LianLiguang 5 years ago
parent
commit
68fa73f06a
5 changed files with 175 additions and 54 deletions
  1. +11
    -22
      mindspore/ccsrc/backend/optimizer/ascend/ascend_backend_optimization.cc
  2. +69
    -0
      mindspore/ccsrc/backend/optimizer/ascend/format_type/convert_cast_format.cc
  3. +35
    -0
      mindspore/ccsrc/backend/optimizer/ascend/format_type/convert_cast_format.h
  4. +1
    -1
      mindspore/ccsrc/backend/session/ascend_session.cc
  5. +59
    -31
      mindspore/ccsrc/runtime/device/ascend/kernel_select_ascend.cc

+ 11
- 22
mindspore/ccsrc/backend/optimizer/ascend/ascend_backend_optimization.cc View File

@@ -72,6 +72,7 @@
#include "backend/optimizer/ascend/format_type/chang_axis_of_reduce_kernel.h"
#include "backend/optimizer/ascend/format_type/split_unsupported_transdata.h"
#include "backend/optimizer/ascend/format_type/insert_reshape_for_extract_image_patches_op.h"
#include "backend/optimizer/ascend/format_type/convert_cast_format.h"
#include "backend/optimizer/pass/getitem_tuple.h"
#include "backend/optimizer/pass/optimize_dependence.h"
#include "backend/optimizer/pass/erase_visit_attr.h"
@@ -188,27 +189,6 @@ void AddAscendIRFusionPass(PassManager *ir_fusion_pm) {
ir_fusion_pm->AddPass(std::make_shared<GatherV2DsFission>());
}
} // namespace

void RunOpAscendDataLayout(const std::shared_ptr<session::KernelGraph> &kernel_graph) {
MS_EXCEPTION_IF_NULL(kernel_graph);
auto optimizer = std::make_shared<GraphOptimizer>();
auto data_layout_pm = std::make_shared<PassManager>("pynative_transop_pm");
data_layout_pm->AddPass(std::make_shared<ChangeAxisOfReduceKernel>());
data_layout_pm->AddPass(std::make_shared<RectifyDoMaskKernelInfo>());
data_layout_pm->AddPass(std::make_shared<DynamicRNNGradReformat>());
data_layout_pm->AddPass(std::make_shared<RunOpInsertTransData>());
data_layout_pm->AddPass(std::make_shared<GetitemTuple>());
data_layout_pm->AddPass(std::make_shared<CommonSubexpressionElimination>());
data_layout_pm->AddPass(std::make_shared<EliminateRedundantOp>());
data_layout_pm->AddPass(std::make_shared<InsertTransposeForDynamicGRUV2>());
data_layout_pm->AddPass(std::make_shared<OptimizeDependence>());
data_layout_pm->AddPass(std::make_shared<TransDataSplit>());
data_layout_pm->AddPass(std::make_shared<EraseVisitAttr>());
optimizer->AddPassManager(data_layout_pm);
(void)optimizer->Optimize(kernel_graph);
kernel_graph->SetExecOrderByDefault();
}

void AscendGraphKernelCommonProcess(const std::shared_ptr<session::KernelGraph> &kernel_graph) {
MS_EXCEPTION_IF_NULL(kernel_graph);
auto optimizer = std::make_shared<GraphOptimizer>();
@@ -228,8 +208,17 @@ void AscendDataLayout(const std::shared_ptr<session::KernelGraph> &kernel_graph)
auto data_layout_pm = std::make_shared<PassManager>("transop_pm");
data_layout_pm->AddPass(std::make_shared<RectifyDoMaskKernelInfo>());
data_layout_pm->AddPass(std::make_shared<DynamicRNNGradReformat>());
data_layout_pm->AddPass(std::make_shared<ChangeAxisOfReduceKernel>());
data_layout_pm->AddPass(std::make_shared<AddIoFormatAttrFor3DGraph>());
data_layout_pm->AddPass(std::make_shared<InsertTransOp>());
auto ms_context = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(ms_context);
if (ms_context->get_param<int>(MS_CTX_EXECUTION_MODE) == kPynativeMode) {
data_layout_pm->AddPass(std::make_shared<RunOpInsertTransData>());
} else {
data_layout_pm->AddPass(std::make_shared<MergeCastToOp>());
data_layout_pm->AddPass(std::make_shared<ConvertCastFormat>());
data_layout_pm->AddPass(std::make_shared<InsertTransOp>());
}
data_layout_pm->AddPass(std::make_shared<GetitemTuple>());
data_layout_pm->AddPass(std::make_shared<CommonSubexpressionElimination>());
data_layout_pm->AddPass(std::make_shared<RemoveReshapePair>());


+ 69
- 0
mindspore/ccsrc/backend/optimizer/ascend/format_type/convert_cast_format.cc View File

@@ -0,0 +1,69 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/optimizer/ascend/format_type/convert_cast_format.h"

#include <memory>

#include "backend/session/anf_runtime_algorithm.h"
namespace mindspore {
namespace opt {
const BaseRef ConvertCastFormat::DefinePattern() const {
VarPtr X = std::make_shared<Var>();
VarPtr Xs = std::make_shared<SeqVar>();
return VectorRef({X, Xs});
}

const AnfNodePtr ConvertCastFormat::Process(const mindspore::FuncGraphPtr &, const mindspore::AnfNodePtr &node,
const mindspore::EquivPtr &) const {
if (node == nullptr || !node->isa<CNode>() || !AnfAlgo::IsRealCNodeKernel(node)) {
return nullptr;
}
auto node_name = AnfAlgo::GetCNodeName(node);
if (node_name == prim::kPrimCast->name()) {
return nullptr;
}
auto cnode = node->cast<CNodePtr>();
MS_EXCEPTION_IF_NULL(cnode);
for (size_t input_index = 0; input_index < AnfAlgo::GetInputTensorNum(cnode); ++input_index) {
auto input_node = AnfAlgo::VisitKernelWithReturnType(AnfAlgo::GetInputNode(cnode, input_index), 0).first;
MS_EXCEPTION_IF_NULL(input_node);
if (!input_node->isa<CNode>()) {
continue;
}
auto cast_node = input_node->cast<CNodePtr>();
MS_EXCEPTION_IF_NULL(cast_node);
auto input_node_name = AnfAlgo::GetCNodeName(cast_node);
if (input_node_name != prim::kPrimCast->name()) {
continue;
}
auto format = AnfAlgo::GetInputFormat(node, input_index);
auto cast_input_node = AnfAlgo::VisitKernelWithReturnType(AnfAlgo::GetInputNode(cast_node, 0), 0).first;
auto cast_input_format = AnfAlgo::GetOutputFormat(cast_input_node, 0);
// change cast to default that can be more faster when it cast other hw format
if (cast_input_format != format) {
if (cast_input_format == kOpFormat_DEFAULT || format == kOpFormat_DEFAULT) {
auto info_builder = std::make_shared<kernel::KernelBuildInfo::KernelBuildInfoBuilder>(
AnfAlgo::GetSelectKernelBuildInfo(cast_node));
info_builder->SetInputsFormat({kOpFormat_DEFAULT});
info_builder->SetOutputsFormat({kOpFormat_DEFAULT});
AnfAlgo::SetSelectKernelBuildInfo(info_builder->Build(), cast_node.get());
}
}
}
return nullptr;
}
} // namespace opt
} // namespace mindspore

+ 35
- 0
mindspore/ccsrc/backend/optimizer/ascend/format_type/convert_cast_format.h View File

@@ -0,0 +1,35 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_BACKEND_OPTIMIZER_ASCEND_FORMAT_TYPE_CONVERT_CAST_FORMAT_H_
#define MINDSPORE_CCSRC_BACKEND_OPTIMIZER_ASCEND_FORMAT_TYPE_CONVERT_CAST_FORMAT_H_

#include "backend/optimizer/common/optimizer.h"

namespace mindspore {
namespace opt {
class ConvertCastFormat : public PatternProcessPass {
public:
explicit ConvertCastFormat(bool multigraph = true) : PatternProcessPass("convert_cast_format", multigraph) {}
~ConvertCastFormat() override = default;
const BaseRef DefinePattern() const override;
const AnfNodePtr Process(const FuncGraphPtr &, const AnfNodePtr &, const EquivPtr &) const override;

private:
bool NeedChangeCastFormat();
};
} // namespace opt
} // namespace mindspore
#endif // MINDSPORE_CCSRC_BACKEND_OPTIMIZER_ASCEND_FORMAT_TYPE_

+ 1
- 1
mindspore/ccsrc/backend/session/ascend_session.cc View File

@@ -669,7 +669,7 @@ void AscendSession::RunGraphImpl(const GraphId &graph_id, const std::vector<tens
void AscendSession::RunOpHardwareOptimize(const std::shared_ptr<session::KernelGraph> &kernel_graph) const {
MS_LOG(INFO) << "Start";
// data layout optimization
opt::RunOpAscendDataLayout(kernel_graph);
opt::AscendDataLayout(kernel_graph);
// mixed precision optimization
opt::AscendMixPrecision(kernel_graph);
MS_LOG(INFO) << "Finish";


+ 59
- 31
mindspore/ccsrc/runtime/device/ascend/kernel_select_ascend.cc View File

@@ -39,8 +39,9 @@ namespace mindspore {
namespace device {
namespace ascend {
namespace {
const float kWegihtBaseScore = 1;
const float kFeatureMapBaseScore = 10;
const int kWeightUnInitScore = 1;
const int kWeightInitScore = 2;
const int kFeatureMapBaseScore = 10;
constexpr auto kPriChoosenFormat = "pri_format";
enum MatchCountPriority : int {
MATCH_COUNT_PRIORITY_BEGIN = 0,
@@ -140,18 +141,17 @@ void UpdateCurMatchCounts(const kernel::KernelBuildInfo &kernel_build_info, cons
MS_EXCEPTION_IF_NULL(kernel_node);
MS_EXCEPTION_IF_NULL(cur_kernelinfo_match_counts);
if (cur_kernelinfo_match_counts->size() < MATCH_COUNT_PRIORITY_END) {
MS_LOG(EXCEPTION) << "Out of range cur_kernelinfo_match_counts " << MATCH_COUNT_PRIORITY_END;
MS_LOG(EXCEPTION) << "Out of range cur_kernel info_match_counts " << MATCH_COUNT_PRIORITY_END;
}
auto pri_match_format = GetPriorityMatchFormat(kernel_node);
for (size_t input_index = 0; input_index < AnfAlgo::GetInputTensorNum(kernel_node); ++input_index) {
auto input_anf_node = kernel_node->input(input_index + 1);
auto input_anf_node = AnfAlgo::VisitKernelWithReturnType(AnfAlgo::GetInputNode(kernel_node, input_index), 0).first;
MS_EXCEPTION_IF_NULL(input_anf_node);
// we do not take ValueNode into consideration in graph kernel.
if (kernel_build_info.kernel_type() == KernelType::AKG_KERNEL) {
if (input_anf_node->isa<ValueNode>() && AnfAlgo::GetOutputDeviceDataType(input_anf_node, 0) == kTypeUnknown) {
continue;
}
auto base_score = AnfAlgo::IsFeatureMapInput(kernel_node, input_index) ? kFeatureMapBaseScore : kWeightInitScore;
if (AnfAlgo::GetOutputDeviceDataType(input_anf_node, 0) == kTypeUnknown) {
base_score = kWeightUnInitScore;
}
auto base_score = AnfAlgo::IsFeatureMapInput(kernel_node, input_index) ? kFeatureMapBaseScore : kWegihtBaseScore;
if (kernel_build_info.GetInputFormat(input_index) == AnfAlgo::GetPrevNodeOutputFormat(kernel_node, input_index)) {
(*cur_kernelinfo_match_counts)[MATCH_FORMAT_COUNT] += base_score;
}
@@ -356,6 +356,54 @@ void SetCastAndWeightFormat(const CNodePtr &kernel_node) {
info_builder->SetOutputsFormat({format});
AnfAlgo::SetSelectKernelBuildInfo(info_builder->Build(), kernel_node.get());
}

void SetWeightFormat(const AnfNodePtr &real_input_node, const std::vector<string> &output_format,
const CNodePtr &kernel_node, size_t input_index) {
auto builder = std::make_shared<kernel::KernelBuildInfo::KernelBuildInfoBuilder>();
// we set special device info of a input tensor.
bool is_ref = false;
auto op_info = kernel::tbe::TbeDynamicShapeUtil::FindOp(AnfAlgo::GetCNodeName(kernel_node), kernel_node);
if (op_info != nullptr) {
is_ref = op_info->is_ref();
}
auto selected_kernel_info = AnfAlgo::GetSelectKernelBuildInfo(kernel_node);
if (IsValueNode<tensor::Tensor>(real_input_node) &&
AnfAlgo::GetOutputDeviceDataType(real_input_node, 0) == kTypeUnknown) {
builder->SetOutputsFormat(output_format);
std::vector<TypeId> output_type = {selected_kernel_info->GetInputDeviceType(input_index)};
builder->SetOutputsDeviceType(output_type);
AnfAlgo::SetSelectKernelBuildInfo(builder->Build(), real_input_node.get());
return;
}
if (AnfAlgo::GetOutputDeviceDataType(real_input_node, 0) == kTypeUnknown || is_ref) {
builder->SetOutputsFormat(output_format);
std::vector<TypeId> output_type = {selected_kernel_info->GetInputDeviceType(input_index)};
builder->SetOutputsDeviceType(output_type);
AnfAlgo::SetSelectKernelBuildInfo(builder->Build(), real_input_node.get());
}
}

bool RefreshCastAndParamWeightFormat(const AnfNodePtr &input_node, const string &format) {
MS_EXCEPTION_IF_NULL(input_node);
if (!input_node->isa<CNode>()) {
return false;
}
auto cast_node = input_node->cast<CNodePtr>();
if (AnfAlgo::GetCNodeName(cast_node) != prim::kPrimCast->name()) {
return true;
}
if (AnfAlgo::IsFeatureMapOutput(cast_node)) {
return true;
}
auto info_builder =
std::make_shared<kernel::KernelBuildInfo::KernelBuildInfoBuilder>(AnfAlgo::GetSelectKernelBuildInfo(input_node));
info_builder->SetInputsFormat({format});
info_builder->SetOutputsFormat({format});
AnfAlgo::SetSelectKernelBuildInfo(info_builder->Build(), cast_node.get());
auto cast_input_node = AnfAlgo::VisitKernel(AnfAlgo::GetInputNode(cast_node, 0), 0);
SetWeightFormat(cast_input_node.first, {format}, cast_node, 0);
return true;
}
} // namespace
void SetTensorDeviceInfo(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
@@ -367,22 +415,15 @@ void SetTensorDeviceInfo(const CNodePtr &kernel_node) {
auto input_with_index = AnfAlgo::VisitKernel(input_kernel_node, 0);
MS_EXCEPTION_IF_NULL(input_with_index.first);
auto real_input_node = input_with_index.first;
if (real_input_node->isa<CNode>()) {
if (RefreshCastAndParamWeightFormat(real_input_node, selected_kernel_info->GetInputFormat(input_index))) {
continue;
}
if (real_input_node->isa<Parameter>() && !AnfAlgo::IsParameterWeight(real_input_node->cast<ParameterPtr>())) {
continue;
}
// we set special device info of a input tensor.
bool is_ref = false;
auto op_info = kernel::tbe::TbeDynamicShapeUtil::FindOp(AnfAlgo::GetCNodeName(kernel_node), kernel_node);
if (op_info != nullptr) {
is_ref = op_info->is_ref();
}
if (AnfAlgo::OutputAddrExist(real_input_node, 0)) {
continue;
}
auto builder = std::make_shared<kernel::KernelBuildInfo::KernelBuildInfoBuilder>();
auto refresh_format = selected_kernel_info->GetInputFormat(input_index);
std::vector<std::string> output_format = {refresh_format};
// if not find in host convert format map means the host has not registered the convert function of this format
@@ -390,20 +431,7 @@ void SetTensorDeviceInfo(const CNodePtr &kernel_node) {
refresh_format != kOpFormat_DEFAULT) {
output_format = {AnfAlgo::GetOutputFormat(real_input_node, 0)};
}
if (IsValueNode<tensor::Tensor>(input_kernel_node) &&
AnfAlgo::GetOutputDeviceDataType(input_kernel_node, 0) == kTypeUnknown) {
builder->SetOutputsFormat(output_format);
std::vector<TypeId> output_type = {selected_kernel_info->GetInputDeviceType(input_index)};
builder->SetOutputsDeviceType(output_type);
AnfAlgo::SetSelectKernelBuildInfo(builder->Build(), input_kernel_node.get());
continue;
}
if (AnfAlgo::GetOutputDeviceDataType(real_input_node, 0) == kTypeUnknown || is_ref) {
builder->SetOutputsFormat(output_format);
std::vector<TypeId> output_type = {selected_kernel_info->GetInputDeviceType(input_index)};
builder->SetOutputsDeviceType(output_type);
AnfAlgo::SetSelectKernelBuildInfo(builder->Build(), real_input_node.get());
}
SetWeightFormat(real_input_node, output_format, kernel_node, input_index);
}
}



Loading…
Cancel
Save