From 68fa73f06a594f5b7bd1fe0846bfe27c4acbd1e6 Mon Sep 17 00:00:00 2001 From: LianLiguang Date: Thu, 10 Dec 2020 22:26:44 +0800 Subject: [PATCH] change cast and weight format --- .../ascend/ascend_backend_optimization.cc | 33 +++---- .../ascend/format_type/convert_cast_format.cc | 69 ++++++++++++++ .../ascend/format_type/convert_cast_format.h | 35 ++++++++ .../ccsrc/backend/session/ascend_session.cc | 2 +- .../device/ascend/kernel_select_ascend.cc | 90 ++++++++++++------- 5 files changed, 175 insertions(+), 54 deletions(-) create mode 100644 mindspore/ccsrc/backend/optimizer/ascend/format_type/convert_cast_format.cc create mode 100644 mindspore/ccsrc/backend/optimizer/ascend/format_type/convert_cast_format.h diff --git a/mindspore/ccsrc/backend/optimizer/ascend/ascend_backend_optimization.cc b/mindspore/ccsrc/backend/optimizer/ascend/ascend_backend_optimization.cc index 04869ff153..5fbe4e62bf 100644 --- a/mindspore/ccsrc/backend/optimizer/ascend/ascend_backend_optimization.cc +++ b/mindspore/ccsrc/backend/optimizer/ascend/ascend_backend_optimization.cc @@ -72,6 +72,7 @@ #include "backend/optimizer/ascend/format_type/chang_axis_of_reduce_kernel.h" #include "backend/optimizer/ascend/format_type/split_unsupported_transdata.h" #include "backend/optimizer/ascend/format_type/insert_reshape_for_extract_image_patches_op.h" +#include "backend/optimizer/ascend/format_type/convert_cast_format.h" #include "backend/optimizer/pass/getitem_tuple.h" #include "backend/optimizer/pass/optimize_dependence.h" #include "backend/optimizer/pass/erase_visit_attr.h" @@ -188,27 +189,6 @@ void AddAscendIRFusionPass(PassManager *ir_fusion_pm) { ir_fusion_pm->AddPass(std::make_shared()); } } // namespace - -void RunOpAscendDataLayout(const std::shared_ptr &kernel_graph) { - MS_EXCEPTION_IF_NULL(kernel_graph); - auto optimizer = std::make_shared(); - auto data_layout_pm = std::make_shared("pynative_transop_pm"); - data_layout_pm->AddPass(std::make_shared()); - data_layout_pm->AddPass(std::make_shared()); - data_layout_pm->AddPass(std::make_shared()); - data_layout_pm->AddPass(std::make_shared()); - data_layout_pm->AddPass(std::make_shared()); - data_layout_pm->AddPass(std::make_shared()); - data_layout_pm->AddPass(std::make_shared()); - data_layout_pm->AddPass(std::make_shared()); - data_layout_pm->AddPass(std::make_shared()); - data_layout_pm->AddPass(std::make_shared()); - data_layout_pm->AddPass(std::make_shared()); - optimizer->AddPassManager(data_layout_pm); - (void)optimizer->Optimize(kernel_graph); - kernel_graph->SetExecOrderByDefault(); -} - void AscendGraphKernelCommonProcess(const std::shared_ptr &kernel_graph) { MS_EXCEPTION_IF_NULL(kernel_graph); auto optimizer = std::make_shared(); @@ -228,8 +208,17 @@ void AscendDataLayout(const std::shared_ptr &kernel_graph) auto data_layout_pm = std::make_shared("transop_pm"); data_layout_pm->AddPass(std::make_shared()); data_layout_pm->AddPass(std::make_shared()); + data_layout_pm->AddPass(std::make_shared()); data_layout_pm->AddPass(std::make_shared()); - data_layout_pm->AddPass(std::make_shared()); + auto ms_context = MsContext::GetInstance(); + MS_EXCEPTION_IF_NULL(ms_context); + if (ms_context->get_param(MS_CTX_EXECUTION_MODE) == kPynativeMode) { + data_layout_pm->AddPass(std::make_shared()); + } else { + data_layout_pm->AddPass(std::make_shared()); + data_layout_pm->AddPass(std::make_shared()); + data_layout_pm->AddPass(std::make_shared()); + } data_layout_pm->AddPass(std::make_shared()); data_layout_pm->AddPass(std::make_shared()); data_layout_pm->AddPass(std::make_shared()); diff --git a/mindspore/ccsrc/backend/optimizer/ascend/format_type/convert_cast_format.cc b/mindspore/ccsrc/backend/optimizer/ascend/format_type/convert_cast_format.cc new file mode 100644 index 0000000000..763f0da805 --- /dev/null +++ b/mindspore/ccsrc/backend/optimizer/ascend/format_type/convert_cast_format.cc @@ -0,0 +1,69 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "backend/optimizer/ascend/format_type/convert_cast_format.h" + +#include + +#include "backend/session/anf_runtime_algorithm.h" +namespace mindspore { +namespace opt { +const BaseRef ConvertCastFormat::DefinePattern() const { + VarPtr X = std::make_shared(); + VarPtr Xs = std::make_shared(); + return VectorRef({X, Xs}); +} + +const AnfNodePtr ConvertCastFormat::Process(const mindspore::FuncGraphPtr &, const mindspore::AnfNodePtr &node, + const mindspore::EquivPtr &) const { + if (node == nullptr || !node->isa() || !AnfAlgo::IsRealCNodeKernel(node)) { + return nullptr; + } + auto node_name = AnfAlgo::GetCNodeName(node); + if (node_name == prim::kPrimCast->name()) { + return nullptr; + } + auto cnode = node->cast(); + MS_EXCEPTION_IF_NULL(cnode); + for (size_t input_index = 0; input_index < AnfAlgo::GetInputTensorNum(cnode); ++input_index) { + auto input_node = AnfAlgo::VisitKernelWithReturnType(AnfAlgo::GetInputNode(cnode, input_index), 0).first; + MS_EXCEPTION_IF_NULL(input_node); + if (!input_node->isa()) { + continue; + } + auto cast_node = input_node->cast(); + MS_EXCEPTION_IF_NULL(cast_node); + auto input_node_name = AnfAlgo::GetCNodeName(cast_node); + if (input_node_name != prim::kPrimCast->name()) { + continue; + } + auto format = AnfAlgo::GetInputFormat(node, input_index); + auto cast_input_node = AnfAlgo::VisitKernelWithReturnType(AnfAlgo::GetInputNode(cast_node, 0), 0).first; + auto cast_input_format = AnfAlgo::GetOutputFormat(cast_input_node, 0); + // change cast to default that can be more faster when it cast other hw format + if (cast_input_format != format) { + if (cast_input_format == kOpFormat_DEFAULT || format == kOpFormat_DEFAULT) { + auto info_builder = std::make_shared( + AnfAlgo::GetSelectKernelBuildInfo(cast_node)); + info_builder->SetInputsFormat({kOpFormat_DEFAULT}); + info_builder->SetOutputsFormat({kOpFormat_DEFAULT}); + AnfAlgo::SetSelectKernelBuildInfo(info_builder->Build(), cast_node.get()); + } + } + } + return nullptr; +} +} // namespace opt +} // namespace mindspore diff --git a/mindspore/ccsrc/backend/optimizer/ascend/format_type/convert_cast_format.h b/mindspore/ccsrc/backend/optimizer/ascend/format_type/convert_cast_format.h new file mode 100644 index 0000000000..5c7f5a2a0d --- /dev/null +++ b/mindspore/ccsrc/backend/optimizer/ascend/format_type/convert_cast_format.h @@ -0,0 +1,35 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_BACKEND_OPTIMIZER_ASCEND_FORMAT_TYPE_CONVERT_CAST_FORMAT_H_ +#define MINDSPORE_CCSRC_BACKEND_OPTIMIZER_ASCEND_FORMAT_TYPE_CONVERT_CAST_FORMAT_H_ + +#include "backend/optimizer/common/optimizer.h" + +namespace mindspore { +namespace opt { +class ConvertCastFormat : public PatternProcessPass { + public: + explicit ConvertCastFormat(bool multigraph = true) : PatternProcessPass("convert_cast_format", multigraph) {} + ~ConvertCastFormat() override = default; + const BaseRef DefinePattern() const override; + const AnfNodePtr Process(const FuncGraphPtr &, const AnfNodePtr &, const EquivPtr &) const override; + + private: + bool NeedChangeCastFormat(); +}; +} // namespace opt +} // namespace mindspore +#endif // MINDSPORE_CCSRC_BACKEND_OPTIMIZER_ASCEND_FORMAT_TYPE_ diff --git a/mindspore/ccsrc/backend/session/ascend_session.cc b/mindspore/ccsrc/backend/session/ascend_session.cc index 5881c5f68a..16abae939e 100644 --- a/mindspore/ccsrc/backend/session/ascend_session.cc +++ b/mindspore/ccsrc/backend/session/ascend_session.cc @@ -669,7 +669,7 @@ void AscendSession::RunGraphImpl(const GraphId &graph_id, const std::vector &kernel_graph) const { MS_LOG(INFO) << "Start"; // data layout optimization - opt::RunOpAscendDataLayout(kernel_graph); + opt::AscendDataLayout(kernel_graph); // mixed precision optimization opt::AscendMixPrecision(kernel_graph); MS_LOG(INFO) << "Finish"; diff --git a/mindspore/ccsrc/runtime/device/ascend/kernel_select_ascend.cc b/mindspore/ccsrc/runtime/device/ascend/kernel_select_ascend.cc index a134a6165c..b33957c10a 100644 --- a/mindspore/ccsrc/runtime/device/ascend/kernel_select_ascend.cc +++ b/mindspore/ccsrc/runtime/device/ascend/kernel_select_ascend.cc @@ -39,8 +39,9 @@ namespace mindspore { namespace device { namespace ascend { namespace { -const float kWegihtBaseScore = 1; -const float kFeatureMapBaseScore = 10; +const int kWeightUnInitScore = 1; +const int kWeightInitScore = 2; +const int kFeatureMapBaseScore = 10; constexpr auto kPriChoosenFormat = "pri_format"; enum MatchCountPriority : int { MATCH_COUNT_PRIORITY_BEGIN = 0, @@ -140,18 +141,17 @@ void UpdateCurMatchCounts(const kernel::KernelBuildInfo &kernel_build_info, cons MS_EXCEPTION_IF_NULL(kernel_node); MS_EXCEPTION_IF_NULL(cur_kernelinfo_match_counts); if (cur_kernelinfo_match_counts->size() < MATCH_COUNT_PRIORITY_END) { - MS_LOG(EXCEPTION) << "Out of range cur_kernelinfo_match_counts " << MATCH_COUNT_PRIORITY_END; + MS_LOG(EXCEPTION) << "Out of range cur_kernel info_match_counts " << MATCH_COUNT_PRIORITY_END; } auto pri_match_format = GetPriorityMatchFormat(kernel_node); for (size_t input_index = 0; input_index < AnfAlgo::GetInputTensorNum(kernel_node); ++input_index) { - auto input_anf_node = kernel_node->input(input_index + 1); + auto input_anf_node = AnfAlgo::VisitKernelWithReturnType(AnfAlgo::GetInputNode(kernel_node, input_index), 0).first; + MS_EXCEPTION_IF_NULL(input_anf_node); // we do not take ValueNode into consideration in graph kernel. - if (kernel_build_info.kernel_type() == KernelType::AKG_KERNEL) { - if (input_anf_node->isa() && AnfAlgo::GetOutputDeviceDataType(input_anf_node, 0) == kTypeUnknown) { - continue; - } + auto base_score = AnfAlgo::IsFeatureMapInput(kernel_node, input_index) ? kFeatureMapBaseScore : kWeightInitScore; + if (AnfAlgo::GetOutputDeviceDataType(input_anf_node, 0) == kTypeUnknown) { + base_score = kWeightUnInitScore; } - auto base_score = AnfAlgo::IsFeatureMapInput(kernel_node, input_index) ? kFeatureMapBaseScore : kWegihtBaseScore; if (kernel_build_info.GetInputFormat(input_index) == AnfAlgo::GetPrevNodeOutputFormat(kernel_node, input_index)) { (*cur_kernelinfo_match_counts)[MATCH_FORMAT_COUNT] += base_score; } @@ -356,6 +356,54 @@ void SetCastAndWeightFormat(const CNodePtr &kernel_node) { info_builder->SetOutputsFormat({format}); AnfAlgo::SetSelectKernelBuildInfo(info_builder->Build(), kernel_node.get()); } + +void SetWeightFormat(const AnfNodePtr &real_input_node, const std::vector &output_format, + const CNodePtr &kernel_node, size_t input_index) { + auto builder = std::make_shared(); + // we set special device info of a input tensor. + bool is_ref = false; + auto op_info = kernel::tbe::TbeDynamicShapeUtil::FindOp(AnfAlgo::GetCNodeName(kernel_node), kernel_node); + if (op_info != nullptr) { + is_ref = op_info->is_ref(); + } + auto selected_kernel_info = AnfAlgo::GetSelectKernelBuildInfo(kernel_node); + if (IsValueNode(real_input_node) && + AnfAlgo::GetOutputDeviceDataType(real_input_node, 0) == kTypeUnknown) { + builder->SetOutputsFormat(output_format); + std::vector output_type = {selected_kernel_info->GetInputDeviceType(input_index)}; + builder->SetOutputsDeviceType(output_type); + AnfAlgo::SetSelectKernelBuildInfo(builder->Build(), real_input_node.get()); + return; + } + if (AnfAlgo::GetOutputDeviceDataType(real_input_node, 0) == kTypeUnknown || is_ref) { + builder->SetOutputsFormat(output_format); + std::vector output_type = {selected_kernel_info->GetInputDeviceType(input_index)}; + builder->SetOutputsDeviceType(output_type); + AnfAlgo::SetSelectKernelBuildInfo(builder->Build(), real_input_node.get()); + } +} + +bool RefreshCastAndParamWeightFormat(const AnfNodePtr &input_node, const string &format) { + MS_EXCEPTION_IF_NULL(input_node); + if (!input_node->isa()) { + return false; + } + auto cast_node = input_node->cast(); + if (AnfAlgo::GetCNodeName(cast_node) != prim::kPrimCast->name()) { + return true; + } + if (AnfAlgo::IsFeatureMapOutput(cast_node)) { + return true; + } + auto info_builder = + std::make_shared(AnfAlgo::GetSelectKernelBuildInfo(input_node)); + info_builder->SetInputsFormat({format}); + info_builder->SetOutputsFormat({format}); + AnfAlgo::SetSelectKernelBuildInfo(info_builder->Build(), cast_node.get()); + auto cast_input_node = AnfAlgo::VisitKernel(AnfAlgo::GetInputNode(cast_node, 0), 0); + SetWeightFormat(cast_input_node.first, {format}, cast_node, 0); + return true; +} } // namespace void SetTensorDeviceInfo(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node); @@ -367,22 +415,15 @@ void SetTensorDeviceInfo(const CNodePtr &kernel_node) { auto input_with_index = AnfAlgo::VisitKernel(input_kernel_node, 0); MS_EXCEPTION_IF_NULL(input_with_index.first); auto real_input_node = input_with_index.first; - if (real_input_node->isa()) { + if (RefreshCastAndParamWeightFormat(real_input_node, selected_kernel_info->GetInputFormat(input_index))) { continue; } if (real_input_node->isa() && !AnfAlgo::IsParameterWeight(real_input_node->cast())) { continue; } - // we set special device info of a input tensor. - bool is_ref = false; - auto op_info = kernel::tbe::TbeDynamicShapeUtil::FindOp(AnfAlgo::GetCNodeName(kernel_node), kernel_node); - if (op_info != nullptr) { - is_ref = op_info->is_ref(); - } if (AnfAlgo::OutputAddrExist(real_input_node, 0)) { continue; } - auto builder = std::make_shared(); auto refresh_format = selected_kernel_info->GetInputFormat(input_index); std::vector output_format = {refresh_format}; // if not find in host convert format map means the host has not registered the convert function of this format @@ -390,20 +431,7 @@ void SetTensorDeviceInfo(const CNodePtr &kernel_node) { refresh_format != kOpFormat_DEFAULT) { output_format = {AnfAlgo::GetOutputFormat(real_input_node, 0)}; } - if (IsValueNode(input_kernel_node) && - AnfAlgo::GetOutputDeviceDataType(input_kernel_node, 0) == kTypeUnknown) { - builder->SetOutputsFormat(output_format); - std::vector output_type = {selected_kernel_info->GetInputDeviceType(input_index)}; - builder->SetOutputsDeviceType(output_type); - AnfAlgo::SetSelectKernelBuildInfo(builder->Build(), input_kernel_node.get()); - continue; - } - if (AnfAlgo::GetOutputDeviceDataType(real_input_node, 0) == kTypeUnknown || is_ref) { - builder->SetOutputsFormat(output_format); - std::vector output_type = {selected_kernel_info->GetInputDeviceType(input_index)}; - builder->SetOutputsDeviceType(output_type); - AnfAlgo::SetSelectKernelBuildInfo(builder->Build(), real_input_node.get()); - } + SetWeightFormat(real_input_node, output_format, kernel_node, input_index); } }