| @@ -72,6 +72,7 @@ | |||||
| #include "backend/optimizer/ascend/format_type/chang_axis_of_reduce_kernel.h" | #include "backend/optimizer/ascend/format_type/chang_axis_of_reduce_kernel.h" | ||||
| #include "backend/optimizer/ascend/format_type/split_unsupported_transdata.h" | #include "backend/optimizer/ascend/format_type/split_unsupported_transdata.h" | ||||
| #include "backend/optimizer/ascend/format_type/insert_reshape_for_extract_image_patches_op.h" | #include "backend/optimizer/ascend/format_type/insert_reshape_for_extract_image_patches_op.h" | ||||
| #include "backend/optimizer/ascend/format_type/convert_cast_format.h" | |||||
| #include "backend/optimizer/pass/getitem_tuple.h" | #include "backend/optimizer/pass/getitem_tuple.h" | ||||
| #include "backend/optimizer/pass/optimize_dependence.h" | #include "backend/optimizer/pass/optimize_dependence.h" | ||||
| #include "backend/optimizer/pass/erase_visit_attr.h" | #include "backend/optimizer/pass/erase_visit_attr.h" | ||||
| @@ -188,27 +189,6 @@ void AddAscendIRFusionPass(PassManager *ir_fusion_pm) { | |||||
| ir_fusion_pm->AddPass(std::make_shared<GatherV2DsFission>()); | ir_fusion_pm->AddPass(std::make_shared<GatherV2DsFission>()); | ||||
| } | } | ||||
| } // namespace | } // namespace | ||||
| void RunOpAscendDataLayout(const std::shared_ptr<session::KernelGraph> &kernel_graph) { | |||||
| MS_EXCEPTION_IF_NULL(kernel_graph); | |||||
| auto optimizer = std::make_shared<GraphOptimizer>(); | |||||
| auto data_layout_pm = std::make_shared<PassManager>("pynative_transop_pm"); | |||||
| data_layout_pm->AddPass(std::make_shared<ChangeAxisOfReduceKernel>()); | |||||
| data_layout_pm->AddPass(std::make_shared<RectifyDoMaskKernelInfo>()); | |||||
| data_layout_pm->AddPass(std::make_shared<DynamicRNNGradReformat>()); | |||||
| data_layout_pm->AddPass(std::make_shared<RunOpInsertTransData>()); | |||||
| data_layout_pm->AddPass(std::make_shared<GetitemTuple>()); | |||||
| data_layout_pm->AddPass(std::make_shared<CommonSubexpressionElimination>()); | |||||
| data_layout_pm->AddPass(std::make_shared<EliminateRedundantOp>()); | |||||
| data_layout_pm->AddPass(std::make_shared<InsertTransposeForDynamicGRUV2>()); | |||||
| data_layout_pm->AddPass(std::make_shared<OptimizeDependence>()); | |||||
| data_layout_pm->AddPass(std::make_shared<TransDataSplit>()); | |||||
| data_layout_pm->AddPass(std::make_shared<EraseVisitAttr>()); | |||||
| optimizer->AddPassManager(data_layout_pm); | |||||
| (void)optimizer->Optimize(kernel_graph); | |||||
| kernel_graph->SetExecOrderByDefault(); | |||||
| } | |||||
| void AscendGraphKernelCommonProcess(const std::shared_ptr<session::KernelGraph> &kernel_graph) { | void AscendGraphKernelCommonProcess(const std::shared_ptr<session::KernelGraph> &kernel_graph) { | ||||
| MS_EXCEPTION_IF_NULL(kernel_graph); | MS_EXCEPTION_IF_NULL(kernel_graph); | ||||
| auto optimizer = std::make_shared<GraphOptimizer>(); | auto optimizer = std::make_shared<GraphOptimizer>(); | ||||
| @@ -228,8 +208,17 @@ void AscendDataLayout(const std::shared_ptr<session::KernelGraph> &kernel_graph) | |||||
| auto data_layout_pm = std::make_shared<PassManager>("transop_pm"); | auto data_layout_pm = std::make_shared<PassManager>("transop_pm"); | ||||
| data_layout_pm->AddPass(std::make_shared<RectifyDoMaskKernelInfo>()); | data_layout_pm->AddPass(std::make_shared<RectifyDoMaskKernelInfo>()); | ||||
| data_layout_pm->AddPass(std::make_shared<DynamicRNNGradReformat>()); | data_layout_pm->AddPass(std::make_shared<DynamicRNNGradReformat>()); | ||||
| data_layout_pm->AddPass(std::make_shared<ChangeAxisOfReduceKernel>()); | |||||
| data_layout_pm->AddPass(std::make_shared<AddIoFormatAttrFor3DGraph>()); | data_layout_pm->AddPass(std::make_shared<AddIoFormatAttrFor3DGraph>()); | ||||
| data_layout_pm->AddPass(std::make_shared<InsertTransOp>()); | |||||
| auto ms_context = MsContext::GetInstance(); | |||||
| MS_EXCEPTION_IF_NULL(ms_context); | |||||
| if (ms_context->get_param<int>(MS_CTX_EXECUTION_MODE) == kPynativeMode) { | |||||
| data_layout_pm->AddPass(std::make_shared<RunOpInsertTransData>()); | |||||
| } else { | |||||
| data_layout_pm->AddPass(std::make_shared<MergeCastToOp>()); | |||||
| data_layout_pm->AddPass(std::make_shared<ConvertCastFormat>()); | |||||
| data_layout_pm->AddPass(std::make_shared<InsertTransOp>()); | |||||
| } | |||||
| data_layout_pm->AddPass(std::make_shared<GetitemTuple>()); | data_layout_pm->AddPass(std::make_shared<GetitemTuple>()); | ||||
| data_layout_pm->AddPass(std::make_shared<CommonSubexpressionElimination>()); | data_layout_pm->AddPass(std::make_shared<CommonSubexpressionElimination>()); | ||||
| data_layout_pm->AddPass(std::make_shared<RemoveReshapePair>()); | data_layout_pm->AddPass(std::make_shared<RemoveReshapePair>()); | ||||
| @@ -0,0 +1,69 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "backend/optimizer/ascend/format_type/convert_cast_format.h" | |||||
| #include <memory> | |||||
| #include "backend/session/anf_runtime_algorithm.h" | |||||
| namespace mindspore { | |||||
| namespace opt { | |||||
| const BaseRef ConvertCastFormat::DefinePattern() const { | |||||
| VarPtr X = std::make_shared<Var>(); | |||||
| VarPtr Xs = std::make_shared<SeqVar>(); | |||||
| return VectorRef({X, Xs}); | |||||
| } | |||||
| const AnfNodePtr ConvertCastFormat::Process(const mindspore::FuncGraphPtr &, const mindspore::AnfNodePtr &node, | |||||
| const mindspore::EquivPtr &) const { | |||||
| if (node == nullptr || !node->isa<CNode>() || !AnfAlgo::IsRealCNodeKernel(node)) { | |||||
| return nullptr; | |||||
| } | |||||
| auto node_name = AnfAlgo::GetCNodeName(node); | |||||
| if (node_name == prim::kPrimCast->name()) { | |||||
| return nullptr; | |||||
| } | |||||
| auto cnode = node->cast<CNodePtr>(); | |||||
| MS_EXCEPTION_IF_NULL(cnode); | |||||
| for (size_t input_index = 0; input_index < AnfAlgo::GetInputTensorNum(cnode); ++input_index) { | |||||
| auto input_node = AnfAlgo::VisitKernelWithReturnType(AnfAlgo::GetInputNode(cnode, input_index), 0).first; | |||||
| MS_EXCEPTION_IF_NULL(input_node); | |||||
| if (!input_node->isa<CNode>()) { | |||||
| continue; | |||||
| } | |||||
| auto cast_node = input_node->cast<CNodePtr>(); | |||||
| MS_EXCEPTION_IF_NULL(cast_node); | |||||
| auto input_node_name = AnfAlgo::GetCNodeName(cast_node); | |||||
| if (input_node_name != prim::kPrimCast->name()) { | |||||
| continue; | |||||
| } | |||||
| auto format = AnfAlgo::GetInputFormat(node, input_index); | |||||
| auto cast_input_node = AnfAlgo::VisitKernelWithReturnType(AnfAlgo::GetInputNode(cast_node, 0), 0).first; | |||||
| auto cast_input_format = AnfAlgo::GetOutputFormat(cast_input_node, 0); | |||||
| // change cast to default that can be more faster when it cast other hw format | |||||
| if (cast_input_format != format) { | |||||
| if (cast_input_format == kOpFormat_DEFAULT || format == kOpFormat_DEFAULT) { | |||||
| auto info_builder = std::make_shared<kernel::KernelBuildInfo::KernelBuildInfoBuilder>( | |||||
| AnfAlgo::GetSelectKernelBuildInfo(cast_node)); | |||||
| info_builder->SetInputsFormat({kOpFormat_DEFAULT}); | |||||
| info_builder->SetOutputsFormat({kOpFormat_DEFAULT}); | |||||
| AnfAlgo::SetSelectKernelBuildInfo(info_builder->Build(), cast_node.get()); | |||||
| } | |||||
| } | |||||
| } | |||||
| return nullptr; | |||||
| } | |||||
| } // namespace opt | |||||
| } // namespace mindspore | |||||
| @@ -0,0 +1,35 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_CCSRC_BACKEND_OPTIMIZER_ASCEND_FORMAT_TYPE_CONVERT_CAST_FORMAT_H_ | |||||
| #define MINDSPORE_CCSRC_BACKEND_OPTIMIZER_ASCEND_FORMAT_TYPE_CONVERT_CAST_FORMAT_H_ | |||||
| #include "backend/optimizer/common/optimizer.h" | |||||
| namespace mindspore { | |||||
| namespace opt { | |||||
| class ConvertCastFormat : public PatternProcessPass { | |||||
| public: | |||||
| explicit ConvertCastFormat(bool multigraph = true) : PatternProcessPass("convert_cast_format", multigraph) {} | |||||
| ~ConvertCastFormat() override = default; | |||||
| const BaseRef DefinePattern() const override; | |||||
| const AnfNodePtr Process(const FuncGraphPtr &, const AnfNodePtr &, const EquivPtr &) const override; | |||||
| private: | |||||
| bool NeedChangeCastFormat(); | |||||
| }; | |||||
| } // namespace opt | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_CCSRC_BACKEND_OPTIMIZER_ASCEND_FORMAT_TYPE_ | |||||
| @@ -669,7 +669,7 @@ void AscendSession::RunGraphImpl(const GraphId &graph_id, const std::vector<tens | |||||
| void AscendSession::RunOpHardwareOptimize(const std::shared_ptr<session::KernelGraph> &kernel_graph) const { | void AscendSession::RunOpHardwareOptimize(const std::shared_ptr<session::KernelGraph> &kernel_graph) const { | ||||
| MS_LOG(INFO) << "Start"; | MS_LOG(INFO) << "Start"; | ||||
| // data layout optimization | // data layout optimization | ||||
| opt::RunOpAscendDataLayout(kernel_graph); | |||||
| opt::AscendDataLayout(kernel_graph); | |||||
| // mixed precision optimization | // mixed precision optimization | ||||
| opt::AscendMixPrecision(kernel_graph); | opt::AscendMixPrecision(kernel_graph); | ||||
| MS_LOG(INFO) << "Finish"; | MS_LOG(INFO) << "Finish"; | ||||
| @@ -39,8 +39,9 @@ namespace mindspore { | |||||
| namespace device { | namespace device { | ||||
| namespace ascend { | namespace ascend { | ||||
| namespace { | namespace { | ||||
| const float kWegihtBaseScore = 1; | |||||
| const float kFeatureMapBaseScore = 10; | |||||
| const int kWeightUnInitScore = 1; | |||||
| const int kWeightInitScore = 2; | |||||
| const int kFeatureMapBaseScore = 10; | |||||
| constexpr auto kPriChoosenFormat = "pri_format"; | constexpr auto kPriChoosenFormat = "pri_format"; | ||||
| enum MatchCountPriority : int { | enum MatchCountPriority : int { | ||||
| MATCH_COUNT_PRIORITY_BEGIN = 0, | MATCH_COUNT_PRIORITY_BEGIN = 0, | ||||
| @@ -140,18 +141,17 @@ void UpdateCurMatchCounts(const kernel::KernelBuildInfo &kernel_build_info, cons | |||||
| MS_EXCEPTION_IF_NULL(kernel_node); | MS_EXCEPTION_IF_NULL(kernel_node); | ||||
| MS_EXCEPTION_IF_NULL(cur_kernelinfo_match_counts); | MS_EXCEPTION_IF_NULL(cur_kernelinfo_match_counts); | ||||
| if (cur_kernelinfo_match_counts->size() < MATCH_COUNT_PRIORITY_END) { | if (cur_kernelinfo_match_counts->size() < MATCH_COUNT_PRIORITY_END) { | ||||
| MS_LOG(EXCEPTION) << "Out of range cur_kernelinfo_match_counts " << MATCH_COUNT_PRIORITY_END; | |||||
| MS_LOG(EXCEPTION) << "Out of range cur_kernel info_match_counts " << MATCH_COUNT_PRIORITY_END; | |||||
| } | } | ||||
| auto pri_match_format = GetPriorityMatchFormat(kernel_node); | auto pri_match_format = GetPriorityMatchFormat(kernel_node); | ||||
| for (size_t input_index = 0; input_index < AnfAlgo::GetInputTensorNum(kernel_node); ++input_index) { | for (size_t input_index = 0; input_index < AnfAlgo::GetInputTensorNum(kernel_node); ++input_index) { | ||||
| auto input_anf_node = kernel_node->input(input_index + 1); | |||||
| auto input_anf_node = AnfAlgo::VisitKernelWithReturnType(AnfAlgo::GetInputNode(kernel_node, input_index), 0).first; | |||||
| MS_EXCEPTION_IF_NULL(input_anf_node); | |||||
| // we do not take ValueNode into consideration in graph kernel. | // we do not take ValueNode into consideration in graph kernel. | ||||
| if (kernel_build_info.kernel_type() == KernelType::AKG_KERNEL) { | |||||
| if (input_anf_node->isa<ValueNode>() && AnfAlgo::GetOutputDeviceDataType(input_anf_node, 0) == kTypeUnknown) { | |||||
| continue; | |||||
| } | |||||
| auto base_score = AnfAlgo::IsFeatureMapInput(kernel_node, input_index) ? kFeatureMapBaseScore : kWeightInitScore; | |||||
| if (AnfAlgo::GetOutputDeviceDataType(input_anf_node, 0) == kTypeUnknown) { | |||||
| base_score = kWeightUnInitScore; | |||||
| } | } | ||||
| auto base_score = AnfAlgo::IsFeatureMapInput(kernel_node, input_index) ? kFeatureMapBaseScore : kWegihtBaseScore; | |||||
| if (kernel_build_info.GetInputFormat(input_index) == AnfAlgo::GetPrevNodeOutputFormat(kernel_node, input_index)) { | if (kernel_build_info.GetInputFormat(input_index) == AnfAlgo::GetPrevNodeOutputFormat(kernel_node, input_index)) { | ||||
| (*cur_kernelinfo_match_counts)[MATCH_FORMAT_COUNT] += base_score; | (*cur_kernelinfo_match_counts)[MATCH_FORMAT_COUNT] += base_score; | ||||
| } | } | ||||
| @@ -356,6 +356,54 @@ void SetCastAndWeightFormat(const CNodePtr &kernel_node) { | |||||
| info_builder->SetOutputsFormat({format}); | info_builder->SetOutputsFormat({format}); | ||||
| AnfAlgo::SetSelectKernelBuildInfo(info_builder->Build(), kernel_node.get()); | AnfAlgo::SetSelectKernelBuildInfo(info_builder->Build(), kernel_node.get()); | ||||
| } | } | ||||
| void SetWeightFormat(const AnfNodePtr &real_input_node, const std::vector<string> &output_format, | |||||
| const CNodePtr &kernel_node, size_t input_index) { | |||||
| auto builder = std::make_shared<kernel::KernelBuildInfo::KernelBuildInfoBuilder>(); | |||||
| // we set special device info of a input tensor. | |||||
| bool is_ref = false; | |||||
| auto op_info = kernel::tbe::TbeDynamicShapeUtil::FindOp(AnfAlgo::GetCNodeName(kernel_node), kernel_node); | |||||
| if (op_info != nullptr) { | |||||
| is_ref = op_info->is_ref(); | |||||
| } | |||||
| auto selected_kernel_info = AnfAlgo::GetSelectKernelBuildInfo(kernel_node); | |||||
| if (IsValueNode<tensor::Tensor>(real_input_node) && | |||||
| AnfAlgo::GetOutputDeviceDataType(real_input_node, 0) == kTypeUnknown) { | |||||
| builder->SetOutputsFormat(output_format); | |||||
| std::vector<TypeId> output_type = {selected_kernel_info->GetInputDeviceType(input_index)}; | |||||
| builder->SetOutputsDeviceType(output_type); | |||||
| AnfAlgo::SetSelectKernelBuildInfo(builder->Build(), real_input_node.get()); | |||||
| return; | |||||
| } | |||||
| if (AnfAlgo::GetOutputDeviceDataType(real_input_node, 0) == kTypeUnknown || is_ref) { | |||||
| builder->SetOutputsFormat(output_format); | |||||
| std::vector<TypeId> output_type = {selected_kernel_info->GetInputDeviceType(input_index)}; | |||||
| builder->SetOutputsDeviceType(output_type); | |||||
| AnfAlgo::SetSelectKernelBuildInfo(builder->Build(), real_input_node.get()); | |||||
| } | |||||
| } | |||||
| bool RefreshCastAndParamWeightFormat(const AnfNodePtr &input_node, const string &format) { | |||||
| MS_EXCEPTION_IF_NULL(input_node); | |||||
| if (!input_node->isa<CNode>()) { | |||||
| return false; | |||||
| } | |||||
| auto cast_node = input_node->cast<CNodePtr>(); | |||||
| if (AnfAlgo::GetCNodeName(cast_node) != prim::kPrimCast->name()) { | |||||
| return true; | |||||
| } | |||||
| if (AnfAlgo::IsFeatureMapOutput(cast_node)) { | |||||
| return true; | |||||
| } | |||||
| auto info_builder = | |||||
| std::make_shared<kernel::KernelBuildInfo::KernelBuildInfoBuilder>(AnfAlgo::GetSelectKernelBuildInfo(input_node)); | |||||
| info_builder->SetInputsFormat({format}); | |||||
| info_builder->SetOutputsFormat({format}); | |||||
| AnfAlgo::SetSelectKernelBuildInfo(info_builder->Build(), cast_node.get()); | |||||
| auto cast_input_node = AnfAlgo::VisitKernel(AnfAlgo::GetInputNode(cast_node, 0), 0); | |||||
| SetWeightFormat(cast_input_node.first, {format}, cast_node, 0); | |||||
| return true; | |||||
| } | |||||
| } // namespace | } // namespace | ||||
| void SetTensorDeviceInfo(const CNodePtr &kernel_node) { | void SetTensorDeviceInfo(const CNodePtr &kernel_node) { | ||||
| MS_EXCEPTION_IF_NULL(kernel_node); | MS_EXCEPTION_IF_NULL(kernel_node); | ||||
| @@ -367,22 +415,15 @@ void SetTensorDeviceInfo(const CNodePtr &kernel_node) { | |||||
| auto input_with_index = AnfAlgo::VisitKernel(input_kernel_node, 0); | auto input_with_index = AnfAlgo::VisitKernel(input_kernel_node, 0); | ||||
| MS_EXCEPTION_IF_NULL(input_with_index.first); | MS_EXCEPTION_IF_NULL(input_with_index.first); | ||||
| auto real_input_node = input_with_index.first; | auto real_input_node = input_with_index.first; | ||||
| if (real_input_node->isa<CNode>()) { | |||||
| if (RefreshCastAndParamWeightFormat(real_input_node, selected_kernel_info->GetInputFormat(input_index))) { | |||||
| continue; | continue; | ||||
| } | } | ||||
| if (real_input_node->isa<Parameter>() && !AnfAlgo::IsParameterWeight(real_input_node->cast<ParameterPtr>())) { | if (real_input_node->isa<Parameter>() && !AnfAlgo::IsParameterWeight(real_input_node->cast<ParameterPtr>())) { | ||||
| continue; | continue; | ||||
| } | } | ||||
| // we set special device info of a input tensor. | |||||
| bool is_ref = false; | |||||
| auto op_info = kernel::tbe::TbeDynamicShapeUtil::FindOp(AnfAlgo::GetCNodeName(kernel_node), kernel_node); | |||||
| if (op_info != nullptr) { | |||||
| is_ref = op_info->is_ref(); | |||||
| } | |||||
| if (AnfAlgo::OutputAddrExist(real_input_node, 0)) { | if (AnfAlgo::OutputAddrExist(real_input_node, 0)) { | ||||
| continue; | continue; | ||||
| } | } | ||||
| auto builder = std::make_shared<kernel::KernelBuildInfo::KernelBuildInfoBuilder>(); | |||||
| auto refresh_format = selected_kernel_info->GetInputFormat(input_index); | auto refresh_format = selected_kernel_info->GetInputFormat(input_index); | ||||
| std::vector<std::string> output_format = {refresh_format}; | std::vector<std::string> output_format = {refresh_format}; | ||||
| // if not find in host convert format map means the host has not registered the convert function of this format | // if not find in host convert format map means the host has not registered the convert function of this format | ||||
| @@ -390,20 +431,7 @@ void SetTensorDeviceInfo(const CNodePtr &kernel_node) { | |||||
| refresh_format != kOpFormat_DEFAULT) { | refresh_format != kOpFormat_DEFAULT) { | ||||
| output_format = {AnfAlgo::GetOutputFormat(real_input_node, 0)}; | output_format = {AnfAlgo::GetOutputFormat(real_input_node, 0)}; | ||||
| } | } | ||||
| if (IsValueNode<tensor::Tensor>(input_kernel_node) && | |||||
| AnfAlgo::GetOutputDeviceDataType(input_kernel_node, 0) == kTypeUnknown) { | |||||
| builder->SetOutputsFormat(output_format); | |||||
| std::vector<TypeId> output_type = {selected_kernel_info->GetInputDeviceType(input_index)}; | |||||
| builder->SetOutputsDeviceType(output_type); | |||||
| AnfAlgo::SetSelectKernelBuildInfo(builder->Build(), input_kernel_node.get()); | |||||
| continue; | |||||
| } | |||||
| if (AnfAlgo::GetOutputDeviceDataType(real_input_node, 0) == kTypeUnknown || is_ref) { | |||||
| builder->SetOutputsFormat(output_format); | |||||
| std::vector<TypeId> output_type = {selected_kernel_info->GetInputDeviceType(input_index)}; | |||||
| builder->SetOutputsDeviceType(output_type); | |||||
| AnfAlgo::SetSelectKernelBuildInfo(builder->Build(), real_input_node.get()); | |||||
| } | |||||
| SetWeightFormat(real_input_node, output_format, kernel_node, input_index); | |||||
| } | } | ||||
| } | } | ||||