| @@ -72,6 +72,7 @@ | |||
| #include "backend/optimizer/ascend/format_type/chang_axis_of_reduce_kernel.h" | |||
| #include "backend/optimizer/ascend/format_type/split_unsupported_transdata.h" | |||
| #include "backend/optimizer/ascend/format_type/insert_reshape_for_extract_image_patches_op.h" | |||
| #include "backend/optimizer/ascend/format_type/convert_cast_format.h" | |||
| #include "backend/optimizer/pass/getitem_tuple.h" | |||
| #include "backend/optimizer/pass/optimize_dependence.h" | |||
| #include "backend/optimizer/pass/erase_visit_attr.h" | |||
| @@ -188,27 +189,6 @@ void AddAscendIRFusionPass(PassManager *ir_fusion_pm) { | |||
| ir_fusion_pm->AddPass(std::make_shared<GatherV2DsFission>()); | |||
| } | |||
| } // namespace | |||
| void RunOpAscendDataLayout(const std::shared_ptr<session::KernelGraph> &kernel_graph) { | |||
| MS_EXCEPTION_IF_NULL(kernel_graph); | |||
| auto optimizer = std::make_shared<GraphOptimizer>(); | |||
| auto data_layout_pm = std::make_shared<PassManager>("pynative_transop_pm"); | |||
| data_layout_pm->AddPass(std::make_shared<ChangeAxisOfReduceKernel>()); | |||
| data_layout_pm->AddPass(std::make_shared<RectifyDoMaskKernelInfo>()); | |||
| data_layout_pm->AddPass(std::make_shared<DynamicRNNGradReformat>()); | |||
| data_layout_pm->AddPass(std::make_shared<RunOpInsertTransData>()); | |||
| data_layout_pm->AddPass(std::make_shared<GetitemTuple>()); | |||
| data_layout_pm->AddPass(std::make_shared<CommonSubexpressionElimination>()); | |||
| data_layout_pm->AddPass(std::make_shared<EliminateRedundantOp>()); | |||
| data_layout_pm->AddPass(std::make_shared<InsertTransposeForDynamicGRUV2>()); | |||
| data_layout_pm->AddPass(std::make_shared<OptimizeDependence>()); | |||
| data_layout_pm->AddPass(std::make_shared<TransDataSplit>()); | |||
| data_layout_pm->AddPass(std::make_shared<EraseVisitAttr>()); | |||
| optimizer->AddPassManager(data_layout_pm); | |||
| (void)optimizer->Optimize(kernel_graph); | |||
| kernel_graph->SetExecOrderByDefault(); | |||
| } | |||
| void AscendGraphKernelCommonProcess(const std::shared_ptr<session::KernelGraph> &kernel_graph) { | |||
| MS_EXCEPTION_IF_NULL(kernel_graph); | |||
| auto optimizer = std::make_shared<GraphOptimizer>(); | |||
| @@ -228,8 +208,17 @@ void AscendDataLayout(const std::shared_ptr<session::KernelGraph> &kernel_graph) | |||
| auto data_layout_pm = std::make_shared<PassManager>("transop_pm"); | |||
| data_layout_pm->AddPass(std::make_shared<RectifyDoMaskKernelInfo>()); | |||
| data_layout_pm->AddPass(std::make_shared<DynamicRNNGradReformat>()); | |||
| data_layout_pm->AddPass(std::make_shared<ChangeAxisOfReduceKernel>()); | |||
| data_layout_pm->AddPass(std::make_shared<AddIoFormatAttrFor3DGraph>()); | |||
| data_layout_pm->AddPass(std::make_shared<InsertTransOp>()); | |||
| auto ms_context = MsContext::GetInstance(); | |||
| MS_EXCEPTION_IF_NULL(ms_context); | |||
| if (ms_context->get_param<int>(MS_CTX_EXECUTION_MODE) == kPynativeMode) { | |||
| data_layout_pm->AddPass(std::make_shared<RunOpInsertTransData>()); | |||
| } else { | |||
| data_layout_pm->AddPass(std::make_shared<MergeCastToOp>()); | |||
| data_layout_pm->AddPass(std::make_shared<ConvertCastFormat>()); | |||
| data_layout_pm->AddPass(std::make_shared<InsertTransOp>()); | |||
| } | |||
| data_layout_pm->AddPass(std::make_shared<GetitemTuple>()); | |||
| data_layout_pm->AddPass(std::make_shared<CommonSubexpressionElimination>()); | |||
| data_layout_pm->AddPass(std::make_shared<RemoveReshapePair>()); | |||
| @@ -0,0 +1,69 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/optimizer/ascend/format_type/convert_cast_format.h" | |||
| #include <memory> | |||
| #include "backend/session/anf_runtime_algorithm.h" | |||
| namespace mindspore { | |||
| namespace opt { | |||
| const BaseRef ConvertCastFormat::DefinePattern() const { | |||
| VarPtr X = std::make_shared<Var>(); | |||
| VarPtr Xs = std::make_shared<SeqVar>(); | |||
| return VectorRef({X, Xs}); | |||
| } | |||
| const AnfNodePtr ConvertCastFormat::Process(const mindspore::FuncGraphPtr &, const mindspore::AnfNodePtr &node, | |||
| const mindspore::EquivPtr &) const { | |||
| if (node == nullptr || !node->isa<CNode>() || !AnfAlgo::IsRealCNodeKernel(node)) { | |||
| return nullptr; | |||
| } | |||
| auto node_name = AnfAlgo::GetCNodeName(node); | |||
| if (node_name == prim::kPrimCast->name()) { | |||
| return nullptr; | |||
| } | |||
| auto cnode = node->cast<CNodePtr>(); | |||
| MS_EXCEPTION_IF_NULL(cnode); | |||
| for (size_t input_index = 0; input_index < AnfAlgo::GetInputTensorNum(cnode); ++input_index) { | |||
| auto input_node = AnfAlgo::VisitKernelWithReturnType(AnfAlgo::GetInputNode(cnode, input_index), 0).first; | |||
| MS_EXCEPTION_IF_NULL(input_node); | |||
| if (!input_node->isa<CNode>()) { | |||
| continue; | |||
| } | |||
| auto cast_node = input_node->cast<CNodePtr>(); | |||
| MS_EXCEPTION_IF_NULL(cast_node); | |||
| auto input_node_name = AnfAlgo::GetCNodeName(cast_node); | |||
| if (input_node_name != prim::kPrimCast->name()) { | |||
| continue; | |||
| } | |||
| auto format = AnfAlgo::GetInputFormat(node, input_index); | |||
| auto cast_input_node = AnfAlgo::VisitKernelWithReturnType(AnfAlgo::GetInputNode(cast_node, 0), 0).first; | |||
| auto cast_input_format = AnfAlgo::GetOutputFormat(cast_input_node, 0); | |||
| // change cast to default that can be more faster when it cast other hw format | |||
| if (cast_input_format != format) { | |||
| if (cast_input_format == kOpFormat_DEFAULT || format == kOpFormat_DEFAULT) { | |||
| auto info_builder = std::make_shared<kernel::KernelBuildInfo::KernelBuildInfoBuilder>( | |||
| AnfAlgo::GetSelectKernelBuildInfo(cast_node)); | |||
| info_builder->SetInputsFormat({kOpFormat_DEFAULT}); | |||
| info_builder->SetOutputsFormat({kOpFormat_DEFAULT}); | |||
| AnfAlgo::SetSelectKernelBuildInfo(info_builder->Build(), cast_node.get()); | |||
| } | |||
| } | |||
| } | |||
| return nullptr; | |||
| } | |||
| } // namespace opt | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,35 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_OPTIMIZER_ASCEND_FORMAT_TYPE_CONVERT_CAST_FORMAT_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_OPTIMIZER_ASCEND_FORMAT_TYPE_CONVERT_CAST_FORMAT_H_ | |||
| #include "backend/optimizer/common/optimizer.h" | |||
| namespace mindspore { | |||
| namespace opt { | |||
| class ConvertCastFormat : public PatternProcessPass { | |||
| public: | |||
| explicit ConvertCastFormat(bool multigraph = true) : PatternProcessPass("convert_cast_format", multigraph) {} | |||
| ~ConvertCastFormat() override = default; | |||
| const BaseRef DefinePattern() const override; | |||
| const AnfNodePtr Process(const FuncGraphPtr &, const AnfNodePtr &, const EquivPtr &) const override; | |||
| private: | |||
| bool NeedChangeCastFormat(); | |||
| }; | |||
| } // namespace opt | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_BACKEND_OPTIMIZER_ASCEND_FORMAT_TYPE_ | |||
| @@ -669,7 +669,7 @@ void AscendSession::RunGraphImpl(const GraphId &graph_id, const std::vector<tens | |||
| void AscendSession::RunOpHardwareOptimize(const std::shared_ptr<session::KernelGraph> &kernel_graph) const { | |||
| MS_LOG(INFO) << "Start"; | |||
| // data layout optimization | |||
| opt::RunOpAscendDataLayout(kernel_graph); | |||
| opt::AscendDataLayout(kernel_graph); | |||
| // mixed precision optimization | |||
| opt::AscendMixPrecision(kernel_graph); | |||
| MS_LOG(INFO) << "Finish"; | |||
| @@ -39,8 +39,9 @@ namespace mindspore { | |||
| namespace device { | |||
| namespace ascend { | |||
| namespace { | |||
| const float kWegihtBaseScore = 1; | |||
| const float kFeatureMapBaseScore = 10; | |||
| const int kWeightUnInitScore = 1; | |||
| const int kWeightInitScore = 2; | |||
| const int kFeatureMapBaseScore = 10; | |||
| constexpr auto kPriChoosenFormat = "pri_format"; | |||
| enum MatchCountPriority : int { | |||
| MATCH_COUNT_PRIORITY_BEGIN = 0, | |||
| @@ -140,18 +141,17 @@ void UpdateCurMatchCounts(const kernel::KernelBuildInfo &kernel_build_info, cons | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| MS_EXCEPTION_IF_NULL(cur_kernelinfo_match_counts); | |||
| if (cur_kernelinfo_match_counts->size() < MATCH_COUNT_PRIORITY_END) { | |||
| MS_LOG(EXCEPTION) << "Out of range cur_kernelinfo_match_counts " << MATCH_COUNT_PRIORITY_END; | |||
| MS_LOG(EXCEPTION) << "Out of range cur_kernel info_match_counts " << MATCH_COUNT_PRIORITY_END; | |||
| } | |||
| auto pri_match_format = GetPriorityMatchFormat(kernel_node); | |||
| for (size_t input_index = 0; input_index < AnfAlgo::GetInputTensorNum(kernel_node); ++input_index) { | |||
| auto input_anf_node = kernel_node->input(input_index + 1); | |||
| auto input_anf_node = AnfAlgo::VisitKernelWithReturnType(AnfAlgo::GetInputNode(kernel_node, input_index), 0).first; | |||
| MS_EXCEPTION_IF_NULL(input_anf_node); | |||
| // we do not take ValueNode into consideration in graph kernel. | |||
| if (kernel_build_info.kernel_type() == KernelType::AKG_KERNEL) { | |||
| if (input_anf_node->isa<ValueNode>() && AnfAlgo::GetOutputDeviceDataType(input_anf_node, 0) == kTypeUnknown) { | |||
| continue; | |||
| } | |||
| auto base_score = AnfAlgo::IsFeatureMapInput(kernel_node, input_index) ? kFeatureMapBaseScore : kWeightInitScore; | |||
| if (AnfAlgo::GetOutputDeviceDataType(input_anf_node, 0) == kTypeUnknown) { | |||
| base_score = kWeightUnInitScore; | |||
| } | |||
| auto base_score = AnfAlgo::IsFeatureMapInput(kernel_node, input_index) ? kFeatureMapBaseScore : kWegihtBaseScore; | |||
| if (kernel_build_info.GetInputFormat(input_index) == AnfAlgo::GetPrevNodeOutputFormat(kernel_node, input_index)) { | |||
| (*cur_kernelinfo_match_counts)[MATCH_FORMAT_COUNT] += base_score; | |||
| } | |||
| @@ -356,6 +356,54 @@ void SetCastAndWeightFormat(const CNodePtr &kernel_node) { | |||
| info_builder->SetOutputsFormat({format}); | |||
| AnfAlgo::SetSelectKernelBuildInfo(info_builder->Build(), kernel_node.get()); | |||
| } | |||
| void SetWeightFormat(const AnfNodePtr &real_input_node, const std::vector<string> &output_format, | |||
| const CNodePtr &kernel_node, size_t input_index) { | |||
| auto builder = std::make_shared<kernel::KernelBuildInfo::KernelBuildInfoBuilder>(); | |||
| // we set special device info of a input tensor. | |||
| bool is_ref = false; | |||
| auto op_info = kernel::tbe::TbeDynamicShapeUtil::FindOp(AnfAlgo::GetCNodeName(kernel_node), kernel_node); | |||
| if (op_info != nullptr) { | |||
| is_ref = op_info->is_ref(); | |||
| } | |||
| auto selected_kernel_info = AnfAlgo::GetSelectKernelBuildInfo(kernel_node); | |||
| if (IsValueNode<tensor::Tensor>(real_input_node) && | |||
| AnfAlgo::GetOutputDeviceDataType(real_input_node, 0) == kTypeUnknown) { | |||
| builder->SetOutputsFormat(output_format); | |||
| std::vector<TypeId> output_type = {selected_kernel_info->GetInputDeviceType(input_index)}; | |||
| builder->SetOutputsDeviceType(output_type); | |||
| AnfAlgo::SetSelectKernelBuildInfo(builder->Build(), real_input_node.get()); | |||
| return; | |||
| } | |||
| if (AnfAlgo::GetOutputDeviceDataType(real_input_node, 0) == kTypeUnknown || is_ref) { | |||
| builder->SetOutputsFormat(output_format); | |||
| std::vector<TypeId> output_type = {selected_kernel_info->GetInputDeviceType(input_index)}; | |||
| builder->SetOutputsDeviceType(output_type); | |||
| AnfAlgo::SetSelectKernelBuildInfo(builder->Build(), real_input_node.get()); | |||
| } | |||
| } | |||
| bool RefreshCastAndParamWeightFormat(const AnfNodePtr &input_node, const string &format) { | |||
| MS_EXCEPTION_IF_NULL(input_node); | |||
| if (!input_node->isa<CNode>()) { | |||
| return false; | |||
| } | |||
| auto cast_node = input_node->cast<CNodePtr>(); | |||
| if (AnfAlgo::GetCNodeName(cast_node) != prim::kPrimCast->name()) { | |||
| return true; | |||
| } | |||
| if (AnfAlgo::IsFeatureMapOutput(cast_node)) { | |||
| return true; | |||
| } | |||
| auto info_builder = | |||
| std::make_shared<kernel::KernelBuildInfo::KernelBuildInfoBuilder>(AnfAlgo::GetSelectKernelBuildInfo(input_node)); | |||
| info_builder->SetInputsFormat({format}); | |||
| info_builder->SetOutputsFormat({format}); | |||
| AnfAlgo::SetSelectKernelBuildInfo(info_builder->Build(), cast_node.get()); | |||
| auto cast_input_node = AnfAlgo::VisitKernel(AnfAlgo::GetInputNode(cast_node, 0), 0); | |||
| SetWeightFormat(cast_input_node.first, {format}, cast_node, 0); | |||
| return true; | |||
| } | |||
| } // namespace | |||
| void SetTensorDeviceInfo(const CNodePtr &kernel_node) { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| @@ -367,22 +415,15 @@ void SetTensorDeviceInfo(const CNodePtr &kernel_node) { | |||
| auto input_with_index = AnfAlgo::VisitKernel(input_kernel_node, 0); | |||
| MS_EXCEPTION_IF_NULL(input_with_index.first); | |||
| auto real_input_node = input_with_index.first; | |||
| if (real_input_node->isa<CNode>()) { | |||
| if (RefreshCastAndParamWeightFormat(real_input_node, selected_kernel_info->GetInputFormat(input_index))) { | |||
| continue; | |||
| } | |||
| if (real_input_node->isa<Parameter>() && !AnfAlgo::IsParameterWeight(real_input_node->cast<ParameterPtr>())) { | |||
| continue; | |||
| } | |||
| // we set special device info of a input tensor. | |||
| bool is_ref = false; | |||
| auto op_info = kernel::tbe::TbeDynamicShapeUtil::FindOp(AnfAlgo::GetCNodeName(kernel_node), kernel_node); | |||
| if (op_info != nullptr) { | |||
| is_ref = op_info->is_ref(); | |||
| } | |||
| if (AnfAlgo::OutputAddrExist(real_input_node, 0)) { | |||
| continue; | |||
| } | |||
| auto builder = std::make_shared<kernel::KernelBuildInfo::KernelBuildInfoBuilder>(); | |||
| auto refresh_format = selected_kernel_info->GetInputFormat(input_index); | |||
| std::vector<std::string> output_format = {refresh_format}; | |||
| // if not find in host convert format map means the host has not registered the convert function of this format | |||
| @@ -390,20 +431,7 @@ void SetTensorDeviceInfo(const CNodePtr &kernel_node) { | |||
| refresh_format != kOpFormat_DEFAULT) { | |||
| output_format = {AnfAlgo::GetOutputFormat(real_input_node, 0)}; | |||
| } | |||
| if (IsValueNode<tensor::Tensor>(input_kernel_node) && | |||
| AnfAlgo::GetOutputDeviceDataType(input_kernel_node, 0) == kTypeUnknown) { | |||
| builder->SetOutputsFormat(output_format); | |||
| std::vector<TypeId> output_type = {selected_kernel_info->GetInputDeviceType(input_index)}; | |||
| builder->SetOutputsDeviceType(output_type); | |||
| AnfAlgo::SetSelectKernelBuildInfo(builder->Build(), input_kernel_node.get()); | |||
| continue; | |||
| } | |||
| if (AnfAlgo::GetOutputDeviceDataType(real_input_node, 0) == kTypeUnknown || is_ref) { | |||
| builder->SetOutputsFormat(output_format); | |||
| std::vector<TypeId> output_type = {selected_kernel_info->GetInputDeviceType(input_index)}; | |||
| builder->SetOutputsDeviceType(output_type); | |||
| AnfAlgo::SetSelectKernelBuildInfo(builder->Build(), real_input_node.get()); | |||
| } | |||
| SetWeightFormat(real_input_node, output_format, kernel_node, input_index); | |||
| } | |||
| } | |||