| @@ -55,8 +55,8 @@ void FilterInvalidKernelInfo(const CNodePtr &kernel_node, | |||
| buffer << "Kernel node's output size [" << output_tensor_num << "]" | |||
| << " cannot match the kernel's output size [" << kernel_info->GetOutputNum() << "]"; | |||
| } else { | |||
| buffer << "Kernel node's output size [" << input_tensor_num << "]" | |||
| << " cannot match the kernel's output size [" << kernel_info->GetInputNum() << "]"; | |||
| buffer << "Kernel node's input size [" << input_tensor_num << "]" | |||
| << " cannot match the kernel's input size [" << kernel_info->GetInputNum() << "]"; | |||
| } | |||
| MS_LOG(INFO) << "Kernel [ " << index << " ] :" << kernel_info->ToString() << buffer.str(); | |||
| } | |||
| @@ -33,23 +33,16 @@ namespace mindspore { | |||
| namespace kernel { | |||
| enum kCreaterType : int { SINGLE_BUILD = 0, OP_SELECT_FORMAT, CHECK_SUPPORTED, OP_PRE_COMPILE }; | |||
| namespace tbe { | |||
| const std::map<std::string, std::string> opTypeAdapter = {{"ReLUV2", "ReluV2"}, | |||
| {"ReLU6", "Relu6"}, | |||
| {"ReLU6Grad", "Relu6Grad"}, | |||
| {"ReLUGrad", "ReluGrad"}, | |||
| {"ReLU", "Relu"}, | |||
| {"Pad", "PadD"}, | |||
| {"Gather", "GatherV2"}, | |||
| {"SparseApplyFtrl", "SparseApplyFtrlD"}, | |||
| {"Concat", "ConcatD"}, | |||
| {"DepthwiseConv2dNative", "DepthwiseConv2D"}, | |||
| {"FastGeLU", "FastGelu"}, | |||
| {"FastGeLUGrad", "FastGeluGrad"}, | |||
| {"GeLU", "Gelu"}, | |||
| {"GeLUGrad", "GeluGrad"}, | |||
| {"PReLU", "PRelu"}, | |||
| {"PReLUGrad", "PReluGrad"}, | |||
| {"SeLU", "Selu"}}; | |||
| const std::map<std::string, std::string> opTypeAdapter = { | |||
| {"ReLUV2", "ReluV2"}, {"ReLU6", "Relu6"}, | |||
| {"ReLU6Grad", "Relu6Grad"}, {"ReLUGrad", "ReluGrad"}, | |||
| {"ReLU", "Relu"}, {"Pad", "PadD"}, | |||
| {"Gather", "GatherV2"}, {"SparseApplyFtrl", "SparseApplyFtrlD"}, | |||
| {"Concat", "ConcatD"}, {"DepthwiseConv2dNative", "DepthwiseConv2D"}, | |||
| {"FastGeLU", "FastGelu"}, {"FastGeLUGrad", "FastGeluGrad"}, | |||
| {"GeLU", "Gelu"}, {"GeLUGrad", "GeluGrad"}, | |||
| {"PReLU", "PRelu"}, {"PReLUGrad", "PReluGrad"}, | |||
| {"SeLU", "Selu"}, {"TransposeNOD", "Transpose"}}; | |||
| enum FusionDataType { kFusionNormal = 0, kFusionAddN, kFusionReLUGradV2, kFusionAdd }; | |||
| using FAttrsPass = void (*)(const AnfNodePtr &anf_node, const std::vector<std::shared_ptr<OpAttr>> &op_info_attrs, | |||
| @@ -74,6 +74,7 @@ | |||
| #include "backend/optimizer/ascend/ir_fusion/softmax_grad_ext_fusion.h" | |||
| #include "backend/optimizer/ascend/ir_fusion/set_fracz_group_attr.h" | |||
| #include "backend/optimizer/ascend/ir_fusion/bn_reduce_grad_conv2d_backprop_filter_fusion.h" | |||
| #include "backend/optimizer/ascend/ir_fusion/transposed_update_fusion.h" | |||
| #include "backend/optimizer/ascend/format_type/insert_trans_op.h" | |||
| #include "backend/optimizer/ascend/format_type/trans_op_format_refine.h" | |||
| #include "backend/optimizer/ascend/format_type/dynamic_rnn_grad_reformat.h" | |||
| @@ -265,6 +266,7 @@ void AscendMixPrecision(const std::shared_ptr<session::KernelGraph> &kernel_grap | |||
| mixed_precision_pm->AddPass(std::make_shared<EraseVisitAttr>()); | |||
| mixed_precision_pm->AddPass(std::make_shared<TransOpFormatRefine>()); | |||
| mixed_precision_pm->AddPass(std::make_shared<EraseVisitAttr>()); | |||
| mixed_precision_pm->AddPass(std::make_shared<TransposedUpdateFusion>()); | |||
| mixed_precision_pm->AddPass(std::make_shared<ConvertUnSupportNodeToAICPU>()); | |||
| mixed_precision_pm->AddPass(std::make_shared<RemoveInternalOutputCast>()); | |||
| optimizer->AddPassManager(mixed_precision_pm); | |||
| @@ -25,6 +25,7 @@ | |||
| #include "backend/kernel_compiler/oplib/oplib.h" | |||
| #include "backend/session/anf_runtime_algorithm.h" | |||
| #include "backend/kernel_compiler/tbe/tbe_dynaminc_shape_util.h" | |||
| #include "backend/kernel_compiler/tbe/tbe_kernel_select/tbe_kernel_select.h" | |||
| namespace mindspore { | |||
| namespace opt { | |||
| @@ -73,6 +74,17 @@ class KernelQuery { | |||
| }; | |||
| using KernelQueryPtr = std::shared_ptr<KernelQuery>; | |||
| class TbeKernelQuery { | |||
| public: | |||
| TbeKernelQuery() = default; | |||
| virtual ~TbeKernelQuery() = default; | |||
| virtual void GetTbeKernelMetaInfo(const CNodePtr &kernel_node, | |||
| std::vector<std::shared_ptr<kernel::KernelBuildInfo>> *kernel_info_list) { | |||
| kernel::TbeMetadataInfo(kernel_node, kernel_info_list); | |||
| } | |||
| }; | |||
| using TbeKernelQueryPtr = std::shared_ptr<TbeKernelQuery>; | |||
| class OpFinder { | |||
| public: | |||
| OpFinder() = default; | |||
| @@ -0,0 +1,96 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/optimizer/ascend/ir_fusion/transposed_update_fusion.h" | |||
| #include <set> | |||
| #include "backend/optimizer/ascend/ascend_helper.h" | |||
| #include "backend/session/anf_runtime_algorithm.h" | |||
| #include "debug/anf_ir_dump.h" | |||
| #include "utils/trace_base.h" | |||
| namespace mindspore { | |||
| namespace opt { | |||
| namespace { | |||
| constexpr size_t kInt64Len = 8; | |||
| tensor::TensorPtr CreatePermTensor(const CNodePtr &transposed) { | |||
| auto perm = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(transposed, kAttrPerm); | |||
| std::vector<int64_t> perm_shape = {SizeToLong(perm.size())}; | |||
| TensorTypePtr tensor_type = std::make_shared<TensorType>(kInt64); | |||
| tensor::DeviceInfo device_info{kOpFormat_DEFAULT, tensor_type}; | |||
| auto perm_tensor = std::make_shared<tensor::Tensor>(kNumberTypeInt64, perm_shape); | |||
| perm_tensor->set_device_info(device_info); | |||
| MS_EXCEPTION_IF_NULL(perm_tensor); | |||
| auto data_ptr = perm_tensor->data_c(); | |||
| MS_EXCEPTION_IF_NULL(data_ptr); | |||
| auto elem_num = perm.size() * kInt64Len; | |||
| auto ret_code = memcpy_s(data_ptr, static_cast<size_t>(perm_tensor->data().nbytes()), | |||
| reinterpret_cast<void *>(perm.data()), elem_num); | |||
| if (ret_code != 0) { | |||
| MS_LOG(ERROR) << "Failed to copy data into Tensor."; | |||
| return nullptr; | |||
| } | |||
| return perm_tensor; | |||
| } | |||
| ValueNodePtr CreatePermValueNode(const CNodePtr &transposed) { | |||
| tensor::TensorPtr perm_tensor = CreatePermTensor(transposed); | |||
| MS_EXCEPTION_IF_NULL(perm_tensor); | |||
| auto perm_const = std::make_shared<ValueNode>(perm_tensor); | |||
| MS_EXCEPTION_IF_NULL(perm_const); | |||
| auto perm_abstract = perm_tensor->ToAbstract(); | |||
| perm_const->set_abstract(perm_abstract); | |||
| auto perm_kernel_info = std::make_shared<device::KernelInfo>(); | |||
| MS_EXCEPTION_IF_NULL(perm_kernel_info); | |||
| perm_const->set_kernel_info(perm_kernel_info); | |||
| kernel::KernelBuildInfo::KernelBuildInfoBuilder op_builder; | |||
| op_builder.SetOutputsFormat({kOpFormat_DEFAULT}); | |||
| op_builder.SetOutputsDeviceType({kNumberTypeInt64}); | |||
| AnfAlgo::SetSelectKernelBuildInfo(op_builder.Build(), perm_const.get()); | |||
| return perm_const; | |||
| } | |||
| } // namespace | |||
| const BaseRef TransposedUpdateFusion::DefinePattern() const { | |||
| VarPtr X = std::make_shared<Var>(); | |||
| return VectorRef({prim::kPrimTranspose, X}); | |||
| } | |||
| const AnfNodePtr TransposedUpdateFusion::Process(const FuncGraphPtr &func_graph, const AnfNodePtr &node, | |||
| const EquivPtr &) const { | |||
| MS_EXCEPTION_IF_NULL(func_graph); | |||
| MS_EXCEPTION_IF_NULL(node); | |||
| auto transposed = node->cast<CNodePtr>(); | |||
| MS_EXCEPTION_IF_NULL(transposed); | |||
| auto kernel_graph = func_graph->cast<KernelGraphPtr>(); | |||
| MS_EXCEPTION_IF_NULL(kernel_graph); | |||
| auto perm_vnode = CreatePermValueNode(transposed); | |||
| std::vector<AnfNodePtr> transpose_inputs = {NewValueNode(std::make_shared<Primitive>(kTransposeNODOpName)), | |||
| transposed->input(1), perm_vnode}; | |||
| auto transpose = kernel_graph->NewCNode(transpose_inputs); | |||
| transpose->set_scope(transposed->scope()); | |||
| transpose->set_abstract(transposed->abstract()); | |||
| std::vector<std::shared_ptr<kernel::KernelBuildInfo>> kernel_info_list; | |||
| tbe_kernel_query_->GetTbeKernelMetaInfo(transpose, &kernel_info_list); | |||
| if (kernel_info_list.empty()) { | |||
| return nullptr; | |||
| } | |||
| kernel_select_->SelectKernel(transpose); | |||
| kernel_graph->AddValueNodeToGraph(perm_vnode); | |||
| return transpose; | |||
| } | |||
| } // namespace opt | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,50 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_OPTIMIZER_ASCEND_IR_FUSION_TRANSPOSED_UPDATE_FUSION_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_OPTIMIZER_ASCEND_IR_FUSION_TRANSPOSED_UPDATE_FUSION_H_ | |||
| #include <vector> | |||
| #include <string> | |||
| #include <utility> | |||
| #include <memory> | |||
| #include "backend/optimizer/common/pass.h" | |||
| #include "ir/func_graph.h" | |||
| #include "ir/anf.h" | |||
| #include "backend/optimizer/common/helper.h" | |||
| #include "backend/optimizer/common/optimizer.h" | |||
| #include "backend/optimizer/ascend/ascend_helper.h" | |||
| namespace mindspore { | |||
| namespace opt { | |||
| class TransposedUpdateFusion : public PatternProcessPass { | |||
| public: | |||
| explicit TransposedUpdateFusion(bool multigraph = true, const string &name = "transposed_update_fusion") | |||
| : PatternProcessPass(name, multigraph), | |||
| kernel_select_(std::make_shared<KernelSelect>()), | |||
| tbe_kernel_query_(std::make_shared<TbeKernelQuery>()) {} | |||
| ~TransposedUpdateFusion() override = default; | |||
| const BaseRef DefinePattern() const override; | |||
| const AnfNodePtr Process(const FuncGraphPtr &func_graph, const AnfNodePtr &node, const EquivPtr &) const override; | |||
| protected: | |||
| CNodePtr DoSplit(const FuncGraphPtr &func_graph, const AnfNodePtr &node) const; | |||
| bool IsFormatInvaild(const AnfNodePtr &node) const; | |||
| KernelSelectPtr kernel_select_; | |||
| TbeKernelQueryPtr tbe_kernel_query_; | |||
| }; | |||
| } // namespace opt | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_BACKEND_OPTIMIZER_ASCEND_IR_FUSION_TRANSPOSED_UPDATE_FUSION_H_ | |||
| @@ -1,5 +1,5 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * Copyright 2020-2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| @@ -37,7 +37,6 @@ constexpr auto kAttrOffsetA = "offset_a"; | |||
| constexpr auto kAttrPadList = "pad_list"; | |||
| constexpr auto kAttrMode = "mode"; | |||
| constexpr auto kAttrChannelMultiplier = "channel_multiplier"; | |||
| constexpr auto kAttrPerm = "perm"; | |||
| constexpr auto kAttrInputSizes = "input_sizes"; | |||
| constexpr auto kAttrInputSize = "input_size"; | |||
| constexpr auto kIndex2 = 2; | |||
| @@ -2311,6 +2311,12 @@ bool AnfRuntimeAlgorithm::IsNodeInputContainMonad(const AnfNodePtr &node) { | |||
| void AnfRuntimeAlgorithm::CacheAddrForGraph(const KernelGraphPtr &kernel_graph) { | |||
| MS_EXCEPTION_IF_NULL(kernel_graph); | |||
| auto ms_context = MsContext::GetInstance(); | |||
| MS_EXCEPTION_IF_NULL(ms_context); | |||
| if (ms_context->get_param<int>(MS_CTX_EXECUTION_MODE) == kGraphMode && | |||
| ms_context->get_param<bool>(MS_CTX_ENABLE_TASK_SINK) == true) { | |||
| return; | |||
| } | |||
| auto nodes = kernel_graph->execution_order(); | |||
| for (auto &kernel : nodes) { | |||
| // Skip transpose kernel with "nop_op" attr which is not hidden or removed in PyNative infer scenario. Transpose | |||
| @@ -96,6 +96,7 @@ constexpr auto kFlattenGradOpName = "FlattenGrad"; | |||
| constexpr auto kExpandDimsOpName = "ExpandDims"; | |||
| constexpr auto kReshapeOpName = "Reshape"; | |||
| constexpr auto kTransposeOpName = "Transpose"; | |||
| constexpr auto kTransposeNODOpName = "TransposeNOD"; | |||
| constexpr auto kSplitOpName = "Split"; | |||
| constexpr auto kSplitVOpName = "SplitV"; | |||
| constexpr auto kSparseApplyAdagradOpName = "SparseApplyAdagrad"; | |||
| @@ -175,6 +175,7 @@ from .sparse_apply_ftrl_d_ds import _sparse_apply_ftrl_d_ds | |||
| from .sparse_apply_proximal_adagrad import _sparse_apply_proximal_adagrad | |||
| from .sparse_apply_proximal_adagrad_ds import _sparse_apply_proximal_adagrad_ds | |||
| from .apply_proximal_adagrad import _apply_proximal_adagrad | |||
| from .transpose import _transpose_tbe | |||
| from .transpose_d import _transpose_d_tbe | |||
| from .truncate_div import _truncate_div_tbe | |||
| from .truncate_mod import _truncate_mod_tbe | |||
| @@ -0,0 +1,59 @@ | |||
| # Copyright 2021 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """Transpose op""" | |||
| from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType | |||
| transpose_op_info = TBERegOp("TransposeNOD") \ | |||
| .fusion_type("OPAQUE") \ | |||
| .async_flag(False) \ | |||
| .binfile_name("transpose.so") \ | |||
| .compute_cost(10) \ | |||
| .kernel_name("transpose") \ | |||
| .partial_flag(True) \ | |||
| .dynamic_compile_static(True) \ | |||
| .input(0, "x", False, "required", "all") \ | |||
| .input(1, "perm", False, "required", "all", "optional") \ | |||
| .output(0, "y", False, "required", "all") \ | |||
| .need_check_supported(True) \ | |||
| .dtype_format(DataType.BOOL_Default, DataType.I32_Default, DataType.BOOL_Default) \ | |||
| .dtype_format(DataType.I8_Default, DataType.I32_Default, DataType.I8_Default) \ | |||
| .dtype_format(DataType.U8_Default, DataType.I32_Default, DataType.U8_Default) \ | |||
| .dtype_format(DataType.I16_Default, DataType.I32_Default, DataType.I16_Default) \ | |||
| .dtype_format(DataType.U16_Default, DataType.I32_Default, DataType.U16_Default) \ | |||
| .dtype_format(DataType.I32_Default, DataType.I32_Default, DataType.I32_Default) \ | |||
| .dtype_format(DataType.U32_Default, DataType.I32_Default, DataType.U32_Default) \ | |||
| .dtype_format(DataType.I64_Default, DataType.I32_Default, DataType.I64_Default) \ | |||
| .dtype_format(DataType.U64_Default, DataType.I32_Default, DataType.U64_Default) \ | |||
| .dtype_format(DataType.F16_Default, DataType.I32_Default, DataType.F16_Default) \ | |||
| .dtype_format(DataType.F32_Default, DataType.I32_Default, DataType.F32_Default) \ | |||
| .dtype_format(DataType.BOOL_Default, DataType.I64_Default, DataType.BOOL_Default) \ | |||
| .dtype_format(DataType.I8_Default, DataType.I64_Default, DataType.I8_Default) \ | |||
| .dtype_format(DataType.U8_Default, DataType.I64_Default, DataType.U8_Default) \ | |||
| .dtype_format(DataType.I16_Default, DataType.I64_Default, DataType.I16_Default) \ | |||
| .dtype_format(DataType.U16_Default, DataType.I64_Default, DataType.U16_Default) \ | |||
| .dtype_format(DataType.I32_Default, DataType.I64_Default, DataType.I32_Default) \ | |||
| .dtype_format(DataType.U32_Default, DataType.I64_Default, DataType.U32_Default) \ | |||
| .dtype_format(DataType.I64_Default, DataType.I64_Default, DataType.I64_Default) \ | |||
| .dtype_format(DataType.U64_Default, DataType.I64_Default, DataType.U64_Default) \ | |||
| .dtype_format(DataType.F16_Default, DataType.I64_Default, DataType.F16_Default) \ | |||
| .dtype_format(DataType.F32_Default, DataType.I64_Default, DataType.F32_Default) \ | |||
| .get_op_info() | |||
| @op_info_register(transpose_op_info) | |||
| def _transpose_tbe(): | |||
| """Transpose TBE register""" | |||
| return | |||