Browse Source

add TransposedUpdateFusion

tags/v1.5.0-rc1
yuchaojie 4 years ago
parent
commit
6cd16d61c4
11 changed files with 240 additions and 21 deletions
  1. +2
    -2
      mindspore/ccsrc/backend/kernel_compiler/kernel_query.cc
  2. +10
    -17
      mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_adapter.h
  3. +2
    -0
      mindspore/ccsrc/backend/optimizer/ascend/ascend_backend_optimization.cc
  4. +12
    -0
      mindspore/ccsrc/backend/optimizer/ascend/ascend_helper.h
  5. +96
    -0
      mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/transposed_update_fusion.cc
  6. +50
    -0
      mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/transposed_update_fusion.h
  7. +1
    -2
      mindspore/ccsrc/backend/optimizer/ascend/mindir/conv2d_unify_mindir.cc
  8. +6
    -0
      mindspore/ccsrc/backend/session/anf_runtime_algorithm.cc
  9. +1
    -0
      mindspore/ccsrc/utils/utils.h
  10. +1
    -0
      mindspore/ops/_op_impl/tbe/__init__.py
  11. +59
    -0
      mindspore/ops/_op_impl/tbe/transpose.py

+ 2
- 2
mindspore/ccsrc/backend/kernel_compiler/kernel_query.cc View File

@@ -55,8 +55,8 @@ void FilterInvalidKernelInfo(const CNodePtr &kernel_node,
buffer << "Kernel node's output size [" << output_tensor_num << "]"
<< " cannot match the kernel's output size [" << kernel_info->GetOutputNum() << "]";
} else {
buffer << "Kernel node's output size [" << input_tensor_num << "]"
<< " cannot match the kernel's output size [" << kernel_info->GetInputNum() << "]";
buffer << "Kernel node's input size [" << input_tensor_num << "]"
<< " cannot match the kernel's input size [" << kernel_info->GetInputNum() << "]";
}
MS_LOG(INFO) << "Kernel [ " << index << " ] :" << kernel_info->ToString() << buffer.str();
}


+ 10
- 17
mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_adapter.h View File

@@ -33,23 +33,16 @@ namespace mindspore {
namespace kernel {
enum kCreaterType : int { SINGLE_BUILD = 0, OP_SELECT_FORMAT, CHECK_SUPPORTED, OP_PRE_COMPILE };
namespace tbe {
const std::map<std::string, std::string> opTypeAdapter = {{"ReLUV2", "ReluV2"},
{"ReLU6", "Relu6"},
{"ReLU6Grad", "Relu6Grad"},
{"ReLUGrad", "ReluGrad"},
{"ReLU", "Relu"},
{"Pad", "PadD"},
{"Gather", "GatherV2"},
{"SparseApplyFtrl", "SparseApplyFtrlD"},
{"Concat", "ConcatD"},
{"DepthwiseConv2dNative", "DepthwiseConv2D"},
{"FastGeLU", "FastGelu"},
{"FastGeLUGrad", "FastGeluGrad"},
{"GeLU", "Gelu"},
{"GeLUGrad", "GeluGrad"},
{"PReLU", "PRelu"},
{"PReLUGrad", "PReluGrad"},
{"SeLU", "Selu"}};
const std::map<std::string, std::string> opTypeAdapter = {
{"ReLUV2", "ReluV2"}, {"ReLU6", "Relu6"},
{"ReLU6Grad", "Relu6Grad"}, {"ReLUGrad", "ReluGrad"},
{"ReLU", "Relu"}, {"Pad", "PadD"},
{"Gather", "GatherV2"}, {"SparseApplyFtrl", "SparseApplyFtrlD"},
{"Concat", "ConcatD"}, {"DepthwiseConv2dNative", "DepthwiseConv2D"},
{"FastGeLU", "FastGelu"}, {"FastGeLUGrad", "FastGeluGrad"},
{"GeLU", "Gelu"}, {"GeLUGrad", "GeluGrad"},
{"PReLU", "PRelu"}, {"PReLUGrad", "PReluGrad"},
{"SeLU", "Selu"}, {"TransposeNOD", "Transpose"}};

enum FusionDataType { kFusionNormal = 0, kFusionAddN, kFusionReLUGradV2, kFusionAdd };
using FAttrsPass = void (*)(const AnfNodePtr &anf_node, const std::vector<std::shared_ptr<OpAttr>> &op_info_attrs,


+ 2
- 0
mindspore/ccsrc/backend/optimizer/ascend/ascend_backend_optimization.cc View File

@@ -74,6 +74,7 @@
#include "backend/optimizer/ascend/ir_fusion/softmax_grad_ext_fusion.h"
#include "backend/optimizer/ascend/ir_fusion/set_fracz_group_attr.h"
#include "backend/optimizer/ascend/ir_fusion/bn_reduce_grad_conv2d_backprop_filter_fusion.h"
#include "backend/optimizer/ascend/ir_fusion/transposed_update_fusion.h"
#include "backend/optimizer/ascend/format_type/insert_trans_op.h"
#include "backend/optimizer/ascend/format_type/trans_op_format_refine.h"
#include "backend/optimizer/ascend/format_type/dynamic_rnn_grad_reformat.h"
@@ -265,6 +266,7 @@ void AscendMixPrecision(const std::shared_ptr<session::KernelGraph> &kernel_grap
mixed_precision_pm->AddPass(std::make_shared<EraseVisitAttr>());
mixed_precision_pm->AddPass(std::make_shared<TransOpFormatRefine>());
mixed_precision_pm->AddPass(std::make_shared<EraseVisitAttr>());
mixed_precision_pm->AddPass(std::make_shared<TransposedUpdateFusion>());
mixed_precision_pm->AddPass(std::make_shared<ConvertUnSupportNodeToAICPU>());
mixed_precision_pm->AddPass(std::make_shared<RemoveInternalOutputCast>());
optimizer->AddPassManager(mixed_precision_pm);


+ 12
- 0
mindspore/ccsrc/backend/optimizer/ascend/ascend_helper.h View File

@@ -25,6 +25,7 @@
#include "backend/kernel_compiler/oplib/oplib.h"
#include "backend/session/anf_runtime_algorithm.h"
#include "backend/kernel_compiler/tbe/tbe_dynaminc_shape_util.h"
#include "backend/kernel_compiler/tbe/tbe_kernel_select/tbe_kernel_select.h"

namespace mindspore {
namespace opt {
@@ -73,6 +74,17 @@ class KernelQuery {
};
using KernelQueryPtr = std::shared_ptr<KernelQuery>;

class TbeKernelQuery {
public:
TbeKernelQuery() = default;
virtual ~TbeKernelQuery() = default;
virtual void GetTbeKernelMetaInfo(const CNodePtr &kernel_node,
std::vector<std::shared_ptr<kernel::KernelBuildInfo>> *kernel_info_list) {
kernel::TbeMetadataInfo(kernel_node, kernel_info_list);
}
};
using TbeKernelQueryPtr = std::shared_ptr<TbeKernelQuery>;

class OpFinder {
public:
OpFinder() = default;


+ 96
- 0
mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/transposed_update_fusion.cc View File

@@ -0,0 +1,96 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/optimizer/ascend/ir_fusion/transposed_update_fusion.h"
#include <set>
#include "backend/optimizer/ascend/ascend_helper.h"
#include "backend/session/anf_runtime_algorithm.h"
#include "debug/anf_ir_dump.h"
#include "utils/trace_base.h"

namespace mindspore {
namespace opt {
namespace {
constexpr size_t kInt64Len = 8;

tensor::TensorPtr CreatePermTensor(const CNodePtr &transposed) {
auto perm = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(transposed, kAttrPerm);
std::vector<int64_t> perm_shape = {SizeToLong(perm.size())};
TensorTypePtr tensor_type = std::make_shared<TensorType>(kInt64);
tensor::DeviceInfo device_info{kOpFormat_DEFAULT, tensor_type};
auto perm_tensor = std::make_shared<tensor::Tensor>(kNumberTypeInt64, perm_shape);
perm_tensor->set_device_info(device_info);
MS_EXCEPTION_IF_NULL(perm_tensor);
auto data_ptr = perm_tensor->data_c();
MS_EXCEPTION_IF_NULL(data_ptr);
auto elem_num = perm.size() * kInt64Len;
auto ret_code = memcpy_s(data_ptr, static_cast<size_t>(perm_tensor->data().nbytes()),
reinterpret_cast<void *>(perm.data()), elem_num);
if (ret_code != 0) {
MS_LOG(ERROR) << "Failed to copy data into Tensor.";
return nullptr;
}
return perm_tensor;
}

ValueNodePtr CreatePermValueNode(const CNodePtr &transposed) {
tensor::TensorPtr perm_tensor = CreatePermTensor(transposed);
MS_EXCEPTION_IF_NULL(perm_tensor);
auto perm_const = std::make_shared<ValueNode>(perm_tensor);
MS_EXCEPTION_IF_NULL(perm_const);
auto perm_abstract = perm_tensor->ToAbstract();
perm_const->set_abstract(perm_abstract);
auto perm_kernel_info = std::make_shared<device::KernelInfo>();
MS_EXCEPTION_IF_NULL(perm_kernel_info);
perm_const->set_kernel_info(perm_kernel_info);
kernel::KernelBuildInfo::KernelBuildInfoBuilder op_builder;
op_builder.SetOutputsFormat({kOpFormat_DEFAULT});
op_builder.SetOutputsDeviceType({kNumberTypeInt64});
AnfAlgo::SetSelectKernelBuildInfo(op_builder.Build(), perm_const.get());
return perm_const;
}
} // namespace

const BaseRef TransposedUpdateFusion::DefinePattern() const {
VarPtr X = std::make_shared<Var>();
return VectorRef({prim::kPrimTranspose, X});
}

const AnfNodePtr TransposedUpdateFusion::Process(const FuncGraphPtr &func_graph, const AnfNodePtr &node,
const EquivPtr &) const {
MS_EXCEPTION_IF_NULL(func_graph);
MS_EXCEPTION_IF_NULL(node);
auto transposed = node->cast<CNodePtr>();
MS_EXCEPTION_IF_NULL(transposed);
auto kernel_graph = func_graph->cast<KernelGraphPtr>();
MS_EXCEPTION_IF_NULL(kernel_graph);
auto perm_vnode = CreatePermValueNode(transposed);
std::vector<AnfNodePtr> transpose_inputs = {NewValueNode(std::make_shared<Primitive>(kTransposeNODOpName)),
transposed->input(1), perm_vnode};
auto transpose = kernel_graph->NewCNode(transpose_inputs);
transpose->set_scope(transposed->scope());
transpose->set_abstract(transposed->abstract());

std::vector<std::shared_ptr<kernel::KernelBuildInfo>> kernel_info_list;
tbe_kernel_query_->GetTbeKernelMetaInfo(transpose, &kernel_info_list);
if (kernel_info_list.empty()) {
return nullptr;
}
kernel_select_->SelectKernel(transpose);
kernel_graph->AddValueNodeToGraph(perm_vnode);
return transpose;
}
} // namespace opt
} // namespace mindspore

+ 50
- 0
mindspore/ccsrc/backend/optimizer/ascend/ir_fusion/transposed_update_fusion.h View File

@@ -0,0 +1,50 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_BACKEND_OPTIMIZER_ASCEND_IR_FUSION_TRANSPOSED_UPDATE_FUSION_H_
#define MINDSPORE_CCSRC_BACKEND_OPTIMIZER_ASCEND_IR_FUSION_TRANSPOSED_UPDATE_FUSION_H_
#include <vector>
#include <string>
#include <utility>
#include <memory>
#include "backend/optimizer/common/pass.h"
#include "ir/func_graph.h"
#include "ir/anf.h"
#include "backend/optimizer/common/helper.h"
#include "backend/optimizer/common/optimizer.h"
#include "backend/optimizer/ascend/ascend_helper.h"
namespace mindspore {
namespace opt {
class TransposedUpdateFusion : public PatternProcessPass {
public:
explicit TransposedUpdateFusion(bool multigraph = true, const string &name = "transposed_update_fusion")
: PatternProcessPass(name, multigraph),
kernel_select_(std::make_shared<KernelSelect>()),
tbe_kernel_query_(std::make_shared<TbeKernelQuery>()) {}
~TransposedUpdateFusion() override = default;
const BaseRef DefinePattern() const override;
const AnfNodePtr Process(const FuncGraphPtr &func_graph, const AnfNodePtr &node, const EquivPtr &) const override;
protected:
CNodePtr DoSplit(const FuncGraphPtr &func_graph, const AnfNodePtr &node) const;
bool IsFormatInvaild(const AnfNodePtr &node) const;
KernelSelectPtr kernel_select_;
TbeKernelQueryPtr tbe_kernel_query_;
};
} // namespace opt
} // namespace mindspore
#endif // MINDSPORE_CCSRC_BACKEND_OPTIMIZER_ASCEND_IR_FUSION_TRANSPOSED_UPDATE_FUSION_H_

+ 1
- 2
mindspore/ccsrc/backend/optimizer/ascend/mindir/conv2d_unify_mindir.cc View File

@@ -1,5 +1,5 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -37,7 +37,6 @@ constexpr auto kAttrOffsetA = "offset_a";
constexpr auto kAttrPadList = "pad_list";
constexpr auto kAttrMode = "mode";
constexpr auto kAttrChannelMultiplier = "channel_multiplier";
constexpr auto kAttrPerm = "perm";
constexpr auto kAttrInputSizes = "input_sizes";
constexpr auto kAttrInputSize = "input_size";
constexpr auto kIndex2 = 2;


+ 6
- 0
mindspore/ccsrc/backend/session/anf_runtime_algorithm.cc View File

@@ -2311,6 +2311,12 @@ bool AnfRuntimeAlgorithm::IsNodeInputContainMonad(const AnfNodePtr &node) {

void AnfRuntimeAlgorithm::CacheAddrForGraph(const KernelGraphPtr &kernel_graph) {
MS_EXCEPTION_IF_NULL(kernel_graph);
auto ms_context = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(ms_context);
if (ms_context->get_param<int>(MS_CTX_EXECUTION_MODE) == kGraphMode &&
ms_context->get_param<bool>(MS_CTX_ENABLE_TASK_SINK) == true) {
return;
}
auto nodes = kernel_graph->execution_order();
for (auto &kernel : nodes) {
// Skip transpose kernel with "nop_op" attr which is not hidden or removed in PyNative infer scenario. Transpose


+ 1
- 0
mindspore/ccsrc/utils/utils.h View File

@@ -96,6 +96,7 @@ constexpr auto kFlattenGradOpName = "FlattenGrad";
constexpr auto kExpandDimsOpName = "ExpandDims";
constexpr auto kReshapeOpName = "Reshape";
constexpr auto kTransposeOpName = "Transpose";
constexpr auto kTransposeNODOpName = "TransposeNOD";
constexpr auto kSplitOpName = "Split";
constexpr auto kSplitVOpName = "SplitV";
constexpr auto kSparseApplyAdagradOpName = "SparseApplyAdagrad";


+ 1
- 0
mindspore/ops/_op_impl/tbe/__init__.py View File

@@ -175,6 +175,7 @@ from .sparse_apply_ftrl_d_ds import _sparse_apply_ftrl_d_ds
from .sparse_apply_proximal_adagrad import _sparse_apply_proximal_adagrad
from .sparse_apply_proximal_adagrad_ds import _sparse_apply_proximal_adagrad_ds
from .apply_proximal_adagrad import _apply_proximal_adagrad
from .transpose import _transpose_tbe
from .transpose_d import _transpose_d_tbe
from .truncate_div import _truncate_div_tbe
from .truncate_mod import _truncate_mod_tbe


+ 59
- 0
mindspore/ops/_op_impl/tbe/transpose.py View File

@@ -0,0 +1,59 @@
# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================

"""Transpose op"""
from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType

transpose_op_info = TBERegOp("TransposeNOD") \
.fusion_type("OPAQUE") \
.async_flag(False) \
.binfile_name("transpose.so") \
.compute_cost(10) \
.kernel_name("transpose") \
.partial_flag(True) \
.dynamic_compile_static(True) \
.input(0, "x", False, "required", "all") \
.input(1, "perm", False, "required", "all", "optional") \
.output(0, "y", False, "required", "all") \
.need_check_supported(True) \
.dtype_format(DataType.BOOL_Default, DataType.I32_Default, DataType.BOOL_Default) \
.dtype_format(DataType.I8_Default, DataType.I32_Default, DataType.I8_Default) \
.dtype_format(DataType.U8_Default, DataType.I32_Default, DataType.U8_Default) \
.dtype_format(DataType.I16_Default, DataType.I32_Default, DataType.I16_Default) \
.dtype_format(DataType.U16_Default, DataType.I32_Default, DataType.U16_Default) \
.dtype_format(DataType.I32_Default, DataType.I32_Default, DataType.I32_Default) \
.dtype_format(DataType.U32_Default, DataType.I32_Default, DataType.U32_Default) \
.dtype_format(DataType.I64_Default, DataType.I32_Default, DataType.I64_Default) \
.dtype_format(DataType.U64_Default, DataType.I32_Default, DataType.U64_Default) \
.dtype_format(DataType.F16_Default, DataType.I32_Default, DataType.F16_Default) \
.dtype_format(DataType.F32_Default, DataType.I32_Default, DataType.F32_Default) \
.dtype_format(DataType.BOOL_Default, DataType.I64_Default, DataType.BOOL_Default) \
.dtype_format(DataType.I8_Default, DataType.I64_Default, DataType.I8_Default) \
.dtype_format(DataType.U8_Default, DataType.I64_Default, DataType.U8_Default) \
.dtype_format(DataType.I16_Default, DataType.I64_Default, DataType.I16_Default) \
.dtype_format(DataType.U16_Default, DataType.I64_Default, DataType.U16_Default) \
.dtype_format(DataType.I32_Default, DataType.I64_Default, DataType.I32_Default) \
.dtype_format(DataType.U32_Default, DataType.I64_Default, DataType.U32_Default) \
.dtype_format(DataType.I64_Default, DataType.I64_Default, DataType.I64_Default) \
.dtype_format(DataType.U64_Default, DataType.I64_Default, DataType.U64_Default) \
.dtype_format(DataType.F16_Default, DataType.I64_Default, DataType.F16_Default) \
.dtype_format(DataType.F32_Default, DataType.I64_Default, DataType.F32_Default) \
.get_op_info()


@op_info_register(transpose_op_info)
def _transpose_tbe():
"""Transpose TBE register"""
return

Loading…
Cancel
Save