From 2edc7cc193cb5a6d4956ee8085fdd753c07d6432 Mon Sep 17 00:00:00 2001 From: hwjiaorui Date: Mon, 14 Dec 2020 12:22:03 +0800 Subject: [PATCH] sparse pass format format format --- ..._cross_entropy_with_logits_unify_mindir.cc | 454 ++++++++++++++++++ ...x_cross_entropy_with_logits_unify_mindir.h | 54 +++ .../ccsrc/backend/session/ascend_session.cc | 4 + mindspore/nn/loss/loss.py | 11 +- 4 files changed, 516 insertions(+), 7 deletions(-) create mode 100644 mindspore/ccsrc/backend/optimizer/ascend/mindir/sparse_softmax_cross_entropy_with_logits_unify_mindir.cc create mode 100644 mindspore/ccsrc/backend/optimizer/ascend/mindir/sparse_softmax_cross_entropy_with_logits_unify_mindir.h diff --git a/mindspore/ccsrc/backend/optimizer/ascend/mindir/sparse_softmax_cross_entropy_with_logits_unify_mindir.cc b/mindspore/ccsrc/backend/optimizer/ascend/mindir/sparse_softmax_cross_entropy_with_logits_unify_mindir.cc new file mode 100644 index 0000000000..df9d59fbeb --- /dev/null +++ b/mindspore/ccsrc/backend/optimizer/ascend/mindir/sparse_softmax_cross_entropy_with_logits_unify_mindir.cc @@ -0,0 +1,454 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "backend/optimizer/ascend/mindir/sparse_softmax_cross_entropy_with_logits_unify_mindir.h" +#include +#include +#include +#include "backend/session/anf_runtime_algorithm.h" +#include "backend/optimizer/common/helper.h" +#include "utils/utils.h" +#include "ir/primitive.h" +#include "ir/tensor.h" +#include "ir/dtype/type_id.h" +#include "ir/dtype/type.h" + +constexpr auto softmax_output_shape_size = 2; +namespace mindspore { +namespace opt { +namespace { +ValueNodePtr CreateValueNode(const ValuePtr &value_ptr, TypeId output_type) { + MS_EXCEPTION_IF_NULL(value_ptr); + auto new_node = std::make_shared(value_ptr); + MS_EXCEPTION_IF_NULL(new_node); + auto value_abstract = value_ptr->ToAbstract(); + new_node->set_abstract(value_abstract); + + auto kernel_info = std::make_shared(); + MS_EXCEPTION_IF_NULL(kernel_info); + new_node->set_kernel_info(kernel_info); + kernel::KernelBuildInfo::KernelBuildInfoBuilder builder1; + builder1.SetOutputsFormat({kOpFormat_DEFAULT}); + builder1.SetOutputsDeviceType({output_type}); + AnfAlgo::SetSelectKernelBuildInfo(builder1.Build(), new_node.get()); + return new_node; +} + +CNodePtr CreateOneHot(const FuncGraphPtr &graph, const CNodePtr &sparse_softmax_node) { + MS_EXCEPTION_IF_NULL(graph); + MS_EXCEPTION_IF_NULL(sparse_softmax_node); + + std::vector logits_shape = AnfAlgo::GetPrevNodeOutputInferShape(sparse_softmax_node, 0); + int64_t depth; + if (logits_shape.size() >= 1) { + size_t index = logits_shape.size() - 1; + depth = logits_shape[index]; + } else { + MS_LOG(EXCEPTION) << "logits's shape of sparse_softmax_cross_entropy_with_logits is empty."; + } + + auto value_on = std::make_shared(1.0, kFloat32); + auto value_on_node = CreateValueNode(value_on, kNumberTypeFloat32); + MS_EXCEPTION_IF_NULL(value_on_node); + auto value_off = std::make_shared(0.0, kFloat32); + auto value_off_node = CreateValueNode(value_off, kNumberTypeFloat32); + MS_EXCEPTION_IF_NULL(value_off_node); + + auto kernel_graph = graph->cast(); + kernel_graph->AddValueNodeToGraph(value_on_node); + kernel_graph->AddValueNodeToGraph(value_off_node); + + auto depth_node = NewValueNode(depth); + MS_EXCEPTION_IF_NULL(depth_node); + + auto depth_abstract = std::make_shared(); + depth_abstract->set_type(kInt64); + depth_node->set_abstract(depth_abstract); + + auto one_hot_primitive = std::make_shared(kOneHotOpName); + std::vector input_names = {"indices", "depth", "on_value", "off_value"}; + std::vector output_names = {"output"}; + one_hot_primitive->set_attr(kAttrInputNames, MakeValue(input_names)); + one_hot_primitive->set_attr(kAttrOutputNames, MakeValue(output_names)); + std::vector one_hot_inputs = {NewValueNode(one_hot_primitive), sparse_softmax_node->input(2), depth_node, + value_on_node, value_off_node}; + auto one_hot_node = graph->NewCNode(one_hot_inputs); + MS_EXCEPTION_IF_NULL(one_hot_node); + + one_hot_node->set_scope(sparse_softmax_node->scope()); + std::vector labels_shape = AnfAlgo ::GetPrevNodeOutputInferShape(sparse_softmax_node, 1); + labels_shape.emplace_back(depth); + AnfAlgo::SetOutputInferTypeAndShape({kNumberTypeFloat32}, {labels_shape}, one_hot_node.get()); + AnfAlgo::SetNodeAttr(kAttrAxis, MakeValue(-1), one_hot_node); + return one_hot_node; +} + +CNodePtr CreateSoftmaxCrossEntropyWithLogits(const FuncGraphPtr &graph, const CNodePtr &sparse_softmax_node, + const CNodePtr &one_hot_node) { + MS_EXCEPTION_IF_NULL(graph); + MS_EXCEPTION_IF_NULL(sparse_softmax_node); + MS_EXCEPTION_IF_NULL(one_hot_node); + + if (sparse_softmax_node->size() != kSparseSoftmaxCrossEntropyWithLogitsInputNum) { + MS_LOG(EXCEPTION) << "sparse_softmax_cross_entropy_with_logits's input size not equal " + << kSparseSoftmaxCrossEntropyWithLogitsInputNum; + } + if (one_hot_node->size() != kOneHotInputNum) { + MS_LOG(EXCEPTION) << "ont_hot's input size not equal " << kOneHotInputNum; + } + + std::vector inputs = {NewValueNode(std::make_shared(kSoftmaxCrossEntropyWithLogitsOpName)), + sparse_softmax_node->input(1), one_hot_node}; + auto softmax_node = graph->NewCNode(inputs); + MS_EXCEPTION_IF_NULL(softmax_node); + softmax_node->set_scope(sparse_softmax_node->scope()); + + std::vector labels_shape = AnfAlgo::GetOutputInferShape(one_hot_node, 0); + std::vector loss_shape; + if (labels_shape.size() > 0) { + loss_shape.emplace_back(labels_shape[0]); + } else { + MS_LOG(EXCEPTION) << "one_hot output's shape is empty."; + } + + auto shapes = {loss_shape, AnfAlgo::GetOutputInferShape(one_hot_node, 0)}; + auto data_types = AnfAlgo::GetOutputInferDataType(one_hot_node, 0); + auto types = {data_types, data_types}; + AnfAlgo::SetOutputInferTypeAndShape(types, shapes, softmax_node.get()); + return softmax_node; +} + +ValueNodePtr GetAxis(const AnfNodePtr &node) { + MS_EXCEPTION_IF_NULL(node); + std::vector output_shape = AnfAlgo::GetOutputInferShape(node, 0); + if (output_shape.empty()) { + MS_LOG(EXCEPTION) << node->fullname_with_scope() << "'s output shape is empty"; + } + std::vector range; + for (size_t i = 0; i < output_shape.size(); i++) { + range.emplace_back(i); + } + auto axis_node = CreateValueNode(MakeValue(range), kNumberTypeInt64); + MS_EXCEPTION_IF_NULL(axis_node); + return axis_node; +} + +CNodePtr CreateReduceMean(const FuncGraphPtr &graph, const CNodePtr &sparse_softmax_node, + const AnfNodePtr &softmax_output_node) { + MS_EXCEPTION_IF_NULL(graph); + MS_EXCEPTION_IF_NULL(sparse_softmax_node); + MS_EXCEPTION_IF_NULL(softmax_output_node); + if (sparse_softmax_node->size() != kSparseSoftmaxCrossEntropyWithLogitsInputNum) { + MS_LOG(EXCEPTION) << "sparse_softmax_cross_entropy_with_logits's input size not equal " + << kSparseSoftmaxCrossEntropyWithLogitsInputNum; + } + auto axis_node = GetAxis(softmax_output_node); + MS_EXCEPTION_IF_NULL(axis_node); + auto kernel_graph = graph->cast(); + kernel_graph->AddValueNodeToGraph(axis_node); + + auto reduce_primitive = std::make_shared(kReduceMeanOpName); + std::vector input_names = {"x", "axis"}; + std::vector output_names = {"y"}; + reduce_primitive->set_attr(kAttrInputNames, MakeValue(input_names)); + reduce_primitive->set_attr(kAttrOutputNames, MakeValue(output_names)); + + std::vector inputs = {NewValueNode(reduce_primitive), softmax_output_node, axis_node}; + auto reduce_node = graph->NewCNode(inputs); + MS_EXCEPTION_IF_NULL(reduce_node); + + reduce_node->set_scope(sparse_softmax_node->scope()); + auto reduce_abstract = softmax_output_node->abstract(); + reduce_abstract->set_shape(std::make_shared()); + reduce_node->set_abstract(reduce_abstract); + return reduce_node; +} + +CNodePtr CreateExpandDims(const FuncGraphPtr &graph, const CNodePtr &real_div_node) { + MS_EXCEPTION_IF_NULL(graph); + MS_EXCEPTION_IF_NULL(real_div_node); + if (real_div_node->size() != kRealDivInputNum) { + MS_LOG(EXCEPTION) << "Op real_div's input num not equal " << kRealDivInputNum; + } + + int64_t axis = -1; + auto axis_node = NewValueNode(axis); + MS_EXCEPTION_IF_NULL(axis_node); + auto axis_abstract = std::make_shared(); + axis_abstract->set_type(kInt64); + axis_node->set_abstract(axis_abstract); + + auto expand_dims_primitive = std::make_shared(kExpandDimsOpName); + std::vector input_names = {"x", "axis"}; + std::vector output_names = {"output"}; + expand_dims_primitive->set_attr(kAttrInputNames, MakeValue(input_names)); + expand_dims_primitive->set_attr(kAttrOutputNames, MakeValue(output_names)); + std::vector expand_dims_inputs = {NewValueNode(expand_dims_primitive), real_div_node, axis_node}; + auto expand_dims_node = graph->NewCNode(expand_dims_inputs); + MS_EXCEPTION_IF_NULL(expand_dims_node); + + expand_dims_node->set_scope(real_div_node->scope()); + std::vector y_shape = AnfAlgo::GetOutputInferShape(real_div_node, 0); + y_shape.emplace_back(1); + AnfAlgo::SetOutputInferTypeAndShape({AnfAlgo::GetOutputInferDataType(real_div_node, 0)}, {y_shape}, + expand_dims_node.get()); + return expand_dims_node; +} + +CNodePtr CreateTile(const FuncGraphPtr &graph, const CNodePtr &sparse_softmax_node, const CNodePtr &mul_node) { + MS_EXCEPTION_IF_NULL(graph); + MS_EXCEPTION_IF_NULL(sparse_softmax_node); + MS_EXCEPTION_IF_NULL(mul_node); + if (sparse_softmax_node->size() != kSparseSoftmaxCrossEntropyWithLogitsInputNum) { + MS_LOG(EXCEPTION) << "sparse_softmax_cross_entropy_with_logits's input size not equal " + << kSparseSoftmaxCrossEntropyWithLogitsInputNum; + } + if (mul_node->size() != kMulInputNum) { + MS_LOG(EXCEPTION) << "Op Mul's input not equal " << kMulInputNum; + } + + auto labels_shape = AnfAlgo::GetPrevNodeOutputInferShape(sparse_softmax_node, 1); + std::vector multiple_value; + std::transform(labels_shape.begin(), labels_shape.end(), std::back_inserter(multiple_value), + [](size_t label) { return static_cast(label); }); + auto mutiples = MakeValue(multiple_value); + auto mutiples_node = CreateValueNode(mutiples, kNumberTypeInt64); + MS_EXCEPTION_IF_NULL(mutiples_node); + auto kernel_graph = graph->cast(); + kernel_graph->AddValueNodeToGraph(mutiples_node); + + auto tile_primitive = std::make_shared(kTileOpName); + std::vector input_names = {"x", "multiples"}; + std::vector output_names = {"output"}; + tile_primitive->set_attr(kAttrInputNames, MakeValue(input_names)); + tile_primitive->set_attr(kAttrOutputNames, MakeValue(output_names)); + std::vector tile_inputs = {NewValueNode(tile_primitive), mul_node->input(2), mutiples_node}; + auto tile_node = graph->NewCNode(tile_inputs); + MS_EXCEPTION_IF_NULL(tile_node); + + tile_node->set_scope(mul_node->scope()); + AnfAlgo::SetOutputInferTypeAndShape({AnfAlgo::GetPrevNodeOutputInferDataType(mul_node, 1)}, {labels_shape}, + tile_node.get()); + return tile_node; +} + +CNodePtr CreateRealDiv(const FuncGraphPtr &graph, const CNodePtr &sparse_softmax_node, const CNodePtr &tile_node) { + MS_EXCEPTION_IF_NULL(graph); + MS_EXCEPTION_IF_NULL(sparse_softmax_node); + MS_EXCEPTION_IF_NULL(tile_node); + + if (sparse_softmax_node->size() != kSparseSoftmaxCrossEntropyWithLogitsInputNum) { + MS_LOG(EXCEPTION) << "sparse_softmax_cross_entropy_with_logits's input size not equal " + << kSparseSoftmaxCrossEntropyWithLogitsInputNum; + } + + std::vector labels_shape = AnfAlgo::GetPrevNodeOutputInferShape(sparse_softmax_node, 1); + if (labels_shape.size() != 1) { + MS_LOG(EXCEPTION) << "label's shape should be 1-D."; + } + float y_value = static_cast(labels_shape[0]); + auto y = std::make_shared(y_value, kFloat32); + auto y_node = CreateValueNode(y, kNumberTypeFloat32); + MS_EXCEPTION_IF_NULL(y_node); + auto kernel_graph = graph->cast(); + kernel_graph->AddValueNodeToGraph(y_node); + + auto real_div_primitive = std::make_shared(kRealDivOpName); + std::vector input_names = {"x", "y"}; + std::vector output_names = {"output"}; + real_div_primitive->set_attr(kAttrInputNames, MakeValue(input_names)); + real_div_primitive->set_attr(kAttrOutputNames, MakeValue(output_names)); + std::vector real_div_inputs = {NewValueNode(real_div_primitive), tile_node, y_node}; + auto real_div_node = graph->NewCNode(real_div_inputs); + MS_EXCEPTION_IF_NULL(real_div_node); + + real_div_node->set_scope(sparse_softmax_node->scope()); + real_div_node->set_abstract(tile_node->abstract()); + return real_div_node; +} + +CNodePtr GetSparseNode(const CNodePtr &depend_node, size_t index) { + MS_EXCEPTION_IF_NULL(depend_node); + if (depend_node->size() != kDependInputNum) { + MS_LOG(EXCEPTION) << "Op Depend's input not equal " << kDependInputNum; + } + auto sparse_node = depend_node->input(index); + MS_EXCEPTION_IF_NULL(sparse_node); + return sparse_node->cast(); +} + +CNodePtr GetDependNode(const CNodePtr &mul_node) { + MS_EXCEPTION_IF_NULL(mul_node); + if (mul_node->size() != kMulInputNum) { + MS_LOG(EXCEPTION) << "Op Mul's input not equal " << kMulInputNum; + } + auto depend_node = mul_node->input(1); + MS_EXCEPTION_IF_NULL(depend_node); + return depend_node->cast(); +} + +CNodePtr CreateMul(const FuncGraphPtr &graph, const CNodePtr &sparse_softmax_node, + const AnfNodePtr &softmax_output_node) { + MS_EXCEPTION_IF_NULL(graph); + MS_EXCEPTION_IF_NULL(sparse_softmax_node); + MS_EXCEPTION_IF_NULL(softmax_output_node); + auto softmax_output_shape = AnfAlgo::GetOutputInferShape(softmax_output_node, 0); + if (softmax_output_shape.size() != softmax_output_shape_size) { + MS_LOG(EXCEPTION) << "SoftmaxCrossEntropyWithLogits the second output shape size should be " + << softmax_output_shape_size << ", but got " << softmax_output_shape.size(); + } + ShapeVector tensor_shape; + tensor_shape.emplace_back(softmax_output_shape[0]); + tensor_shape.emplace_back(1); + std::vector tensor_value(softmax_output_shape[0], 1.0 / softmax_output_shape[0]); + auto buf_size = sizeof(float) * tensor_value.size(); + auto tensor_y = std::make_shared(kNumberTypeFloat32, tensor_shape, tensor_value.data(), buf_size); + auto y_node = CreateValueNode(tensor_y, kNumberTypeFloat32); + MS_EXCEPTION_IF_NULL(y_node); + + auto kernel_graph = graph->cast(); + kernel_graph->AddValueNodeToGraph(y_node); + + auto mul_primitive = std::make_shared(kMulOpName); + std::vector input_names = {"x", "y"}; + std::vector output_names = {"output"}; + mul_primitive->set_attr(kAttrInputNames, MakeValue(input_names)); + mul_primitive->set_attr(kAttrOutputNames, MakeValue(output_names)); + + std::vector mul_input = {NewValueNode(mul_primitive), softmax_output_node, y_node}; + auto mul_node = graph->NewCNode(mul_input); + MS_EXCEPTION_IF_NULL(mul_node); + + mul_node->set_scope(sparse_softmax_node->scope()); + mul_node->set_abstract(softmax_output_node->abstract()); + return mul_node; +} +} // namespace + +const BaseRef SparseSoftmaxCrossEntropyWithLogitsUnifyMindIR::DefinePattern() const { + VarPtr x1 = std::make_shared(); + VarPtr x2 = std::make_shared(); + return VectorRef({prim::kPrimSparseSoftmaxCrossEntropyWithLogits, x1, x2}); +} + +const AnfNodePtr SparseSoftmaxCrossEntropyWithLogitsUnifyMindIR::Process(const FuncGraphPtr &graph, + const AnfNodePtr &node, + const EquivPtr &) const { + MS_EXCEPTION_IF_NULL(graph); + MS_EXCEPTION_IF_NULL(node); + + auto sparse_softmax_node = node->cast(); + MS_EXCEPTION_IF_NULL(sparse_softmax_node); + if (sparse_softmax_node->size() != kSparseSoftmaxCrossEntropyWithLogitsInputNum) { + MS_LOG(EXCEPTION) << "Op SparseSoftmaxCrossEntropyWithLogits's input not equal " + << kSparseSoftmaxCrossEntropyWithLogitsInputNum; + } + if (AnfAlgo::HasNodeAttr(kAttrIsGrad, sparse_softmax_node) && + AnfAlgo::GetNodeAttr(sparse_softmax_node, kAttrIsGrad)) { + return nullptr; + } + + CNodePtr softmax_node; + auto one_hot_node = CreateOneHot(graph, sparse_softmax_node); + softmax_node = CreateSoftmaxCrossEntropyWithLogits(graph, sparse_softmax_node, one_hot_node); + + std::vector softmax_node_outputs; + CreateMultipleOutputsOfAnfNode(graph, softmax_node, kSoftmaxCrossEntropyWithLogitsOutputNum, &softmax_node_outputs); + auto reduce_node = CreateReduceMean(graph, sparse_softmax_node, softmax_node_outputs[0]); + + return reduce_node; +} + +const BaseRef GradSparseSoftmaxCrossEntropyWithLogitsUnifyMindIR::DefinePattern() const { + VarPtr x1 = std::make_shared(); + VarPtr x2 = std::make_shared(); + VarPtr x3 = std::make_shared(); + VarPtr x4 = std::make_shared(); + VectorRef sparse_softmax_cross_entropy_with_logits({prim::kPrimSparseSoftmaxCrossEntropyWithLogits, x1, x2}); + VectorRef depend({prim::kPrimDepend, sparse_softmax_cross_entropy_with_logits, x3}); + return VectorRef({prim::kPrimMul, depend, x4}); +} + +const AnfNodePtr GradSparseSoftmaxCrossEntropyWithLogitsUnifyMindIR::Process(const FuncGraphPtr &graph, + const AnfNodePtr &node, + const EquivPtr &) const { + MS_EXCEPTION_IF_NULL(graph); + MS_EXCEPTION_IF_NULL(node); + + auto mul_node = node->cast(); + MS_EXCEPTION_IF_NULL(mul_node); + if (mul_node->size() != kMulInputNum) { + MS_LOG(EXCEPTION) << "Op Mul's input not equal " << kMulInputNum; + } + + auto depend_node = GetDependNode(mul_node); + auto sparse_softmax_node_grad = GetSparseNode(depend_node, 1); + if (sparse_softmax_node_grad->size() != kSparseSoftmaxCrossEntropyWithLogitsInputNum) { + MS_LOG(EXCEPTION) << "Op SparseSoftmaxCrossEntropyWithLogits's input not equal " + << kSparseSoftmaxCrossEntropyWithLogitsInputNum; + } + + CNodePtr softmax_node; + auto one_hot_node = CreateOneHot(graph, sparse_softmax_node_grad); + softmax_node = CreateSoftmaxCrossEntropyWithLogits(graph, sparse_softmax_node_grad, one_hot_node); + + std::vector softmax_node_outputs; + CreateMultipleOutputsOfAnfNode(graph, softmax_node, kSoftmaxCrossEntropyWithLogitsOutputNum, &softmax_node_outputs); + auto tile_node = CreateTile(graph, sparse_softmax_node_grad, mul_node); + auto real_div_node = CreateRealDiv(graph, sparse_softmax_node_grad, tile_node); + auto expand_dims_node = CreateExpandDims(graph, real_div_node); + + mul_node->set_input(1, softmax_node_outputs[1]); + mul_node->set_input(2, expand_dims_node); + + auto manager = graph->manager(); + MS_EXCEPTION_IF_NULL(manager); + manager->Replace(sparse_softmax_node_grad, softmax_node_outputs[1]); + return mul_node; +} + +const BaseRef GradSparseSoftmaxCrossEntropyWithLogitsUnifyMindIRV2::DefinePattern() const { + VarPtr x1 = std::make_shared(); + VarPtr x2 = std::make_shared(); + VarPtr x3 = std::make_shared(); + VectorRef sparse_softmax_cross_entropy_with_logits({prim::kPrimSparseSoftmaxCrossEntropyWithLogits, x1, x2}); + return VectorRef({prim::kPrimDepend, sparse_softmax_cross_entropy_with_logits, x3}); +} + +const AnfNodePtr GradSparseSoftmaxCrossEntropyWithLogitsUnifyMindIRV2::Process(const FuncGraphPtr &graph, + const AnfNodePtr &node, + const EquivPtr &) const { + MS_EXCEPTION_IF_NULL(graph); + MS_EXCEPTION_IF_NULL(node); + + auto depend_node = node->cast(); + auto sparse_softmax_node_grad = GetSparseNode(depend_node, 1); + + CNodePtr softmax_node; + auto one_hot_node = CreateOneHot(graph, sparse_softmax_node_grad); + softmax_node = CreateSoftmaxCrossEntropyWithLogits(graph, sparse_softmax_node_grad, one_hot_node); + + std::vector softmax_node_outputs; + CreateMultipleOutputsOfAnfNode(graph, softmax_node, kSoftmaxCrossEntropyWithLogitsOutputNum, &softmax_node_outputs); + auto mul_node = CreateMul(graph, sparse_softmax_node_grad, softmax_node_outputs[1]); + + auto manager = graph->manager(); + MS_EXCEPTION_IF_NULL(manager); + manager->Replace(sparse_softmax_node_grad, softmax_node_outputs[1]); + return mul_node; +} +} // namespace opt +} // namespace mindspore diff --git a/mindspore/ccsrc/backend/optimizer/ascend/mindir/sparse_softmax_cross_entropy_with_logits_unify_mindir.h b/mindspore/ccsrc/backend/optimizer/ascend/mindir/sparse_softmax_cross_entropy_with_logits_unify_mindir.h new file mode 100644 index 0000000000..27e2837ff9 --- /dev/null +++ b/mindspore/ccsrc/backend/optimizer/ascend/mindir/sparse_softmax_cross_entropy_with_logits_unify_mindir.h @@ -0,0 +1,54 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_SPARSE_SOFTMAX_CROSS_ENTROPY_WITH_LOGITS_UNIFY_MINDIR_H +#define MINDSPORE_SPARSE_SOFTMAX_CROSS_ENTROPY_WITH_LOGITS_UNIFY_MINDIR_H + +#include +#include "backend/optimizer/common/optimizer.h" + +namespace mindspore { +namespace opt { +class SparseSoftmaxCrossEntropyWithLogitsUnifyMindIR : public PatternProcessPass { + public: + explicit SparseSoftmaxCrossEntropyWithLogitsUnifyMindIR(bool multigraph = true) + : PatternProcessPass("sparse_softmax_cross_entropy_with_logits_unify_mindir", multigraph) {} + ~SparseSoftmaxCrossEntropyWithLogitsUnifyMindIR() override = default; + const BaseRef DefinePattern() const override; + const AnfNodePtr Process(const FuncGraphPtr &, const AnfNodePtr &, const EquivPtr &) const override; +}; + +class GradSparseSoftmaxCrossEntropyWithLogitsUnifyMindIR : public PatternProcessPass { + public: + explicit GradSparseSoftmaxCrossEntropyWithLogitsUnifyMindIR(bool multigraph = true) + : PatternProcessPass("grad_sparse_softmax_cross_entropy_with_logits_unify_mindir", multigraph) {} + ~GradSparseSoftmaxCrossEntropyWithLogitsUnifyMindIR() override = default; + const BaseRef DefinePattern() const override; + const AnfNodePtr Process(const FuncGraphPtr &, const AnfNodePtr &, const EquivPtr &) const override; +}; + +class GradSparseSoftmaxCrossEntropyWithLogitsUnifyMindIRV2 : public PatternProcessPass { + public: + explicit GradSparseSoftmaxCrossEntropyWithLogitsUnifyMindIRV2(bool multigraph = true) + : PatternProcessPass("grad_sparse_softmax_cross_entropy_with_logits_unify_mindir_v2", multigraph) {} + ~GradSparseSoftmaxCrossEntropyWithLogitsUnifyMindIRV2() override = default; + const BaseRef DefinePattern() const override; + const AnfNodePtr Process(const FuncGraphPtr &, const AnfNodePtr &, const EquivPtr &) const override; +}; + +} // namespace opt +} // namespace mindspore +#endif // MINDSPORE_SPARSE_SOFTMAX_CROSS_ENTROPY_WITH_LOGITS_UNIFY_MINDIR_H diff --git a/mindspore/ccsrc/backend/session/ascend_session.cc b/mindspore/ccsrc/backend/session/ascend_session.cc index 69f8d1f28b..c4619c7e03 100644 --- a/mindspore/ccsrc/backend/session/ascend_session.cc +++ b/mindspore/ccsrc/backend/session/ascend_session.cc @@ -36,6 +36,7 @@ #include "backend/optimizer/ascend/mindir/maxpool_to_maxpool_with_argmax.h" #include "backend/optimizer/ascend/mindir/maxpool_with_argmax_unify_mindir.h" #include "backend/optimizer/ascend/mindir/conv2d_unify_mindir.h" +#include "backend/optimizer/ascend/mindir/sparse_softmax_cross_entropy_with_logits_unify_mindir.h" #include "runtime/device/kernel_adjust.h" #include "runtime/device/ascend/ascend_stream_assign.h" #include "backend/session/anf_runtime_algorithm.h" @@ -446,6 +447,9 @@ void AscendSession::UnifyMindIR(const KernelGraphPtr &graph) { unify_mindir_pm->AddPass(std::make_shared()); unify_mindir_pm->AddPass(std::make_shared()); unify_mindir_pm->AddPass(std::make_shared()); + unify_mindir_pm->AddPass(std::make_shared()); + unify_mindir_pm->AddPass(std::make_shared()); + unify_mindir_pm->AddPass(std::make_shared()); optimizer->AddPassManager(unify_mindir_pm); (void)optimizer->Optimize(graph); diff --git a/mindspore/nn/loss/loss.py b/mindspore/nn/loss/loss.py index 9ae7c4a748..2a9bc9a696 100644 --- a/mindspore/nn/loss/loss.py +++ b/mindspore/nn/loss/loss.py @@ -267,16 +267,13 @@ class SoftmaxCrossEntropyWithLogits(_Loss): self.on_value = Tensor(1.0, mstype.float32) self.off_value = Tensor(0., mstype.float32) self.is_cpugpu = context.get_context('device_target') in ["CPU", "GPU"] - - if self.is_cpugpu: - self.sparse_softmax_cross_entropy = P.SparseSoftmaxCrossEntropyWithLogits() + self.sparse_softmax_cross_entropy = P.SparseSoftmaxCrossEntropyWithLogits() def construct(self, logits, labels): - if self.is_cpugpu and self.sparse and self.reduction == 'mean': - x = self.sparse_softmax_cross_entropy(logits, labels) - return x - if self.sparse: + if self.reduction == 'mean': + x = self.sparse_softmax_cross_entropy(logits, labels) + return x labels = self.one_hot(labels, F.shape(logits)[-1], self.on_value, self.off_value) x = self.softmax_cross_entropy(logits, labels)[0] return self.get_loss(x)