Merge pull request !3090 from VectorSL/batchnorm-casttags/v0.6.0-beta
| @@ -13,8 +13,8 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_GPU_IR_FUSION_ADAM_FUSION_H_ | |||
| #define MINDSPORE_CCSRC_PRE_ACTIVATE_GPU_IR_FUSION_ADAM_FUSION_H_ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_OPTIMIZER_GPU_ADAM_FUSION_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_OPTIMIZER_GPU_ADAM_FUSION_H_ | |||
| #include <memory> | |||
| #include "backend/optimizer/common/optimizer.h" | |||
| @@ -53,4 +53,4 @@ class AdamFusion : public PatternProcessPass { | |||
| }; | |||
| } // namespace opt | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_PRE_ACTIVATE_GPU_IR_FUSION_ADAM_FUSION_H_ | |||
| #endif // MINDSPORE_CCSRC_BACKEND_OPTIMIZER_GPU_ADAM_FUSION_H_ | |||
| @@ -13,8 +13,8 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_GPU_IR_FUSION_ADAM_WEIGHT_DECAY_FUSION_H_ | |||
| #define MINDSPORE_CCSRC_PRE_ACTIVATE_GPU_IR_FUSION_ADAM_WEIGHT_DECAY_FUSION_H_ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_OPTIMIZER_GPU_ADAM_WEIGHT_DECAY_FUSION_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_OPTIMIZER_GPU_ADAM_WEIGHT_DECAY_FUSION_H_ | |||
| #include <memory> | |||
| #include "backend/optimizer/common/optimizer.h" | |||
| @@ -55,4 +55,4 @@ class AdamWeightDecayFusion : public PatternProcessPass { | |||
| }; | |||
| } // namespace opt | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_PRE_ACTIVATE_GPU_IR_FUSION_ADAM_WEIGHT_DECAY_FUSION_H_ | |||
| #endif // MINDSPORE_CCSRC_BACKEND_OPTIMIZER_GPU_ADAM_WEIGHT_DECAY_FUSION_H_ | |||
| @@ -0,0 +1,65 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/optimizer/gpu/replace_addn_fusion.h" | |||
| #include <memory> | |||
| #include <vector> | |||
| #include <string> | |||
| #include "backend/session/anf_runtime_algorithm.h" | |||
| #include "ir/primitive.h" | |||
| #include "utils/utils.h" | |||
| #include "backend/optimizer/common/helper.h" | |||
| namespace mindspore { | |||
| namespace opt { | |||
| const BaseRef ReplaceAddNFusion::DefinePattern() const { | |||
| VectorRef addn = VectorRef({prim::kPrimAddN, A, B}); | |||
| return addn; | |||
| } | |||
| const AnfNodePtr ReplaceAddNFusion::Process(const FuncGraphPtr &graph, const AnfNodePtr &node, | |||
| const EquivPtr &equiv) const { | |||
| MS_EXCEPTION_IF_NULL(graph); | |||
| MS_EXCEPTION_IF_NULL(node); | |||
| MS_EXCEPTION_IF_NULL(equiv); | |||
| auto A = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(node), 0); | |||
| auto B = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(node), 1); | |||
| MS_EXCEPTION_IF_NULL(A); | |||
| MS_EXCEPTION_IF_NULL(B); | |||
| int num_input = AnfAlgo::GetNodeAttr<int>(node, "n"); | |||
| if (num_input == 2) { | |||
| auto prim = std::make_shared<Primitive>(prim::kPrimTensorAdd->name()); | |||
| MS_EXCEPTION_IF_NULL(prim); | |||
| std::vector<AnfNodePtr> inputs = {NewValueNode(prim), A, B}; | |||
| auto add_new = graph->NewCNode(inputs); | |||
| std::vector<TypeId> outputs_type; | |||
| std::vector<std::vector<size_t>> outputs_shape; | |||
| outputs_type.push_back(AnfAlgo::GetOutputInferDataType(A, 0)); | |||
| outputs_shape.push_back(AnfAlgo::GetOutputInferShape(A, 0)); | |||
| AnfAlgo::SetOutputInferTypeAndShape(outputs_type, outputs_shape, add_new.get()); | |||
| auto manager = graph->manager(); | |||
| MS_EXCEPTION_IF_NULL(manager); | |||
| manager->Replace(utils::cast<CNodePtr>(node), utils::cast<CNodePtr>(add_new)); | |||
| return add_new; | |||
| } else { | |||
| return nullptr; | |||
| } | |||
| } | |||
| } // namespace opt | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,40 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_OPTIMIZER_GPU_REPLACE_ADDN_FUSION_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_OPTIMIZER_GPU_REPLACE_ADDN_FUSION_H_ | |||
| #include <memory> | |||
| #include "backend/optimizer/common/optimizer.h" | |||
| namespace mindspore { | |||
| namespace opt { | |||
| class ReplaceAddNFusion : public PatternProcessPass { | |||
| public: | |||
| explicit ReplaceAddNFusion(bool multigraph = true) : PatternProcessPass("replace_addn", multigraph) { | |||
| A = std::make_shared<Var>(); | |||
| B = std::make_shared<Var>(); | |||
| } | |||
| ~ReplaceAddNFusion() override = default; | |||
| const BaseRef DefinePattern() const override; | |||
| const AnfNodePtr Process(const FuncGraphPtr &, const AnfNodePtr &, const EquivPtr &) const override; | |||
| private: | |||
| VarPtr A; | |||
| VarPtr B; | |||
| }; | |||
| } // namespace opt | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_BACKEND_OPTIMIZER_GPU_REPLACE_ADDN_FUSION_H_ | |||
| @@ -0,0 +1,92 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/optimizer/gpu/replace_bn_cast_fusion.h" | |||
| #include <memory> | |||
| #include <vector> | |||
| #include <string> | |||
| #include "backend/session/anf_runtime_algorithm.h" | |||
| #include "ir/primitive.h" | |||
| #include "utils/utils.h" | |||
| #include "backend/optimizer/common/helper.h" | |||
| namespace mindspore { | |||
| namespace opt { | |||
| const BaseRef ReplaceBNCastFusion::DefinePattern() const { | |||
| VectorRef in_cast = VectorRef({prim::kPrimCast, x_}); | |||
| VectorRef fbn2 = VectorRef({prim::kPrimFusedBatchNorm, in_cast, scale_, bias_, mean_, var_}); | |||
| VectorRef tupleget = VectorRef({prim::kPrimTupleGetItem, fbn2, index_}); | |||
| VectorRef out_cast = VectorRef({prim::kPrimCast, tupleget}); | |||
| return out_cast; | |||
| } | |||
| const AnfNodePtr ReplaceBNCastFusion::Process(const FuncGraphPtr &graph, const AnfNodePtr &node, | |||
| const EquivPtr &equiv) const { | |||
| MS_EXCEPTION_IF_NULL(graph); | |||
| MS_EXCEPTION_IF_NULL(node); | |||
| MS_EXCEPTION_IF_NULL(equiv); | |||
| auto tuple = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(node), 0); | |||
| auto index_node = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(tuple), 1); | |||
| MS_EXCEPTION_IF_NULL(index_node); | |||
| auto value_node = index_node->cast<ValueNodePtr>(); | |||
| MS_EXCEPTION_IF_NULL(value_node); | |||
| int item_idx = GetValue<int>(value_node->value()); | |||
| auto fbn2 = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(tuple), 0); | |||
| auto x_after = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(fbn2), 0); | |||
| auto x_before = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(x_after), 0); | |||
| if (item_idx != 0) { | |||
| return nullptr; | |||
| } | |||
| auto scale = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(fbn2), 1); | |||
| auto bias = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(fbn2), 2); | |||
| auto mean = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(fbn2), 3); | |||
| auto var = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(fbn2), 4); | |||
| MS_EXCEPTION_IF_NULL(fbn2); | |||
| MS_EXCEPTION_IF_NULL(x_after); | |||
| MS_EXCEPTION_IF_NULL(x_before); | |||
| MS_EXCEPTION_IF_NULL(scale); | |||
| MS_EXCEPTION_IF_NULL(bias); | |||
| MS_EXCEPTION_IF_NULL(mean); | |||
| MS_EXCEPTION_IF_NULL(var); | |||
| auto manager = graph->manager(); | |||
| MS_EXCEPTION_IF_NULL(manager); | |||
| manager->Replace(utils::cast<CNodePtr>(x_after), utils::cast<CNodePtr>(x_before)); | |||
| manager->Replace(utils::cast<CNodePtr>(node), utils::cast<CNodePtr>(tuple)); | |||
| std::vector<TypeId> outputs_type; | |||
| std::vector<std::vector<size_t>> outputs_shape; | |||
| auto output_num = AnfAlgo::GetOutputTensorNum(fbn2); | |||
| for (size_t i = 0; i < output_num; i++) { | |||
| outputs_type.push_back(AnfAlgo::GetOutputInferDataType(fbn2, i)); | |||
| outputs_shape.push_back(AnfAlgo::GetOutputInferShape(fbn2, i)); | |||
| } | |||
| outputs_type[0] = kNumberTypeFloat16; | |||
| AnfAlgo::SetOutputInferTypeAndShape(outputs_type, outputs_shape, fbn2.get()); | |||
| outputs_type.clear(); | |||
| outputs_shape.clear(); | |||
| outputs_type.push_back(kNumberTypeFloat16); | |||
| outputs_shape.push_back(AnfAlgo::GetOutputInferShape(tuple, 0)); | |||
| AnfAlgo::SetOutputInferTypeAndShape(outputs_type, outputs_shape, tuple.get()); | |||
| return tuple; | |||
| } | |||
| } // namespace opt | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,58 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_OPTIMIZER_GPU_REPLACE_BN_CAST_FUSION_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_OPTIMIZER_GPU_REPLACE_BN_CAST_FUSION_H_ | |||
| #include <memory> | |||
| #include "backend/optimizer/common/optimizer.h" | |||
| namespace mindspore { | |||
| namespace opt { | |||
| class ReplaceBNCastFusion : public PatternProcessPass { | |||
| public: | |||
| explicit ReplaceBNCastFusion(bool multigraph = true) : PatternProcessPass("replace_bn_cast", multigraph) { | |||
| x_ = std::make_shared<Var>(); | |||
| scale_ = std::make_shared<Var>(); | |||
| bias_ = std::make_shared<Var>(); | |||
| mean_ = std::make_shared<Var>(); | |||
| var_ = std::make_shared<Var>(); | |||
| y_ = std::make_shared<Var>(); | |||
| running_mean_ = std::make_shared<Var>(); | |||
| running_var_ = std::make_shared<Var>(); | |||
| save_mean_ = std::make_shared<Var>(); | |||
| save_var_ = std::make_shared<Var>(); | |||
| index_ = std::make_shared<Var>(); | |||
| } | |||
| ~ReplaceBNCastFusion() override = default; | |||
| const BaseRef DefinePattern() const override; | |||
| const AnfNodePtr Process(const FuncGraphPtr &, const AnfNodePtr &, const EquivPtr &) const override; | |||
| private: | |||
| VarPtr x_; | |||
| VarPtr scale_; | |||
| VarPtr bias_; | |||
| VarPtr mean_; | |||
| VarPtr var_; | |||
| VarPtr y_; | |||
| VarPtr running_mean_; | |||
| VarPtr running_var_; | |||
| VarPtr save_mean_; | |||
| VarPtr save_var_; | |||
| VarPtr index_; | |||
| }; | |||
| } // namespace opt | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_BACKEND_OPTIMIZER_GPU_REPLACE_BN_CAST_FUSION_H_ | |||