| @@ -0,0 +1,89 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/optimizer/gpu/batch_norm_add_relu_fusion.h" | |||
| #include <memory> | |||
| #include <vector> | |||
| #include <string> | |||
| #include "backend/session/anf_runtime_algorithm.h" | |||
| #include "ir/primitive.h" | |||
| #include "utils/utils.h" | |||
| #include "backend/optimizer/common/helper.h" | |||
| namespace mindspore { | |||
| namespace opt { | |||
| const BaseRef BatchNormAddReluFusion::DefinePattern() const { | |||
| VectorRef batch_norm_ex = VectorRef({prim::kPrimFusedBatchNormEx, x_, scale_, bias_, mean_, var_}); | |||
| VectorRef tuple_get_item = VectorRef({prim::kPrimTupleGetItem, batch_norm_ex, index_}); | |||
| VectorRef tensor_add = VectorRef({prim::kPrimTensorAdd, tuple_get_item, z_}); | |||
| VectorRef relu = VectorRef({prim::kPrimRelu, tensor_add}); | |||
| return relu; | |||
| } | |||
| const AnfNodePtr BatchNormAddReluFusion::Process(const FuncGraphPtr &graph, const AnfNodePtr &node, | |||
| const EquivPtr &equiv) const { | |||
| MS_EXCEPTION_IF_NULL(graph); | |||
| MS_EXCEPTION_IF_NULL(node); | |||
| auto tensor_add = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(node), 0); | |||
| MS_EXCEPTION_IF_NULL(tensor_add); | |||
| auto tuple_get_item = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(tensor_add), 0); | |||
| MS_EXCEPTION_IF_NULL(tuple_get_item); | |||
| auto batch_norm_ex = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(tuple_get_item), 0); | |||
| MS_EXCEPTION_IF_NULL(batch_norm_ex); | |||
| if (AnfAlgo::GetOutputInferDataType(batch_norm_ex, 0) != kNumberTypeFloat16) { | |||
| return nullptr; | |||
| } | |||
| auto x = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(batch_norm_ex), 0); | |||
| auto scale = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(batch_norm_ex), 1); | |||
| auto bias = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(batch_norm_ex), 2); | |||
| auto mean = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(batch_norm_ex), 3); | |||
| auto var = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(batch_norm_ex), 4); | |||
| auto z = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(tensor_add), 1); | |||
| MS_EXCEPTION_IF_NULL(x); | |||
| MS_EXCEPTION_IF_NULL(scale); | |||
| MS_EXCEPTION_IF_NULL(bias); | |||
| MS_EXCEPTION_IF_NULL(mean); | |||
| MS_EXCEPTION_IF_NULL(var); | |||
| MS_EXCEPTION_IF_NULL(z); | |||
| auto prim = std::make_shared<Primitive>(kFusedBatchNormExWithAddAndActivation); | |||
| MS_EXCEPTION_IF_NULL(prim); | |||
| std::vector<AnfNodePtr> inputs = {NewValueNode(prim), x, scale, bias, mean, var, z}; | |||
| auto fused_batch_norm_with_add_relu = graph->NewCNode(inputs); | |||
| MS_EXCEPTION_IF_NULL(fused_batch_norm_with_add_relu); | |||
| std::vector<TypeId> outputs_type; | |||
| std::vector<std::vector<size_t>> outputs_shape; | |||
| auto output_num = AnfAlgo::GetOutputTensorNum(batch_norm_ex); | |||
| for (size_t i = 0; i < output_num; i++) { | |||
| outputs_type.push_back(AnfAlgo::GetOutputInferDataType(batch_norm_ex, i)); | |||
| outputs_shape.push_back(AnfAlgo::GetOutputInferShape(batch_norm_ex, i)); | |||
| } | |||
| AnfAlgo::SetOutputInferTypeAndShape(outputs_type, outputs_shape, fused_batch_norm_with_add_relu.get()); | |||
| AnfAlgo::CopyNodeAttrs(batch_norm_ex, fused_batch_norm_with_add_relu); | |||
| auto manager = graph->manager(); | |||
| MS_EXCEPTION_IF_NULL(manager); | |||
| manager->Replace(batch_norm_ex, fused_batch_norm_with_add_relu); | |||
| return tuple_get_item; | |||
| } | |||
| } // namespace opt | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,51 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_OPTIMIZER_GPU_BATCH_NORM_ADD_RELU_FUSION_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_OPTIMIZER_GPU_BATCH_NORM_ADD_RELU_FUSION_H_ | |||
| #include <memory> | |||
| #include "backend/optimizer/common/optimizer.h" | |||
| namespace mindspore { | |||
| namespace opt { | |||
| class BatchNormAddReluFusion : public PatternProcessPass { | |||
| public: | |||
| explicit BatchNormAddReluFusion(bool multigraph = true) | |||
| : PatternProcessPass("batch_norm_add_relu_fusion", multigraph) { | |||
| x_ = std::make_shared<Var>(); | |||
| scale_ = std::make_shared<Var>(); | |||
| bias_ = std::make_shared<Var>(); | |||
| mean_ = std::make_shared<Var>(); | |||
| var_ = std::make_shared<Var>(); | |||
| index_ = std::make_shared<Var>(); | |||
| z_ = std::make_shared<Var>(); | |||
| } | |||
| ~BatchNormAddReluFusion() override = default; | |||
| const BaseRef DefinePattern() const override; | |||
| const AnfNodePtr Process(const FuncGraphPtr &, const AnfNodePtr &, const EquivPtr &) const override; | |||
| private: | |||
| VarPtr x_; | |||
| VarPtr scale_; | |||
| VarPtr bias_; | |||
| VarPtr mean_; | |||
| VarPtr var_; | |||
| VarPtr index_; | |||
| VarPtr z_; | |||
| }; | |||
| } // namespace opt | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_BACKEND_OPTIMIZER_GPU_BATCH_NORM_ADD_RELU_FUSION_H_ | |||
| @@ -0,0 +1,84 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/optimizer/gpu/batch_norm_relu_fusion.h" | |||
| #include <memory> | |||
| #include <vector> | |||
| #include <string> | |||
| #include "backend/session/anf_runtime_algorithm.h" | |||
| #include "ir/primitive.h" | |||
| #include "utils/utils.h" | |||
| #include "backend/optimizer/common/helper.h" | |||
| namespace mindspore { | |||
| namespace opt { | |||
| const BaseRef BatchNormReluFusion::DefinePattern() const { | |||
| VectorRef batch_norm_ex = VectorRef({prim::kPrimFusedBatchNormEx, x_, scale_, bias_, mean_, var_}); | |||
| VectorRef tuple_get = VectorRef({prim::kPrimTupleGetItem, batch_norm_ex, index_}); | |||
| VectorRef relu = VectorRef({prim::kPrimRelu, tuple_get}); | |||
| return relu; | |||
| } | |||
| const AnfNodePtr BatchNormReluFusion::Process(const FuncGraphPtr &graph, const AnfNodePtr &node, | |||
| const EquivPtr &equiv) const { | |||
| MS_EXCEPTION_IF_NULL(graph); | |||
| MS_EXCEPTION_IF_NULL(node); | |||
| auto tuple_get_item = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(node), 0); | |||
| MS_EXCEPTION_IF_NULL(tuple_get_item); | |||
| auto batch_norm_ex = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(tuple_get_item), 0); | |||
| MS_EXCEPTION_IF_NULL(batch_norm_ex); | |||
| if (AnfAlgo::GetOutputInferDataType(batch_norm_ex, 0) != kNumberTypeFloat16) { | |||
| return nullptr; | |||
| } | |||
| auto x = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(batch_norm_ex), 0); | |||
| auto scale = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(batch_norm_ex), 1); | |||
| auto bias = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(batch_norm_ex), 2); | |||
| auto mean = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(batch_norm_ex), 3); | |||
| auto var = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(batch_norm_ex), 4); | |||
| MS_EXCEPTION_IF_NULL(x); | |||
| MS_EXCEPTION_IF_NULL(scale); | |||
| MS_EXCEPTION_IF_NULL(bias); | |||
| MS_EXCEPTION_IF_NULL(mean); | |||
| MS_EXCEPTION_IF_NULL(var); | |||
| auto prim = std::make_shared<Primitive>(kFusedBatchNormExWithActivation); | |||
| MS_EXCEPTION_IF_NULL(prim); | |||
| std::vector<AnfNodePtr> inputs = {NewValueNode(prim), x, scale, bias, mean, var}; | |||
| auto fused_batch_norm_with_relu = graph->NewCNode(inputs); | |||
| MS_EXCEPTION_IF_NULL(fused_batch_norm_with_relu); | |||
| std::vector<TypeId> outputs_type; | |||
| std::vector<std::vector<size_t>> outputs_shape; | |||
| auto output_num = AnfAlgo::GetOutputTensorNum(batch_norm_ex); | |||
| for (size_t i = 0; i < output_num; i++) { | |||
| outputs_type.push_back(AnfAlgo::GetOutputInferDataType(batch_norm_ex, i)); | |||
| outputs_shape.push_back(AnfAlgo::GetOutputInferShape(batch_norm_ex, i)); | |||
| } | |||
| AnfAlgo::SetOutputInferTypeAndShape(outputs_type, outputs_shape, fused_batch_norm_with_relu.get()); | |||
| AnfAlgo::CopyNodeAttrs(batch_norm_ex, fused_batch_norm_with_relu); | |||
| auto manager = graph->manager(); | |||
| MS_EXCEPTION_IF_NULL(manager); | |||
| manager->Replace(batch_norm_ex, fused_batch_norm_with_relu); | |||
| return tuple_get_item; | |||
| } | |||
| } // namespace opt | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,48 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_OPTIMIZER_GPU_BATCH_NORM_RELU_FUSION_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_OPTIMIZER_GPU_BATCH_NORM_RELU_FUSION_H_ | |||
| #include <memory> | |||
| #include "backend/optimizer/common/optimizer.h" | |||
| namespace mindspore { | |||
| namespace opt { | |||
| class BatchNormReluFusion : public PatternProcessPass { | |||
| public: | |||
| explicit BatchNormReluFusion(bool multigraph = true) : PatternProcessPass("batch_norm_relu_fusion", multigraph) { | |||
| x_ = std::make_shared<Var>(); | |||
| scale_ = std::make_shared<Var>(); | |||
| bias_ = std::make_shared<Var>(); | |||
| mean_ = std::make_shared<Var>(); | |||
| var_ = std::make_shared<Var>(); | |||
| index_ = std::make_shared<Var>(); | |||
| } | |||
| ~BatchNormReluFusion() override = default; | |||
| const BaseRef DefinePattern() const override; | |||
| const AnfNodePtr Process(const FuncGraphPtr &, const AnfNodePtr &, const EquivPtr &) const override; | |||
| private: | |||
| VarPtr x_; | |||
| VarPtr scale_; | |||
| VarPtr bias_; | |||
| VarPtr mean_; | |||
| VarPtr var_; | |||
| VarPtr index_; | |||
| }; | |||
| } // namespace opt | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_BACKEND_OPTIMIZER_GPU_BATCH_NORM_RELU_FUSION_H_ | |||
| @@ -0,0 +1,90 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/optimizer/gpu/batch_norm_relu_grad_fusion.h" | |||
| #include <memory> | |||
| #include <vector> | |||
| #include <string> | |||
| #include "backend/session/anf_runtime_algorithm.h" | |||
| #include "ir/primitive.h" | |||
| #include "utils/utils.h" | |||
| #include "backend/optimizer/common/helper.h" | |||
| namespace mindspore { | |||
| namespace opt { | |||
| const BaseRef BatchNormReluGradFusion::DefinePattern() const { | |||
| VectorRef relu_grad = VectorRef({prim::kPrimReluGrad, dy_, y_}); | |||
| VectorRef batch_norm_grad = | |||
| VectorRef({prim::kPrimFusedBatchNormGradEx, relu_grad, x_, scale_, save_mean_, save_var_, reserve_}); | |||
| return batch_norm_grad; | |||
| } | |||
| const AnfNodePtr BatchNormReluGradFusion::Process(const FuncGraphPtr &graph, const AnfNodePtr &node, | |||
| const EquivPtr &equiv) const { | |||
| MS_EXCEPTION_IF_NULL(graph); | |||
| MS_EXCEPTION_IF_NULL(node); | |||
| if (AnfAlgo::GetOutputInferDataType(node, 0) != kNumberTypeFloat16) { | |||
| return nullptr; | |||
| } | |||
| auto relu_grad = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(node), 0); | |||
| MS_EXCEPTION_IF_NULL(relu_grad); | |||
| auto outlist = GetRealNodeUsedList(graph, relu_grad); | |||
| if (outlist->size() >= 2) { | |||
| return nullptr; | |||
| } | |||
| auto dy = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(relu_grad), 0); | |||
| MS_EXCEPTION_IF_NULL(dy); | |||
| auto y = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(relu_grad), 1); | |||
| MS_EXCEPTION_IF_NULL(y); | |||
| auto x = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(node), 1); | |||
| MS_EXCEPTION_IF_NULL(x); | |||
| auto scale = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(node), 2); | |||
| MS_EXCEPTION_IF_NULL(scale); | |||
| auto save_mean = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(node), 3); | |||
| MS_EXCEPTION_IF_NULL(save_mean); | |||
| auto save_var = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(node), 4); | |||
| MS_EXCEPTION_IF_NULL(save_var); | |||
| auto reserve = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(node), 5); | |||
| MS_EXCEPTION_IF_NULL(reserve); | |||
| auto batch_norm = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(save_mean), 0); | |||
| MS_EXCEPTION_IF_NULL(batch_norm); | |||
| auto bias = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(batch_norm), 2); | |||
| MS_EXCEPTION_IF_NULL(bias); | |||
| auto prim = std::make_shared<Primitive>(kFusedBatchNormGradExWithActivation); | |||
| MS_EXCEPTION_IF_NULL(prim); | |||
| std::vector<AnfNodePtr> inputs = {NewValueNode(prim), dy, x, scale, save_mean, save_var, reserve, bias, y}; | |||
| auto fused_batch_norm_grad_with_relu = graph->NewCNode(inputs); | |||
| MS_EXCEPTION_IF_NULL(fused_batch_norm_grad_with_relu); | |||
| std::vector<TypeId> outputs_type; | |||
| std::vector<std::vector<size_t>> outputs_shape; | |||
| auto output_num = AnfAlgo::GetOutputTensorNum(node); | |||
| for (size_t i = 0; i < output_num; i++) { | |||
| outputs_type.push_back(AnfAlgo::GetOutputInferDataType(node, i)); | |||
| outputs_shape.push_back(AnfAlgo::GetOutputInferShape(node, i)); | |||
| } | |||
| AnfAlgo::SetOutputInferTypeAndShape(outputs_type, outputs_shape, fused_batch_norm_grad_with_relu.get()); | |||
| AnfAlgo::CopyNodeAttrs(node, fused_batch_norm_grad_with_relu); | |||
| return fused_batch_norm_grad_with_relu; | |||
| } | |||
| } // namespace opt | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,51 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_OPTIMIZER_GPU_BATCH_NORM_RELU_GRAD_FUSION_H_ | |||
| #define MINDSPORE_CCSRC_BACKEND_OPTIMIZER_GPU_BATCH_NORM_RELU_GRAD_FUSION_H_ | |||
| #include <memory> | |||
| #include "backend/optimizer/common/optimizer.h" | |||
| namespace mindspore { | |||
| namespace opt { | |||
| class BatchNormReluGradFusion : public PatternProcessPass { | |||
| public: | |||
| explicit BatchNormReluGradFusion(bool multigraph = true) | |||
| : PatternProcessPass("batch_norm_relu_grad_fusion", multigraph) { | |||
| dy_ = std::make_shared<Var>(); | |||
| y_ = std::make_shared<Var>(); | |||
| x_ = std::make_shared<Var>(); | |||
| scale_ = std::make_shared<Var>(); | |||
| save_mean_ = std::make_shared<Var>(); | |||
| save_var_ = std::make_shared<Var>(); | |||
| reserve_ = std::make_shared<Var>(); | |||
| } | |||
| ~BatchNormReluGradFusion() override = default; | |||
| const BaseRef DefinePattern() const override; | |||
| const AnfNodePtr Process(const FuncGraphPtr &, const AnfNodePtr &, const EquivPtr &) const override; | |||
| private: | |||
| VarPtr dy_; | |||
| VarPtr y_; | |||
| VarPtr x_; | |||
| VarPtr scale_; | |||
| VarPtr save_mean_; | |||
| VarPtr save_var_; | |||
| VarPtr reserve_; | |||
| }; | |||
| } // namespace opt | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_BACKEND_OPTIMIZER_GPU_BATCH_NORM_RELU_GRAD_FUSION_H_ | |||
| @@ -117,8 +117,10 @@ inline const PrimitivePtr kPrimApplyCenteredRMSProp = std::make_shared<Primitive | |||
| inline const PrimitivePtr kPrimAvgPoolGrad = std::make_shared<Primitive>("AvgPoolGrad"); | |||
| inline const PrimitivePtr kPrimAvgPoolGradVm = std::make_shared<Primitive>("AvgPoolGradVm"); | |||
| inline const PrimitivePtr kPrimFusedBatchNorm = std::make_shared<Primitive>("FusedBatchNorm"); | |||
| inline const PrimitivePtr kPrimFusedBatchNormEx = std::make_shared<Primitive>("FusedBatchNormEx"); | |||
| inline const PrimitivePtr kPrimConv2D = std::make_shared<Primitive>("Conv2D"); | |||
| inline const PrimitivePtr kPrimFusedBatchNormGrad = std::make_shared<Primitive>("FusedBatchNormGrad"); | |||
| inline const PrimitivePtr kPrimFusedBatchNormGradEx = std::make_shared<Primitive>("FusedBatchNormGradEx"); | |||
| inline const PrimitivePtr kPrimBatchNorm = std::make_shared<Primitive>("BatchNorm"); | |||
| inline const PrimitivePtr kPrimBatchNormGrad = std::make_shared<Primitive>("BatchNormGrad"); | |||
| inline const PrimitivePtr kPrimReluGrad = std::make_shared<Primitive>("ReluGrad"); | |||