From 50dc89332c5aacb61ffff03ede9bc08cf524e0a4 Mon Sep 17 00:00:00 2001 From: VectorSL Date: Sat, 19 Sep 2020 10:03:29 +0800 Subject: [PATCH] fix bn cast --- .../backend/kernel_compiler/gpu/nn/pooling_gpu_kernel.h | 2 +- .../backend/kernel_compiler/gpu/nn/pooling_grad_gpu_kernel.h | 2 +- .../ccsrc/backend/optimizer/gpu/replace_bn_cast_fusion.cc | 5 ++++- .../backend/optimizer/gpu/replace_bn_grad_cast_fusion.cc | 5 +++-- 4 files changed, 9 insertions(+), 5 deletions(-) diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/pooling_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/pooling_gpu_kernel.h index 6bebca4e81..8e4e506d98 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/pooling_gpu_kernel.h +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/pooling_gpu_kernel.h @@ -144,7 +144,7 @@ class PoolingGpuFwdKernel : public GpuKernel { void SetPoolingMode(const CNodePtr &kernel_node) { mode_ = AnfAlgo::GetCNodeName(kernel_node); if (mode_ == "AvgPool") { - pooling_mode_ = CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING; + pooling_mode_ = CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING; pad_value_ = 0.0; } else { pooling_mode_ = CUDNN_POOLING_MAX; diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/pooling_grad_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/pooling_grad_gpu_kernel.h index ac78d2cd2a..185686a5ae 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/pooling_grad_gpu_kernel.h +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/pooling_grad_gpu_kernel.h @@ -207,7 +207,7 @@ class PoolingGradGpuKernel : public GpuKernel { cudnn_data_type_ = GetCudnnDataType(TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0))); mode_ = AnfAlgo::GetCNodeName(kernel_node); if (mode_ == "AvgPoolGradGpu") { - pooling_mode_ = CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING; + pooling_mode_ = CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING; pad_value_ = 0.0; } else { pooling_mode_ = CUDNN_POOLING_MAX; diff --git a/mindspore/ccsrc/backend/optimizer/gpu/replace_bn_cast_fusion.cc b/mindspore/ccsrc/backend/optimizer/gpu/replace_bn_cast_fusion.cc index f328e625e4..d67d20adf2 100644 --- a/mindspore/ccsrc/backend/optimizer/gpu/replace_bn_cast_fusion.cc +++ b/mindspore/ccsrc/backend/optimizer/gpu/replace_bn_cast_fusion.cc @@ -37,13 +37,16 @@ const AnfNodePtr ReplaceBNCastFusion::Process(const FuncGraphPtr &graph, const A const EquivPtr &equiv) const { MS_EXCEPTION_IF_NULL(graph); MS_EXCEPTION_IF_NULL(node); - MS_EXCEPTION_IF_NULL(equiv); auto fbn2 = AnfAlgo::GetInputNode(utils::cast(node), 0); auto x_after = AnfAlgo::GetInputNode(utils::cast(fbn2), 0); auto x_before = AnfAlgo::GetInputNode(utils::cast(x_after), 0); MS_EXCEPTION_IF_NULL(fbn2); MS_EXCEPTION_IF_NULL(x_after); MS_EXCEPTION_IF_NULL(x_before); + // only deal with x_after with fp32: x 16->32->bn->16->32 + if (AnfAlgo::GetOutputInferDataType(x_after, 0) == kNumberTypeFloat16) { + return nullptr; + } std::vector outputs_type; std::vector> outputs_shape; auto manager = graph->manager(); diff --git a/mindspore/ccsrc/backend/optimizer/gpu/replace_bn_grad_cast_fusion.cc b/mindspore/ccsrc/backend/optimizer/gpu/replace_bn_grad_cast_fusion.cc index ef5c698a13..01c915142b 100644 --- a/mindspore/ccsrc/backend/optimizer/gpu/replace_bn_grad_cast_fusion.cc +++ b/mindspore/ccsrc/backend/optimizer/gpu/replace_bn_grad_cast_fusion.cc @@ -68,8 +68,9 @@ const AnfNodePtr ReplaceBNGradCastFusion::Process(const FuncGraphPtr &graph, con auto dy_before = AnfAlgo::GetInputNode(utils::cast(dy_after), 0); auto x_ = AnfAlgo::GetInputNode(utils::cast(fbn2g), 1); MS_EXCEPTION_IF_NULL(x_); - // if x_type is fp32, the cast is necessary. - if (AnfAlgo::GetOutputInferDataType(x_, 0) == kNumberTypeFloat32) { + // if x_type is fp32, the cast is necessary or dy_afer is fp32: dy 16->32->bng->16->32. + if (AnfAlgo::GetOutputInferDataType(x_, 0) == kNumberTypeFloat32 || + AnfAlgo::GetOutputInferDataType(dy_after, 0) == kNumberTypeFloat16) { return nullptr; } MS_EXCEPTION_IF_NULL(fbn2g);