| @@ -23,6 +23,11 @@ MS_REG_GPU_KERNEL_ONE(ReLU, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOut | |||||
| MS_REG_GPU_KERNEL_ONE(ReLU, KernelAttr().AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16), | MS_REG_GPU_KERNEL_ONE(ReLU, KernelAttr().AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16), | ||||
| ActivationGpuFwdKernel, half) | ActivationGpuFwdKernel, half) | ||||
| MS_REG_GPU_KERNEL_ONE(ReLU6, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), | |||||
| ActivationGpuFwdKernel, float) | |||||
| MS_REG_GPU_KERNEL_ONE(ReLU6, KernelAttr().AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16), | |||||
| ActivationGpuFwdKernel, half) | |||||
| MS_REG_GPU_KERNEL_ONE(Tanh, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), | MS_REG_GPU_KERNEL_ONE(Tanh, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), | ||||
| ActivationGpuFwdKernel, float) | ActivationGpuFwdKernel, float) | ||||
| MS_REG_GPU_KERNEL_ONE(Tanh, KernelAttr().AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16), | MS_REG_GPU_KERNEL_ONE(Tanh, KernelAttr().AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16), | ||||
| @@ -83,7 +83,8 @@ class ActivationGpuFwdKernel : public GpuKernel { | |||||
| return true; | return true; | ||||
| } | } | ||||
| std::vector<int> shape; | std::vector<int> shape; | ||||
| CHECK_CUDNN_RET_WITH_EXCEPT(cudnnSetActivationDescriptor(activation_desc_, mode_, CUDNN_NOT_PROPAGATE_NAN, 0.0), | |||||
| double coef = (mode_ == CUDNN_ACTIVATION_CLIPPED_RELU) ? 6.0 : 0.0; | |||||
| CHECK_CUDNN_RET_WITH_EXCEPT(cudnnSetActivationDescriptor(activation_desc_, mode_, CUDNN_NOT_PROPAGATE_NAN, coef), | |||||
| "cudnnSetActivationDescriptor failed"); | "cudnnSetActivationDescriptor failed"); | ||||
| const int split_dim = 4; | const int split_dim = 4; | ||||
| @@ -132,6 +133,7 @@ class ActivationGpuFwdKernel : public GpuKernel { | |||||
| } | } | ||||
| std::map<std::string, cudnnActivationMode_t> kernel_map = {{"ReLU", CUDNN_ACTIVATION_RELU}, | std::map<std::string, cudnnActivationMode_t> kernel_map = {{"ReLU", CUDNN_ACTIVATION_RELU}, | ||||
| {"ReLU6", CUDNN_ACTIVATION_CLIPPED_RELU}, | |||||
| {"Tanh", CUDNN_ACTIVATION_TANH}, | {"Tanh", CUDNN_ACTIVATION_TANH}, | ||||
| {"ELU", CUDNN_ACTIVATION_ELU}, | {"ELU", CUDNN_ACTIVATION_ELU}, | ||||
| {"Sigmoid", CUDNN_ACTIVATION_SIGMOID}}; | {"Sigmoid", CUDNN_ACTIVATION_SIGMOID}}; | ||||
| @@ -27,6 +27,15 @@ MS_REG_GPU_KERNEL_ONE( | |||||
| KernelAttr().AddInputAttr(kNumberTypeFloat16).AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16), | KernelAttr().AddInputAttr(kNumberTypeFloat16).AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16), | ||||
| ActivationGradGpuKernel, half) | ActivationGradGpuKernel, half) | ||||
| MS_REG_GPU_KERNEL_ONE( | |||||
| ReLU6Grad, | |||||
| KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), | |||||
| ActivationGradGpuKernel, float) | |||||
| MS_REG_GPU_KERNEL_ONE( | |||||
| ReLU6Grad, | |||||
| KernelAttr().AddInputAttr(kNumberTypeFloat16).AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16), | |||||
| ActivationGradGpuKernel, half) | |||||
| MS_REG_GPU_KERNEL_ONE( | MS_REG_GPU_KERNEL_ONE( | ||||
| TanhGrad, | TanhGrad, | ||||
| KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), | KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), | ||||
| @@ -49,7 +49,7 @@ class ActivationGradGpuKernel : public GpuKernel { | |||||
| } | } | ||||
| T *dy = nullptr; | T *dy = nullptr; | ||||
| T *y = nullptr; | T *y = nullptr; | ||||
| if (mode_ == CUDNN_ACTIVATION_RELU || mode_ == CUDNN_ACTIVATION_ELU) { | |||||
| if (mode_ == CUDNN_ACTIVATION_RELU || mode_ == CUDNN_ACTIVATION_ELU || mode_ == CUDNN_ACTIVATION_CLIPPED_RELU) { | |||||
| dy = GetDeviceAddress<T>(inputs, 0); | dy = GetDeviceAddress<T>(inputs, 0); | ||||
| y = GetDeviceAddress<T>(inputs, 1); | y = GetDeviceAddress<T>(inputs, 1); | ||||
| } else { | } else { | ||||
| @@ -90,7 +90,8 @@ class ActivationGradGpuKernel : public GpuKernel { | |||||
| return true; | return true; | ||||
| } | } | ||||
| std::vector<int> shape; | std::vector<int> shape; | ||||
| CHECK_CUDNN_RET_WITH_EXCEPT(cudnnSetActivationDescriptor(activation_desc_, mode_, CUDNN_PROPAGATE_NAN, 0.0), | |||||
| double coef = (mode_ == CUDNN_ACTIVATION_CLIPPED_RELU) ? 5.999999 : 0.0; | |||||
| CHECK_CUDNN_RET_WITH_EXCEPT(cudnnSetActivationDescriptor(activation_desc_, mode_, CUDNN_PROPAGATE_NAN, coef), | |||||
| "SetActivationDescriptor failed"); | "SetActivationDescriptor failed"); | ||||
| const int split_dim = 4; | const int split_dim = 4; | ||||
| @@ -138,6 +139,7 @@ class ActivationGradGpuKernel : public GpuKernel { | |||||
| } | } | ||||
| std::map<std::string, cudnnActivationMode_t> kernel_map = {{"ReluGrad", CUDNN_ACTIVATION_RELU}, | std::map<std::string, cudnnActivationMode_t> kernel_map = {{"ReluGrad", CUDNN_ACTIVATION_RELU}, | ||||
| {"ReLU6Grad", CUDNN_ACTIVATION_CLIPPED_RELU}, | |||||
| {"TanhGrad", CUDNN_ACTIVATION_TANH}, | {"TanhGrad", CUDNN_ACTIVATION_TANH}, | ||||
| {"ELUGrad", CUDNN_ACTIVATION_ELU}, | {"ELUGrad", CUDNN_ACTIVATION_ELU}, | ||||
| {"SigmoidGrad", CUDNN_ACTIVATION_SIGMOID}}; | {"SigmoidGrad", CUDNN_ACTIVATION_SIGMOID}}; | ||||
| @@ -54,6 +54,8 @@ static std::map<std::string, std::pair<std::vector<size_t>, std::vector<size_t>> | |||||
| // Format insensitive. | // Format insensitive. | ||||
| {prim::kPrimRelu->name(), {{0}, {0}}}, | {prim::kPrimRelu->name(), {{0}, {0}}}, | ||||
| {prim::kPrimReluGrad->name(), {{0, 1}, {0}}}, | {prim::kPrimReluGrad->name(), {{0, 1}, {0}}}, | ||||
| {prim::kPrimRelu6->name(), {{0}, {0}}}, | |||||
| {prim::kPrimRelu6Grad->name(), {{0, 1}, {0}}}, | |||||
| {kSliceOpName, {{0}, {0}}}, | {kSliceOpName, {{0}, {0}}}, | ||||
| {kTensorAddOpName, {{0, 1}, {0}}}, | {kTensorAddOpName, {{0, 1}, {0}}}, | ||||
| {prim::kPrimConcat->name(), {{}, {0}}}, | {prim::kPrimConcat->name(), {{}, {0}}}, | ||||
| @@ -127,6 +127,7 @@ inline const PrimitivePtr kPrimFusedBatchNormGradEx = std::make_shared<Primitive | |||||
| inline const PrimitivePtr kPrimBatchNorm = std::make_shared<Primitive>("BatchNorm"); | inline const PrimitivePtr kPrimBatchNorm = std::make_shared<Primitive>("BatchNorm"); | ||||
| inline const PrimitivePtr kPrimBatchNormGrad = std::make_shared<Primitive>("BatchNormGrad"); | inline const PrimitivePtr kPrimBatchNormGrad = std::make_shared<Primitive>("BatchNormGrad"); | ||||
| inline const PrimitivePtr kPrimReluGrad = std::make_shared<Primitive>("ReluGrad"); | inline const PrimitivePtr kPrimReluGrad = std::make_shared<Primitive>("ReluGrad"); | ||||
| inline const PrimitivePtr kPrimRelu6Grad = std::make_shared<Primitive>("ReLU6Grad"); | |||||
| inline const PrimitivePtr kPrimConv2DBackpropInput = std::make_shared<Primitive>("Conv2DBackpropInput"); | inline const PrimitivePtr kPrimConv2DBackpropInput = std::make_shared<Primitive>("Conv2DBackpropInput"); | ||||
| inline const PrimitivePtr kPrimConv2DBackpropFilter = std::make_shared<Primitive>("Conv2DBackpropFilter"); | inline const PrimitivePtr kPrimConv2DBackpropFilter = std::make_shared<Primitive>("Conv2DBackpropFilter"); | ||||
| inline const PrimitivePtr kPrimDepthwiseConv2dNative = std::make_shared<Primitive>("DepthwiseConv2dNative"); | inline const PrimitivePtr kPrimDepthwiseConv2dNative = std::make_shared<Primitive>("DepthwiseConv2dNative"); | ||||
| @@ -151,6 +152,7 @@ inline const PrimitivePtr kPrimOneHot = std::make_shared<Primitive>("OneHot"); | |||||
| inline const PrimitivePtr kPrimGelu = std::make_shared<Primitive>("Gelu"); | inline const PrimitivePtr kPrimGelu = std::make_shared<Primitive>("Gelu"); | ||||
| inline const PrimitivePtr kPrimGeluGrad = std::make_shared<Primitive>("GeluGrad"); | inline const PrimitivePtr kPrimGeluGrad = std::make_shared<Primitive>("GeluGrad"); | ||||
| inline const PrimitivePtr kPrimRelu = std::make_shared<Primitive>("ReLU"); | inline const PrimitivePtr kPrimRelu = std::make_shared<Primitive>("ReLU"); | ||||
| inline const PrimitivePtr kPrimRelu6 = std::make_shared<Primitive>("ReLU6"); | |||||
| inline const PrimitivePtr kPrimReluV2 = std::make_shared<Primitive>("ReLUV2"); | inline const PrimitivePtr kPrimReluV2 = std::make_shared<Primitive>("ReLUV2"); | ||||
| inline const PrimitivePtr kPrimZerosLike = std::make_shared<Primitive>("ZerosLike"); | inline const PrimitivePtr kPrimZerosLike = std::make_shared<Primitive>("ZerosLike"); | ||||
| inline const PrimitivePtr kPrimBpropCut = std::make_shared<Primitive>("bprop_cut"); | inline const PrimitivePtr kPrimBpropCut = std::make_shared<Primitive>("bprop_cut"); | ||||