depthwise
GitOrigin-RevId: 950d2f4889
master
| @@ -543,11 +543,11 @@ class RegionRestrictedConvolutionForward : public ConvolutionBase<param::Convolu | |||||
| public: | public: | ||||
| /** | /** | ||||
| * \param[in] src (n, ic, ih, iw) | |||||
| * \param[in] filter (oc, ic, fh, fw) | |||||
| * \param[in] src (n, ic, ih, iw) or (n, g*icpg, ih, iw) | |||||
| * \param[in] filter (oc, ic, fh, fw) or (g, ocpg, icpg, fh, fw) | |||||
| * \param[in] rin (n, ih, iw) | * \param[in] rin (n, ih, iw) | ||||
| * \param[in] rout (n, oh, ow) | * \param[in] rout (n, oh, ow) | ||||
| * \param[out] dst (n, oc, oh, ow) | |||||
| * \param[out] dst (n, oc, oh, ow) or (n, g*ocpg, oh, ow) | |||||
| */ | */ | ||||
| virtual void exec( | virtual void exec( | ||||
| _megdnn_tensor_in src, _megdnn_tensor_in filter, _megdnn_tensor_in rin, | _megdnn_tensor_in src, _megdnn_tensor_in filter, _megdnn_tensor_in rin, | ||||
| @@ -592,11 +592,11 @@ class RegionRestrictedConvolutionBackwardData | |||||
| public: | public: | ||||
| /** | /** | ||||
| * \param[in] filter (oc, ic, fh, fw) | |||||
| * \param[in] diff (n, oc, oh, ow) | |||||
| * \param[in] filter (oc, ic, fh, fw) or (g, ocpg, icpg, fh, fw) | |||||
| * \param[in] diff (n, oc, oh, ow) or (n, g*ocpg, oh, ow) | |||||
| * \param[in] rin (n, ih, iw) | * \param[in] rin (n, ih, iw) | ||||
| * \param[in] rout (n, oh, ow) | * \param[in] rout (n, oh, ow) | ||||
| * \param[out] grad (n, ic, ih, iw) | |||||
| * \param[out] grad (n, ic, ih, iw) or (n, g*icpg, ih, iw) | |||||
| */ | */ | ||||
| virtual void exec( | virtual void exec( | ||||
| _megdnn_tensor_in filter, _megdnn_tensor_in diff, _megdnn_tensor_in rin, | _megdnn_tensor_in filter, _megdnn_tensor_in diff, _megdnn_tensor_in rin, | ||||
| @@ -635,11 +635,11 @@ class RegionRestrictedConvolutionBackwardFilter | |||||
| public: | public: | ||||
| /** | /** | ||||
| * \param[in] src (n, ic, ih, iw) | |||||
| * \param[in] diff (n, oc, oh, ow) | |||||
| * \param[in] src (n, ic, ih, iw) or (n, g*icpg, ih, iw) | |||||
| * \param[in] diff (n, oc, oh, ow) or (n, g*ocpg, oh, ow) | |||||
| * \param[in] rin (n, ih, iw) | * \param[in] rin (n, ih, iw) | ||||
| * \param[in] rout (n, oh, ow) | * \param[in] rout (n, oh, ow) | ||||
| * \param[out] grad (oc, ic, fh, fw) | |||||
| * \param[out] grad (oc, ic, fh, fw) or (g, ocpg, icpg, fh, fw) | |||||
| */ | */ | ||||
| virtual void exec( | virtual void exec( | ||||
| _megdnn_tensor_in src, _megdnn_tensor_in diff, _megdnn_tensor_in rin, | _megdnn_tensor_in src, _megdnn_tensor_in diff, _megdnn_tensor_in rin, | ||||
| @@ -20,7 +20,7 @@ void RegionRestrictedConvolutionForwardImpl::exec( | |||||
| src.layout, dst.layout, fm, | src.layout, dst.layout, fm, | ||||
| param().compute_mode == Param::ComputeMode::DEFAULT); | param().compute_mode == Param::ComputeMode::DEFAULT); | ||||
| megdnn_assert( | megdnn_assert( | ||||
| fm.group > 1 && src.layout.dtype.category() == DTypeCategory::FLOAT && | |||||
| src.layout.dtype.category() == DTypeCategory::FLOAT && | |||||
| param().compute_mode == Param::ComputeMode::DEFAULT && | param().compute_mode == Param::ComputeMode::DEFAULT && | ||||
| fm.spatial_ndim == 2 && fm.icpg == 1 && fm.ocpg == 1 && | fm.spatial_ndim == 2 && fm.icpg == 1 && fm.ocpg == 1 && | ||||
| fm.dilation[0] == 1 && fm.dilation[1] == 1 && !fm.should_flip && | fm.dilation[0] == 1 && fm.dilation[1] == 1 && !fm.should_flip && | ||||
| @@ -76,7 +76,7 @@ void RegionRestrictedConvolutionBackwardDataImpl::exec( | |||||
| diff.layout, grad.layout, fm, | diff.layout, grad.layout, fm, | ||||
| param().compute_mode == Param::ComputeMode::DEFAULT); | param().compute_mode == Param::ComputeMode::DEFAULT); | ||||
| megdnn_assert( | megdnn_assert( | ||||
| fm.group > 1 && diff.layout.dtype.category() == DTypeCategory::FLOAT && | |||||
| diff.layout.dtype.category() == DTypeCategory::FLOAT && | |||||
| param().compute_mode == Param::ComputeMode::DEFAULT && | param().compute_mode == Param::ComputeMode::DEFAULT && | ||||
| fm.spatial_ndim == 2 && fm.icpg == 1 && fm.ocpg == 1 && | fm.spatial_ndim == 2 && fm.icpg == 1 && fm.ocpg == 1 && | ||||
| fm.dilation[0] == 1 && fm.dilation[1] == 1 && !fm.should_flip && | fm.dilation[0] == 1 && fm.dilation[1] == 1 && !fm.should_flip && | ||||
| @@ -120,7 +120,7 @@ void RegionRestrictedConvolutionBackwardFilterImpl::exec( | |||||
| workspace.size); | workspace.size); | ||||
| megdnn_assert( | megdnn_assert( | ||||
| fm.group > 1 && src.layout.dtype.category() == DTypeCategory::FLOAT && | |||||
| src.layout.dtype.category() == DTypeCategory::FLOAT && | |||||
| param().compute_mode == Param::ComputeMode::DEFAULT && | param().compute_mode == Param::ComputeMode::DEFAULT && | ||||
| fm.spatial_ndim == 2 && fm.icpg == 1 && fm.ocpg == 1 && | fm.spatial_ndim == 2 && fm.icpg == 1 && fm.ocpg == 1 && | ||||
| fm.dilation[0] == 1 && fm.dilation[1] == 1 && !fm.should_flip && | fm.dilation[0] == 1 && fm.dilation[1] == 1 && !fm.should_flip && | ||||
| @@ -53,6 +53,7 @@ TEST_F(CUDA, REGION_RESTRICTED_CONV_FORWARD_LARGE_FILTER) { | |||||
| run(4, 8, 32, 5, 5 / 2, 1); | run(4, 8, 32, 5, 5 / 2, 1); | ||||
| run(4, 8, 32, 7, 7 / 2, 1); | run(4, 8, 32, 7, 7 / 2, 1); | ||||
| run(1, 2, 32, 9, 9 / 2, 1); | run(1, 2, 32, 9, 9 / 2, 1); | ||||
| run(4, 1, 32, 9, 9 / 2, 1); | |||||
| run(4, 8, 32, 11, 11 / 2, 1); | run(4, 8, 32, 11, 11 / 2, 1); | ||||
| run(4, 8, 32, 13, 13 / 2, 1); | run(4, 8, 32, 13, 13 / 2, 1); | ||||
| run(4, 8, 32, 15, 15 / 2, 1); | run(4, 8, 32, 15, 15 / 2, 1); | ||||
| @@ -723,6 +724,7 @@ TEST_F(CUDA, REGION_RESTRICTED_CONV_BWD_DATA_FP32) { | |||||
| run(4, 8, 32, 25, 25 / 2, 1); | run(4, 8, 32, 25, 25 / 2, 1); | ||||
| run(4, 8, 32, 27, 27 / 2, 1); | run(4, 8, 32, 27, 27 / 2, 1); | ||||
| run(4, 8, 32, 29, 29 / 2, 1); | run(4, 8, 32, 29, 29 / 2, 1); | ||||
| run(4, 1, 32, 29, 29 / 2, 1); | |||||
| run(4, 8, 32, 31, 31 / 2, 1); | run(4, 8, 32, 31, 31 / 2, 1); | ||||
| } | } | ||||
| } | } | ||||
| @@ -779,6 +781,7 @@ TEST_F(CUDA, REGION_RESTRICTED_CONV_BWD_DATA_FP32_RIN_EQ_ROUT) { | |||||
| run(4, 8, 32, 21, 21 / 2, 1); | run(4, 8, 32, 21, 21 / 2, 1); | ||||
| run(4, 8, 32, 23, 23 / 2, 1); | run(4, 8, 32, 23, 23 / 2, 1); | ||||
| run(4, 8, 32, 25, 25 / 2, 1); | run(4, 8, 32, 25, 25 / 2, 1); | ||||
| run(4, 1, 32, 25, 25 / 2, 1); | |||||
| run(4, 8, 32, 27, 27 / 2, 1); | run(4, 8, 32, 27, 27 / 2, 1); | ||||
| run(4, 8, 32, 29, 29 / 2, 1); | run(4, 8, 32, 29, 29 / 2, 1); | ||||
| run(4, 8, 32, 31, 31 / 2, 1); | run(4, 8, 32, 31, 31 / 2, 1); | ||||
| @@ -841,6 +844,7 @@ TEST_F(CUDA, REGION_RESTRICTED_CONV_BWD_FILTER_FP32) { | |||||
| run(4, 8, 32, 23, 23 / 2, 1); | run(4, 8, 32, 23, 23 / 2, 1); | ||||
| run(4, 8, 32, 25, 25 / 2, 1); | run(4, 8, 32, 25, 25 / 2, 1); | ||||
| run(4, 8, 32, 27, 27 / 2, 1); | run(4, 8, 32, 27, 27 / 2, 1); | ||||
| run(4, 1, 32, 27, 27 / 2, 1); | |||||
| run(4, 8, 32, 29, 29 / 2, 1); | run(4, 8, 32, 29, 29 / 2, 1); | ||||
| run(4, 8, 32, 31, 31 / 2, 1); | run(4, 8, 32, 31, 31 / 2, 1); | ||||
| } | } | ||||
| @@ -899,6 +903,7 @@ TEST_F(CUDA, REGION_RESTRICTED_CONV_BWD_FILTER_FP32_RIN_EQ_ROUT) { | |||||
| run(4, 8, 32, 17, 17 / 2, 1); | run(4, 8, 32, 17, 17 / 2, 1); | ||||
| run(4, 8, 32, 19, 19 / 2, 1); | run(4, 8, 32, 19, 19 / 2, 1); | ||||
| run(4, 8, 32, 21, 21 / 2, 1); | run(4, 8, 32, 21, 21 / 2, 1); | ||||
| run(4, 1, 32, 21, 21 / 2, 1); | |||||
| run(4, 8, 32, 23, 23 / 2, 1); | run(4, 8, 32, 23, 23 / 2, 1); | ||||
| run(4, 8, 32, 25, 25 / 2, 1); | run(4, 8, 32, 25, 25 / 2, 1); | ||||
| run(4, 8, 32, 27, 27 / 2, 1); | run(4, 8, 32, 27, 27 / 2, 1); | ||||
| @@ -2016,7 +2016,12 @@ def region_restricted_conv( | |||||
| stride_h, stride_w = expand_hw(stride) | stride_h, stride_w = expand_hw(stride) | ||||
| dilate_h, dilate_w = expand_hw(dilation) | dilate_h, dilate_w = expand_hw(dilation) | ||||
| sparse_type = "dense" if groups == 1 else "group" | |||||
| sparse_type = "group" | |||||
| assert groups > 0, ( | |||||
| "RegionRestrictedConv expected grouped conv mode, \ | |||||
| which requires groups > 0, but got groups=%d" | |||||
| % (groups) | |||||
| ) | |||||
| op = builtin.RegionRestrictedConvolution( | op = builtin.RegionRestrictedConvolution( | ||||
| stride_h=stride_h, | stride_h=stride_h, | ||||
| stride_w=stride_w, | stride_w=stride_w, | ||||
| @@ -1050,8 +1050,8 @@ class RegionRestrictedConv(_ConvNd): | |||||
| Refer to :class:`~.module.padding.Pad` for more information. | Refer to :class:`~.module.padding.Pad` for more information. | ||||
| Note: | Note: | ||||
| * ``weight`` usually has shape ``(out_channels, in_channels, height, width)`` , | |||||
| if groups is not 1, shape will be ``(groups, out_channels // groups, in_channels // groups, height, width)`` | |||||
| * weight shape will be ``(groups, out_channels // groups, in_channels // groups, height, width)``, | |||||
| becasue RegionRestrictedConv support grouped conv only. | |||||
| Examples: | Examples: | ||||
| >>> import numpy as np | >>> import numpy as np | ||||
| @@ -1071,7 +1071,7 @@ class RegionRestrictedConv(_ConvNd): | |||||
| in_channels: int, | in_channels: int, | ||||
| out_channels: int, | out_channels: int, | ||||
| kernel_size: Union[int, Tuple[int, int]], | kernel_size: Union[int, Tuple[int, int]], | ||||
| groups: int, | |||||
| groups: int = 1, | |||||
| bias: bool = True, | bias: bool = True, | ||||
| stride: Union[int, Tuple[int, int]] = 1, | stride: Union[int, Tuple[int, int]] = 1, | ||||
| padding: Union[int, Tuple[int, int]] = 0, | padding: Union[int, Tuple[int, int]] = 0, | ||||
| @@ -1111,9 +1111,6 @@ class RegionRestrictedConv(_ConvNd): | |||||
| ichl = self.in_channels | ichl = self.in_channels | ||||
| ochl = self.out_channels | ochl = self.out_channels | ||||
| kh, kw = self.kernel_size | kh, kw = self.kernel_size | ||||
| if group == 1: | |||||
| # Assume format is NCHW | |||||
| return (ochl, ichl, kh, kw) | |||||
| assert ( | assert ( | ||||
| ichl % group == 0 and ochl % group == 0 | ichl % group == 0 and ochl % group == 0 | ||||
| @@ -971,17 +971,16 @@ def test_region_restricted_conv_forward_backward_naive(bias): | |||||
| @pytest.mark.skipif( | @pytest.mark.skipif( | ||||
| not is_cuda_available(), reason="rrconv cuda kernel requires cuda available" | not is_cuda_available(), reason="rrconv cuda kernel requires cuda available" | ||||
| ) | ) | ||||
| @pytest.mark.parametrize("bias", [True, False]) | |||||
| def test_region_restricted_conv_forward_backward_cuda(bias): | |||||
| @pytest.mark.parametrize("bias, groups", [(True, 1), (True, 3), (False, 1), (False, 3)]) | |||||
| def test_region_restricted_conv_forward_backward_cuda(bias, groups): | |||||
| import megengine as mge | import megengine as mge | ||||
| import megengine.module as M | import megengine.module as M | ||||
| from megengine.autodiff import GradManager | from megengine.autodiff import GradManager | ||||
| import megengine.distributed as dist | |||||
| # params | # params | ||||
| handle = "gpu0" | handle = "gpu0" | ||||
| N = 1 | N = 1 | ||||
| GROUP = 3 | |||||
| GROUP = groups | |||||
| FH = FW = 2 | FH = FW = 2 | ||||
| IH = IW = 2 | IH = IW = 2 | ||||
| OH = OW = 1 | OH = OW = 1 | ||||
| @@ -1051,8 +1050,8 @@ def test_region_restricted_conv_forward_backward_cuda(bias): | |||||
| @pytest.mark.skipif( | @pytest.mark.skipif( | ||||
| not is_cuda_available(), reason="rrconv cuda kernel requires cuda available" | not is_cuda_available(), reason="rrconv cuda kernel requires cuda available" | ||||
| ) | ) | ||||
| @pytest.mark.parametrize("bias", [True, False]) | |||||
| def test_region_restricted_conv_forward_backward_uint8(bias): | |||||
| @pytest.mark.parametrize("bias, groups", [(True, 1), (True, 3), (False, 1), (False, 3)]) | |||||
| def test_region_restricted_conv_forward_backward_uint8(bias, groups): | |||||
| import megengine as mge | import megengine as mge | ||||
| import megengine.module as M | import megengine.module as M | ||||
| from megengine.autodiff import GradManager | from megengine.autodiff import GradManager | ||||
| @@ -1060,7 +1059,7 @@ def test_region_restricted_conv_forward_backward_uint8(bias): | |||||
| # params | # params | ||||
| handle = "gpu0" | handle = "gpu0" | ||||
| N = 1 | N = 1 | ||||
| GROUP = 2 | |||||
| GROUP = groups | |||||
| FH = FW = 1 | FH = FW = 1 | ||||
| IH = IW = 4 | IH = IW = 4 | ||||
| OH = OW = 4 | OH = OW = 4 | ||||