GitOrigin-RevId: 44c586f912
tags/v1.5.0
| @@ -1189,7 +1189,12 @@ ConvolutionBackwardFilter::check_exec(const TensorLayout& src, | |||||
| diff.dtype.category() == DTypeCategory::FLOAT && | diff.dtype.category() == DTypeCategory::FLOAT && | ||||
| grad.dtype.category() == DTypeCategory::FLOAT, | grad.dtype.category() == DTypeCategory::FLOAT, | ||||
| "only float type is supported for conv backward filter"); | "only float type is supported for conv backward filter"); | ||||
| auto ret = check_layout_fwd(src, grad, diff); | |||||
| auto src_fwd = src; | |||||
| auto diff_fwd = diff; | |||||
| src_fwd.init_contiguous_stride(); | |||||
| diff_fwd.init_contiguous_stride(); | |||||
| auto ret = check_layout_fwd(src_fwd, grad, diff_fwd); | |||||
| auto required_workspace_in_bytes = get_workspace_in_bytes(src, diff, grad); | auto required_workspace_in_bytes = get_workspace_in_bytes(src, diff, grad); | ||||
| megdnn_assert(workspace_in_bytes >= required_workspace_in_bytes); | megdnn_assert(workspace_in_bytes >= required_workspace_in_bytes); | ||||
| return ret; | return ret; | ||||
| @@ -20,6 +20,10 @@ using namespace convolution; | |||||
| bool ConvolutionBackwardDataImpl::AlgoChanwise::is_available( | bool ConvolutionBackwardDataImpl::AlgoChanwise::is_available( | ||||
| const SizeArgs& args) const { | const SizeArgs& args) const { | ||||
| if (!args.grad_layout->is_contiguous() || | |||||
| !args.diff_layout->is_contiguous()) { | |||||
| return false; | |||||
| } | |||||
| if ((args.diff_layout->dtype == args.filter_layout->dtype && | if ((args.diff_layout->dtype == args.filter_layout->dtype && | ||||
| args.diff_layout->dtype == dtype::BFloat16()) || | args.diff_layout->dtype == dtype::BFloat16()) || | ||||
| (args.diff_layout->dtype == args.filter_layout->dtype && | (args.diff_layout->dtype == args.filter_layout->dtype && | ||||
| @@ -30,6 +30,10 @@ inline bool is_available_small(const chanwise::Param& param) { | |||||
| bool ConvolutionBackwardDataImpl::AlgoChanwiseSmall::is_available( | bool ConvolutionBackwardDataImpl::AlgoChanwiseSmall::is_available( | ||||
| const SizeArgs& args) const { | const SizeArgs& args) const { | ||||
| if (!args.grad_layout->is_contiguous() || | |||||
| !args.diff_layout->is_contiguous()) { | |||||
| return false; | |||||
| } | |||||
| if ((args.diff_layout->dtype == args.filter_layout->dtype && | if ((args.diff_layout->dtype == args.filter_layout->dtype && | ||||
| args.diff_layout->dtype == dtype::BFloat16()) || | args.diff_layout->dtype == dtype::BFloat16()) || | ||||
| (args.diff_layout->dtype == args.filter_layout->dtype && | (args.diff_layout->dtype == args.filter_layout->dtype && | ||||
| @@ -71,7 +71,7 @@ ConvolutionBackwardFilterImpl::AlgoBase::SizeArgs::SizeArgs( | |||||
| ConvolutionBackwardFilterImpl *o, | ConvolutionBackwardFilterImpl *o, | ||||
| const TensorLayout &src, const TensorLayout &diff, | const TensorLayout &src, const TensorLayout &diff, | ||||
| const TensorLayout &grad): | const TensorLayout &grad): | ||||
| SizeArgs(o, src, diff, grad, o->check_layout_fwd(src, grad, diff)) | |||||
| SizeArgs(o, src, diff, grad, o->make_canonized_filter_meta(src.ndim, grad)) | |||||
| { | { | ||||
| } | } | ||||
| @@ -19,6 +19,10 @@ using namespace convolution; | |||||
| bool ConvolutionBackwardFilterImpl::AlgoChanwise::is_available( | bool ConvolutionBackwardFilterImpl::AlgoChanwise::is_available( | ||||
| const SizeArgs &args) const { | const SizeArgs &args) const { | ||||
| if (!args.grad_layout->is_contiguous() || | |||||
| !args.diff_layout->is_contiguous()) { | |||||
| return false; | |||||
| } | |||||
| if (args.src_layout->dtype == args.src_layout->dtype && | if (args.src_layout->dtype == args.src_layout->dtype && | ||||
| args.diff_layout->dtype == dtype::BFloat16()) { | args.diff_layout->dtype == dtype::BFloat16()) { | ||||
| return false; | return false; | ||||
| @@ -21,6 +21,13 @@ using namespace convolution; | |||||
| bool ConvolutionBackwardFilterImpl::AlgoCUDNN::is_available( | bool ConvolutionBackwardFilterImpl::AlgoCUDNN::is_available( | ||||
| const SizeArgs &args) const { | const SizeArgs &args) const { | ||||
| if (args.grad_filter_meta.format != Param::Format::NCHW && | |||||
| args.grad_filter_meta.format != Param::Format::NHWC) { | |||||
| if (!args.grad_layout->is_contiguous() || | |||||
| !args.diff_layout->is_contiguous()) { | |||||
| return false; | |||||
| } | |||||
| } | |||||
| auto& cudnn = args.handle->cudnn(); | auto& cudnn = args.handle->cudnn(); | ||||
| CUDNNBwdFilterDescs D; | CUDNNBwdFilterDescs D; | ||||
| @@ -64,8 +64,9 @@ ConvolutionBackwardFilterImpl::AlgoMatmul::get_subopr_list( | |||||
| const TensorLayoutArray& layouts, const OperatorBase* opr) const { | const TensorLayoutArray& layouts, const OperatorBase* opr) const { | ||||
| const ConvolutionBackwardFilterImpl* conv_backward_filter_opr = | const ConvolutionBackwardFilterImpl* conv_backward_filter_opr = | ||||
| static_cast<const ConvolutionBackwardFilterImpl*>(opr); | static_cast<const ConvolutionBackwardFilterImpl*>(opr); | ||||
| CanonizedFilterMeta fm = conv_backward_filter_opr->check_layout_fwd( | |||||
| layouts[0], layouts[2], layouts[1]); | |||||
| CanonizedFilterMeta fm = | |||||
| conv_backward_filter_opr->make_canonized_filter_meta( | |||||
| layouts[0].ndim, layouts[2]); | |||||
| auto&& config = sub_opr_config(fm, layouts[0], layouts[1], layouts[2], | auto&& config = sub_opr_config(fm, layouts[0], layouts[1], layouts[2], | ||||
| conv_backward_filter_opr); | conv_backward_filter_opr); | ||||
| @@ -519,6 +519,38 @@ TEST_F(CUDA, CONVOLUTION_BACKWARD_FILTER_MATMUL) { | |||||
| .set_param(arg.param) | .set_param(arg.param) | ||||
| .exec(TensorLayoutArray{src, dst, filter}); | .exec(TensorLayoutArray{src, dst, filter}); | ||||
| } | } | ||||
| //! noncontiguous case | |||||
| { | |||||
| NormalRNG default_rng; | |||||
| param::Convolution param; | |||||
| param.pad_h = param.pad_w = 1; | |||||
| checker.set_rng(0, &default_rng) | |||||
| .set_rng(1, &default_rng) | |||||
| .set_param(param) | |||||
| .execl(TensorLayoutArray{ | |||||
| {{2, 16, 7, 7}, {1568, 49, 7, 1}, dtype::Float32()}, | |||||
| {{2, 16, 7, 7}, {1568, 49, 7, 1}, dtype::Float32()}, | |||||
| {{16, 16, 3, 3}, {144, 9, 3, 1}, dtype::Float32()}}); | |||||
| } | |||||
| } | |||||
| TEST_F(CUDA, CONVOLUTION_BACKWARD_FILTER_CUDNN) { | |||||
| if (cuda::is_compute_capability_required(7, 0)) | |||||
| return; | |||||
| using namespace convolution; | |||||
| Checker<ConvolutionBackwardFilter> checker(handle_cuda()); | |||||
| checker.set_before_exec_callback(AlgoChecker<ConvolutionBackwardFilter>( | |||||
| "CUDNN_CONVOLUTION")); | |||||
| //! noncontiguous case | |||||
| { | |||||
| param::Convolution param; | |||||
| param.pad_h = param.pad_w = 1; | |||||
| checker.set_param(param).execl(TensorLayoutArray{ | |||||
| {{2, 16, 7, 7}, {1568, 49, 7, 1}, dtype::Float32()}, | |||||
| {{2, 16, 7, 7}, {1568, 49, 7, 1}, dtype::Float32()}, | |||||
| {{16, 16, 3, 3}, {144, 9, 3, 1}, dtype::Float32()} | |||||
| }); | |||||
| } | |||||
| } | } | ||||
| TEST_F(CUDA, CONV_CONFIG_COMBINATIONS) { | TEST_F(CUDA, CONV_CONFIG_COMBINATIONS) { | ||||