GitOrigin-RevId: 3d3c31b021
tags/v1.5.0
| @@ -122,8 +122,6 @@ Convolution3DBase::CanonizedFilterMeta Convolution3DBase::deduce_layout_fwd( | |||||
| TensorLayout& dst) const { | TensorLayout& dst) const { | ||||
| auto errmsg = [&]() { return get_errmsg(src, filter, dst, param()); }; | auto errmsg = [&]() { return get_errmsg(src, filter, dst, param()); }; | ||||
| MEGDNN_MARK_USED_VAR(errmsg); | MEGDNN_MARK_USED_VAR(errmsg); | ||||
| megdnn_assert_contiguous(src); | |||||
| megdnn_assert_contiguous(filter); | |||||
| megdnn_assert(src.ndim >= 5_z, "%s", errmsg().c_str()); | megdnn_assert(src.ndim >= 5_z, "%s", errmsg().c_str()); | ||||
| megdnn_assert(src.dtype == filter.dtype, "%s", errmsg().c_str()); | megdnn_assert(src.dtype == filter.dtype, "%s", errmsg().c_str()); | ||||
| if (param().data_type == Param::DataType::FLOAT) { | if (param().data_type == Param::DataType::FLOAT) { | ||||
| @@ -170,6 +168,8 @@ Convolution3DBase::CanonizedFilterMeta Convolution3DBase::deduce_layout_fwd( | |||||
| Convolution3DBase::CanonizedFilterMeta Convolution3DBase::check_layout_fwd( | Convolution3DBase::CanonizedFilterMeta Convolution3DBase::check_layout_fwd( | ||||
| const TensorLayout& src, const TensorLayout& filter, | const TensorLayout& src, const TensorLayout& filter, | ||||
| const TensorLayout& dst) const { | const TensorLayout& dst) const { | ||||
| megdnn_assert_contiguous(src); | |||||
| megdnn_assert_contiguous(filter); | |||||
| TensorLayout dst_expected; | TensorLayout dst_expected; | ||||
| auto ret = deduce_layout_fwd(src, filter, dst_expected); | auto ret = deduce_layout_fwd(src, filter, dst_expected); | ||||
| megdnn_assert_eq_layout(dst_expected, dst); | megdnn_assert_eq_layout(dst_expected, dst); | ||||
| @@ -185,7 +185,12 @@ void Convolution3DForward::deduce_layout(const TensorLayout& src, | |||||
| Convolution3DBase::CanonizedFilterMeta Convolution3DForward::check_exec( | Convolution3DBase::CanonizedFilterMeta Convolution3DForward::check_exec( | ||||
| const TensorLayout& src, const TensorLayout& filter, | const TensorLayout& src, const TensorLayout& filter, | ||||
| const TensorLayout& dst, size_t workspace_in_bytes) { | const TensorLayout& dst, size_t workspace_in_bytes) { | ||||
| auto ret = check_layout_fwd(src, filter, dst); | |||||
| auto src_fwd = src; | |||||
| auto dst_fwd = dst; | |||||
| src_fwd.init_contiguous_stride(); | |||||
| dst_fwd.init_contiguous_stride(); | |||||
| auto ret = check_layout_fwd(src_fwd, filter, dst_fwd); | |||||
| auto required_workspace_in_bytes = get_workspace_in_bytes(src, filter, dst); | auto required_workspace_in_bytes = get_workspace_in_bytes(src, filter, dst); | ||||
| megdnn_assert(workspace_in_bytes >= required_workspace_in_bytes); | megdnn_assert(workspace_in_bytes >= required_workspace_in_bytes); | ||||
| return ret; | return ret; | ||||
| @@ -196,7 +201,12 @@ Convolution3DBase::CanonizedFilterMeta Convolution3DBackwardData::check_exec( | |||||
| const TensorLayout& grad, size_t workspace_in_bytes) { | const TensorLayout& grad, size_t workspace_in_bytes) { | ||||
| megdnn_assert(param().data_type == Param::DataType::FLOAT, | megdnn_assert(param().data_type == Param::DataType::FLOAT, | ||||
| "only float type is supported for conv backward"); | "only float type is supported for conv backward"); | ||||
| auto ret = check_layout_fwd(grad, filter, diff); | |||||
| auto diff_fwd = diff; | |||||
| auto grad_fwd = grad; | |||||
| diff_fwd.init_contiguous_stride(); | |||||
| grad_fwd.init_contiguous_stride(); | |||||
| auto ret = check_layout_fwd(grad_fwd, filter, diff_fwd); | |||||
| auto required_workspace_in_bytes = | auto required_workspace_in_bytes = | ||||
| get_workspace_in_bytes(filter, diff, grad); | get_workspace_in_bytes(filter, diff, grad); | ||||
| megdnn_assert(workspace_in_bytes >= required_workspace_in_bytes); | megdnn_assert(workspace_in_bytes >= required_workspace_in_bytes); | ||||
| @@ -244,7 +254,12 @@ Convolution3DBase::CanonizedFilterMeta Convolution3DBackwardFilter::check_exec( | |||||
| const TensorLayout& grad, size_t workspace_in_bytes) { | const TensorLayout& grad, size_t workspace_in_bytes) { | ||||
| megdnn_assert(param().data_type == Param::DataType::FLOAT, | megdnn_assert(param().data_type == Param::DataType::FLOAT, | ||||
| "only float type is supported for conv backward"); | "only float type is supported for conv backward"); | ||||
| auto ret = check_layout_fwd(src, grad, diff); | |||||
| auto src_fwd = src; | |||||
| auto diff_fwd = diff; | |||||
| src_fwd.init_contiguous_stride(); | |||||
| diff_fwd.init_contiguous_stride(); | |||||
| auto ret = check_layout_fwd(src_fwd, grad, diff_fwd); | |||||
| auto required_workspace_in_bytes = get_workspace_in_bytes(src, diff, grad); | auto required_workspace_in_bytes = get_workspace_in_bytes(src, diff, grad); | ||||
| megdnn_assert(workspace_in_bytes >= required_workspace_in_bytes); | megdnn_assert(workspace_in_bytes >= required_workspace_in_bytes); | ||||
| return ret; | return ret; | ||||
| @@ -44,6 +44,8 @@ bool ConvolutionBackwardDataImpl::AlgoGroupConvGeneral::is_available( | |||||
| args.diff_layout->dtype == dtype::QuantizedS8())) { | args.diff_layout->dtype == dtype::QuantizedS8())) { | ||||
| return false; | return false; | ||||
| } | } | ||||
| if (args.filter_meta.group <= 1) | |||||
| return false; | |||||
| auto sub_args = args; | auto sub_args = args; | ||||
| TensorLayout diff_pg, grad_pg; | TensorLayout diff_pg, grad_pg; | ||||
| modify_size_args(sub_args, diff_pg, grad_pg); | modify_size_args(sub_args, diff_pg, grad_pg); | ||||
| @@ -19,7 +19,7 @@ using namespace convolution; | |||||
| bool ConvolutionBackwardFilterImpl::AlgoChanwise::is_available( | bool ConvolutionBackwardFilterImpl::AlgoChanwise::is_available( | ||||
| const SizeArgs &args) const { | const SizeArgs &args) const { | ||||
| if (!args.grad_layout->is_contiguous() || | |||||
| if (!args.src_layout->is_contiguous() || | |||||
| !args.diff_layout->is_contiguous()) { | !args.diff_layout->is_contiguous()) { | ||||
| return false; | return false; | ||||
| } | } | ||||
| @@ -42,6 +42,8 @@ bool ConvolutionBackwardFilterImpl::AlgoGroupConvGeneral::is_available( | |||||
| args.diff_layout->dtype == dtype::BFloat16()) { | args.diff_layout->dtype == dtype::BFloat16()) { | ||||
| return false; | return false; | ||||
| } | } | ||||
| if (args.grad_filter_meta.group <= 1) | |||||
| return false; | |||||
| auto sub_args = args; | auto sub_args = args; | ||||
| TensorLayout src_pg, diff_pg; | TensorLayout src_pg, diff_pg; | ||||
| modify_size_args(sub_args, src_pg, diff_pg); | modify_size_args(sub_args, src_pg, diff_pg); | ||||
| @@ -64,7 +64,7 @@ Convolution3DBackwardDataImpl::AlgoBase::SizeArgs::SizeArgs( | |||||
| Convolution3DBackwardDataImpl *o, | Convolution3DBackwardDataImpl *o, | ||||
| const TensorLayout &filter, const TensorLayout &diff, | const TensorLayout &filter, const TensorLayout &diff, | ||||
| const TensorLayout &grad): | const TensorLayout &grad): | ||||
| SizeArgs(o, o->check_layout_fwd(grad, filter, diff), diff, grad) | |||||
| SizeArgs(o, o->make_canonized_filter_meta(grad.ndim, filter), diff, grad) | |||||
| { | { | ||||
| } | } | ||||
| @@ -19,6 +19,10 @@ using namespace convolution3d; | |||||
| bool Convolution3DBackwardDataImpl::AlgoChanwise::is_available( | bool Convolution3DBackwardDataImpl::AlgoChanwise::is_available( | ||||
| const SizeArgs &args) const { | const SizeArgs &args) const { | ||||
| if (!args.grad_layout->is_contiguous() || | |||||
| !args.diff_layout->is_contiguous()) { | |||||
| return false; | |||||
| } | |||||
| auto &&fm = args.filter_meta; | auto &&fm = args.filter_meta; | ||||
| return args.filter_meta.format == Param::Format::NCDHW && | return args.filter_meta.format == Param::Format::NCDHW && | ||||
| args.diff_layout->dtype.category() == DTypeCategory::FLOAT && | args.diff_layout->dtype.category() == DTypeCategory::FLOAT && | ||||
| @@ -38,6 +38,8 @@ Convolution3DBackwardDataImpl::AlgoGroupConvGeneral::AlgoGroupConvGeneral( | |||||
| bool Convolution3DBackwardDataImpl::AlgoGroupConvGeneral::is_available( | bool Convolution3DBackwardDataImpl::AlgoGroupConvGeneral::is_available( | ||||
| const SizeArgs &args) const { | const SizeArgs &args) const { | ||||
| if (args.filter_meta.group <= 1) | |||||
| return false; | |||||
| auto sub_args = args; | auto sub_args = args; | ||||
| TensorLayout diff_pg, grad_pg; | TensorLayout diff_pg, grad_pg; | ||||
| modify_size_args(sub_args, diff_pg, grad_pg); | modify_size_args(sub_args, diff_pg, grad_pg); | ||||
| @@ -67,7 +67,7 @@ Convolution3DBackwardFilterImpl::AlgoBase::SizeArgs::SizeArgs( | |||||
| Convolution3DBackwardFilterImpl *o, | Convolution3DBackwardFilterImpl *o, | ||||
| const TensorLayout &src, const TensorLayout &diff, | const TensorLayout &src, const TensorLayout &diff, | ||||
| const TensorLayout &grad): | const TensorLayout &grad): | ||||
| SizeArgs(o, src, diff, o->check_layout_fwd(src, grad, diff)) | |||||
| SizeArgs(o, src, diff, o->make_canonized_filter_meta(src.ndim, grad)) | |||||
| { | { | ||||
| } | } | ||||
| @@ -19,6 +19,10 @@ using namespace convolution3d; | |||||
| bool Convolution3DBackwardFilterImpl::AlgoChanwise::is_available( | bool Convolution3DBackwardFilterImpl::AlgoChanwise::is_available( | ||||
| const SizeArgs &args) const { | const SizeArgs &args) const { | ||||
| if (!args.src_layout->is_contiguous() || | |||||
| !args.diff_layout->is_contiguous()) { | |||||
| return false; | |||||
| } | |||||
| auto &&fm = args.grad_filter_meta; | auto &&fm = args.grad_filter_meta; | ||||
| return fm.format == Param::Format::NCDHW && | return fm.format == Param::Format::NCDHW && | ||||
| args.diff_layout->dtype.category() == DTypeCategory::FLOAT && | args.diff_layout->dtype.category() == DTypeCategory::FLOAT && | ||||
| @@ -38,6 +38,8 @@ Convolution3DBackwardFilterImpl::AlgoGroupConvGeneral::AlgoGroupConvGeneral( | |||||
| bool Convolution3DBackwardFilterImpl::AlgoGroupConvGeneral::is_available( | bool Convolution3DBackwardFilterImpl::AlgoGroupConvGeneral::is_available( | ||||
| const SizeArgs &args) const { | const SizeArgs &args) const { | ||||
| if (args.grad_filter_meta.group <= 1) | |||||
| return false; | |||||
| auto sub_args = args; | auto sub_args = args; | ||||
| TensorLayout src_pg, diff_pg; | TensorLayout src_pg, diff_pg; | ||||
| modify_size_args(sub_args, src_pg, diff_pg); | modify_size_args(sub_args, src_pg, diff_pg); | ||||
| @@ -17,6 +17,10 @@ using namespace cuda; | |||||
| bool Convolution3DBackwardFilterImpl::AlgoInplaceMatmul::is_available( | bool Convolution3DBackwardFilterImpl::AlgoInplaceMatmul::is_available( | ||||
| const SizeArgs &args) const { | const SizeArgs &args) const { | ||||
| if (!args.src_layout->is_contiguous() || | |||||
| !args.diff_layout->is_contiguous()) { | |||||
| return false; | |||||
| } | |||||
| auto &&fm = args.grad_filter_meta; | auto &&fm = args.grad_filter_meta; | ||||
| return args.grad_filter_meta.format == Param::Format::NCDHW && | return args.grad_filter_meta.format == Param::Format::NCDHW && | ||||
| args.src_layout->dtype == dtype::Float32() && | args.src_layout->dtype == dtype::Float32() && | ||||
| @@ -69,7 +69,7 @@ Convolution3DForwardImpl::AlgoBase::SizeArgs::SizeArgs( | |||||
| Convolution3DForwardImpl *o, | Convolution3DForwardImpl *o, | ||||
| const TensorLayout &src, const TensorLayout &filter, | const TensorLayout &src, const TensorLayout &filter, | ||||
| const TensorLayout &dst): | const TensorLayout &dst): | ||||
| SizeArgs(o, src, o->check_layout_fwd(src, filter, dst), dst) | |||||
| SizeArgs(o, src, o->make_canonized_filter_meta(src.ndim, filter), dst) | |||||
| { | { | ||||
| } | } | ||||
| @@ -19,6 +19,10 @@ using namespace convolution3d; | |||||
| bool Convolution3DForwardImpl::AlgoChanwise::is_available( | bool Convolution3DForwardImpl::AlgoChanwise::is_available( | ||||
| const SizeArgs &args) const { | const SizeArgs &args) const { | ||||
| if (!args.src_layout->is_contiguous() || | |||||
| !args.dst_layout->is_contiguous()) { | |||||
| return false; | |||||
| } | |||||
| auto &&fm = args.filter_meta; | auto &&fm = args.filter_meta; | ||||
| return args.filter_meta.format == Param::Format::NCDHW && | return args.filter_meta.format == Param::Format::NCDHW && | ||||
| args.src_layout->dtype.category() == DTypeCategory::FLOAT && | args.src_layout->dtype.category() == DTypeCategory::FLOAT && | ||||
| @@ -45,6 +45,8 @@ Convolution3DForwardImpl::AlgoGroupConvGeneral::AlgoGroupConvGeneral( | |||||
| bool Convolution3DForwardImpl::AlgoGroupConvGeneral::is_available( | bool Convolution3DForwardImpl::AlgoGroupConvGeneral::is_available( | ||||
| const SizeArgs &args) const { | const SizeArgs &args) const { | ||||
| if (args.filter_meta.group <= 1) | |||||
| return false; | |||||
| auto sub_args = args; | auto sub_args = args; | ||||
| TensorLayout src_pg, dst_pg; | TensorLayout src_pg, dst_pg; | ||||
| modify_size_args(sub_args, src_pg, dst_pg); | modify_size_args(sub_args, src_pg, dst_pg); | ||||
| @@ -215,7 +215,6 @@ void backward_data(_megdnn_tensor_in filter, | |||||
| _megdnn_tensor_in diff, | _megdnn_tensor_in diff, | ||||
| _megdnn_tensor_out grad, | _megdnn_tensor_out grad, | ||||
| const Convolution3D::CanonizedFilterMeta &filter_meta) { | const Convolution3D::CanonizedFilterMeta &filter_meta) { | ||||
| megdnn_assert(grad.layout.is_contiguous()); | |||||
| memset(grad.raw_ptr, 0, grad.layout.span().dist_byte()); | memset(grad.raw_ptr, 0, grad.layout.span().dist_byte()); | ||||
| megdnn_assert(filter_meta.spatial_ndim == 3); | megdnn_assert(filter_meta.spatial_ndim == 3); | ||||
| compute3d<gtype, ftype, dtype, StrategyBwdData>( | compute3d<gtype, ftype, dtype, StrategyBwdData>( | ||||
| @@ -227,7 +226,6 @@ void backward_filter(_megdnn_tensor_in src, | |||||
| _megdnn_tensor_in diff, | _megdnn_tensor_in diff, | ||||
| _megdnn_tensor_out grad, | _megdnn_tensor_out grad, | ||||
| const Convolution3D::CanonizedFilterMeta &filter_meta) { | const Convolution3D::CanonizedFilterMeta &filter_meta) { | ||||
| megdnn_assert(grad.layout.is_contiguous()); | |||||
| memset(grad.raw_ptr, 0, grad.layout.span().dist_byte()); | memset(grad.raw_ptr, 0, grad.layout.span().dist_byte()); | ||||
| megdnn_assert(filter_meta.spatial_ndim == 3); | megdnn_assert(filter_meta.spatial_ndim == 3); | ||||
| compute3d<stype, gtype, dtype, StrategyBwdFlt>( | compute3d<stype, gtype, dtype, StrategyBwdFlt>( | ||||
| @@ -384,16 +384,6 @@ TEST_F(CUDA, CONVOLUTION_BACKWARD_DATA_INT8_NCHW_DP4A) { | |||||
| } | } | ||||
| checker.set_rng(0, &rng).set_rng(1, &rng).set_param(arg.param).exec( | checker.set_rng(0, &rng).set_rng(1, &rng).set_param(arg.param).exec( | ||||
| TensorLayoutArray{filter, dst, src}); | TensorLayoutArray{filter, dst, src}); | ||||
| //! noncontiguous case | |||||
| { | |||||
| param::Convolution param; | |||||
| param.pad_h = param.pad_w = 1; | |||||
| checker.set_param(param).execl(TensorLayoutArray{ | |||||
| {{16, 16, 3, 3}, {144, 9, 3, 1}, dtype::QuantizedS8{1.3f}}, | |||||
| {{2, 16, 7, 7}, {1568, 49, 7, 1}, dtype::QuantizedS8{1.2f}}, | |||||
| {{2, 16, 7, 7}, {1568, 49, 7, 1}, dtype::QuantizedS8{1.2f}} | |||||
| }); | |||||
| } | |||||
| } | } | ||||
| } | } | ||||
| @@ -150,6 +150,77 @@ TEST_F(CUDA, CONVOLUTION3D_MATMUL_FORWARD) { | |||||
| } | } | ||||
| } | } | ||||
| TEST_F(CUDA, CONVOLUTION3D_FORWARD_NONCONTIG_CUDNN) { | |||||
| using namespace convolution3d; | |||||
| Checker<Convolution3DForward> checker(handle_cuda()); | |||||
| checker.set_before_exec_callback(AlgoChecker<Convolution3DForward>( | |||||
| "CUDNN")); | |||||
| param::Convolution3D param; | |||||
| param.pad_d = param.pad_h = param.pad_w = 1; | |||||
| checker.set_dtype(0, dtype::Float32()) | |||||
| .set_dtype(1, dtype::Float32()) | |||||
| .set_epsilon(1e-3); | |||||
| //! noncontiguous case | |||||
| { | |||||
| checker.set_param(param).execl(TensorLayoutArray{ | |||||
| {{4, 5, 16, 16, 16}, | |||||
| {40960, 4096, 256, 16, 1}, | |||||
| dtype::Float32()}, | |||||
| {{5, 5, 3, 3, 3}, {135, 27, 9, 3, 1}, dtype::Float32()}, | |||||
| {{4, 5, 16, 16, 16}, | |||||
| {40960, 4096, 256, 16, 1}, | |||||
| dtype::Float32()}}); | |||||
| } | |||||
| } | |||||
| TEST_F(CUDA, CONVOLUTION3D_FORWARD_NONCONTIG_INPLACE_MATMUL) { | |||||
| using namespace convolution3d; | |||||
| Checker<Convolution3DForward> checker(handle_cuda()); | |||||
| checker.set_before_exec_callback(AlgoChecker<Convolution3DForward>( | |||||
| "INPLACE_MATMUL")); | |||||
| param::Convolution3D param; | |||||
| param.pad_d = param.pad_h = param.pad_w = 1; | |||||
| checker.set_dtype(0, dtype::Float32()) | |||||
| .set_dtype(1, dtype::Float32()) | |||||
| .set_epsilon(1e-3); | |||||
| //! noncontiguous case | |||||
| { | |||||
| checker.set_param(param).execl(TensorLayoutArray{ | |||||
| {{4, 5, 16, 16, 16}, | |||||
| {40960, 4096, 256, 16, 1}, | |||||
| dtype::Float32()}, | |||||
| {{5, 5, 3, 3, 3}, {135, 27, 9, 3, 1}, dtype::Float32()}, | |||||
| {{4, 5, 16, 16, 16}, | |||||
| {40960, 4096, 256, 16, 1}, | |||||
| dtype::Float32()}}); | |||||
| } | |||||
| } | |||||
| TEST_F(CUDA, CONVOLUTION3D_FORWARD_NONCONTIG_1x1x1) { | |||||
| using namespace convolution3d; | |||||
| Checker<Convolution3DForward> checker(handle_cuda()); | |||||
| checker.set_before_exec_callback(AlgoChecker<Convolution3DForward>( | |||||
| "1x1x1")); | |||||
| param::Convolution3D param; | |||||
| checker.set_dtype(0, dtype::Float32()) | |||||
| .set_dtype(1, dtype::Float32()) | |||||
| .set_epsilon(1e-3); | |||||
| //! noncontiguous case | |||||
| { | |||||
| checker.set_param(param).execl(TensorLayoutArray{ | |||||
| {{4, 5, 16, 16, 16}, | |||||
| {40960, 4096, 256, 16, 1}, | |||||
| dtype::Float32()}, | |||||
| {{5, 5, 1, 1, 1}, {5, 1, 1, 1, 1}, dtype::Float32()}, | |||||
| {{4, 5, 16, 16, 16}, | |||||
| {40960, 4096, 256, 16, 1}, | |||||
| dtype::Float32()}}); | |||||
| } | |||||
| } | |||||
| #if MEGDNN_WITH_BENCHMARK | #if MEGDNN_WITH_BENCHMARK | ||||
| TEST_F(CUDA, BENCHMARK_CONVOLUTION3D_MATMUL_BACKWARD_FILTER) { | TEST_F(CUDA, BENCHMARK_CONVOLUTION3D_MATMUL_BACKWARD_FILTER) { | ||||
| using namespace convolution3d; | using namespace convolution3d; | ||||
| @@ -343,6 +414,60 @@ TEST_F(CUDA, CONVOLUTION3D_MATMUL_BACKWARD_FILTER) { | |||||
| } | } | ||||
| } | } | ||||
| TEST_F(CUDA, CONVOLUTION3D_BACKWARD_DATA_NONCONTIG_CUDNN) { | |||||
| using namespace convolution3d; | |||||
| Checker<Convolution3DBackwardData> checker(handle_cuda()); | |||||
| checker.set_before_exec_callback(AlgoChecker<Convolution3DBackwardData>( | |||||
| "CUDNN")); | |||||
| Convolution3DBackwardData::Param param; | |||||
| param.pad_d = param.pad_h = param.pad_w = 1; | |||||
| NormalRNG default_rng; | |||||
| checker.set_dtype(0, dtype::Float32()) | |||||
| .set_dtype(1, dtype::Float32()) | |||||
| .set_rng(0, &default_rng) | |||||
| .set_rng(1, &default_rng) | |||||
| .set_epsilon(1e-3) | |||||
| .set_param(param); | |||||
| //! noncontiguous case | |||||
| { | |||||
| checker.execl(TensorLayoutArray{ | |||||
| {{5, 5, 3, 3, 3}, {135, 27, 9, 3, 1}, dtype::Float32()}, | |||||
| {{4, 5, 16, 16, 16}, | |||||
| {40960, 4096, 256, 16, 1}, | |||||
| dtype::Float32()}, | |||||
| {{4, 5, 16, 16, 16}, | |||||
| {40960, 4096, 256, 16, 1}, | |||||
| dtype::Float32()}}); | |||||
| } | |||||
| } | |||||
| TEST_F(CUDA, CONVOLUTION3D_BACKWARD_FILTER_NONCONTIG_CUDNN) { | |||||
| using namespace convolution3d; | |||||
| Checker<Convolution3DBackwardFilter> checker(handle_cuda()); | |||||
| checker.set_before_exec_callback(AlgoChecker<Convolution3DBackwardFilter>( | |||||
| "CUDNN")); | |||||
| Convolution3DBackwardFilter::Param param; | |||||
| param.pad_d = param.pad_h = param.pad_w = 1; | |||||
| NormalRNG default_rng; | |||||
| checker.set_dtype(0, dtype::Float32()) | |||||
| .set_dtype(1, dtype::Float32()) | |||||
| .set_rng(0, &default_rng) | |||||
| .set_rng(1, &default_rng) | |||||
| .set_epsilon(1e-3) | |||||
| .set_param(param); | |||||
| //! noncontiguous case | |||||
| { | |||||
| checker.execl(TensorLayoutArray{ | |||||
| {{4, 5, 16, 16, 16}, | |||||
| {40960, 4096, 256, 16, 1}, | |||||
| dtype::Float32()}, | |||||
| {{4, 5, 16, 16, 16}, | |||||
| {40960, 4096, 256, 16, 1}, | |||||
| dtype::Float32()}, | |||||
| {{5, 5, 3, 3, 3}, {135, 27, 9, 3, 1}, dtype::Float32()}}); | |||||
| } | |||||
| } | |||||
| /* | /* | ||||
| TEST_F(CUDA, CONV_CONFIG_COMBINATIONS) { | TEST_F(CUDA, CONV_CONFIG_COMBINATIONS) { | ||||
| auto eps_getter = [](bool f16, int stage, const char *name) -> float { | auto eps_getter = [](bool f16, int stage, const char *name) -> float { | ||||