#include "megdnn/oprs.h" #include "src/common/utils.h" namespace megdnn { void PoolingBase::deduce_layout_impl( const TensorLayout& src, const Param& param, TensorLayout& dst) { auto pformat = param.format; // the overhead of generating error message is about 18x of the other part of this // function so we use a function to wrap the error message and get it only when need. auto get_errmsg = [&](void) -> std::string { std::string errmsg = megdnn_layout_msg(src) + ", " + megdnn_layout_msg(dst) + ", " + "pad_h=" + std::to_string(param.pad_h) + ", " + "pad_w=" + std::to_string(param.pad_w) + ", " + "stride_h=" + std::to_string(param.stride_h) + ", " + "stride_w=" + std::to_string(param.stride_w) + ", " + "window_h=" + std::to_string(param.window_h) + ", " + "window_w=" + std::to_string(param.window_w) + ", " + "is_max=" + std::to_string(param.mode == Mode::MAX) + ", " + "is_nhwc=" + std::to_string(pformat == Param::Format::NHWC) + ", " + "is_nhwcd4=" + std::to_string(pformat == Param::Format::NHWCD4); return errmsg; }; MEGDNN_MARK_USED_VAR(get_errmsg); megdnn_assert_contiguous(src); size_t spatial_pos, c_pos, batch_pos = 0; if (pformat == Param::Format::NCHW) { megdnn_assert(src.ndim == 4_z, "%s", get_errmsg().c_str()); spatial_pos = 2; c_pos = 1; } else if (pformat == Param::Format::NHWC) { megdnn_assert(src.ndim == 4_z, "%s", get_errmsg().c_str()); spatial_pos = 1; c_pos = 3; } else if ( pformat == Param::Format::NCHW4 || pformat == Param::Format::NCHW44 || pformat == Param::Format::NCHW88 || pformat == Param::Format::NCHW32 || pformat == Param::Format::NCHW64) { megdnn_assert(src.ndim == 5_z, "%s", get_errmsg().c_str()); spatial_pos = 2; c_pos = 1; } else if (pformat == Param::Format::CHWN4) { spatial_pos = 1; c_pos = 0; batch_pos = 3; } else { megdnn_assert( pformat == Param::Format::NHWCD4 && src.ndim == 5_z, "%s", get_errmsg().c_str()); spatial_pos = 1; c_pos = 2; } size_t n = src[batch_pos]; size_t c = src[c_pos]; size_t ih = src[spatial_pos]; size_t iw = src[spatial_pos + 1]; if (pformat == Param::Format::NHWCD4) { c *= 4; iw = src[spatial_pos + 2]; } if (pformat == Param::Format::NCHW4 || pformat == Param::Format::NCHW44 || pformat == Param::Format::CHWN4) { c *= 4; } if (pformat == Param::Format::NCHW88) { c *= 8; } if (pformat == Param::Format::NCHW32) { c *= 32; } if (pformat == Param::Format::NCHW64) { c *= 64; } size_t oh, ow; size_t fh = param.window_h; size_t fw = param.window_w; size_t sh = param.stride_h; size_t sw = param.stride_w; size_t ph = param.pad_h; size_t pw = param.pad_w; // moving some python assert to here // megdnn_assert() if (ph >= fh || pw >= fw) { megdnn_log_warn( "pooling padding size (%zu %zu) should not be bigger than " "window size (%zu %zu), it only can be used in CaffePooling", pw, ph, fw, fh); } infer_conv_shape2d(ih, iw, fh, fw, sh, sw, ph, pw, oh, ow); if (pformat == Param::Format::NCHW) { dst = TensorLayout(TensorShape({n, c, oh, ow}), src.dtype); } else if (pformat == Param::Format::NHWC) { megdnn_assert(pformat == Param::Format::NHWC, "invalid pooling format"); dst = TensorLayout({n, oh, ow, c}, src.dtype, src.format); } else if (pformat == Param::Format::NCHW4 || pformat == Param::Format::NCHW44) { dst = TensorLayout{{n, c / 4, oh, ow, 4}, src.dtype, src.format}; } else if (pformat == Param::Format::NCHW88) { dst = TensorLayout{{n, c / 8, oh, ow, 8}, src.dtype, src.format}; } else if (pformat == Param::Format::NCHW32) { dst = TensorLayout{{n, c / 32, oh, ow, 32}, src.dtype, src.format}; } else if (pformat == Param::Format::NCHW64) { dst = TensorLayout{{n, c / 64, oh, ow, 64}, src.dtype, src.format}; } else if (pformat == Param::Format::CHWN4) { dst = TensorLayout{{c / 4, oh, ow, n, 4}, src.dtype, src.format}; } else { megdnn_assert(pformat == Param::Format::NHWCD4, "invalid pooling format"); dst = TensorLayout{{n, oh, c / 4, ow, 4}, src.dtype, src.format}; } } void PoolingBase::deduce_layout_fwd(const TensorLayout& src, TensorLayout& dst) { deduce_layout_impl(src, param(), dst); } void PoolingBase::check_layout_fwd(const TensorLayout& src, const TensorLayout& dst) { TensorLayout dst_expected; megdnn_assert_eq_dtype(src, dst); deduce_layout_fwd(src, dst_expected); megdnn_assert_eq_layout(dst_expected, dst); megdnn_assert( src.dtype.category() == DTypeCategory::FLOAT || src.dtype == dtype::Int8() || src.dtype.category() == DTypeCategory::QUANTIZED); } void PoolingForward::deduce_layout(const TensorLayout& src, TensorLayout& dst) { deduce_layout_fwd(src, dst); } void PoolingForward::check_exec( const TensorLayout& src, const TensorLayout& dst, size_t workspace_in_bytes) { check_layout_fwd(src, dst); auto required_workspace_in_bytes = get_workspace_in_bytes(src, dst); megdnn_assert(workspace_in_bytes >= required_workspace_in_bytes); } void PoolingBackward::check_exec( const TensorLayout& src, const TensorLayout& dst, const TensorLayout& diff, const TensorLayout& grad, size_t workspace_in_bytes) { check_layout_fwd(src, dst); megdnn_assert_eq_layout(src, grad); megdnn_assert_eq_layout(dst, diff); auto required_workspace_in_bytes = get_workspace_in_bytes(src, dst, diff, grad); megdnn_assert(workspace_in_bytes >= required_workspace_in_bytes); } } // namespace megdnn // vim: syntax=cpp.doxygen