| @@ -1353,6 +1353,53 @@ public: | |||||
| protected: | protected: | ||||
| void check_exec(const TensorLayout& dst, size_t workspace_in_bytes); | void check_exec(const TensorLayout& dst, size_t workspace_in_bytes); | ||||
| }; | }; | ||||
| /*! | |||||
| * \brief standard padding operator | |||||
| * Inputs must have the same dtype, and the output tensor shape must greater or equal than | |||||
| * input tensor in every dimensions, the extra space will be fulled with m which default to | |||||
| * be 0. | |||||
| */ | |||||
| class PaddingBase: public OperatorBase { | |||||
| DEF_OPR_PARAM(Padding); | |||||
| DEF_OPR_IMPL(PaddingBase, OperatorBase, 1, 1); | |||||
| public: | |||||
| using Mode = Param::PaddingMode; | |||||
| protected: | |||||
| SmallVector<size_t> get_offsets(); | |||||
| void check_exec(const TensorLayout& src, const TensorLayout& dst); | |||||
| }; | |||||
| class PaddingForward: public PaddingBase { | |||||
| DEF_OPR_IMPL(PaddingForward, PaddingBase, 1, 1); | |||||
| public: | |||||
| virtual void exec(_megdnn_tensor_in src, _megdnn_tensor_out dst) = 0; | |||||
| void exec(_megdnn_tensor_in src, _megdnn_tensor_out dst, | |||||
| _megdnn_workspace) { | |||||
| return exec(src, dst); | |||||
| } | |||||
| virtual size_t get_workspace_in_bytes(const TensorLayout& src, const TensorLayout& dst) = 0; | |||||
| void deduce_layout(const TensorLayout &src, TensorLayout &dst); | |||||
| protected: | |||||
| void forward_check_exec(const TensorLayout& src, const TensorLayout& dst); | |||||
| }; | |||||
| using Padding = PaddingForward; | |||||
| class PaddingBackward: public PaddingBase { | |||||
| DEF_OPR_IMPL(PaddingBackward, PaddingBase, 1, 1); | |||||
| public: | |||||
| virtual void exec(_megdnn_tensor_in src, _megdnn_tensor_out dst) = 0; | |||||
| void exec(_megdnn_tensor_in src, _megdnn_tensor_out dst, | |||||
| _megdnn_workspace) { | |||||
| return exec(src, dst); | |||||
| } | |||||
| virtual size_t get_workspace_in_bytes(const TensorLayout& src, const TensorLayout& dst) = 0; | |||||
| protected: | |||||
| void backward_check_exec(const TensorLayout& src, const TensorLayout& dst); | |||||
| }; | |||||
| } // namespace megdnn | } // namespace megdnn | ||||
| #include "megdnn/internal/opr_header_epilogue.h" | #include "megdnn/internal/opr_header_epilogue.h" | ||||
| @@ -1171,3 +1171,29 @@ Note: NCHW_NCHW4_WEIGHT will auto pad oc and ic, you should remove oc in later o | |||||
| add_fields('int32', 'qmax', '2147483647') | add_fields('int32', 'qmax', '2147483647') | ||||
| ) | ) | ||||
| pdef('Fill').add_fields('float32', 'value', '0') | pdef('Fill').add_fields('float32', 'value', '0') | ||||
| PADDING_MODES = [Doc('REPLICATE', 'aaaaaa|abcdefgh|hhhhhhh'), | |||||
| Doc('REFLECT', 'fedcba|abcdefgh|hgfedcb'), | |||||
| Doc('CONSTANT', 'iiiiii|abcdefgh|iiiiiii')] | |||||
| (pdef('Padding'). | |||||
| add_fields('uint32', Doc('front_offset_dim0','offset in dim 0'), 0). | |||||
| add_fields('uint32', Doc('front_offset_dim1','offset in dim 1'), 0). | |||||
| add_fields('uint32', Doc('front_offset_dim2','offset in dim 2'), 0). | |||||
| add_fields('uint32', Doc('front_offset_dim3','offset in dim 3'), 0). | |||||
| add_fields('uint32', Doc('front_offset_dim4','offset in dim 4'), 0). | |||||
| add_fields('uint32', Doc('front_offset_dim5','offset in dim 5'), 0). | |||||
| add_fields('uint32', Doc('front_offset_dim6','offset in dim 6'), 0). | |||||
| add_fields('uint32', Doc('back_offset_dim0', 'back offset in dim0'), 0). | |||||
| add_fields('uint32', Doc('back_offset_dim1', 'back offset in dim1'), 0). | |||||
| add_fields('uint32', Doc('back_offset_dim2', 'back offset in dim2'), 0). | |||||
| add_fields('uint32', Doc('back_offset_dim3', 'back offset in dim3'), 0). | |||||
| add_fields('uint32', Doc('back_offset_dim4', 'back offset in dim4'), 0). | |||||
| add_fields('uint32', Doc('back_offset_dim5', 'back offset in dim5'), 0). | |||||
| add_fields('uint32', Doc('back_offset_dim6', 'back offset in dim6'), 0). | |||||
| add_fields('float32', Doc('padding_val','param of padding opr'), 0). | |||||
| add_enum('PaddingMode', *PADDING_MODES, | |||||
| name_field='padding_mode', default=2, | |||||
| member_alias=[(i, 'PADDING_{}'.format(i)) for i in PADDING_MODES] | |||||
| ) | |||||
| ) | |||||
| @@ -217,7 +217,9 @@ private: | |||||
| cb(CheckHasInf) \ | cb(CheckHasInf) \ | ||||
| cb(LSQForward) \ | cb(LSQForward) \ | ||||
| cb(LSQBackward) \ | cb(LSQBackward) \ | ||||
| cb(Fill) | |||||
| cb(Fill) \ | |||||
| cb(PaddingForward) \ | |||||
| cb(PaddingBackward) | |||||
| /*! | /*! | ||||
| * \brief specialize HandleImpl::create_operator for a single opr type; | * \brief specialize HandleImpl::create_operator for a single opr type; | ||||
| @@ -27,6 +27,8 @@ struct OprTrait {}; | |||||
| static const bool can_deduce_layout = CanDeduceLayout; \ | static const bool can_deduce_layout = CanDeduceLayout; \ | ||||
| } | } | ||||
| DEF(Padding, 2, false, true); | |||||
| DEF(PaddingBackward, 2, false, false); | |||||
| DEF(ConvolutionForward, 3, true, true); | DEF(ConvolutionForward, 3, true, true); | ||||
| DEF(Convolution3DForward, 3, true, true); | DEF(Convolution3DForward, 3, true, true); | ||||
| DEF(ConvolutionBackwardData, 3, true, false); | DEF(ConvolutionBackwardData, 3, true, false); | ||||
| @@ -0,0 +1,144 @@ | |||||
| /** | |||||
| * \file dnn/src/common/padding.cpp | |||||
| * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
| * | |||||
| * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, | |||||
| * software distributed under the License is distributed on an | |||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
| * implied. | |||||
| */ | |||||
| #include "megdnn/oprs.h" | |||||
| #include "megdnn/oprs/general.h" | |||||
| #include "megdnn/thin/small_vector.h" | |||||
| #include "src/common/opr_param_defs_enumv.cuh" | |||||
| #include "src/common/utils.h" | |||||
| namespace megdnn { | |||||
| using padding_param = megdnn::param_enumv::Padding; | |||||
| void PaddingForward::forward_check_exec(const TensorLayout& src, | |||||
| const TensorLayout& dst) { | |||||
| check_exec(src, dst); | |||||
| megdnn_assert(src.dtype.enumv() != DTypeEnum::Bool && | |||||
| src.dtype.enumv() != DTypeEnum::IntB1 && | |||||
| src.dtype.enumv() != DTypeEnum::IntB2 && | |||||
| src.dtype.enumv() != DTypeEnum::IntB4, | |||||
| "unsupported %s dtype for forward padding opr", | |||||
| src.dtype.name()); | |||||
| } | |||||
| void PaddingForward::deduce_layout(const TensorLayout& src, TensorLayout& dst) { | |||||
| SmallVector<size_t> offsets(get_offsets()); | |||||
| TensorShape dst_shape; | |||||
| switch (src.ndim) { | |||||
| case 1: | |||||
| dst_shape = {src.shape[0] + offsets[0] + offsets[1]}; | |||||
| break; | |||||
| case 2: | |||||
| dst_shape = {src.shape[0] + offsets[0] + offsets[1], | |||||
| src.shape[1] + offsets[2] + offsets[3]}; | |||||
| break; | |||||
| case 3: | |||||
| dst_shape = {src.shape[0] + offsets[0] + offsets[1], | |||||
| src.shape[1] + offsets[2] + offsets[3], | |||||
| src.shape[2] + offsets[4] + offsets[5]}; | |||||
| break; | |||||
| case 4: | |||||
| dst_shape = {src.shape[0] + offsets[0] + offsets[1], | |||||
| src.shape[1] + offsets[2] + offsets[3], | |||||
| src.shape[2] + offsets[4] + offsets[5], | |||||
| src.shape[3] + offsets[6] + offsets[7]}; | |||||
| break; | |||||
| case 5: | |||||
| dst_shape = {src.shape[0] + offsets[0] + offsets[1], | |||||
| src.shape[1] + offsets[2] + offsets[3], | |||||
| src.shape[2] + offsets[4] + offsets[5], | |||||
| src.shape[3] + offsets[6] + offsets[7], | |||||
| src.shape[4] + offsets[8] + offsets[9]}; | |||||
| break; | |||||
| case 6: | |||||
| dst_shape = {src.shape[0] + offsets[0] + offsets[1], | |||||
| src.shape[1] + offsets[2] + offsets[3], | |||||
| src.shape[2] + offsets[4] + offsets[5], | |||||
| src.shape[3] + offsets[6] + offsets[7], | |||||
| src.shape[4] + offsets[8] + offsets[9], | |||||
| src.shape[5] + offsets[10] + offsets[11]}; | |||||
| break; | |||||
| case 7: | |||||
| dst_shape = {src.shape[0] + offsets[0] + offsets[1], | |||||
| src.shape[1] + offsets[2] + offsets[3], | |||||
| src.shape[2] + offsets[4] + offsets[5], | |||||
| src.shape[3] + offsets[6] + offsets[7], | |||||
| src.shape[4] + offsets[8] + offsets[9], | |||||
| src.shape[5] + offsets[10] + offsets[11], | |||||
| src.shape[6] + offsets[12] + offsets[13]}; | |||||
| break; | |||||
| default: | |||||
| megdnn_assert(false, "invalid tensor ndim %zu", src.ndim); | |||||
| break; | |||||
| } | |||||
| dst = TensorLayout(dst_shape, src.dtype); | |||||
| } | |||||
| void PaddingBackward::backward_check_exec(const TensorLayout& src, | |||||
| const TensorLayout& dst) { | |||||
| check_exec(dst, src); | |||||
| megdnn_assert(src.dtype.enumv() == | |||||
| DTypeEnum::Float32 DNN_INC_FLOAT16( | |||||
| || src.dtype.enumv() == DTypeEnum::Float16 || | |||||
| src.dtype.enumv() == DTypeEnum::BFloat16), | |||||
| "unsupported %s dtype for forward padding opr", | |||||
| src.dtype.name()); | |||||
| } | |||||
| SmallVector<size_t> PaddingBase::get_offsets() { | |||||
| SmallVector<size_t> offsets = { | |||||
| param().front_offset_dim0, param().back_offset_dim0, | |||||
| param().front_offset_dim1, param().back_offset_dim1, | |||||
| param().front_offset_dim2, param().back_offset_dim2, | |||||
| param().front_offset_dim3, param().back_offset_dim3, | |||||
| param().front_offset_dim4, param().back_offset_dim4, | |||||
| param().front_offset_dim5, param().back_offset_dim5, | |||||
| param().front_offset_dim6, param().back_offset_dim6}; | |||||
| return offsets; | |||||
| } | |||||
| void PaddingBase::check_exec(const TensorLayout& src, const TensorLayout& dst) { | |||||
| SmallVector<size_t> offsets(get_offsets()); | |||||
| // make sure the src and dst tensor not empty | |||||
| megdnn_assert(src.ndim != 0 && dst.ndim != 0); | |||||
| // make sure src and dst is same dtype | |||||
| megdnn_assert_eq_dtype(src, dst); | |||||
| // make sure src and dst is same ndim | |||||
| megdnn_assert(src.ndim == dst.ndim, "the src.ndim = %zu the dst.ndim = %zu", | |||||
| src.ndim, dst.ndim); | |||||
| // make sure in every dimension dst is equal or greater than src | |||||
| for (size_t i = 0; i < src.ndim; ++i) { | |||||
| megdnn_assert(dst.shape[i] == | |||||
| src.shape[i] + offsets[i * 2] + offsets[i * 2 + 1]); | |||||
| } | |||||
| // check the padding mode is valid | |||||
| megdnn_assert(static_cast<uint32_t>(param().padding_mode) == | |||||
| padding_param::PaddingMode::REFLECT || | |||||
| static_cast<uint32_t>(param().padding_mode) == | |||||
| padding_param::PaddingMode::REPLICATE || | |||||
| static_cast<uint32_t>(param().padding_mode) == | |||||
| padding_param::PaddingMode::CONSTANT, | |||||
| "unsupported padding mode"); | |||||
| // addition check for reflect padding, make sure the reflected index is | |||||
| // valid | |||||
| if (static_cast<uint32_t>(param().padding_mode) == | |||||
| padding_param::PaddingMode::REFLECT) { | |||||
| for (size_t i = 0; i < src.ndim; ++i) { | |||||
| megdnn_assert(offsets[i * 2] < src.shape[i] && | |||||
| dst.shape[i] - offsets[i * 2] - src.shape[i] < | |||||
| src.shape[i]); | |||||
| } | |||||
| } | |||||
| } | |||||
| } // namespace megdnn | |||||
| @@ -12,6 +12,7 @@ | |||||
| #include "src/common/handle_impl.h" | #include "src/common/handle_impl.h" | ||||
| #include "src/cuda/padding/opr_impl.h" | |||||
| #include "src/cuda/adaptive_pooling/opr_impl.h" | #include "src/cuda/adaptive_pooling/opr_impl.h" | ||||
| #include "src/cuda/add_update/opr_impl.h" | #include "src/cuda/add_update/opr_impl.h" | ||||
| #include "src/cuda/argmxx/opr_impl.h" | #include "src/cuda/argmxx/opr_impl.h" | ||||
| @@ -0,0 +1,71 @@ | |||||
| /** | |||||
| * \file dnn/src/cuda/padding/opr_impl.cpp | |||||
| * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
| * | |||||
| * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, | |||||
| * software distributed under the License is distributed on an | |||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
| * implied. | |||||
| */ | |||||
| #include "src/cuda/padding/opr_impl.h" | |||||
| #include "src/common/utils.h" | |||||
| #include "src/cuda/handle.h" | |||||
| #include "src/cuda/padding/padding.cuh" | |||||
| #include "src/cuda/utils.h" | |||||
| namespace megdnn { | |||||
| namespace cuda { | |||||
| void PaddingForwardImpl::exec(_megdnn_tensor_in src, _megdnn_tensor_out dst) { | |||||
| forward_check_exec(src.layout, dst.layout); | |||||
| SmallVector<size_t> offsets(get_offsets()); | |||||
| // SamllVector can not be used as argument in cu file | |||||
| size_t param_offsets[MEGDNN_MAX_NDIM * 2] = { | |||||
| offsets[0], offsets[1], offsets[2], offsets[3], offsets[4], | |||||
| offsets[5], offsets[6], offsets[7], offsets[8], offsets[9], | |||||
| offsets[10], offsets[11], offsets[12], offsets[13]}; | |||||
| auto stream = cuda_stream(this->handle()); | |||||
| #define cb(DType) \ | |||||
| if (src.layout.dtype.enumv() == DTypeTrait<DType>::enumv) { \ | |||||
| using ctype = typename DTypeTrait<DType>::ctype; \ | |||||
| padding::padding_forward_proxy<ctype>(src, dst, param_offsets, \ | |||||
| uint32_t(param().padding_mode), \ | |||||
| param().padding_val, stream); \ | |||||
| } | |||||
| MEGDNN_FOREACH_COMPUTING_DTYPE(cb) | |||||
| #undef cb | |||||
| } | |||||
| void PaddingBackwardImpl::exec(_megdnn_tensor_in src, _megdnn_tensor_out dst) { | |||||
| backward_check_exec(src.layout, dst.layout); | |||||
| SmallVector<size_t> offsets(get_offsets()); | |||||
| // SamllVector can not be used as argument in cu file | |||||
| size_t param_offsets[MEGDNN_MAX_NDIM * 2] = { | |||||
| offsets[0], offsets[1], offsets[2], offsets[3], offsets[4], | |||||
| offsets[5], offsets[6], offsets[7], offsets[8], offsets[9], | |||||
| offsets[10], offsets[11], offsets[12], offsets[13]}; | |||||
| auto stream = cuda_stream(this->handle()); | |||||
| #define cb(DType) \ | |||||
| if (src.layout.dtype.enumv() == DTypeTrait<DType>::enumv) { \ | |||||
| using ctype = typename DTypeTrait<DType>::ctype; \ | |||||
| padding::padding_backward_proxy<ctype>(src, dst, param_offsets, \ | |||||
| uint32_t(param().padding_mode), \ | |||||
| stream); \ | |||||
| } | |||||
| MEGDNN_FOREACH_COMPUTING_DTYPE_FLOAT(cb) | |||||
| #undef cb | |||||
| } | |||||
| size_t PaddingForwardImpl::get_workspace_in_bytes(const TensorLayout& src, | |||||
| const TensorLayout& dst) { | |||||
| return 0; | |||||
| } | |||||
| size_t PaddingBackwardImpl::get_workspace_in_bytes(const TensorLayout& src, | |||||
| const TensorLayout& dst) { | |||||
| return 0; | |||||
| } | |||||
| } // namespace cuda | |||||
| } // namespace megdnn | |||||
| @@ -0,0 +1,35 @@ | |||||
| /** | |||||
| * \file dnn/src/cuda/padding/opr_impl.h | |||||
| * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
| * | |||||
| * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, | |||||
| * software distributed under the License is distributed on an | |||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
| * implied. | |||||
| */ | |||||
| #pragma once | |||||
| #include "megdnn/oprs.h" | |||||
| namespace megdnn { | |||||
| namespace cuda { | |||||
| class PaddingForwardImpl : public PaddingForward { | |||||
| using PaddingForward::PaddingForward; | |||||
| public: | |||||
| void exec(_megdnn_tensor_in src, _megdnn_tensor_out dst) override; | |||||
| size_t get_workspace_in_bytes(const TensorLayout& src, | |||||
| const TensorLayout& dst) override; | |||||
| }; | |||||
| class PaddingBackwardImpl : public PaddingBackward { | |||||
| using PaddingBackward::PaddingBackward; | |||||
| public: | |||||
| void exec(_megdnn_tensor_in src, _megdnn_tensor_out dst) override; | |||||
| size_t get_workspace_in_bytes(const TensorLayout& src, | |||||
| const TensorLayout& dst) override; | |||||
| }; | |||||
| } // namespace cuda | |||||
| } // namespace megdnn | |||||
| @@ -0,0 +1,284 @@ | |||||
| /** | |||||
| * \file dnn/src/cuda/padding/padding.cu | |||||
| * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
| * | |||||
| * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, | |||||
| * software distributed under the License is distributed on an | |||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
| * implied. | |||||
| */ | |||||
| #include <algorithm> | |||||
| #include <cstring> | |||||
| #include <iostream> | |||||
| #include "megdnn/basic_types.h" | |||||
| #include "padding.cuh" | |||||
| #include "src/cuda/int_fastdiv.cuh" | |||||
| #include "src/cuda/query_blocksize.cuh" | |||||
| namespace megdnn { | |||||
| namespace cuda { | |||||
| namespace padding { | |||||
| struct ShapeParams { | |||||
| size_t src_shape[MEGDNN_MAX_NDIM]; | |||||
| size_t dst_shape[MEGDNN_MAX_NDIM]; | |||||
| Uint32Fastdiv src_stride[MEGDNN_MAX_NDIM]; | |||||
| Uint32Fastdiv dst_stride[MEGDNN_MAX_NDIM]; | |||||
| size_t offsets[MEGDNN_MAX_NDIM * 2]; | |||||
| }; | |||||
| template <typename T> | |||||
| __global__ void paddingConst_kernel(const size_t ndim, | |||||
| const size_t total_out_nr, | |||||
| const T* const src, T* const dst, | |||||
| ShapeParams params, | |||||
| const float_t padding_val) { | |||||
| KERN_FOR(out_index, total_out_nr) { | |||||
| bool in_src_valid_area = true; | |||||
| size_t in_index = 0; | |||||
| size_t out_index_tmp = out_index; | |||||
| for (size_t dim = 0; dim <= ndim - 1; ++dim) { | |||||
| Uint32Fastdiv dst_stride = params.dst_stride[dim], src_stride = params.src_stride[dim]; | |||||
| size_t src_shape = params.src_shape[dim]; | |||||
| size_t offset = params.offsets[dim*2]; | |||||
| size_t dim_index = out_index_tmp / dst_stride; | |||||
| in_src_valid_area &= (dim_index >= offset && dim_index < offset+src_shape); | |||||
| if(!in_src_valid_area) break; | |||||
| out_index_tmp -= dim_index * dst_stride.divisor(); | |||||
| in_index += (dim_index - offset)*src_stride.divisor(); | |||||
| /* | |||||
| size_t dim_index = out_index_tmp / params.dst_stride[dim]; | |||||
| out_index_tmp -= dim_index * params.dst_stride[dim].divisor(); | |||||
| in_src_valid_area &= (dim_index >= params.offsets[dim * 2] && | |||||
| dim_index < params.offsets[dim * 2] + | |||||
| params.src_shape[dim]); | |||||
| in_index += (dim_index - params.offsets[dim * 2]) * | |||||
| params.src_stride[dim].divisor(); | |||||
| */ | |||||
| } | |||||
| dst[out_index] = in_src_valid_area ? src[in_index] : padding_val; | |||||
| } | |||||
| } | |||||
| template <typename T> | |||||
| __global__ void paddingReplicate_kernel(const size_t ndim, | |||||
| const size_t total_out_nr, | |||||
| const T* const src, T* const dst, | |||||
| ShapeParams params, const float_t) { | |||||
| KERN_FOR(out_index, total_out_nr) { | |||||
| size_t in_index = 0; | |||||
| size_t out_index_tmp = out_index; | |||||
| for (size_t dim = 0; dim <= ndim - 1; ++dim) { | |||||
| size_t dim_index = out_index_tmp / params.dst_stride[dim]; | |||||
| out_index_tmp -= dim_index * params.dst_stride[dim].divisor(); | |||||
| dim_index = (size_t)llmin( | |||||
| (long long)params.src_shape[dim] - 1, | |||||
| llmax((long long)dim_index - | |||||
| (long long)params.offsets[dim * 2], | |||||
| (long long)0)); | |||||
| in_index += dim_index * params.src_stride[dim].divisor(); | |||||
| } | |||||
| dst[out_index] = src[in_index]; | |||||
| } | |||||
| } | |||||
| template <typename T> | |||||
| __global__ void paddingReflect_kernel(const size_t ndim, | |||||
| const size_t total_out_nr, | |||||
| const T* const src, T* const dst, | |||||
| ShapeParams params, const float_t) { | |||||
| KERN_FOR(out_index, total_out_nr) { | |||||
| size_t in_index = 0; | |||||
| size_t out_index_tmp = out_index; | |||||
| for (size_t dim = 0; dim <= ndim - 1; ++dim) { | |||||
| long long dim_index = out_index_tmp / params.dst_stride[dim]; | |||||
| out_index_tmp -= dim_index * params.dst_stride[dim].divisor(); | |||||
| dim_index -= (long long)params.offsets[dim * 2]; | |||||
| dim_index = llmax(dim_index, -dim_index); | |||||
| dim_index = llmin(dim_index, 2 * (long long)params.src_shape[dim] - | |||||
| dim_index - 2); | |||||
| in_index += size_t(dim_index) * | |||||
| (size_t)params.src_stride[dim].divisor(); | |||||
| } | |||||
| dst[out_index] = src[in_index]; | |||||
| } | |||||
| } | |||||
| template <typename T> | |||||
| __global__ void paddingConstBackward_kernel(const size_t ndim, | |||||
| const size_t total_in_nr, | |||||
| const T* const src, T* const dst, | |||||
| ShapeParams params) { | |||||
| KERN_FOR(in_index, total_in_nr) { | |||||
| bool in_dst_valid_area = true; | |||||
| size_t out_index = 0; | |||||
| size_t in_index_tmp = in_index; | |||||
| for (size_t dim = 0; dim <= ndim - 1; ++dim) { | |||||
| size_t dim_index = in_index_tmp / params.src_stride[dim]; | |||||
| in_index_tmp -= dim_index * params.src_stride[dim].divisor(); | |||||
| in_dst_valid_area &= (dim_index >= params.offsets[dim * 2] && | |||||
| dim_index < params.offsets[dim * 2] + | |||||
| params.dst_shape[dim]); | |||||
| out_index += (dim_index - params.offsets[dim * 2]) * | |||||
| params.dst_stride[dim].divisor(); | |||||
| } | |||||
| if (in_dst_valid_area) { | |||||
| dst[out_index] = src[in_index]; | |||||
| } | |||||
| } | |||||
| } | |||||
| template <typename T> | |||||
| __global__ void paddingReplicateBackward_kernel(const size_t ndim, | |||||
| const size_t total_in_nr, | |||||
| const T* const src, | |||||
| T* const dst, | |||||
| ShapeParams params) { | |||||
| KERN_FOR(in_index, total_in_nr) { | |||||
| size_t out_index = 0; | |||||
| size_t in_index_tmp = in_index; | |||||
| for (size_t dim = 0; dim <= ndim - 1; ++dim) { | |||||
| size_t dim_index = in_index_tmp / params.src_stride[dim]; | |||||
| in_index_tmp -= dim_index * params.src_stride[dim].divisor(); | |||||
| dim_index = (size_t)llmin( | |||||
| (long long)params.dst_shape[dim] - 1, | |||||
| llmax((long long)dim_index - | |||||
| (long long)params.offsets[dim * 2], | |||||
| (long long)0)); | |||||
| out_index += dim_index * params.dst_stride[dim].divisor(); | |||||
| } | |||||
| atomic_add(&dst[out_index], src[in_index]); | |||||
| } | |||||
| } | |||||
| template <typename T> | |||||
| __global__ void paddingReflectBackward_kernel(const size_t ndim, | |||||
| const size_t total_in_nr, | |||||
| const T* const src, T* const dst, | |||||
| ShapeParams params) { | |||||
| KERN_FOR(in_index, total_in_nr) { | |||||
| size_t out_index = 0; | |||||
| size_t in_index_tmp = in_index; | |||||
| for (size_t dim = 0; dim <= ndim - 1; ++dim) { | |||||
| long long dim_index = in_index_tmp / params.src_stride[dim]; | |||||
| in_index_tmp -= dim_index * params.src_stride[dim].divisor(); | |||||
| dim_index -= (long long)params.offsets[dim * 2]; | |||||
| dim_index = llmax(dim_index, -dim_index); | |||||
| dim_index = llmin(dim_index, 2 * (long long)params.dst_shape[dim] - | |||||
| dim_index - 2); | |||||
| out_index += size_t(dim_index) * | |||||
| (size_t)params.dst_stride[dim].divisor(); | |||||
| } | |||||
| atomic_add(&dst[out_index], src[in_index]); | |||||
| } | |||||
| } | |||||
| template <typename T> | |||||
| void padding_forward_proxy(const TensorND& src, const TensorND& dst, | |||||
| size_t offsets[MEGDNN_MAX_NDIM * 2], uint32_t mode, | |||||
| const float_t padding_val, cudaStream_t stream) { | |||||
| ShapeParams params; | |||||
| for (size_t i = 0; i < src.layout.ndim; ++i) { | |||||
| params.src_shape[i] = src.layout.shape[i]; | |||||
| params.dst_shape[i] = dst.layout.shape[i]; | |||||
| params.src_stride[i] = src.layout.stride[i]; | |||||
| params.dst_stride[i] = dst.layout.stride[i]; | |||||
| params.offsets[i * 2] = offsets[i * 2]; | |||||
| params.offsets[i * 2 + 1] = offsets[i * 2 + 1]; | |||||
| } | |||||
| void (*fwd_kern)(const size_t, const size_t, const T* const, T* const, | |||||
| ShapeParams, const float_t); | |||||
| switch (mode) { | |||||
| case param_enumv::Padding::PaddingMode::CONSTANT: | |||||
| fwd_kern = paddingConst_kernel<T>; | |||||
| break; | |||||
| case param_enumv::Padding::PaddingMode::REPLICATE: | |||||
| fwd_kern = paddingReplicate_kernel<T>; | |||||
| break; | |||||
| case param_enumv::Padding::PaddingMode::REFLECT: | |||||
| fwd_kern = paddingReflect_kernel<T>; | |||||
| break; | |||||
| default: | |||||
| megdnn_assert(false, "invalid padding mode"); | |||||
| } | |||||
| size_t total_nr = dst.layout.total_nr_elems(); | |||||
| uint32_t nr_threads = query_blocksize_for_kernel(fwd_kern); | |||||
| dim3 threads(nr_threads); | |||||
| dim3 blocks(DIVUP(total_nr, nr_threads)); | |||||
| fwd_kern<<<blocks, threads, 0, stream>>>(src.layout.ndim, total_nr, | |||||
| src.ptr<T>(), dst.ptr<T>(), params, | |||||
| padding_val); | |||||
| after_kernel_launch(); | |||||
| } | |||||
| template <typename T> | |||||
| void padding_backward_proxy(const TensorND& src, const TensorND& dst, | |||||
| size_t offsets[MEGDNN_MAX_NDIM * 2], uint32_t mode, | |||||
| cudaStream_t stream) { | |||||
| ShapeParams params; | |||||
| for (size_t i = 0; i < src.layout.ndim; ++i) { | |||||
| params.src_shape[i] = src.layout.shape[i]; | |||||
| params.dst_shape[i] = dst.layout.shape[i]; | |||||
| params.src_stride[i] = src.layout.stride[i]; | |||||
| params.dst_stride[i] = dst.layout.stride[i]; | |||||
| params.offsets[i * 2] = offsets[i * 2]; | |||||
| params.offsets[i * 2 + 1] = offsets[i * 2 + 1]; | |||||
| } | |||||
| cudaMemset(dst.raw_ptr, 0, dst.layout.access_bytes()); | |||||
| void (*bwd_kern)(const size_t, const size_t, const T* const, T* const, | |||||
| ShapeParams); | |||||
| switch (mode) { | |||||
| case param_enumv::Padding::PaddingMode::CONSTANT: | |||||
| bwd_kern = paddingConstBackward_kernel<T>; | |||||
| break; | |||||
| case param_enumv::Padding::PaddingMode::REPLICATE: | |||||
| bwd_kern = paddingReplicateBackward_kernel<T>; | |||||
| break; | |||||
| case param_enumv::Padding::PaddingMode::REFLECT: | |||||
| bwd_kern = paddingReflectBackward_kernel<T>; | |||||
| break; | |||||
| default: | |||||
| megdnn_assert(false, "invalid padding mode"); | |||||
| } | |||||
| size_t total_nr = src.layout.total_nr_elems(); | |||||
| uint32_t nr_threads = query_blocksize_for_kernel(bwd_kern); | |||||
| dim3 threads(nr_threads); | |||||
| dim3 blocks(DIVUP(total_nr, nr_threads)); | |||||
| bwd_kern<<<blocks, threads, 0, stream>>>( | |||||
| src.layout.ndim, total_nr, src.ptr<T>(), dst.ptr<T>(), params); | |||||
| after_kernel_launch(); | |||||
| } | |||||
| #define INST(T) \ | |||||
| template void padding_forward_proxy<T>( \ | |||||
| const TensorND& src, const TensorND& dst, \ | |||||
| size_t offsets[MEGDNN_MAX_NDIM * 2], uint32_t mode, \ | |||||
| const float_t padding_val, cudaStream_t stream); | |||||
| #define cb(DType) INST(typename DTypeTrait<DType>::ctype) | |||||
| MEGDNN_FOREACH_COMPUTING_DTYPE(cb) | |||||
| #undef cb | |||||
| #undef INST | |||||
| #define INST(T) \ | |||||
| template void padding_backward_proxy<T>( \ | |||||
| const TensorND& src, const TensorND& dst, \ | |||||
| size_t offsets[MEGDNN_MAX_NDIM * 2], uint32_t mode, \ | |||||
| cudaStream_t stream); | |||||
| #define cb(DType) INST(typename DTypeTrait<DType>::ctype) | |||||
| MEGDNN_FOREACH_COMPUTING_DTYPE_FLOAT(cb) | |||||
| #undef cb | |||||
| #undef INST | |||||
| } // namespace padding | |||||
| } // namespace cuda | |||||
| } // namespace megdnn | |||||
| @@ -0,0 +1,36 @@ | |||||
| /** | |||||
| * \file dnn/src/cuda/padding/padding.cuh | |||||
| * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
| * | |||||
| * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, | |||||
| * software distributed under the License is distributed on an | |||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
| * implied. | |||||
| */ | |||||
| #pragma once | |||||
| #include "cuda_runtime.h" | |||||
| #include "megdnn/basic_types.h" | |||||
| #include "src/common/opr_param_defs_enumv.cuh" | |||||
| #include "src/cuda/utils.cuh" | |||||
| namespace megdnn { | |||||
| namespace cuda { | |||||
| namespace padding { | |||||
| template <typename T> | |||||
| void padding_forward_proxy(const TensorND& src, const TensorND& dst, | |||||
| size_t offsets[MEGDNN_MAX_NDIM * 2], uint32_t mode, | |||||
| const float_t padding_val, cudaStream_t stream); | |||||
| template <typename T> | |||||
| void padding_backward_proxy(const TensorND& src, const TensorND& dst, | |||||
| size_t offsets[MEGDNN_MAX_NDIM * 2], uint32_t mode, | |||||
| cudaStream_t stream); | |||||
| } // namespace padding | |||||
| } // namespace cuda | |||||
| } // namespace megdnn | |||||
| // vim: syntax=cuda.doxygen | |||||
| @@ -78,7 +78,7 @@ | |||||
| #if MEGDNN_THREADS_512 | #if MEGDNN_THREADS_512 | ||||
| #define NR_THREADS 512 | #define NR_THREADS 512 | ||||
| #define NR_THREADS_X 32 | #define NR_THREADS_X 32 | ||||
| #define NR_THREADS_Y 16 | |||||
| #define NR_THREADS_Y 16 | |||||
| #else | #else | ||||
| #define NR_THREADS 1024 | #define NR_THREADS 1024 | ||||
| #define NR_THREADS_X 32 | #define NR_THREADS_X 32 | ||||
| @@ -16,7 +16,7 @@ | |||||
| #include <numeric> | #include <numeric> | ||||
| namespace megdnn { | |||||
| namespace { | |||||
| using namespace megdnn; | using namespace megdnn; | ||||
| @@ -14,6 +14,7 @@ | |||||
| #include "src/common/handle_impl.h" | #include "src/common/handle_impl.h" | ||||
| #include "src/naive/padding/opr_impl.h" | |||||
| #include "src/naive/adaptive_pooling/opr_impl.h" | #include "src/naive/adaptive_pooling/opr_impl.h" | ||||
| #include "src/naive/add_update/opr_impl.h" | #include "src/naive/add_update/opr_impl.h" | ||||
| #include "src/naive/argmxx/opr_impl.h" | #include "src/naive/argmxx/opr_impl.h" | ||||
| @@ -0,0 +1,281 @@ | |||||
| /** | |||||
| * \file dnn/src/naive/padding/opr_impl.cpp | |||||
| * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
| * | |||||
| * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, | |||||
| * software distributed under the License is distributed on an | |||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
| * implied. | |||||
| */ | |||||
| #include "src/naive/padding/opr_impl.h" | |||||
| #include <math.h> | |||||
| #include <stdio.h> | |||||
| #include "src/common/utils.h" | |||||
| #include "src/naive/handle.h" | |||||
| namespace megdnn { | |||||
| namespace naive { | |||||
| struct ShapeParams { | |||||
| size_t src_shape[MEGDNN_MAX_NDIM]; | |||||
| size_t dst_shape[MEGDNN_MAX_NDIM]; | |||||
| ptrdiff_t src_stride[MEGDNN_MAX_NDIM]; | |||||
| ptrdiff_t dst_stride[MEGDNN_MAX_NDIM]; | |||||
| size_t offsets[MEGDNN_MAX_NDIM * 2]; | |||||
| }; | |||||
| template <typename T> | |||||
| void exec_const_internal(const size_t ndim, const size_t total_out_nr, | |||||
| const T* const src, T* const dst, ShapeParams params, | |||||
| const T padding_val) MEGDNN_NOEXCEPT { | |||||
| rep(out_index, total_out_nr) { | |||||
| bool in_src_valid_area = true; | |||||
| size_t in_index = 0; | |||||
| size_t out_index_tmp = out_index; | |||||
| for (size_t dim = 0; dim <= ndim - 1; ++dim) { | |||||
| size_t dim_index = out_index_tmp / params.dst_stride[dim]; | |||||
| out_index_tmp -= dim_index * params.dst_stride[dim]; | |||||
| in_src_valid_area &= (dim_index >= params.offsets[dim * 2] && | |||||
| dim_index < params.offsets[dim * 2] + | |||||
| params.src_shape[dim]); | |||||
| in_index += (dim_index - params.offsets[dim * 2]) * | |||||
| params.src_stride[dim]; | |||||
| } | |||||
| if (in_src_valid_area) { | |||||
| dst[out_index] = src[in_index]; | |||||
| } else { | |||||
| dst[out_index] = padding_val; | |||||
| } | |||||
| } | |||||
| } | |||||
| template <typename T> | |||||
| void exec_replicate_internal(const size_t ndim, const size_t total_out_nr, | |||||
| const T* const src, T* const dst, | |||||
| ShapeParams params) MEGDNN_NOEXCEPT { | |||||
| rep(out_index, total_out_nr) { | |||||
| size_t in_index = 0; | |||||
| size_t out_index_tmp = out_index; | |||||
| for (size_t dim = 0; dim <= ndim - 1; ++dim) { | |||||
| size_t dim_index = out_index_tmp / params.dst_stride[dim]; | |||||
| out_index_tmp -= dim_index * params.dst_stride[dim]; | |||||
| dim_index = (size_t)std::min( | |||||
| (long long)params.src_shape[dim] - 1, | |||||
| std::max((long long)dim_index - | |||||
| (long long)params.offsets[dim * 2], | |||||
| (long long)0)); | |||||
| in_index += dim_index * params.src_stride[dim]; | |||||
| } | |||||
| dst[out_index] = src[in_index]; | |||||
| } | |||||
| } | |||||
| template <typename T> | |||||
| void exec_reflect_internal(const size_t ndim, const size_t total_out_nr, | |||||
| const T* const src, T* const dst, | |||||
| ShapeParams params) MEGDNN_NOEXCEPT { | |||||
| rep(out_index, total_out_nr) { | |||||
| size_t in_index = 0; | |||||
| size_t out_index_tmp = out_index; | |||||
| for (size_t dim = 0; dim <= ndim - 1; ++dim) { | |||||
| long long dim_index = out_index_tmp / params.dst_stride[dim]; | |||||
| out_index_tmp -= dim_index * params.dst_stride[dim]; | |||||
| dim_index -= (long long)params.offsets[dim * 2]; | |||||
| dim_index = std::max(dim_index, -dim_index); | |||||
| dim_index = | |||||
| std::min(dim_index, 2 * (long long)params.src_shape[dim] - | |||||
| dim_index - 2); | |||||
| in_index += size_t(dim_index) * (size_t)params.src_stride[dim]; | |||||
| } | |||||
| dst[out_index] = src[in_index]; | |||||
| } | |||||
| } | |||||
| template <typename T> | |||||
| void backward_exec_const_internal(const size_t ndim, const size_t total_in_nr, | |||||
| const T* const src, T* const dst, | |||||
| ShapeParams params) MEGDNN_NOEXCEPT { | |||||
| rep(in_index, total_in_nr) { | |||||
| bool in_dst_valid_area = true; | |||||
| size_t out_index = 0; | |||||
| size_t in_index_tmp = in_index; | |||||
| for (size_t dim = 0; dim <= ndim - 1; ++dim) { | |||||
| size_t dim_index = in_index_tmp / params.src_stride[dim]; | |||||
| in_index_tmp -= dim_index * params.src_stride[dim]; | |||||
| in_dst_valid_area &= (dim_index >= params.offsets[dim * 2] && | |||||
| dim_index < params.offsets[dim * 2] + | |||||
| params.dst_shape[dim]); | |||||
| out_index += (dim_index - params.offsets[dim * 2]) * | |||||
| params.dst_stride[dim]; | |||||
| } | |||||
| if (in_dst_valid_area) { | |||||
| dst[out_index] = src[in_index]; | |||||
| } | |||||
| } | |||||
| } | |||||
| template <typename T> | |||||
| void backward_exec_replicate_internal(const size_t ndim, | |||||
| const size_t total_in_nr, | |||||
| const T* const src, T* const dst, | |||||
| ShapeParams params) MEGDNN_NOEXCEPT { | |||||
| rep(in_index, total_in_nr) { | |||||
| size_t out_index = 0; | |||||
| size_t in_index_tmp = in_index; | |||||
| for (size_t dim = 0; dim <= ndim - 1; ++dim) { | |||||
| size_t dim_index = in_index_tmp / params.src_stride[dim]; | |||||
| in_index_tmp -= dim_index * params.src_stride[dim]; | |||||
| dim_index = (size_t)std::min( | |||||
| (long long)params.dst_shape[dim] - 1, | |||||
| std::max((long long)dim_index - | |||||
| (long long)params.offsets[dim * 2], | |||||
| (long long)0)); | |||||
| out_index += dim_index * params.dst_stride[dim]; | |||||
| } | |||||
| dst[out_index] += src[in_index]; | |||||
| } | |||||
| } | |||||
| template <typename T> | |||||
| void backward_exec_reflect_internal(const size_t ndim, const size_t total_in_nr, | |||||
| const T* const src, T* const dst, | |||||
| ShapeParams params) MEGDNN_NOEXCEPT { | |||||
| rep(in_index, total_in_nr) { | |||||
| size_t out_index = 0; | |||||
| size_t in_index_tmp = in_index; | |||||
| for (size_t dim = 0; dim <= ndim - 1; ++dim) { | |||||
| long long dim_index = in_index_tmp / params.src_stride[dim]; | |||||
| in_index_tmp -= dim_index * params.src_stride[dim]; | |||||
| dim_index -= (long long)params.offsets[dim * 2]; | |||||
| dim_index = std::max(dim_index, -dim_index); | |||||
| dim_index = | |||||
| std::min(dim_index, 2 * (long long)params.dst_shape[dim] - | |||||
| dim_index - 2); | |||||
| out_index += size_t(dim_index) * (size_t)params.dst_stride[dim]; | |||||
| } | |||||
| dst[out_index] += src[in_index]; | |||||
| } | |||||
| } | |||||
| void PaddingForwardImpl::exec(_megdnn_tensor_in src, _megdnn_tensor_out dst) { | |||||
| forward_check_exec(src.layout, dst.layout); | |||||
| SmallVector<size_t> offsets(get_offsets()); | |||||
| ShapeParams params; | |||||
| for (size_t i = 0; i < src.layout.ndim; ++i) { | |||||
| params.src_shape[i] = src.layout.shape[i]; | |||||
| params.dst_shape[i] = dst.layout.shape[i]; | |||||
| params.src_stride[i] = src.layout.stride[i]; | |||||
| params.dst_stride[i] = dst.layout.stride[i]; | |||||
| params.offsets[i * 2] = offsets[i * 2]; | |||||
| params.offsets[i * 2 + 1] = offsets[i * 2 + 1]; | |||||
| } | |||||
| size_t n = dst.layout.total_nr_elems(); | |||||
| switch (param().padding_mode) { | |||||
| case param::Padding::PaddingMode::CONSTANT: | |||||
| #define cb(DType) \ | |||||
| if (src.layout.dtype == DType()) { \ | |||||
| using T = typename DTypeTrait<DType>::ctype; \ | |||||
| MEGDNN_DISPATCH_CPU_KERN_OPR(exec_const_internal<T>( \ | |||||
| src.layout.ndim, n, src.ptr<T>(), dst.ptr<T>(), params, \ | |||||
| T(param().padding_val))); \ | |||||
| return; \ | |||||
| } | |||||
| MEGDNN_FOREACH_COMPUTING_DTYPE(cb) | |||||
| #undef cb | |||||
| break; | |||||
| case param::Padding::PaddingMode::REPLICATE: | |||||
| #define cb(DType) \ | |||||
| if (src.layout.dtype == DType()) { \ | |||||
| using T = typename DTypeTrait<DType>::ctype; \ | |||||
| MEGDNN_DISPATCH_CPU_KERN_OPR(exec_replicate_internal<T>( \ | |||||
| src.layout.ndim, n, src.ptr<T>(), dst.ptr<T>(), params)); \ | |||||
| return; \ | |||||
| } | |||||
| MEGDNN_FOREACH_COMPUTING_DTYPE(cb) | |||||
| #undef cb | |||||
| break; | |||||
| case param::Padding::PaddingMode::REFLECT: | |||||
| #define cb(DType) \ | |||||
| if (src.layout.dtype == DType()) { \ | |||||
| using T = typename DTypeTrait<DType>::ctype; \ | |||||
| MEGDNN_DISPATCH_CPU_KERN_OPR(exec_reflect_internal<T>( \ | |||||
| src.layout.ndim, n, src.ptr<T>(), dst.ptr<T>(), params)); \ | |||||
| return; \ | |||||
| } | |||||
| MEGDNN_FOREACH_COMPUTING_DTYPE(cb) | |||||
| #undef cb | |||||
| break; | |||||
| default: | |||||
| megdnn_assert(false, "unsupported padding mode!"); | |||||
| } | |||||
| } | |||||
| void PaddingBackwardImpl::exec(_megdnn_tensor_in src, _megdnn_tensor_out dst) { | |||||
| backward_check_exec(src.layout, dst.layout); | |||||
| SmallVector<size_t> offsets(get_offsets()); | |||||
| ShapeParams params; | |||||
| for (size_t i = 0; i < src.layout.ndim; ++i) { | |||||
| params.src_shape[i] = src.layout.shape[i]; | |||||
| params.dst_shape[i] = dst.layout.shape[i]; | |||||
| params.src_stride[i] = src.layout.stride[i]; | |||||
| params.dst_stride[i] = dst.layout.stride[i]; | |||||
| params.offsets[i * 2] = offsets[i * 2]; | |||||
| params.offsets[i * 2 + 1] = offsets[i * 2 + 1]; | |||||
| } | |||||
| size_t n = src.layout.total_nr_elems(); | |||||
| memset(dst.raw_ptr, 0, dst.layout.access_bytes()); | |||||
| switch (param().padding_mode) { | |||||
| case param::Padding::PaddingMode::CONSTANT: | |||||
| #define cb(DType) \ | |||||
| if (src.layout.dtype == DType()) { \ | |||||
| using T = typename DTypeTrait<DType>::ctype; \ | |||||
| MEGDNN_DISPATCH_CPU_KERN_OPR(backward_exec_const_internal<T>( \ | |||||
| src.layout.ndim, n, src.ptr<T>(), dst.ptr<T>(), params)); \ | |||||
| return; \ | |||||
| } | |||||
| MEGDNN_FOREACH_COMPUTING_DTYPE_FLOAT(cb) | |||||
| #undef cb | |||||
| break; | |||||
| case param::Padding::PaddingMode::REPLICATE: | |||||
| #define cb(DType) \ | |||||
| if (src.layout.dtype == DType()) { \ | |||||
| using T = typename DTypeTrait<DType>::ctype; \ | |||||
| MEGDNN_DISPATCH_CPU_KERN_OPR(backward_exec_replicate_internal<T>( \ | |||||
| src.layout.ndim, n, src.ptr<T>(), dst.ptr<T>(), params)); \ | |||||
| return; \ | |||||
| } | |||||
| MEGDNN_FOREACH_COMPUTING_DTYPE_FLOAT(cb) | |||||
| #undef cb | |||||
| break; | |||||
| case param::Padding::PaddingMode::REFLECT: | |||||
| #define cb(DType) \ | |||||
| if (src.layout.dtype == DType()) { \ | |||||
| using T = typename DTypeTrait<DType>::ctype; \ | |||||
| MEGDNN_DISPATCH_CPU_KERN_OPR(backward_exec_reflect_internal<T>( \ | |||||
| src.layout.ndim, n, src.ptr<T>(), dst.ptr<T>(), params)); \ | |||||
| return; \ | |||||
| } | |||||
| MEGDNN_FOREACH_COMPUTING_DTYPE_FLOAT(cb) | |||||
| #undef cb | |||||
| break; | |||||
| default: | |||||
| megdnn_assert(false, "unsupported padding mode!"); | |||||
| } | |||||
| } | |||||
| size_t PaddingForwardImpl::get_workspace_in_bytes( | |||||
| const TensorLayout& /* src */, const TensorLayout& /* dst */) { | |||||
| return 0; | |||||
| } | |||||
| size_t PaddingBackwardImpl::get_workspace_in_bytes( | |||||
| const TensorLayout& /* src */, const TensorLayout& /* dst */) { | |||||
| return 0; | |||||
| } | |||||
| } // namespace naive | |||||
| } // namespace megdnn | |||||
| @@ -0,0 +1,30 @@ | |||||
| /** | |||||
| * \file dnn/src/naive/padding/opr_impl.h | |||||
| * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
| * | |||||
| * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, | |||||
| * software distributed under the License is distributed on an | |||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| */ | |||||
| #pragma once | |||||
| #include "megdnn/oprs.h" | |||||
| namespace megdnn{ | |||||
| namespace naive{ | |||||
| class PaddingForwardImpl: public PaddingForward{ | |||||
| using PaddingForward::PaddingForward; | |||||
| public: | |||||
| void exec(_megdnn_tensor_in src, _megdnn_tensor_out dst) override; | |||||
| size_t get_workspace_in_bytes(const TensorLayout& src, const TensorLayout& dst) override; | |||||
| }; | |||||
| class PaddingBackwardImpl: public PaddingBackward{ | |||||
| using PaddingBackward::PaddingBackward; | |||||
| public: | |||||
| void exec(_megdnn_tensor_in src, _megdnn_tensor_out dst) override; | |||||
| size_t get_workspace_in_bytes(const TensorLayout& src, const TensorLayout& dst) override; | |||||
| }; | |||||
| } | |||||
| } | |||||
| @@ -0,0 +1,441 @@ | |||||
| /** | |||||
| * \file dnn/test/common/padding.h | |||||
| * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
| * | |||||
| * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, | |||||
| * software distributed under the License is distributed on an | |||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
| * implied. | |||||
| */ | |||||
| #pragma once | |||||
| #include <cstddef> | |||||
| #include <iostream> | |||||
| #include "megdnn/basic_types.h" | |||||
| #include "megdnn/opr_param_defs.h" | |||||
| namespace megdnn { | |||||
| namespace test { | |||||
| namespace padding { | |||||
| struct TestArg { | |||||
| param::Padding param; | |||||
| TensorShape src; | |||||
| TensorShape dst; | |||||
| TestArg(param::Padding _param, TensorShape _src, TensorShape _dst) | |||||
| : param(_param), src(_src), dst(_dst) {} | |||||
| }; | |||||
| inline std::vector<TestArg> get_args() { | |||||
| size_t src_shape_dim0 = 5; | |||||
| size_t src_shape_dim1 = 5; | |||||
| size_t src_shape_dim2 = 5; | |||||
| size_t src_shape_dim3 = 5; | |||||
| size_t src_shape_dim4 = 5; | |||||
| size_t src_shape_dim5 = 5; | |||||
| size_t src_shape_dim6 = 5; | |||||
| size_t dst_shape_dim0 = 8; | |||||
| size_t dst_shape_dim1 = 8; | |||||
| size_t dst_shape_dim2 = 8; | |||||
| size_t dst_shape_dim3 = 8; | |||||
| size_t dst_shape_dim4 = 8; | |||||
| size_t dst_shape_dim5 = 8; | |||||
| size_t dst_shape_dim6 = 8; | |||||
| std::vector<TestArg> args; | |||||
| param::Padding cur_param; | |||||
| cur_param.front_offset_dim0 = 0; | |||||
| cur_param.front_offset_dim1 = 0; | |||||
| cur_param.front_offset_dim2 = 0; | |||||
| cur_param.front_offset_dim3 = 0; | |||||
| cur_param.front_offset_dim4 = 0; | |||||
| cur_param.front_offset_dim5 = 0; | |||||
| cur_param.front_offset_dim6 = 0; | |||||
| cur_param.back_offset_dim0 = 0; | |||||
| cur_param.back_offset_dim1 = 0; | |||||
| cur_param.back_offset_dim2 = 0; | |||||
| cur_param.back_offset_dim3 = 0; | |||||
| cur_param.back_offset_dim4 = 0; | |||||
| cur_param.back_offset_dim5 = 0; | |||||
| cur_param.back_offset_dim6 = 0; | |||||
| cur_param.padding_val = 2; | |||||
| cur_param.front_offset_dim0 = 1; | |||||
| cur_param.back_offset_dim0 = 2; | |||||
| cur_param.padding_mode = param::Padding::PaddingMode::CONSTANT; | |||||
| args.emplace_back(cur_param, TensorShape{src_shape_dim0}, | |||||
| TensorShape{dst_shape_dim0}); | |||||
| cur_param.padding_mode = param::Padding::PaddingMode::REPLICATE; | |||||
| args.emplace_back(cur_param, TensorShape{src_shape_dim0}, | |||||
| TensorShape{dst_shape_dim0}); | |||||
| cur_param.padding_mode = param::Padding::PaddingMode::REFLECT; | |||||
| args.emplace_back(cur_param, TensorShape{src_shape_dim0}, | |||||
| TensorShape{dst_shape_dim0}); | |||||
| cur_param.front_offset_dim1 = 2; | |||||
| cur_param.back_offset_dim1 = 1; | |||||
| cur_param.padding_mode = param::Padding::PaddingMode::CONSTANT; | |||||
| args.emplace_back(cur_param, TensorShape{src_shape_dim0, src_shape_dim1}, | |||||
| TensorShape{dst_shape_dim0, dst_shape_dim1}); | |||||
| cur_param.padding_mode = param::Padding::PaddingMode::REPLICATE; | |||||
| args.emplace_back(cur_param, TensorShape{src_shape_dim0, src_shape_dim1}, | |||||
| TensorShape{dst_shape_dim0, dst_shape_dim1}); | |||||
| cur_param.padding_mode = param::Padding::PaddingMode::REFLECT; | |||||
| args.emplace_back(cur_param, TensorShape{src_shape_dim0, src_shape_dim1}, | |||||
| TensorShape{dst_shape_dim0, dst_shape_dim1}); | |||||
| cur_param.front_offset_dim2 = 1; | |||||
| cur_param.back_offset_dim2 = 2; | |||||
| cur_param.padding_mode = param::Padding::PaddingMode::CONSTANT; | |||||
| args.emplace_back( | |||||
| cur_param, | |||||
| TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2}, | |||||
| TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2}); | |||||
| cur_param.padding_mode = param::Padding::PaddingMode::REPLICATE; | |||||
| args.emplace_back( | |||||
| cur_param, | |||||
| TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2}, | |||||
| TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2}); | |||||
| cur_param.padding_mode = param::Padding::PaddingMode::REFLECT; | |||||
| args.emplace_back( | |||||
| cur_param, | |||||
| TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2}, | |||||
| TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2}); | |||||
| cur_param.front_offset_dim3 = 0; | |||||
| cur_param.back_offset_dim3 = 3; | |||||
| cur_param.padding_mode = param::Padding::PaddingMode::CONSTANT; | |||||
| args.emplace_back(cur_param, | |||||
| TensorShape{src_shape_dim0, src_shape_dim1, | |||||
| src_shape_dim2, src_shape_dim3}, | |||||
| TensorShape{dst_shape_dim0, dst_shape_dim1, | |||||
| dst_shape_dim2, dst_shape_dim3}); | |||||
| cur_param.padding_mode = param::Padding::PaddingMode::REPLICATE; | |||||
| args.emplace_back(cur_param, | |||||
| TensorShape{src_shape_dim0, src_shape_dim1, | |||||
| src_shape_dim2, src_shape_dim3}, | |||||
| TensorShape{dst_shape_dim0, dst_shape_dim1, | |||||
| dst_shape_dim2, dst_shape_dim3}); | |||||
| cur_param.padding_mode = param::Padding::PaddingMode::REFLECT; | |||||
| args.emplace_back(cur_param, | |||||
| TensorShape{src_shape_dim0, src_shape_dim1, | |||||
| src_shape_dim2, src_shape_dim3}, | |||||
| TensorShape{dst_shape_dim0, dst_shape_dim1, | |||||
| dst_shape_dim2, dst_shape_dim3}); | |||||
| cur_param.front_offset_dim4 = 3; | |||||
| cur_param.back_offset_dim4 = 0; | |||||
| cur_param.padding_mode = param::Padding::PaddingMode::CONSTANT; | |||||
| args.emplace_back( | |||||
| cur_param, | |||||
| TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2, | |||||
| src_shape_dim3, src_shape_dim4}, | |||||
| TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2, | |||||
| dst_shape_dim3, dst_shape_dim4}); | |||||
| cur_param.padding_mode = param::Padding::PaddingMode::REPLICATE; | |||||
| args.emplace_back( | |||||
| cur_param, | |||||
| TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2, | |||||
| src_shape_dim3, src_shape_dim4}, | |||||
| TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2, | |||||
| dst_shape_dim3, dst_shape_dim4}); | |||||
| cur_param.padding_mode = param::Padding::PaddingMode::REFLECT; | |||||
| args.emplace_back( | |||||
| cur_param, | |||||
| TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2, | |||||
| src_shape_dim3, src_shape_dim4}, | |||||
| TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2, | |||||
| dst_shape_dim3, dst_shape_dim4}); | |||||
| cur_param.front_offset_dim5 = 1; | |||||
| cur_param.back_offset_dim5 = 2; | |||||
| cur_param.padding_mode = param::Padding::PaddingMode::CONSTANT; | |||||
| args.emplace_back( | |||||
| cur_param, | |||||
| TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2, | |||||
| src_shape_dim3, src_shape_dim4, src_shape_dim5}, | |||||
| TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2, | |||||
| dst_shape_dim3, dst_shape_dim4, dst_shape_dim5}); | |||||
| cur_param.padding_mode = param::Padding::PaddingMode::REPLICATE; | |||||
| args.emplace_back( | |||||
| cur_param, | |||||
| TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2, | |||||
| src_shape_dim3, src_shape_dim4, src_shape_dim5}, | |||||
| TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2, | |||||
| dst_shape_dim3, dst_shape_dim4, dst_shape_dim5}); | |||||
| cur_param.padding_mode = param::Padding::PaddingMode::REFLECT; | |||||
| args.emplace_back( | |||||
| cur_param, | |||||
| TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2, | |||||
| src_shape_dim3, src_shape_dim4, src_shape_dim5}, | |||||
| TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2, | |||||
| dst_shape_dim3, dst_shape_dim4, dst_shape_dim5}); | |||||
| cur_param.front_offset_dim6 = 0; | |||||
| cur_param.front_offset_dim6 = 3; | |||||
| cur_param.padding_mode = param::Padding::PaddingMode::CONSTANT; | |||||
| args.emplace_back( | |||||
| cur_param, | |||||
| TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2, | |||||
| src_shape_dim3, src_shape_dim4, src_shape_dim5, | |||||
| src_shape_dim6}, | |||||
| TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2, | |||||
| dst_shape_dim3, dst_shape_dim4, dst_shape_dim5, | |||||
| dst_shape_dim6}); | |||||
| cur_param.padding_mode = param::Padding::PaddingMode::REPLICATE; | |||||
| args.emplace_back( | |||||
| cur_param, | |||||
| TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2, | |||||
| src_shape_dim3, src_shape_dim4, src_shape_dim5, | |||||
| src_shape_dim6}, | |||||
| TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2, | |||||
| dst_shape_dim3, dst_shape_dim4, dst_shape_dim5, | |||||
| dst_shape_dim6}); | |||||
| cur_param.padding_mode = param::Padding::PaddingMode::REFLECT; | |||||
| args.emplace_back( | |||||
| cur_param, | |||||
| TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2, | |||||
| src_shape_dim3, src_shape_dim4, src_shape_dim5, | |||||
| src_shape_dim6}, | |||||
| TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2, | |||||
| dst_shape_dim3, dst_shape_dim4, dst_shape_dim5, | |||||
| dst_shape_dim6}); | |||||
| return args; | |||||
| } | |||||
| inline std::vector<TestArg> get_args_backward() { | |||||
| size_t src_shape_dim0 = 8; | |||||
| size_t src_shape_dim1 = 8; | |||||
| size_t src_shape_dim2 = 8; | |||||
| size_t src_shape_dim3 = 8; | |||||
| size_t src_shape_dim4 = 8; | |||||
| size_t src_shape_dim5 = 8; | |||||
| size_t src_shape_dim6 = 8; | |||||
| size_t dst_shape_dim0 = 5; | |||||
| size_t dst_shape_dim1 = 5; | |||||
| size_t dst_shape_dim2 = 5; | |||||
| size_t dst_shape_dim3 = 5; | |||||
| size_t dst_shape_dim4 = 5; | |||||
| size_t dst_shape_dim5 = 5; | |||||
| size_t dst_shape_dim6 = 5; | |||||
| std::vector<TestArg> args; | |||||
| param::Padding cur_param; | |||||
| cur_param.front_offset_dim0 = 0; | |||||
| cur_param.front_offset_dim1 = 0; | |||||
| cur_param.front_offset_dim2 = 0; | |||||
| cur_param.front_offset_dim3 = 0; | |||||
| cur_param.front_offset_dim4 = 0; | |||||
| cur_param.front_offset_dim5 = 0; | |||||
| cur_param.front_offset_dim6 = 0; | |||||
| cur_param.back_offset_dim0 = 0; | |||||
| cur_param.back_offset_dim1 = 0; | |||||
| cur_param.back_offset_dim2 = 0; | |||||
| cur_param.back_offset_dim3 = 0; | |||||
| cur_param.back_offset_dim4 = 0; | |||||
| cur_param.back_offset_dim5 = 0; | |||||
| cur_param.back_offset_dim6 = 0; | |||||
| cur_param.padding_val = 2; | |||||
| cur_param.front_offset_dim0 = 1; | |||||
| cur_param.back_offset_dim0 = 2; | |||||
| cur_param.padding_mode = param::Padding::PaddingMode::CONSTANT; | |||||
| args.emplace_back(cur_param, TensorShape{src_shape_dim0}, | |||||
| TensorShape{dst_shape_dim0}); | |||||
| cur_param.padding_mode = param::Padding::PaddingMode::REPLICATE; | |||||
| args.emplace_back(cur_param, TensorShape{src_shape_dim0}, | |||||
| TensorShape{dst_shape_dim0}); | |||||
| cur_param.padding_mode = param::Padding::PaddingMode::REFLECT; | |||||
| args.emplace_back(cur_param, TensorShape{src_shape_dim0}, | |||||
| TensorShape{dst_shape_dim0}); | |||||
| cur_param.front_offset_dim1 = 2; | |||||
| cur_param.back_offset_dim1 = 1; | |||||
| cur_param.padding_mode = param::Padding::PaddingMode::CONSTANT; | |||||
| args.emplace_back(cur_param, TensorShape{src_shape_dim0, src_shape_dim1}, | |||||
| TensorShape{dst_shape_dim0, dst_shape_dim1}); | |||||
| cur_param.padding_mode = param::Padding::PaddingMode::REPLICATE; | |||||
| args.emplace_back(cur_param, TensorShape{src_shape_dim0, src_shape_dim1}, | |||||
| TensorShape{dst_shape_dim0, dst_shape_dim1}); | |||||
| cur_param.padding_mode = param::Padding::PaddingMode::REFLECT; | |||||
| args.emplace_back(cur_param, TensorShape{src_shape_dim0, src_shape_dim1}, | |||||
| TensorShape{dst_shape_dim0, dst_shape_dim1}); | |||||
| cur_param.front_offset_dim2 = 1; | |||||
| cur_param.back_offset_dim2 = 2; | |||||
| cur_param.padding_mode = param::Padding::PaddingMode::CONSTANT; | |||||
| args.emplace_back( | |||||
| cur_param, | |||||
| TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2}, | |||||
| TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2}); | |||||
| cur_param.padding_mode = param::Padding::PaddingMode::REPLICATE; | |||||
| args.emplace_back( | |||||
| cur_param, | |||||
| TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2}, | |||||
| TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2}); | |||||
| cur_param.padding_mode = param::Padding::PaddingMode::REFLECT; | |||||
| args.emplace_back( | |||||
| cur_param, | |||||
| TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2}, | |||||
| TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2}); | |||||
| cur_param.front_offset_dim3 = 0; | |||||
| cur_param.back_offset_dim3 = 3; | |||||
| cur_param.padding_mode = param::Padding::PaddingMode::CONSTANT; | |||||
| args.emplace_back(cur_param, | |||||
| TensorShape{src_shape_dim0, src_shape_dim1, | |||||
| src_shape_dim2, src_shape_dim3}, | |||||
| TensorShape{dst_shape_dim0, dst_shape_dim1, | |||||
| dst_shape_dim2, dst_shape_dim3}); | |||||
| cur_param.padding_mode = param::Padding::PaddingMode::REPLICATE; | |||||
| args.emplace_back(cur_param, | |||||
| TensorShape{src_shape_dim0, src_shape_dim1, | |||||
| src_shape_dim2, src_shape_dim3}, | |||||
| TensorShape{dst_shape_dim0, dst_shape_dim1, | |||||
| dst_shape_dim2, dst_shape_dim3}); | |||||
| cur_param.padding_mode = param::Padding::PaddingMode::REFLECT; | |||||
| args.emplace_back(cur_param, | |||||
| TensorShape{src_shape_dim0, src_shape_dim1, | |||||
| src_shape_dim2, src_shape_dim3}, | |||||
| TensorShape{dst_shape_dim0, dst_shape_dim1, | |||||
| dst_shape_dim2, dst_shape_dim3}); | |||||
| cur_param.front_offset_dim4 = 3; | |||||
| cur_param.back_offset_dim4 =0; | |||||
| cur_param.padding_mode = param::Padding::PaddingMode::CONSTANT; | |||||
| args.emplace_back( | |||||
| cur_param, | |||||
| TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2, | |||||
| src_shape_dim3, src_shape_dim4}, | |||||
| TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2, | |||||
| dst_shape_dim3, dst_shape_dim4}); | |||||
| cur_param.padding_mode = param::Padding::PaddingMode::REPLICATE; | |||||
| args.emplace_back( | |||||
| cur_param, | |||||
| TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2, | |||||
| src_shape_dim3, src_shape_dim4}, | |||||
| TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2, | |||||
| dst_shape_dim3, dst_shape_dim4}); | |||||
| cur_param.padding_mode = param::Padding::PaddingMode::REFLECT; | |||||
| args.emplace_back( | |||||
| cur_param, | |||||
| TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2, | |||||
| src_shape_dim3, src_shape_dim4}, | |||||
| TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2, | |||||
| dst_shape_dim3, dst_shape_dim4}); | |||||
| cur_param.front_offset_dim5 = 1; | |||||
| cur_param.back_offset_dim5 = 2; | |||||
| cur_param.padding_mode = param::Padding::PaddingMode::CONSTANT; | |||||
| args.emplace_back( | |||||
| cur_param, | |||||
| TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2, | |||||
| src_shape_dim3, src_shape_dim4, src_shape_dim5}, | |||||
| TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2, | |||||
| dst_shape_dim3, dst_shape_dim4, dst_shape_dim5}); | |||||
| cur_param.padding_mode = param::Padding::PaddingMode::REPLICATE; | |||||
| args.emplace_back( | |||||
| cur_param, | |||||
| TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2, | |||||
| src_shape_dim3, src_shape_dim4, src_shape_dim5}, | |||||
| TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2, | |||||
| dst_shape_dim3, dst_shape_dim4, dst_shape_dim5}); | |||||
| cur_param.padding_mode = param::Padding::PaddingMode::REFLECT; | |||||
| args.emplace_back( | |||||
| cur_param, | |||||
| TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2, | |||||
| src_shape_dim3, src_shape_dim4, src_shape_dim5}, | |||||
| TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2, | |||||
| dst_shape_dim3, dst_shape_dim4, dst_shape_dim5}); | |||||
| cur_param.front_offset_dim6 = 0; | |||||
| cur_param.back_offset_dim6 = 3; | |||||
| cur_param.padding_mode = param::Padding::PaddingMode::CONSTANT; | |||||
| args.emplace_back( | |||||
| cur_param, | |||||
| TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2, | |||||
| src_shape_dim3, src_shape_dim4, src_shape_dim5, | |||||
| src_shape_dim6}, | |||||
| TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2, | |||||
| dst_shape_dim3, dst_shape_dim4, dst_shape_dim5, | |||||
| dst_shape_dim6}); | |||||
| cur_param.padding_mode = param::Padding::PaddingMode::REPLICATE; | |||||
| args.emplace_back( | |||||
| cur_param, | |||||
| TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2, | |||||
| src_shape_dim3, src_shape_dim4, src_shape_dim5, | |||||
| src_shape_dim6}, | |||||
| TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2, | |||||
| dst_shape_dim3, dst_shape_dim4, dst_shape_dim5, | |||||
| dst_shape_dim6}); | |||||
| cur_param.padding_mode = param::Padding::PaddingMode::REFLECT; | |||||
| args.emplace_back( | |||||
| cur_param, | |||||
| TensorShape{src_shape_dim0, src_shape_dim1, src_shape_dim2, | |||||
| src_shape_dim3, src_shape_dim4, src_shape_dim5, | |||||
| src_shape_dim6}, | |||||
| TensorShape{dst_shape_dim0, dst_shape_dim1, dst_shape_dim2, | |||||
| dst_shape_dim3, dst_shape_dim4, dst_shape_dim5, | |||||
| dst_shape_dim6}); | |||||
| return args; | |||||
| } | |||||
| } // namespace padding | |||||
| } // namespace test | |||||
| } // namespace megdnn | |||||
| @@ -0,0 +1,219 @@ | |||||
| /** | |||||
| * \file dnn/test/cuda/padding.cpp | |||||
| * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
| * | |||||
| * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, | |||||
| * software distributed under the License is distributed on an | |||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
| * implied. | |||||
| */ | |||||
| #include "test/cuda/benchmark.h" | |||||
| #include "test/common/padding.h" | |||||
| #include "megdnn/oprs.h" | |||||
| #include "test/common/checker.h" | |||||
| #include "test/common/rng.h" | |||||
| #include "test/cuda/fixture.h" | |||||
| using namespace megdnn; | |||||
| using namespace test; | |||||
| TEST_F(CUDA, PADDING) { | |||||
| std::vector<padding::TestArg> args = padding::get_args(); | |||||
| Checker<Padding> checker(handle_cuda()); | |||||
| UniformIntNonZeroRNG rng(1, 9); | |||||
| for (auto&& arg : args) { | |||||
| checker.set_param(arg.param) | |||||
| .set_rng(0, &rng) | |||||
| .set_dtype(0, dtype::Int8()) | |||||
| .set_dtype(1, dtype::Int8()) | |||||
| .execs({arg.src, arg.dst}); | |||||
| } | |||||
| } | |||||
| TEST_F(CUDA, PADDING_BACKWARD) { | |||||
| std::vector<padding::TestArg> args = padding::get_args_backward(); | |||||
| Checker<PaddingBackward> checker(handle_cuda()); | |||||
| UniformFloatRNG rng(1, 9); | |||||
| for (auto&& arg : args) { | |||||
| checker.set_param(arg.param) | |||||
| .set_rng(0, &rng) | |||||
| .set_dtype(0, dtype::Float32()) | |||||
| .set_dtype(1, dtype::Float32()) | |||||
| .execs({arg.src, arg.dst}); | |||||
| } | |||||
| } | |||||
| TEST_F(CUDA, PADDING_REFLECT) { | |||||
| Checker<Padding> checker(handle_cuda(), false); | |||||
| param::Padding param; | |||||
| param.padding_val = 10; | |||||
| param.padding_mode = param::Padding::PaddingMode::REFLECT; | |||||
| param.front_offset_dim0 = 2; | |||||
| param.front_offset_dim1 = 0; | |||||
| param.front_offset_dim2 = 0; | |||||
| param.front_offset_dim3 = 0; | |||||
| param.front_offset_dim4 = 0; | |||||
| param.front_offset_dim5 = 0; | |||||
| param.front_offset_dim6 = 0; | |||||
| param.back_offset_dim0 = 3; | |||||
| param.back_offset_dim1 = 0; | |||||
| param.back_offset_dim2 = 0; | |||||
| param.back_offset_dim3 = 0; | |||||
| param.back_offset_dim4 = 0; | |||||
| param.back_offset_dim5 = 0; | |||||
| param.back_offset_dim6 = 0; | |||||
| checker.set_param(param).exect( | |||||
| Testcase{TensorValue({5}, dtype::Int8(), {1, 2, 3, 4, 5}), {}}, | |||||
| Testcase{{}, | |||||
| TensorValue({10}, dtype::Int8(), | |||||
| {3, 2, 1, 2, 3, 4, 5, 4, 3, 2})}); | |||||
| } | |||||
| TEST_F(CUDA, PADDING_REFLECT2) { | |||||
| Checker<Padding> checker(handle_cuda(), false); | |||||
| param::Padding param; | |||||
| param.padding_val = 10; | |||||
| param.padding_mode = param::Padding::PaddingMode::REFLECT; | |||||
| param.front_offset_dim0 = 1; | |||||
| param.front_offset_dim1 = 2; | |||||
| param.front_offset_dim2 = 0; | |||||
| param.front_offset_dim3 = 0; | |||||
| param.front_offset_dim4 = 0; | |||||
| param.front_offset_dim5 = 0; | |||||
| param.front_offset_dim6 = 0; | |||||
| param.back_offset_dim0 = 1; | |||||
| param.back_offset_dim1 = 2; | |||||
| param.back_offset_dim2 = 0; | |||||
| param.back_offset_dim3 = 0; | |||||
| param.back_offset_dim4 = 0; | |||||
| param.back_offset_dim5 = 0; | |||||
| param.back_offset_dim6 = 0; | |||||
| checker.set_param(param).exect( | |||||
| Testcase{TensorValue({3, 3}, dtype::Int8(), | |||||
| {3, 5, 1, 3, 6, 1, 4, 7, 9}), | |||||
| {}}, | |||||
| Testcase{{}, | |||||
| TensorValue({5, 7}, dtype::Int8(), | |||||
| {1, 6, 3, 6, 1, 6, 3, 1, 5, 3, 5, 1, | |||||
| 5, 3, 1, 6, 3, 6, 1, 6, 3, 9, 7, 4, | |||||
| 7, 9, 7, 4, 1, 6, 3, 6, 1, 6, 3})}); | |||||
| } | |||||
| TEST_F(CUDA, PADDING_REPLICATE) { | |||||
| Checker<Padding> checker(handle_cuda(), false); | |||||
| param::Padding param; | |||||
| param.padding_val = 10; | |||||
| param.padding_mode = param::Padding::PaddingMode::REPLICATE; | |||||
| param.front_offset_dim0 = 1; | |||||
| param.front_offset_dim1 = 0; | |||||
| param.front_offset_dim2 = 0; | |||||
| param.front_offset_dim3 = 0; | |||||
| param.front_offset_dim4 = 0; | |||||
| param.front_offset_dim5 = 0; | |||||
| param.front_offset_dim6 = 0; | |||||
| param.back_offset_dim0 = 2; | |||||
| param.back_offset_dim1 = 0; | |||||
| param.back_offset_dim2 = 0; | |||||
| param.back_offset_dim3 = 0; | |||||
| param.back_offset_dim4 = 0; | |||||
| param.back_offset_dim5 = 0; | |||||
| param.back_offset_dim6 = 0; | |||||
| checker.set_param(param).exect( | |||||
| Testcase{TensorValue({9}, dtype::Int8(), | |||||
| {1, 2, 3, 4, 5, 6, 7, 8, 9}), | |||||
| {}}, | |||||
| Testcase{{}, | |||||
| TensorValue({12}, dtype::Int8(), | |||||
| {1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 9, 9})}); | |||||
| } | |||||
| TEST_F(CUDA, PADDING_REPLICATE2) { | |||||
| Checker<Padding> checker(handle_cuda(), false); | |||||
| param::Padding param; | |||||
| param.padding_val = 10; | |||||
| param.padding_mode = param::Padding::PaddingMode::REPLICATE; | |||||
| param.front_offset_dim0 = 2; | |||||
| param.front_offset_dim1 = 1; | |||||
| param.front_offset_dim2 = 0; | |||||
| param.front_offset_dim3 = 0; | |||||
| param.front_offset_dim4 = 0; | |||||
| param.front_offset_dim5 = 0; | |||||
| param.front_offset_dim6 = 0; | |||||
| param.back_offset_dim0 = 0; | |||||
| param.back_offset_dim1 = 3; | |||||
| param.back_offset_dim2 = 0; | |||||
| param.back_offset_dim3 = 0; | |||||
| param.back_offset_dim4 = 0; | |||||
| param.back_offset_dim5 = 0; | |||||
| param.back_offset_dim6 = 0; | |||||
| checker.set_param(param).exect( | |||||
| Testcase{TensorValue({3, 3}, dtype::Int8(), | |||||
| {1, 2, 3, 4, 5, 6, 7, 8, 9}), | |||||
| {}}, | |||||
| Testcase{{}, | |||||
| TensorValue({5, 7}, dtype::Int8(), | |||||
| {1, 1, 2, 3, 3, 3, 3, 1, 1, 2, 3, 3, | |||||
| 3, 3, 1, 1, 2, 3, 3, 3, 3, 4, 4, 5, | |||||
| 6, 6, 6, 6, 7, 7, 8, 9, 9, 9, 9})}); | |||||
| } | |||||
| // #if MEGDNN_WITH_BENCHMARK | |||||
| TEST_F(CUDA, BENCHMARK_PADDING_CONSTANT) { | |||||
| using Param = Padding::Param; | |||||
| auto run = [&](const TensorShapeArray& shapes, Param param) { | |||||
| CUBenchmarker<PaddingForward> benchmarker(handle_cuda()); | |||||
| benchmarker.set_param(param); | |||||
| benchmarker.set_dtype(0, dtype::Int8()) | |||||
| .set_dtype(1, dtype::Int8()); | |||||
| for (auto&& shape : shapes) { | |||||
| double memaccess = | |||||
| double(TensorLayout(shape, dtype::Int8()) | |||||
| .span() | |||||
| .dist_byte()) * | |||||
| 2e-6; | |||||
| auto time_ms = benchmarker.execs({shape, {}}); | |||||
| printf("execute %s, time %.4f ms, %.4f GB/s\n", | |||||
| shape.to_string().c_str(), time_ms, memaccess / time_ms); | |||||
| } | |||||
| }; | |||||
| printf("mode -> constant; dtype -> int8\n"); | |||||
| { | |||||
| TensorShapeArray shapes = { | |||||
| {16, 3, 736, 1280}, | |||||
| }; | |||||
| Param param; | |||||
| param.padding_mode = param::Padding::PaddingMode::CONSTANT; | |||||
| param.front_offset_dim1 = 1; | |||||
| run(shapes, param); | |||||
| } | |||||
| printf("mode -> replicate; dtype -> int8\n"); | |||||
| { | |||||
| TensorShapeArray shapes = { | |||||
| {16, 3, 736, 1280}, | |||||
| }; | |||||
| Param param; | |||||
| param.padding_mode = param::Padding::PaddingMode::REPLICATE; | |||||
| param.front_offset_dim1 = 1; | |||||
| run(shapes, param); | |||||
| } | |||||
| printf("mode -> reflect; dtype -> int8\n"); | |||||
| { | |||||
| TensorShapeArray shapes = { | |||||
| {16, 3, 736, 1280}, | |||||
| }; | |||||
| Param param; | |||||
| param.padding_mode = param::Padding::PaddingMode::REFLECT; | |||||
| param.front_offset_dim1 = 1; | |||||
| run(shapes, param); | |||||
| } | |||||
| } | |||||
| // #endif | |||||
| @@ -0,0 +1,132 @@ | |||||
| /** | |||||
| * \file dnn/test/naive/padding.cpp | |||||
| * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
| * | |||||
| * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, | |||||
| * software distributed under the License is distributed on an | |||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| */ | |||||
| #include "test/common/padding.h" | |||||
| #include "megdnn/dtype.h" | |||||
| #include "megdnn/oprs.h" | |||||
| #include "test/common/checker.h" | |||||
| #include "test/naive/fixture.h" | |||||
| namespace megdnn{ | |||||
| namespace test{ | |||||
| TEST_F(NAIVE, PADDING) { | |||||
| std::vector<padding::TestArg> args = padding::get_args(); | |||||
| Checker<Padding> checker(handle()); | |||||
| for(auto&& arg : args){ | |||||
| checker.set_param(arg.param) | |||||
| .set_dtype(0, dtype::Float32()) | |||||
| .set_dtype(1, dtype::Float32()) | |||||
| .execs({arg.src, arg.dst}); | |||||
| } | |||||
| } | |||||
| TEST_F(NAIVE, PADDING_CONSTANT) { | |||||
| Checker<Padding> checker(handle(), false); | |||||
| param::Padding param; | |||||
| param.padding_val = 10; | |||||
| param.padding_mode = param::Padding::PaddingMode::CONSTANT; | |||||
| param.front_offset_dim0 = 2; | |||||
| param.front_offset_dim1 = 1; | |||||
| param.front_offset_dim2 = 0; | |||||
| param.front_offset_dim3 = 0; | |||||
| param.front_offset_dim4 = 0; | |||||
| param.front_offset_dim5 = 0; | |||||
| param.front_offset_dim6 = 0; | |||||
| param.back_offset_dim0 = 2; | |||||
| param.back_offset_dim1 = 3; | |||||
| param.back_offset_dim2 = 0; | |||||
| param.back_offset_dim3 = 0; | |||||
| param.back_offset_dim4 = 0; | |||||
| param.back_offset_dim5 = 0; | |||||
| param.back_offset_dim6 = 0; | |||||
| checker.set_param(param).exect( | |||||
| Testcase{TensorValue({1,1}, dtype::Float32(), {1}), {}}, | |||||
| Testcase{{},TensorValue({5,5}, dtype::Float32(), {10,10,10,10,10,10,10,10,10,10,10,1,10,10,10,10,10,10,10,10,10,10,10,10,10})} | |||||
| ); | |||||
| } | |||||
| TEST_F(NAIVE, PADDING_REFLECT) { | |||||
| Checker<Padding> checker(handle(), false); | |||||
| param::Padding param; | |||||
| param.padding_val = 10; | |||||
| param.padding_mode = param::Padding::PaddingMode::REFLECT; | |||||
| param.front_offset_dim0 = 2; | |||||
| param.front_offset_dim1 = 0; | |||||
| param.front_offset_dim2 = 0; | |||||
| param.front_offset_dim3 = 0; | |||||
| param.front_offset_dim4 = 0; | |||||
| param.front_offset_dim5 = 0; | |||||
| param.front_offset_dim6 = 0; | |||||
| param.back_offset_dim0 = 3; | |||||
| param.back_offset_dim1 = 0; | |||||
| param.back_offset_dim2 = 0; | |||||
| param.back_offset_dim3 = 0; | |||||
| param.back_offset_dim4 = 0; | |||||
| param.back_offset_dim5 = 0; | |||||
| param.back_offset_dim6 = 0; | |||||
| checker.set_param(param).exect( | |||||
| Testcase{TensorValue({5}, dtype::Float32(), {1,2,3,4,5}), {}}, | |||||
| Testcase{{},TensorValue({10}, dtype::Float32(), {3,2,1,2,3,4,5,4,3,2})} | |||||
| ); | |||||
| } | |||||
| TEST_F(NAIVE, PADDING_REPLICATE) { | |||||
| Checker<Padding> checker(handle(), false); | |||||
| param::Padding param; | |||||
| param.padding_val = 10; | |||||
| param.padding_mode = param::Padding::PaddingMode::REPLICATE; | |||||
| param.front_offset_dim0 = 1; | |||||
| param.front_offset_dim1 = 0; | |||||
| param.front_offset_dim2 = 0; | |||||
| param.front_offset_dim3 = 0; | |||||
| param.front_offset_dim4 = 0; | |||||
| param.front_offset_dim5 = 0; | |||||
| param.front_offset_dim6 = 0; | |||||
| param.back_offset_dim0 = 2; | |||||
| param.back_offset_dim1 = 0; | |||||
| param.back_offset_dim2 = 0; | |||||
| param.back_offset_dim3 = 0; | |||||
| param.back_offset_dim4 = 0; | |||||
| param.back_offset_dim5 = 0; | |||||
| param.back_offset_dim6 = 0; | |||||
| checker.set_param(param).exect( | |||||
| Testcase{TensorValue({9}, dtype::Float32(), {1,2,3,4,5,6,7,8,9}), {}}, | |||||
| Testcase{{},TensorValue({12}, dtype::Float32(), {1,1,2,3,4,5,6,7,8,9,9,9})} | |||||
| ); | |||||
| } | |||||
| TEST_F(NAIVE, PADDING_REPLICATE2) { | |||||
| Checker<Padding> checker(handle(), false); | |||||
| param::Padding param; | |||||
| param.padding_val = 10; | |||||
| param.padding_mode = param::Padding::PaddingMode::REPLICATE; | |||||
| param.front_offset_dim0 = 2; | |||||
| param.front_offset_dim1 = 1; | |||||
| param.front_offset_dim2 = 0; | |||||
| param.front_offset_dim3 = 0; | |||||
| param.front_offset_dim4 = 0; | |||||
| param.front_offset_dim5 = 0; | |||||
| param.front_offset_dim6 = 0; | |||||
| param.back_offset_dim0 = 0; | |||||
| param.back_offset_dim1 = 3; | |||||
| param.back_offset_dim2 = 0; | |||||
| param.back_offset_dim3 = 0; | |||||
| param.back_offset_dim4 = 0; | |||||
| param.back_offset_dim5 = 0; | |||||
| param.back_offset_dim6 = 0; | |||||
| checker.set_param(param).exect( | |||||
| Testcase{TensorValue({3,3}, dtype::Float32(), {1,2,3,4,5,6,7,8,9}), {}}, | |||||
| Testcase{{},TensorValue({5,7}, dtype::Float32(), {1,1,2,3,3,3,3,1,1,2,3,3,3,3,1,1,2,3,3,3,3,4,4,5,6,6,6,6,7,7,8,9,9,9,9})} | |||||
| ); | |||||
| } | |||||
| } | |||||
| } | |||||
| @@ -67,4 +67,9 @@ decl_opr('NvOf', | |||||
| inputs=['src'], params='NvOf', | inputs=['src'], params='NvOf', | ||||
| desc='opr Implements NVIDIA Optical Flow SDK.') | desc='opr Implements NVIDIA Optical Flow SDK.') | ||||
| decl_opr('Padding', | |||||
| inputs=['src'], | |||||
| params='Padding', | |||||
| desc='tensor padding opr.') | |||||
| # vim: ft=python | # vim: ft=python | ||||
| @@ -68,7 +68,7 @@ namespace opr { | |||||
| //! current cumsum version | //! current cumsum version | ||||
| using CumsumV1 = opr::Cumsum; | using CumsumV1 = opr::Cumsum; | ||||
| MGB_SEREG_OPR(CumsumV1, 1); | MGB_SEREG_OPR(CumsumV1, 1); | ||||
| #if MGB_CUDA | #if MGB_CUDA | ||||
| MGB_SEREG_OPR(NvOf, 1); | MGB_SEREG_OPR(NvOf, 1); | ||||
| #endif | #endif | ||||
| @@ -1608,4 +1608,30 @@ void RelayoutFormat::init_output_format() { | |||||
| // f}}} | // f}}} | ||||
| // | // | ||||
| /* f{{{ ======================= PaddingForward ======================= */ | |||||
| MGB_DYN_TYPE_OBJ_FINAL_IMPL(PaddingForward); | |||||
| MEGDNN_OPR_INIT1(PaddingForward, "padding") | |||||
| #if MGB_ENABLE_GRAD | |||||
| MGB_IMPL_OPR_GRAD(PaddingForward) { | |||||
| mgb_assert(opr.input().size() == 1); | |||||
| if (wrt_idx == 0) { | |||||
| SymbolVar grad = PaddingBackward::make(out_grad[0], opr.input(0), opr.param()); | |||||
| return grad.node(); | |||||
| } else | |||||
| return InvalidGrad::make(opr, wrt_idx); | |||||
| } | |||||
| #endif | |||||
| // f}}} | |||||
| /* f{{{ ======================= PaddingBackward ======================= */ | |||||
| MGB_DYN_TYPE_OBJ_FINAL_IMPL(PaddingBackward); | |||||
| MEGDNN_OPR_INIT2(PaddingBackward, "padding_backward", 1, false); | |||||
| // f}}} | |||||
| // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} | // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} | ||||
| @@ -24,6 +24,35 @@ MGB_SEREG_MODIFY_SUBTENSOR_OPR(IncrSubtensor); | |||||
| namespace mgb { | namespace mgb { | ||||
| namespace serialization { | namespace serialization { | ||||
| template<> | |||||
| struct OprMaker<opr::Padding, 1> { | |||||
| using Opr = opr::Padding; | |||||
| using Param = Opr::Param; | |||||
| static cg::OperatorNodeBase* make(const Param& param, const cg::VarNodeArray& inputs, ComputingGraph& graph, const OperatorNodeConfig& config) { | |||||
| MGB_MARK_USED_VAR(graph); | |||||
| if(inputs.size() == 1) { | |||||
| return Opr::make(inputs[0], param, config).node()->owner_opr(); | |||||
| }else{ | |||||
| return nullptr; | |||||
| } | |||||
| } | |||||
| }; | |||||
| template<> | |||||
| struct OprMaker<opr::PaddingBackward, 2> { | |||||
| using Opr = opr::PaddingBackward; | |||||
| using Param = Opr::Param; | |||||
| static cg::OperatorNodeBase* make(const Param& param, const cg::VarNodeArray& inputs, ComputingGraph& graph, const OperatorNodeConfig& config) { | |||||
| MGB_MARK_USED_VAR(graph); | |||||
| if(inputs.size() == 2) { | |||||
| return Opr::make(inputs[0], inputs[1], param, config).node()->owner_opr(); | |||||
| }else{ | |||||
| return nullptr; | |||||
| } | |||||
| } | |||||
| }; | |||||
| template<> | template<> | ||||
| struct OprMaker<opr::Concat, 0>: public OprMakerVariadic<opr::Concat>{}; | struct OprMaker<opr::Concat, 0>: public OprMakerVariadic<opr::Concat>{}; | ||||
| @@ -185,6 +214,10 @@ namespace opr { | |||||
| using RelayoutFormatV1 = opr::RelayoutFormat; | using RelayoutFormatV1 = opr::RelayoutFormat; | ||||
| MGB_SEREG_OPR(RelayoutFormatV1, 1); | MGB_SEREG_OPR(RelayoutFormatV1, 1); | ||||
| MGB_SEREG_OPR(Padding, 1); | |||||
| MGB_SEREG_OPR(PaddingBackward, 2); | |||||
| } // namespace opr | } // namespace opr | ||||
| } // namespace mgb | } // namespace mgb | ||||
| @@ -635,6 +635,26 @@ MGB_DEFINE_OPR_CLASS(RelayoutFormat, | |||||
| const OperatorNodeConfig &config = {}); | const OperatorNodeConfig &config = {}); | ||||
| void init_output_format() override final; | void init_output_format() override final; | ||||
| }; | }; | ||||
| /*! | |||||
| * \brief padding the src tensor to dst tensor | |||||
| */ | |||||
| MGB_DEFINE_OPR_CLASS(PaddingForward, intl::MegDNNOprWrapperFwd<megdnn::PaddingForward>) // { | |||||
| public: | |||||
| PaddingForward(VarNode* src, const Param& param, const OperatorNodeConfig& config); | |||||
| static SymbolVar make(SymbolVar src, const Param& param = {}, const OperatorNodeConfig &config = {}); | |||||
| }; | |||||
| using Padding = PaddingForward; | |||||
| /*! | |||||
| * \brief padding backward | |||||
| */ | |||||
| MGB_DEFINE_OPR_CLASS(PaddingBackward, intl::MegDNNOprWrapperBwd<megdnn::PaddingBackward>) // { | |||||
| public: | |||||
| PaddingBackward(VarNode* src, VarNode* in_for_shape, const Param& param, const OperatorNodeConfig& config); | |||||
| static SymbolVar make(SymbolVar src, SymbolVar in_for_shape, const Param ¶m = {}, const OperatorNodeConfig &config = {}); | |||||
| }; | |||||
| } // opr | } // opr | ||||
| } // mgb | } // mgb | ||||
| @@ -0,0 +1,68 @@ | |||||
| #include "./legacy_checker.h" | |||||
| #include "megbrain/comp_node_env.h" | |||||
| #include "megbrain/gopt/inference.h" | |||||
| #include "megbrain/opr/basic_arith.h" | |||||
| #include "megbrain/opr/dnn/convolution.h" | |||||
| #include "megbrain/opr/tensor_manip.h" | |||||
| #include "megbrain/serialization/serializer.h" | |||||
| #include "megbrain/test/autocheck.h" | |||||
| #include "megbrain/test/helper.h" | |||||
| #include "megbrain/test/megdnn_helper.h" | |||||
| #include "megdnn/dtype.h" | |||||
| #include "megdnn/oprs/base.h" | |||||
| #include <gmock/gmock.h> | |||||
| #include <cmath> | |||||
| #include <memory> | |||||
| #include <random> | |||||
| using namespace std; | |||||
| using namespace mgb; | |||||
| namespace { | |||||
| TEST(TestOprDNN, PaddingForwardSerialization) { | |||||
| using namespace serialization; | |||||
| auto fname = output_file("PaddingForwardTest"); | |||||
| auto dump = [&]() { | |||||
| opr::Padding::Param param; | |||||
| param.padding_mode = megdnn::param::Padding::PaddingMode(0); | |||||
| param.front_offset_dim0 = 3; | |||||
| param.front_offset_dim1 = 3; | |||||
| param.front_offset_dim2 = 3; | |||||
| param.front_offset_dim3 = 3; | |||||
| param.front_offset_dim4 = 0; | |||||
| param.front_offset_dim5 = 0; | |||||
| param.front_offset_dim6 = 0; | |||||
| param.back_offset_dim0 = 0; | |||||
| param.back_offset_dim1 = 0; | |||||
| param.back_offset_dim2 = 0; | |||||
| param.back_offset_dim3 = 0; | |||||
| param.back_offset_dim4 = 0; | |||||
| param.back_offset_dim5 = 0; | |||||
| param.back_offset_dim6 = 0; | |||||
| param.padding_val = 0; | |||||
| auto cn = CompNode::load("xpu"); | |||||
| auto graph = ComputingGraph::make(); | |||||
| HostTensorND inp_host{cn, {32, 4, 24, 24}, dtype::Float32()}; | |||||
| auto inp = opr::ImmutableTensor::make(*graph, inp_host); | |||||
| auto opr = opr::PaddingForward::make(inp, param, {}); | |||||
| auto dumper = GraphDumper::make(OutputFile::make_fs(fname.c_str())); | |||||
| auto rst = dumper->dump({opr}); | |||||
| ASSERT_EQ(rst.outputs.size(), 1u); | |||||
| }; | |||||
| auto load = [&]() { | |||||
| auto loader = GraphLoader::make(InputFile::make_fs(fname.c_str())); | |||||
| auto rst = loader->load(); | |||||
| ASSERT_EQ(rst.output_var_list.size(), 1u); | |||||
| }; | |||||
| dump(); | |||||
| load(); | |||||
| } | |||||
| } // namespace | |||||
| @@ -10,6 +10,7 @@ | |||||
| */ | */ | ||||
| #include "megbrain/test/helper.h" | #include "megbrain/test/helper.h" | ||||
| #include "megbrain/test/megdnn_helper.h" | |||||
| #include "megbrain/test/autocheck.h" | #include "megbrain/test/autocheck.h" | ||||
| #include "megbrain/opr/tensor_manip.h" | #include "megbrain/opr/tensor_manip.h" | ||||
| #include "megbrain/opr/tensor_gen.h" | #include "megbrain/opr/tensor_gen.h" | ||||
| @@ -2162,4 +2163,47 @@ TEST(TestParamPack, Split) { | |||||
| test_param_pack_split<3>({{2, 9}, {123}, {5, 3}}); | test_param_pack_split<3>({{2, 9}, {123}, {5, 3}}); | ||||
| } | } | ||||
| TEST(TestTensorManip, Padding_random) { | |||||
| opr::Padding::Param param; | |||||
| param.padding_mode = megdnn::param::Padding::PaddingMode(0); | |||||
| param.front_offset_dim0 = 3; | |||||
| param.front_offset_dim1 = 3; | |||||
| param.front_offset_dim2 = 3; | |||||
| param.front_offset_dim3 = 3; | |||||
| param.front_offset_dim4 = 0; | |||||
| param.front_offset_dim5 = 0; | |||||
| param.front_offset_dim6 = 0; | |||||
| param.back_offset_dim0 = 0; | |||||
| param.back_offset_dim1 = 0; | |||||
| param.back_offset_dim2 = 0; | |||||
| param.back_offset_dim3 = 0; | |||||
| param.back_offset_dim4 = 0; | |||||
| param.back_offset_dim5 = 0; | |||||
| param.back_offset_dim6 = 0; | |||||
| param.padding_val = 0; | |||||
| using Checker = AutoOprChecker<1,1>; | |||||
| auto make_graph = [&](const Checker::SymInpArray& inputs) -> Checker::SymOutArray { | |||||
| return {opr::Padding::make(inputs[0], param)}; | |||||
| }; | |||||
| auto fwd = [&](Checker::NumOutArray& dest, Checker::NumInpArray inp) { | |||||
| auto opr = megdnn_naive_handle()->create_operator<megdnn::Padding>(); | |||||
| TensorShape out_shp{inp[0]->as_megdnn().layout.shape[0]+param.front_offset_dim0+param.back_offset_dim0, | |||||
| inp[0]->as_megdnn().layout.shape[1]+param.front_offset_dim1+param.back_offset_dim1, | |||||
| inp[0]->as_megdnn().layout.shape[2]+param.front_offset_dim2+param.back_offset_dim2, | |||||
| inp[0]->as_megdnn().layout.shape[3]+param.front_offset_dim3+param.back_offset_dim3}; | |||||
| opr->param() = param; | |||||
| dest[0].resize(out_shp); | |||||
| opr->exec(inp[0]->as_megdnn(), dest[0].as_megdnn(), {}); | |||||
| }; | |||||
| Checker::RunOptions opt; | |||||
| opt.numdiff_max_err = 2e-3; | |||||
| Checker(make_graph, fwd, CompNode::load("xpu0")) | |||||
| .run({TensorShape{5, 5, 5, 5}}, opt) | |||||
| .run({TensorShape{4, 3, 4, 5}}, opt) | |||||
| .run({TensorShape{5, 4, 4, 5}}, opt); | |||||
| } | |||||
| // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} | // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} | ||||
| @@ -113,6 +113,7 @@ union OperatorParam { | |||||
| param.PermutationRNG = 79, | param.PermutationRNG = 79, | ||||
| param.BetaRNG = 80, | param.BetaRNG = 80, | ||||
| param.SlidingWindowTranspose = 81, | param.SlidingWindowTranspose = 81, | ||||
| param.Padding = 82, | |||||
| } | } | ||||
| table Operator { | table Operator { | ||||