GitOrigin-RevId: b4b494bb69
tags/v1.0.0-rc1
| @@ -105,15 +105,32 @@ class WarpPerspectiveBackwardData: public WarpPerspectiveBase { | |||
| * \param[out] grad the backpropagated gradient wrt. src | |||
| * \param[out] workspace temporary workspace to perform backward | |||
| */ | |||
| void exec(_megdnn_tensor_in mat, | |||
| _megdnn_tensor_in diff, | |||
| _megdnn_tensor_out grad, | |||
| _megdnn_workspace workspace) { | |||
| exec(mat, {}, diff, grad, workspace); | |||
| } | |||
| virtual void exec(_megdnn_tensor_in mat, | |||
| _megdnn_tensor_in mat_idx, | |||
| _megdnn_tensor_in diff, | |||
| _megdnn_tensor_out grad, | |||
| _megdnn_workspace workspace) = 0; | |||
| size_t get_workspace_in_bytes(const TensorLayout &mat, | |||
| const TensorLayout &diff, | |||
| const TensorLayout &grad) { | |||
| return get_workspace_in_bytes(mat, {}, diff, grad); | |||
| } | |||
| virtual size_t get_workspace_in_bytes(const TensorLayout &mat, | |||
| const TensorLayout &mat_idx, | |||
| const TensorLayout &diff, | |||
| const TensorLayout &grad) = 0; | |||
| protected: | |||
| void check_exec(const TensorLayout &mat, | |||
| const TensorLayout &mat_idx, | |||
| const TensorLayout &diff, | |||
| const TensorLayout &grad, | |||
| size_t workspace_in_bytes); | |||
| @@ -129,18 +146,37 @@ class WarpPerspectiveBackwardMat: public WarpPerspectiveBase { | |||
| * \param[out] grad the backpropagated gradient wrt. mat | |||
| * \param[out] workspace temporary workspace to perform backward | |||
| */ | |||
| void exec(_megdnn_tensor_in src, | |||
| _megdnn_tensor_in mat, | |||
| _megdnn_tensor_in diff, | |||
| _megdnn_tensor_out grad, | |||
| _megdnn_workspace workspace) { | |||
| exec(src, mat, {}, diff, grad, workspace); | |||
| } | |||
| virtual void exec(_megdnn_tensor_in src, | |||
| _megdnn_tensor_in mat, | |||
| _megdnn_tensor_in mat_idx, | |||
| _megdnn_tensor_in diff, | |||
| _megdnn_tensor_out grad, | |||
| _megdnn_workspace workspace) = 0; | |||
| size_t get_workspace_in_bytes(const TensorLayout &src, | |||
| const TensorLayout &mat, | |||
| const TensorLayout &diff, | |||
| const TensorLayout &grad) { | |||
| return get_workspace_in_bytes(src, mat, {}, diff, grad); | |||
| } | |||
| virtual size_t get_workspace_in_bytes(const TensorLayout &src, | |||
| const TensorLayout &mat, | |||
| const TensorLayout &mat_idx, | |||
| const TensorLayout &diff, | |||
| const TensorLayout &grad) = 0; | |||
| protected: | |||
| void check_exec(const TensorLayout &src, | |||
| const TensorLayout &mat, | |||
| const TensorLayout &mat_idx, | |||
| const TensorLayout &diff, | |||
| const TensorLayout &grad, | |||
| size_t workspace_in_bytes); | |||
| @@ -255,29 +255,31 @@ void WarpPerspectiveForward::check_exec_allow_nhwc_mat_idx( | |||
| } | |||
| void WarpPerspectiveBackwardData::check_exec(const TensorLayout& mat, | |||
| const TensorLayout& mat_idx, | |||
| const TensorLayout& diff, | |||
| const TensorLayout& grad, | |||
| size_t workspace_in_bytes) { | |||
| check_layout_fwd(grad, mat, diff); | |||
| check_layout_fwd(grad, mat, mat_idx, diff); | |||
| megdnn_assert(grad.dtype == dtype::Float32() MEGDNN_INC_FLOAT16( | |||
| || grad.dtype == dtype::BFloat16()), | |||
| "Backward WarpPerspective only supports Float32/BFloat16."); | |||
| auto required_workspace_in_bytes = get_workspace_in_bytes(mat, diff, grad); | |||
| auto required_workspace_in_bytes = get_workspace_in_bytes(mat, mat_idx, diff, grad); | |||
| megdnn_assert(workspace_in_bytes >= required_workspace_in_bytes); | |||
| } | |||
| void WarpPerspectiveBackwardMat::check_exec(const TensorLayout& src, | |||
| const TensorLayout& mat, | |||
| const TensorLayout& mat_idx, | |||
| const TensorLayout& diff, | |||
| const TensorLayout& grad, | |||
| size_t workspace_in_bytes) { | |||
| check_layout_fwd(src, mat, diff); | |||
| check_layout_fwd(src, mat, mat_idx, diff); | |||
| megdnn_assert_eq_layout(mat, grad); | |||
| megdnn_assert(grad.dtype == dtype::Float32() MEGDNN_INC_FLOAT16( | |||
| || grad.dtype == dtype::BFloat16()), | |||
| "Backward WarpPerspective only supports Float32/BFloat16."); | |||
| auto required_workspace_in_bytes = | |||
| get_workspace_in_bytes(src, mat, diff, grad); | |||
| get_workspace_in_bytes(src, mat, mat_idx, diff, grad); | |||
| megdnn_assert(workspace_in_bytes >= required_workspace_in_bytes); | |||
| } | |||
| @@ -6,7 +6,8 @@ | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, | |||
| * software distributed under the License is distributed on an | |||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||
| * implied. | |||
| */ | |||
| #include "src/cuda/warp_perspective/opr_impl.h" | |||
| @@ -18,8 +19,8 @@ namespace megdnn { | |||
| namespace cuda { | |||
| WorkspaceBundle WarpPerspectiveBackwardDataImpl::get_workspace_bundle( | |||
| void* ptr, const TensorLayout& mat, const TensorLayout& diff, | |||
| const TensorLayout& grad) const { | |||
| void* ptr, const TensorLayout& mat, const TensorLayout& mat_idx, | |||
| const TensorLayout& diff, const TensorLayout& grad) const { | |||
| SmallVector<size_t> sizes; | |||
| TensorLayout fmat = mat; | |||
| TensorLayout fdiff = diff; | |||
| @@ -33,20 +34,24 @@ WorkspaceBundle WarpPerspectiveBackwardDataImpl::get_workspace_bundle( | |||
| get_workspace(fmat); | |||
| get_workspace(fdiff); | |||
| get_workspace(fgrad); | |||
| sizes.push_back(get_float32_workspace_in_bytes(fmat, fdiff, fgrad)); | |||
| sizes.push_back( | |||
| get_float32_workspace_in_bytes(fmat, mat_idx, fdiff, fgrad)); | |||
| return {ptr, std::move(sizes)}; | |||
| } | |||
| void WarpPerspectiveBackwardDataImpl::exec(_megdnn_tensor_in smat, | |||
| _megdnn_tensor_in mat_idx, | |||
| _megdnn_tensor_in sdiff, | |||
| _megdnn_tensor_out sgrad, | |||
| _megdnn_workspace sworkspace) { | |||
| check_exec(smat.layout, sdiff.layout, sgrad.layout, sworkspace.size); | |||
| check_exec(smat.layout, mat_idx.layout, sdiff.layout, sgrad.layout, | |||
| sworkspace.size); | |||
| TensorND mat = smat; | |||
| TensorND diff = sdiff; | |||
| TensorND grad = sgrad; | |||
| auto bundle = get_workspace_bundle(sworkspace.raw_ptr, smat.layout, | |||
| sdiff.layout, sgrad.layout); | |||
| auto bundle = | |||
| get_workspace_bundle(sworkspace.raw_ptr, smat.layout, | |||
| mat_idx.layout, sdiff.layout, sgrad.layout); | |||
| auto ctypecvt = CompTypeCvter<dtype::BFloat16, dtype::Float32>( | |||
| concrete_handle(this->handle()), &bundle); | |||
| if (sgrad.layout.dtype.enumv() == DTypeTrait<dtype::BFloat16>::enumv) { | |||
| @@ -60,6 +65,15 @@ void WarpPerspectiveBackwardDataImpl::exec(_megdnn_tensor_in smat, | |||
| auto N = grad.layout.shape[0], C = grad.layout.shape[1], | |||
| IH = grad.layout.shape[2], IW = grad.layout.shape[3], | |||
| OH = diff.layout.shape[2], OW = diff.layout.shape[3]; | |||
| int* midx_ptr = nullptr; | |||
| if (mat_idx.raw_ptr) { | |||
| megdnn_assert(mat_idx.layout.ndim == 1); | |||
| N = mat_idx.layout.shape[0]; | |||
| midx_ptr = mat_idx.ptr<int>(); | |||
| } else { | |||
| megdnn_assert(mat_idx.layout.ndim == 0); | |||
| } | |||
| auto bval = param().border_val; | |||
| auto bmode = warp_perspective::get_bmode(param().bmode); | |||
| @@ -67,10 +81,11 @@ void WarpPerspectiveBackwardDataImpl::exec(_megdnn_tensor_in smat, | |||
| size_t max_batch_x_channel = max_batch_x_channel_size(); | |||
| if (batch_x_channel_size <= max_batch_x_channel) { | |||
| warp_perspective::backward_data_proxy( | |||
| mat.ptr<dt_float32>(), diff.ptr<dt_float32>(), | |||
| mat.ptr<dt_float32>(), midx_ptr, diff.ptr<dt_float32>(), | |||
| grad.ptr<dt_float32>(), | |||
| reinterpret_cast<float*>(workspace.raw_ptr), N, C, IH, IW, | |||
| OH, OW, bval, bmode, stream); | |||
| reinterpret_cast<float*>(workspace.raw_ptr), N, | |||
| grad.layout.shape[0], C, IH, IW, OH, OW, bval, bmode, | |||
| stream); | |||
| } else { | |||
| dt_float32* mat_ptr = mat.ptr<dt_float32>(); | |||
| dt_float32* diff_ptr = diff.ptr<dt_float32>(); | |||
| @@ -80,10 +95,10 @@ void WarpPerspectiveBackwardDataImpl::exec(_megdnn_tensor_in smat, | |||
| size_t curr_batch_size = | |||
| N > max_batch_size ? max_batch_size : N; | |||
| warp_perspective::backward_data_proxy( | |||
| mat_ptr, diff_ptr, grad_ptr, | |||
| mat_ptr, midx_ptr, diff_ptr, grad_ptr, | |||
| reinterpret_cast<float*>(workspace.raw_ptr), | |||
| curr_batch_size, C, IH, IW, OH, OW, bval, bmode, | |||
| stream); | |||
| curr_batch_size, grad.layout.shape[0], C, IH, IW, OH, | |||
| OW, bval, bmode, stream); | |||
| if (N <= max_batch_size) { | |||
| break; | |||
| @@ -91,7 +106,11 @@ void WarpPerspectiveBackwardDataImpl::exec(_megdnn_tensor_in smat, | |||
| N -= max_batch_size; | |||
| mat_ptr += curr_batch_size * mat.layout.stride[0]; | |||
| diff_ptr += curr_batch_size * diff.layout.stride[0]; | |||
| grad_ptr += curr_batch_size * grad.layout.stride[0]; | |||
| if (midx_ptr == nullptr) { | |||
| grad_ptr += curr_batch_size * grad.layout.stride[0]; | |||
| } else { | |||
| midx_ptr += curr_batch_size; | |||
| } | |||
| } | |||
| } | |||
| } | |||
| @@ -102,8 +121,8 @@ void WarpPerspectiveBackwardDataImpl::exec(_megdnn_tensor_in smat, | |||
| } | |||
| size_t WarpPerspectiveBackwardDataImpl::get_float32_workspace_in_bytes( | |||
| const TensorLayout& /* mat */, const TensorLayout& diff, | |||
| const TensorLayout& grad) const { | |||
| const TensorLayout& /* mat */, const TensorLayout& mat_idx, | |||
| const TensorLayout& diff, const TensorLayout& grad) const { | |||
| auto N = grad.shape[0], C = grad.shape[1], IH = grad.shape[2], | |||
| IW = grad.shape[3]; | |||
| auto OH = diff.shape[2], OW = diff.shape[3]; | |||
| @@ -112,6 +131,9 @@ size_t WarpPerspectiveBackwardDataImpl::get_float32_workspace_in_bytes( | |||
| size_t max_batch_size = N; | |||
| size_t max_batch_x_channel = max_batch_x_channel_size(); | |||
| if (N * C > max_batch_x_channel) { | |||
| /* when batch size is too large, the workspace only contains part of grad, | |||
| this will cause out of range with mat idx */ | |||
| megdnn_assert(mat_idx.ndim == 0, "batch size is too large, it's unsupported with mat idx backward."); | |||
| max_batch_size = max_batch_x_channel / C; | |||
| } | |||
| @@ -6,7 +6,8 @@ | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, | |||
| * software distributed under the License is distributed on an | |||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||
| * implied. | |||
| */ | |||
| #include "src/cuda/warp_perspective/common.h" | |||
| @@ -20,16 +21,21 @@ namespace warp_perspective { | |||
| const int factor = 4; | |||
| template <typename Getter, int factor> | |||
| __global__ void warp_perspective_bwd_data_kernel(const float *hidden, | |||
| const float *mat, float *dst, | |||
| int N, int C, int IH, int IW, int OH, int OW) | |||
| { | |||
| __global__ void warp_perspective_bwd_data_kernel(const float* hidden, | |||
| const float* mat, | |||
| const int* midx, float* dst, | |||
| int N, int C, int IH, int IW, | |||
| int OH, int OW) { | |||
| Getter getter; | |||
| int n = blockIdx.z; | |||
| int ow = blockIdx.x * blockDim.x + threadIdx.x; | |||
| int oh = blockIdx.y * blockDim.y + threadIdx.y; | |||
| hidden += n * C*OH*OW; | |||
| dst += n * C*factor*IH*IW; | |||
| if (midx) { | |||
| dst += midx[n] * C * factor * IH * IW; | |||
| } else { | |||
| dst += n * C * factor * IH * IW; | |||
| } | |||
| mat += n * 3*3; | |||
| if (ow < OW && oh < OH) { | |||
| float denominator = mat[6]*ow + mat[7]*oh + mat[8]; | |||
| @@ -72,15 +78,19 @@ __global__ void add_up_kernel(const float *src, float *dst, | |||
| } | |||
| template <int factor> | |||
| __global__ void warp_perspective_bwd_data_constant_kernel(const float *hidden, | |||
| const float *mat, float *dst, | |||
| int N, int C, int IH, int IW, int OH, int OW) | |||
| { | |||
| __global__ void warp_perspective_bwd_data_constant_kernel( | |||
| const float* hidden, const float* mat, const int* midx, float* dst, | |||
| int N, int C, int IH, int IW, int OH, int OW) { | |||
| int n = blockIdx.z; | |||
| int ow = blockIdx.x * blockDim.x + threadIdx.x; | |||
| int oh = blockIdx.y * blockDim.y + threadIdx.y; | |||
| hidden += blockIdx.z * C*OH*OW; | |||
| dst += blockIdx.z * C*factor*IH*IW; | |||
| mat += blockIdx.z * 3*3; | |||
| hidden += n * C * OH * OW; | |||
| if (midx) { | |||
| dst += midx[n] * C * factor * IH * IW; | |||
| } else { | |||
| dst += n * C * factor * IH * IW; | |||
| } | |||
| mat += n * 3 * 3; | |||
| if (ow < OW && oh < OH) { | |||
| float denominator = mat[6]*ow + mat[7]*oh + mat[8]; | |||
| float iw = (mat[0]*ow + mat[1]*oh + mat[2]) / denominator; | |||
| @@ -119,30 +129,35 @@ __global__ void warp_perspective_bwd_data_constant_kernel(const float *hidden, | |||
| } | |||
| } | |||
| size_t get_backward_data_workspace_in_bytes( | |||
| int N, int C, int IH, int IW, int /* OH */, int /* OW */, | |||
| BorderMode /* bmode */) | |||
| { | |||
| size_t get_backward_data_workspace_in_bytes(int N, int C, int IH, int IW, | |||
| int /* OH */, int /* OW */, | |||
| BorderMode /* bmode */) { | |||
| return N*C*IH*IW*factor * sizeof(float); | |||
| } | |||
| void backward_data_proxy(const float *mat, const float *diff, | |||
| float *grad, float *workspace, | |||
| int N, int C, int IH, int IW, int OH, int OW, float bval, | |||
| BorderMode mode, cudaStream_t stream) | |||
| { | |||
| void backward_data_proxy(const float* mat, const int* midx, const float* diff, | |||
| float* grad, float* workspace, int N, int N_SRC, int C, | |||
| int IH, int IW, int OH, int OW, float bval, | |||
| BorderMode mode, cudaStream_t stream) { | |||
| (void)bval; | |||
| (void)grad; | |||
| const int BY = 16, BX = 32; | |||
| { | |||
| dim3 threads(BX, BY); | |||
| dim3 blocks((OW+BX-1)/BX, (OH+BY-1)/BY, N); | |||
| cuda_check(cudaMemsetAsync(workspace, 0, sizeof(float) * factor*N*C*IH*IW, | |||
| if (midx) { | |||
| cuda_check(cudaMemsetAsync( | |||
| workspace, 0, sizeof(float) * factor * N_SRC * C * IH * IW, | |||
| stream)); | |||
| #define DISPATCH(Getter) \ | |||
| warp_perspective_bwd_data_kernel<Getter, factor><<<blocks, threads, \ | |||
| 0, stream>>>(diff, mat, workspace, N, C, IH, IW, OH, OW); | |||
| } else { | |||
| cuda_check(cudaMemsetAsync(workspace, 0, | |||
| sizeof(float) * factor * N * C * IH * IW, | |||
| stream)); | |||
| } | |||
| #define DISPATCH(Getter) \ | |||
| warp_perspective_bwd_data_kernel<Getter, factor> \ | |||
| <<<blocks, threads, 0, stream>>>(diff, mat, midx, workspace, N, C, \ | |||
| IH, IW, OH, OW); | |||
| switch (mode) { | |||
| case BORDER_REPLICATE: | |||
| DISPATCH(ReplicateGetter); | |||
| @@ -158,8 +173,9 @@ void backward_data_proxy(const float *mat, const float *diff, | |||
| break; | |||
| case BORDER_CONSTANT: | |||
| warp_perspective_bwd_data_constant_kernel<factor> | |||
| <<<blocks, threads, 0, stream>>> | |||
| (diff, mat, workspace, N, C, IH, IW, OH, OW); | |||
| <<<blocks, threads, 0, stream>>>(diff, mat, midx, | |||
| workspace, N, C, IH, | |||
| IW, OH, OW); | |||
| break; | |||
| default: | |||
| break; | |||
| @@ -169,9 +185,15 @@ void backward_data_proxy(const float *mat, const float *diff, | |||
| { | |||
| int THREADS = 512; | |||
| dim3 threads(THREADS); | |||
| dim3 blocks((IH*IW+THREADS-1)/THREADS, N*C); | |||
| add_up_kernel<factor><<<blocks, threads, 0, stream>>>(workspace, grad, | |||
| IH*IW); | |||
| if (midx) { | |||
| dim3 blocks((IH * IW + THREADS - 1) / THREADS, N_SRC * C); | |||
| add_up_kernel<factor> | |||
| <<<blocks, threads, 0, stream>>>(workspace, grad, IH * IW); | |||
| } else { | |||
| dim3 blocks((IH * IW + THREADS - 1) / THREADS, N * C); | |||
| add_up_kernel<factor> | |||
| <<<blocks, threads, 0, stream>>>(workspace, grad, IH * IW); | |||
| } | |||
| } | |||
| after_kernel_launch(); | |||
| } | |||
| @@ -181,4 +203,3 @@ void backward_data_proxy(const float *mat, const float *diff, | |||
| } // namespace megdnn | |||
| // vim: syntax=cpp.doxygen | |||
| @@ -6,7 +6,8 @@ | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, | |||
| * software distributed under the License is distributed on an | |||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||
| * implied. | |||
| */ | |||
| #include "src/cuda/warp_perspective/opr_impl.h" | |||
| @@ -40,15 +41,17 @@ WorkspaceBundle WarpPerspectiveBackwardMatImpl::get_workspace_bundle( | |||
| void WarpPerspectiveBackwardMatImpl::exec(_megdnn_tensor_in ssrc, | |||
| _megdnn_tensor_in smat, | |||
| _megdnn_tensor_in smat_idx, | |||
| _megdnn_tensor_in sdiff, | |||
| _megdnn_tensor_out sgrad, | |||
| _megdnn_workspace sworkspace) { | |||
| check_exec(ssrc.layout, smat.layout, sdiff.layout, sgrad.layout, | |||
| sworkspace.size); | |||
| check_exec(ssrc.layout, smat.layout, smat_idx.layout, sdiff.layout, | |||
| sgrad.layout, sworkspace.size); | |||
| TensorND src = ssrc; | |||
| TensorND mat = smat; | |||
| TensorND diff = sdiff; | |||
| TensorND grad = sgrad; | |||
| TensorND mat_idx = smat_idx; | |||
| auto bundle = get_workspace_bundle(sworkspace.raw_ptr, ssrc.layout, | |||
| smat.layout, sdiff.layout, sgrad.layout); | |||
| auto ctypecvt = CompTypeCvter<dtype::BFloat16, dtype::Float32>( | |||
| @@ -64,6 +67,15 @@ void WarpPerspectiveBackwardMatImpl::exec(_megdnn_tensor_in ssrc, | |||
| auto N = src.layout.shape[0], C = src.layout.shape[1], | |||
| IH = src.layout.shape[2], IW = src.layout.shape[3], | |||
| OH = diff.layout.shape[2], OW = diff.layout.shape[3]; | |||
| int* midx_ptr = nullptr; | |||
| if (mat_idx.raw_ptr) { | |||
| megdnn_assert(mat_idx.layout.ndim == 1); | |||
| N = mat_idx.layout.shape[0]; | |||
| midx_ptr = mat_idx.ptr<int>(); | |||
| } else { | |||
| megdnn_assert(mat_idx.layout.ndim == 0); | |||
| } | |||
| auto bval = param().border_val; | |||
| auto bmode = warp_perspective::get_bmode(param().bmode); | |||
| @@ -71,7 +83,7 @@ void WarpPerspectiveBackwardMatImpl::exec(_megdnn_tensor_in ssrc, | |||
| size_t max_batch_x_channel = max_batch_x_channel_size(); | |||
| if (batch_x_channel_size <= max_batch_x_channel) { | |||
| warp_perspective::backward_mat_proxy( | |||
| src.ptr<dt_float32>(), mat.ptr<dt_float32>(), | |||
| src.ptr<dt_float32>(), mat.ptr<dt_float32>(), midx_ptr, | |||
| diff.ptr<dt_float32>(), grad.ptr<dt_float32>(), N, C, IH, | |||
| IW, OH, OW, bval, bmode, stream); | |||
| } else { | |||
| @@ -84,14 +96,19 @@ void WarpPerspectiveBackwardMatImpl::exec(_megdnn_tensor_in ssrc, | |||
| size_t curr_batch_size = | |||
| N > max_batch_size ? max_batch_size : N; | |||
| warp_perspective::backward_mat_proxy( | |||
| src_ptr, mat_ptr, diff_ptr, grad_ptr, curr_batch_size, | |||
| C, IH, IW, OH, OW, bval, bmode, stream); | |||
| src_ptr, mat_ptr, midx_ptr, diff_ptr, grad_ptr, | |||
| curr_batch_size, C, IH, IW, OH, OW, bval, bmode, | |||
| stream); | |||
| if (N <= max_batch_size) { | |||
| break; | |||
| } else { | |||
| N -= max_batch_size; | |||
| src_ptr += curr_batch_size * src.layout.stride[0]; | |||
| if (midx_ptr == nullptr) { | |||
| src_ptr += curr_batch_size * src.layout.stride[0]; | |||
| } else { | |||
| midx_ptr += curr_batch_size; | |||
| } | |||
| mat_ptr += curr_batch_size * mat.layout.stride[0]; | |||
| diff_ptr += curr_batch_size * diff.layout.stride[0]; | |||
| grad_ptr += curr_batch_size * grad.layout.stride[0]; | |||
| @@ -109,4 +126,3 @@ void WarpPerspectiveBackwardMatImpl::exec(_megdnn_tensor_in ssrc, | |||
| } // namespace megdnn | |||
| // vim: syntax=cpp.doxygen | |||
| @@ -6,7 +6,8 @@ | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, | |||
| * software distributed under the License is distributed on an | |||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||
| * implied. | |||
| */ | |||
| #include "src/cuda/warp_perspective/common.h" | |||
| @@ -20,17 +21,21 @@ namespace cuda { | |||
| namespace warp_perspective { | |||
| template <typename Getter> | |||
| __global__ void warp_perspective_bwd_mat_kernel(const float *hidden, | |||
| const float *in, const float *mat, float *grad, | |||
| int N, int C, int IH, int IW, int OH, int OW) | |||
| { | |||
| __global__ void warp_perspective_bwd_mat_kernel( | |||
| const float* hidden, const float* in, const float* mat, const int* midx, | |||
| float* grad, int N, int C, int IH, int IW, int OH, int OW) { | |||
| Getter getter; | |||
| int n = blockIdx.z; | |||
| int ow = blockIdx.x * blockDim.x + threadIdx.x; | |||
| int oh = blockIdx.y * blockDim.y + threadIdx.y; | |||
| hidden += blockIdx.z * C*OH*OW; | |||
| in += blockIdx.z * C*IH*IW; | |||
| mat += blockIdx.z * 3*3; | |||
| grad += blockIdx.z * 3*3; | |||
| if (midx) { | |||
| in += midx[n] * C * IH * IW; | |||
| } else { | |||
| in += n * C * IH * IW; | |||
| } | |||
| mat += n * 3*3; | |||
| grad += n * 3*3; | |||
| float grad_local[3*3]; | |||
| memset(grad_local, 0, sizeof(grad_local)); | |||
| if (ow < OW && oh < OH) { | |||
| @@ -83,9 +88,8 @@ __global__ void warp_perspective_bwd_mat_kernel(const float *hidden, | |||
| dh[8] = 1.0f * ddenominatorh; | |||
| #pragma unroll | |||
| for (int i = 0; i < 9; ++i) { | |||
| grad_local[i] += | |||
| hidden[oh*OW+ow] * dalpha * dh[i] + | |||
| hidden[oh*OW+ow] * dbeta * dw[i]; | |||
| grad_local[i] += hidden[oh * OW + ow] * dalpha * dh[i] + | |||
| hidden[oh * OW + ow] * dbeta * dw[i]; | |||
| } | |||
| hidden += OH*OW; | |||
| in += IH*IW; | |||
| @@ -125,17 +129,21 @@ __global__ void warp_perspective_bwd_mat_kernel(const float *hidden, | |||
| } | |||
| } | |||
| __global__ void warp_perspective_bwd_mat_constant_kernel(const float *hidden, | |||
| const float *in, const float *mat, float *grad, | |||
| int N, int C, int IH, int IW, int OH, int OW, float bval) | |||
| { | |||
| __global__ void warp_perspective_bwd_mat_constant_kernel( | |||
| const float* hidden, const float* in, const float* mat, const int* midx, | |||
| float* grad, int N, int C, int IH, int IW, int OH, int OW, float bval) { | |||
| int n = blockIdx.z; | |||
| int ow = blockIdx.x * blockDim.x + threadIdx.x; | |||
| int oh = blockIdx.y * blockDim.y + threadIdx.y; | |||
| hidden += blockIdx.z * C*OH*OW; | |||
| in += blockIdx.z * C*IH*IW; | |||
| mat += blockIdx.z * 3*3; | |||
| grad += blockIdx.z * 3*3; | |||
| float grad_local[3*3]; | |||
| hidden += blockIdx.z * C * OH * OW; | |||
| if (midx) { | |||
| in += midx[n] * C * IH * IW; | |||
| } else { | |||
| in += n * C * IH * IW; | |||
| } | |||
| mat += n * 3 * 3; | |||
| grad += n * 3 * 3; | |||
| float grad_local[3 * 3]; | |||
| memset(grad_local, 0, sizeof(grad_local)); | |||
| if (ow < OW && oh < OH) { | |||
| float numeratorw = mat[0]*ow + mat[1]*oh + mat[2]; | |||
| @@ -199,10 +207,10 @@ __global__ void warp_perspective_bwd_mat_constant_kernel(const float *hidden, | |||
| dh[8] = 1.0f * ddenominatorh; | |||
| #pragma unroll | |||
| for (int i = 0; i < 9; ++i) { | |||
| float delta = | |||
| hidden[oh*OW+ow] * dalpha * dh[i] + | |||
| hidden[oh*OW+ow] * dbeta * dw[i]; | |||
| if (isfinite(delta)) grad_local[i] += delta; | |||
| float delta = hidden[oh * OW + ow] * dalpha * dh[i] + | |||
| hidden[oh * OW + ow] * dbeta * dw[i]; | |||
| if (isfinite(delta)) | |||
| grad_local[i] += delta; | |||
| } | |||
| hidden += OH*OW; | |||
| in += IH*IW; | |||
| @@ -227,8 +235,9 @@ __global__ void warp_perspective_bwd_mat_constant_kernel(const float *hidden, | |||
| for (int k = 16; k >= 1; k >>= 1) { | |||
| if (tidx < k) { | |||
| #pragma unroll | |||
| for (int i = 0; i < 9; ++i) | |||
| grad_shared[tidy][tidx][i] += grad_shared[tidy][tidx+k][i]; | |||
| for (int i = 0; i < 9; ++i) | |||
| grad_shared[tidy][tidx][i] += | |||
| grad_shared[tidy][tidx + k][i]; | |||
| } | |||
| cub::WARP_SYNC(0xffffffff); | |||
| } | |||
| @@ -240,18 +249,17 @@ __global__ void warp_perspective_bwd_mat_constant_kernel(const float *hidden, | |||
| } | |||
| } | |||
| void backward_mat_proxy(const float *src, const float *mat, | |||
| const float *diff, float *grad, | |||
| int N, int C, int IH, int IW, int OH, int OW, float bval, | |||
| BorderMode mode, cudaStream_t stream) | |||
| { | |||
| void backward_mat_proxy(const float* src, const float* mat, const int* midx, | |||
| const float* diff, float* grad, int N, int C, int IH, | |||
| int IW, int OH, int OW, float bval, BorderMode mode, | |||
| cudaStream_t stream) { | |||
| const int BY = 16, BX = 32; | |||
| dim3 threads(BX, BY); | |||
| dim3 blocks((OW+BX-1)/BX, (OH+BY-1)/BY, N); | |||
| cuda_check(cudaMemsetAsync(grad, 0, sizeof(float) * N*3*3, stream)); | |||
| #define DISPATCH(Getter) \ | |||
| #define DISPATCH(Getter) \ | |||
| warp_perspective_bwd_mat_kernel<Getter><<<blocks, threads, 0, stream>>>( \ | |||
| diff, src, mat, grad, N, C, IH, IW, OH, OW); | |||
| diff, src, mat, midx, grad, N, C, IH, IW, OH, OW); | |||
| switch (mode) { | |||
| case BORDER_REPLICATE: | |||
| DISPATCH(ReplicateGetter); | |||
| @@ -266,8 +274,9 @@ void backward_mat_proxy(const float *src, const float *mat, | |||
| DISPATCH(WrapGetter); | |||
| break; | |||
| case BORDER_CONSTANT: | |||
| warp_perspective_bwd_mat_constant_kernel<<<blocks, threads, 0, stream>>>( | |||
| diff, src, mat, grad, N, C, IH, IW, OH, OW, bval); | |||
| warp_perspective_bwd_mat_constant_kernel<<<blocks, threads, 0, | |||
| stream>>>( | |||
| diff, src, mat, midx, grad, N, C, IH, IW, OH, OW, bval); | |||
| break; | |||
| default: | |||
| break; | |||
| @@ -281,4 +290,3 @@ void backward_mat_proxy(const float *src, const float *mat, | |||
| } // namespace megdnn | |||
| // vim: syntax=cpp.doxygen | |||
| @@ -6,7 +6,8 @@ | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, | |||
| * software distributed under the License is distributed on an | |||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||
| * implied. | |||
| */ | |||
| #pragma once | |||
| #include <cuda_runtime_api.h> | |||
| @@ -19,40 +20,34 @@ namespace warp_perspective { | |||
| // all these kernels use bilinear interpolation | |||
| template<typename ctype> | |||
| void forward_proxy( | |||
| bool is_nhwc, | |||
| const ctype *src, const float *mat, const int *mat_idx, | |||
| ctype *dst, int N_SRC, int N_MAT, | |||
| int C, int IH, int IW, int OH, int OW, ctype bval, | |||
| BorderMode bmode, | |||
| megcore::AsyncErrorInfo* error_info, void* error_tracker, | |||
| cudaStream_t stream); | |||
| template <typename ctype> | |||
| void forward_proxy(bool is_nhwc, const ctype* src, const float* mat, | |||
| const int* mat_idx, ctype* dst, int N_SRC, int N_MAT, int C, | |||
| int IH, int IW, int OH, int OW, ctype bval, BorderMode bmode, | |||
| megcore::AsyncErrorInfo* error_info, void* error_tracker, | |||
| cudaStream_t stream); | |||
| template <typename ctype> | |||
| void forward_proxy_nchw4( | |||
| const ctype *src, const float *mat, const int *mat_idx, | |||
| ctype *dst, int N_SRC, int N_MAT, | |||
| int C, int IH, int IW, int OH, int OW, ctype bval, | |||
| BorderMode bmode, | |||
| megcore::AsyncErrorInfo* error_info, void* error_tracker, | |||
| cudaStream_t stream); | |||
| void backward_data_proxy(const float *mat, const float *diff, float *grad, | |||
| float *workspace, | |||
| int N, int C, int IH, int IW, int OH, int OW, float bval, | |||
| BorderMode bmode, cudaStream_t stream); | |||
| size_t get_backward_data_workspace_in_bytes( | |||
| int N, int C, int IH, int IW, int OH, int OW, | |||
| BorderMode bmode); | |||
| void backward_mat_proxy( | |||
| const float *src, const float *mat, const float *diff, float *grad, | |||
| int N, int C, int IH, int IW, int OH, int OW, float bval, | |||
| BorderMode bmode, cudaStream_t stream); | |||
| } // namespace warp_perspective | |||
| } // namespace cuda | |||
| } // namespace megdnn | |||
| void forward_proxy_nchw4(const ctype* src, const float* mat, const int* mat_idx, | |||
| ctype* dst, int N_SRC, int N_MAT, int C, int IH, | |||
| int IW, int OH, int OW, ctype bval, BorderMode bmode, | |||
| megcore::AsyncErrorInfo* error_info, | |||
| void* error_tracker, cudaStream_t stream); | |||
| void backward_data_proxy(const float* mat, const int* midx, const float* diff, | |||
| float* grad, float* workspace, int N, int N_SRC, int C, | |||
| int IH, int IW, int OH, int OW, float bval, | |||
| BorderMode bmode, cudaStream_t stream); | |||
| size_t get_backward_data_workspace_in_bytes(int N, int C, int IH, int IW, | |||
| int OH, int OW, BorderMode bmode); | |||
| void backward_mat_proxy(const float* src, const float* mat, const int* midx, | |||
| const float* diff, float* grad, int N, int C, int IH, | |||
| int IW, int OH, int OW, float bval, BorderMode bmode, | |||
| cudaStream_t stream); | |||
| } // namespace warp_perspective | |||
| } // namespace cuda | |||
| } // namespace megdnn | |||
| // vim: syntax=cpp.doxygen | |||
| @@ -6,7 +6,8 @@ | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, | |||
| * software distributed under the License is distributed on an | |||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||
| * implied. | |||
| */ | |||
| #pragma once | |||
| #include "megdnn/oprs.h" | |||
| @@ -48,20 +49,24 @@ class WarpPerspectiveBackwardDataImpl final | |||
| : public WarpPerspectiveBackwardData { | |||
| public: | |||
| using WarpPerspectiveBackwardData::WarpPerspectiveBackwardData; | |||
| void exec(_megdnn_tensor_in mat, _megdnn_tensor_in diff, | |||
| _megdnn_tensor_out grad, _megdnn_workspace workspace) override; | |||
| void exec(_megdnn_tensor_in mat, _megdnn_tensor_in mat_idx, | |||
| _megdnn_tensor_in diff, _megdnn_tensor_out grad, | |||
| _megdnn_workspace workspace) override; | |||
| size_t get_workspace_in_bytes(const TensorLayout& mat, | |||
| const TensorLayout& mat_idx, | |||
| const TensorLayout& diff, | |||
| const TensorLayout& grad) override { | |||
| return get_workspace_bundle(nullptr, mat, diff, grad) | |||
| return get_workspace_bundle(nullptr, mat, mat_idx, diff, grad) | |||
| .total_size_in_bytes(); | |||
| } | |||
| private: | |||
| WorkspaceBundle get_workspace_bundle(void* ptr, const TensorLayout& mat, | |||
| const TensorLayout& mat_idx, | |||
| const TensorLayout& diff, | |||
| const TensorLayout& grad) const; | |||
| size_t get_float32_workspace_in_bytes(const TensorLayout& mat, | |||
| const TensorLayout& mat_idx, | |||
| const TensorLayout& diff, | |||
| const TensorLayout& grad) const; | |||
| }; | |||
| @@ -70,10 +75,11 @@ class WarpPerspectiveBackwardMatImpl final : public WarpPerspectiveBackwardMat { | |||
| public: | |||
| using WarpPerspectiveBackwardMat::WarpPerspectiveBackwardMat; | |||
| void exec(_megdnn_tensor_in src, _megdnn_tensor_in mat, | |||
| _megdnn_tensor_in diff, _megdnn_tensor_out grad, | |||
| _megdnn_workspace workspace) override; | |||
| _megdnn_tensor_in mat_idx, _megdnn_tensor_in diff, | |||
| _megdnn_tensor_out grad, _megdnn_workspace workspace) override; | |||
| size_t get_workspace_in_bytes(const TensorLayout& src, | |||
| const TensorLayout& mat, | |||
| const TensorLayout& /* mat_idx */, | |||
| const TensorLayout& diff, | |||
| const TensorLayout& grad) override { | |||
| return get_workspace_bundle(nullptr, src, mat, diff, grad) | |||
| @@ -6,7 +6,8 @@ | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, | |||
| * software distributed under the License is distributed on an | |||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||
| * implied. | |||
| */ | |||
| #include "src/naive/warp_perspective/opr_impl.h" | |||
| #include "src/naive/warp_perspective/warp_perspective_cv.h" | |||
| @@ -358,18 +359,29 @@ void WarpPerspectiveForwardImpl::exec(_megdnn_tensor_in src, | |||
| } | |||
| template <typename ctype, typename mtype> | |||
| void WarpPerspectiveBackwardDataImpl::kern_naive(const KernParam<ctype, mtype>& kern_param) { | |||
| const int N = kern_param.n, C = kern_param.c, | |||
| IH = kern_param.ih, IW = kern_param.iw; | |||
| void WarpPerspectiveBackwardDataImpl::kern_naive( | |||
| const KernParam<ctype, mtype>& kern_param) { | |||
| const int N = kern_param.n_mat, C = kern_param.c, IH = kern_param.ih, | |||
| IW = kern_param.iw; | |||
| const int OH = kern_param.oh, OW = kern_param.ow; | |||
| const ctype* hptr_ = kern_param.hptr; | |||
| const mtype* mptr_ = kern_param.mptr; | |||
| ctype* sptr_ = kern_param.sptr; | |||
| int* midx_ptr = kern_param.midx_ptr; | |||
| auto hptr = hptr_; | |||
| auto mptr = mptr_; | |||
| auto sptr = sptr_; | |||
| std::memset(sptr, 0, sizeof(ctype) * N * C * IH * IW); | |||
| if (midx_ptr) { | |||
| std::memset(sptr, 0, sizeof(ctype) * kern_param.n_src * C * IH * IW); | |||
| } else { | |||
| std::memset(sptr, 0, sizeof(ctype) * N * C * IH * IW); | |||
| } | |||
| rep(n, N) { | |||
| if (midx_ptr) { | |||
| sptr = sptr_ + midx_ptr[n] * C * IH * IW; | |||
| } else { | |||
| sptr = sptr_ + n * C * IH * IW; | |||
| } | |||
| rep(oh, OH) rep(ow, OW) { | |||
| float numeratorw = mptr[0] * ow + mptr[1] * oh + mptr[2]; | |||
| float numeratorh = mptr[3] * ow + mptr[4] * oh + mptr[5]; | |||
| @@ -404,27 +416,30 @@ void WarpPerspectiveBackwardDataImpl::kern_naive(const KernParam<ctype, mtype>& | |||
| } | |||
| } | |||
| } | |||
| sptr += C * IH * IW; | |||
| hptr += C * OH * OW; | |||
| mptr += 3 * 3; | |||
| } | |||
| } | |||
| void WarpPerspectiveBackwardDataImpl::exec(_megdnn_tensor_in mat, | |||
| _megdnn_tensor_in mat_idx, | |||
| _megdnn_tensor_in diff, | |||
| _megdnn_tensor_out grad, | |||
| _megdnn_workspace workspace) { | |||
| check_exec(mat.layout, diff.layout, grad.layout, workspace.size); | |||
| check_exec(mat.layout, mat_idx.layout, diff.layout, grad.layout, | |||
| workspace.size); | |||
| megdnn_assert(param().format == param::WarpPerspective::Format::NCHW, | |||
| "invalid warp_perspective format"); | |||
| #define DISPATCH_ST_MT(dt, ct) \ | |||
| if (diff.layout.dtype.enumv() == DTypeTrait<dt>::enumv) { \ | |||
| if (mat.layout.dtype.enumv() == DTypeTrait<dtype::Float32>::enumv) { \ | |||
| auto kparam = KernParam<ct, float>::from_tensors(mat, diff, grad); \ | |||
| auto kparam = KernParam<ct, float>::from_tensors(mat, mat_idx, \ | |||
| diff, grad); \ | |||
| MEGDNN_DISPATCH_CPU_KERN_OPR(kern_naive(kparam)); \ | |||
| return; \ | |||
| } else { \ | |||
| auto kparam = KernParam<ct, ct>::from_tensors(mat, diff, grad); \ | |||
| auto kparam = \ | |||
| KernParam<ct, ct>::from_tensors(mat, mat_idx, diff, grad); \ | |||
| MEGDNN_DISPATCH_CPU_KERN_OPR(kern_naive(kparam)); \ | |||
| return; \ | |||
| } \ | |||
| @@ -441,7 +456,7 @@ void WarpPerspectiveBackwardDataImpl::exec(_megdnn_tensor_in mat, | |||
| template <typename ctype, typename mtype> | |||
| void WarpPerspectiveBackwardMatImpl::kern_naive( | |||
| const KernParam<ctype, mtype>& kern_param) { | |||
| const int N = kern_param.n, C = kern_param.c, IH = kern_param.ih, | |||
| const int N = kern_param.n_mat, C = kern_param.c, IH = kern_param.ih, | |||
| IW = kern_param.iw; | |||
| const int OH = kern_param.oh, OW = kern_param.ow; | |||
| @@ -449,9 +464,15 @@ void WarpPerspectiveBackwardMatImpl::kern_naive( | |||
| auto sptr = kern_param.sptr; | |||
| auto mptr = kern_param.mptr; | |||
| auto res = kern_param.res; | |||
| auto midx_ptr = kern_param.midx_ptr; | |||
| auto border_val = kern_param.border_val; | |||
| std::memset(res, 0, sizeof(float) * N * 3 * 3); | |||
| rep(n, N) { | |||
| if (midx_ptr) { | |||
| sptr = kern_param.sptr + midx_ptr[n] * C * IH * IW; | |||
| } else { | |||
| sptr = kern_param.sptr + n * C * IH * IW; | |||
| } | |||
| rep(oh, OH) rep(ow, OW) { | |||
| float numeratorw = mptr[0] * ow + mptr[1] * oh + mptr[2]; | |||
| float numeratorh = mptr[3] * ow + mptr[4] * oh + mptr[5]; | |||
| @@ -537,7 +558,6 @@ void WarpPerspectiveBackwardMatImpl::kern_naive( | |||
| } | |||
| } | |||
| hptr += C * OH * OW; | |||
| sptr += C * IH * IW; | |||
| mptr += 3 * 3; | |||
| res += 3 * 3; | |||
| } | |||
| @@ -545,21 +565,22 @@ void WarpPerspectiveBackwardMatImpl::kern_naive( | |||
| void WarpPerspectiveBackwardMatImpl::exec(_megdnn_tensor_in src, | |||
| _megdnn_tensor_in mat, | |||
| _megdnn_tensor_in mat_idx, | |||
| _megdnn_tensor_in diff, | |||
| _megdnn_tensor_out grad, | |||
| _megdnn_workspace workspace) { | |||
| check_exec(src.layout, mat.layout, diff.layout, grad.layout, | |||
| check_exec(src.layout, mat.layout, mat_idx.layout, diff.layout, grad.layout, | |||
| workspace.size); | |||
| #define DISPATCH_ST_MT(dt, ct) \ | |||
| if (src.layout.dtype.enumv() == DTypeTrait<dt>::enumv) { \ | |||
| if (mat.layout.dtype.enumv() == DTypeTrait<dtype::Float32>::enumv) { \ | |||
| auto kparam = KernParam<ct, float>::from_tensors( \ | |||
| param().border_val, src, mat, diff, grad); \ | |||
| param().border_val, src, mat, mat_idx, diff, grad); \ | |||
| MEGDNN_DISPATCH_CPU_KERN_OPR(kern_naive(kparam)); \ | |||
| return; \ | |||
| } else { \ | |||
| auto kparam = KernParam<ct, ct>::from_tensors( \ | |||
| param().border_val, src, mat, diff, grad); \ | |||
| param().border_val, src, mat, mat_idx, diff, grad); \ | |||
| MEGDNN_DISPATCH_CPU_KERN_OPR(kern_naive(kparam)); \ | |||
| return; \ | |||
| } \ | |||
| @@ -6,7 +6,8 @@ | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, | |||
| * software distributed under the License is distributed on an | |||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||
| * implied. | |||
| */ | |||
| #pragma once | |||
| #include "megdnn/oprs.h" | |||
| @@ -15,144 +16,158 @@ | |||
| namespace megdnn { | |||
| namespace naive { | |||
| class WarpPerspectiveForwardImpl: public WarpPerspectiveForward { | |||
| protected: | |||
| using Format = Param::Format; | |||
| template <typename ctype, typename mtype> | |||
| struct KernParam { | |||
| Format format; | |||
| BorderMode bmode; | |||
| float border_val; | |||
| size_t n_src, n_mat, c, ih, iw, oh, ow; | |||
| ctype *sptr, *dptr; | |||
| mtype *mptr; | |||
| int *midx_ptr; //!< can be null | |||
| Workspace workspace; | |||
| static KernParam from_tensors( | |||
| Format format, BorderMode bmode, float border_val, | |||
| _megdnn_tensor_in src, _megdnn_tensor_in mat, | |||
| _megdnn_tensor_in mat_idx, _megdnn_tensor_out dst, | |||
| _megdnn_workspace workspace) { | |||
| KernParam ret; | |||
| ret.format = format; | |||
| ret.bmode = bmode; | |||
| ret.border_val = border_val; | |||
| ret.n_src = src.layout.shape[0]; | |||
| if (mat_idx.raw_ptr) { | |||
| megdnn_assert(mat_idx.layout.ndim == 1); | |||
| ret.n_mat = mat_idx.layout.shape[0]; | |||
| ret.midx_ptr = mat_idx.ptr<int>(); | |||
| } else { | |||
| megdnn_assert(mat_idx.layout.ndim == 0); | |||
| ret.n_mat = ret.n_src; | |||
| ret.midx_ptr = nullptr; | |||
| } | |||
| if (format == Format::NCHW) { | |||
| ret.c = src.layout.shape[1]; | |||
| ret.ih = src.layout.shape[2]; | |||
| ret.iw = src.layout.shape[3]; | |||
| ret.oh = dst.layout.shape[2]; | |||
| ret.ow = dst.layout.shape[3]; | |||
| } else if (format == Format::NHWC) { | |||
| ret.c = src.layout.shape[3]; | |||
| ret.ih = src.layout.shape[1]; | |||
| ret.iw = src.layout.shape[2]; | |||
| ret.oh = dst.layout.shape[1]; | |||
| ret.ow = dst.layout.shape[2]; | |||
| } else if (format == Format::NCHW4) { | |||
| ret.c = src.layout.shape[1] * 4; | |||
| ret.ih = src.layout.shape[2]; | |||
| ret.iw = src.layout.shape[3]; | |||
| ret.oh = dst.layout.shape[2]; | |||
| ret.ow = dst.layout.shape[3]; | |||
| } else { | |||
| megdnn_assert(format == Format::NHWCD4); | |||
| ret.c = src.layout.shape[2] * 4; | |||
| ret.ih = src.layout.shape[1]; | |||
| ret.iw = src.layout.shape[3]; | |||
| ret.oh = dst.layout.shape[1]; | |||
| ret.ow = dst.layout.shape[3]; | |||
| } | |||
| if (src.layout.dtype.enumv() == DTypeEnum::Float32 || | |||
| MEGDNN_FLOAT16_SELECT( | |||
| (src.layout.dtype.enumv() == DTypeEnum::Float16 || | |||
| src.layout.dtype.enumv() == DTypeEnum::BFloat16), | |||
| false) || | |||
| src.layout.dtype.enumv() == DTypeEnum::Int8 || | |||
| src.layout.dtype.enumv() == DTypeEnum::Uint8 || | |||
| src.layout.dtype.enumv() == DTypeEnum::QuantizedS8 || | |||
| src.layout.dtype.enumv() == DTypeEnum::Quantized8Asymm) { | |||
| ret.sptr = src.compatible_ptr<ctype>(); | |||
| ret.mptr = mat.ptr<mtype>(); | |||
| ret.dptr = dst.compatible_ptr<ctype>(); | |||
| } else if (src.layout.dtype.enumv() == DTypeEnum::QuantizedS8) { | |||
| ret.sptr = src.compatible_ptr<ctype>(); | |||
| ret.mptr = mat.ptr<mtype>(); | |||
| ret.dptr = dst.compatible_ptr<ctype>(); | |||
| } else { | |||
| ret.sptr = nullptr; | |||
| ret.mptr = nullptr; | |||
| ret.dptr = nullptr; | |||
| } | |||
| ret.workspace = workspace; | |||
| return ret; | |||
| class WarpPerspectiveForwardImpl : public WarpPerspectiveForward { | |||
| protected: | |||
| using Format = Param::Format; | |||
| template <typename ctype, typename mtype> | |||
| struct KernParam { | |||
| Format format; | |||
| BorderMode bmode; | |||
| float border_val; | |||
| size_t n_src, n_mat, c, ih, iw, oh, ow; | |||
| ctype *sptr, *dptr; | |||
| mtype* mptr; | |||
| int* midx_ptr; //!< can be null | |||
| Workspace workspace; | |||
| static KernParam from_tensors(Format format, BorderMode bmode, | |||
| float border_val, _megdnn_tensor_in src, | |||
| _megdnn_tensor_in mat, | |||
| _megdnn_tensor_in mat_idx, | |||
| _megdnn_tensor_out dst, | |||
| _megdnn_workspace workspace) { | |||
| KernParam ret; | |||
| ret.format = format; | |||
| ret.bmode = bmode; | |||
| ret.border_val = border_val; | |||
| ret.n_src = src.layout.shape[0]; | |||
| if (mat_idx.raw_ptr) { | |||
| megdnn_assert(mat_idx.layout.ndim == 1); | |||
| ret.n_mat = mat_idx.layout.shape[0]; | |||
| ret.midx_ptr = mat_idx.ptr<int>(); | |||
| } else { | |||
| megdnn_assert(mat_idx.layout.ndim == 0); | |||
| ret.n_mat = ret.n_src; | |||
| ret.midx_ptr = nullptr; | |||
| } | |||
| if (format == Format::NCHW) { | |||
| ret.c = src.layout.shape[1]; | |||
| ret.ih = src.layout.shape[2]; | |||
| ret.iw = src.layout.shape[3]; | |||
| ret.oh = dst.layout.shape[2]; | |||
| ret.ow = dst.layout.shape[3]; | |||
| } else if (format == Format::NHWC) { | |||
| ret.c = src.layout.shape[3]; | |||
| ret.ih = src.layout.shape[1]; | |||
| ret.iw = src.layout.shape[2]; | |||
| ret.oh = dst.layout.shape[1]; | |||
| ret.ow = dst.layout.shape[2]; | |||
| } else if (format == Format::NCHW4) { | |||
| ret.c = src.layout.shape[1] * 4; | |||
| ret.ih = src.layout.shape[2]; | |||
| ret.iw = src.layout.shape[3]; | |||
| ret.oh = dst.layout.shape[2]; | |||
| ret.ow = dst.layout.shape[3]; | |||
| } else { | |||
| megdnn_assert(format == Format::NHWCD4); | |||
| ret.c = src.layout.shape[2] * 4; | |||
| ret.ih = src.layout.shape[1]; | |||
| ret.iw = src.layout.shape[3]; | |||
| ret.oh = dst.layout.shape[1]; | |||
| ret.ow = dst.layout.shape[3]; | |||
| } | |||
| if (src.layout.dtype.enumv() == DTypeEnum::Float32 || | |||
| MEGDNN_FLOAT16_SELECT( | |||
| (src.layout.dtype.enumv() == DTypeEnum::Float16 || | |||
| src.layout.dtype.enumv() == DTypeEnum::BFloat16), | |||
| false) || | |||
| src.layout.dtype.enumv() == DTypeEnum::Int8 || | |||
| src.layout.dtype.enumv() == DTypeEnum::Uint8 || | |||
| src.layout.dtype.enumv() == DTypeEnum::QuantizedS8 || | |||
| src.layout.dtype.enumv() == DTypeEnum::Quantized8Asymm) { | |||
| ret.sptr = src.compatible_ptr<ctype>(); | |||
| ret.mptr = mat.ptr<mtype>(); | |||
| ret.dptr = dst.compatible_ptr<ctype>(); | |||
| } else if (src.layout.dtype.enumv() == DTypeEnum::QuantizedS8) { | |||
| ret.sptr = src.compatible_ptr<ctype>(); | |||
| ret.mptr = mat.ptr<mtype>(); | |||
| ret.dptr = dst.compatible_ptr<ctype>(); | |||
| } else { | |||
| ret.sptr = nullptr; | |||
| ret.mptr = nullptr; | |||
| ret.dptr = nullptr; | |||
| } | |||
| }; | |||
| // ctype: C type of input data type. | |||
| // mtype: C type of transformation matrix data type. | |||
| template <typename ctype, typename mtype> | |||
| void kern_naive(const KernParam<ctype, mtype>& kern_param, | |||
| size_t task_id); | |||
| public: | |||
| using WarpPerspectiveForward::WarpPerspectiveForward; | |||
| void exec(_megdnn_tensor_in src, _megdnn_tensor_in mat, | |||
| _megdnn_tensor_in mat_idx, _megdnn_tensor_out dst, | |||
| _megdnn_workspace workspace) override; | |||
| size_t get_workspace_in_bytes(const TensorLayout&, const TensorLayout&, | |||
| const TensorLayout&, | |||
| const TensorLayout&) override { | |||
| return 0; | |||
| ret.workspace = workspace; | |||
| return ret; | |||
| } | |||
| }; | |||
| private: | |||
| template <typename ctype, typename mtype> | |||
| void kern_naive_nhwcd4(const KernParam<ctype, mtype>& kern_param, | |||
| size_t task_id); | |||
| // ctype: C type of input data type. | |||
| // mtype: C type of transformation matrix data type. | |||
| template <typename ctype, typename mtype> | |||
| void kern_naive(const KernParam<ctype, mtype>& kern_param, size_t task_id); | |||
| public: | |||
| using WarpPerspectiveForward::WarpPerspectiveForward; | |||
| void exec(_megdnn_tensor_in src, _megdnn_tensor_in mat, | |||
| _megdnn_tensor_in mat_idx, _megdnn_tensor_out dst, | |||
| _megdnn_workspace workspace) override; | |||
| size_t get_workspace_in_bytes(const TensorLayout&, const TensorLayout&, | |||
| const TensorLayout&, | |||
| const TensorLayout&) override { | |||
| return 0; | |||
| } | |||
| private: | |||
| template <typename ctype, typename mtype> | |||
| void kern_naive_nhwcd4(const KernParam<ctype, mtype>& kern_param, | |||
| size_t task_id); | |||
| }; | |||
| class WarpPerspectiveBackwardDataImpl : public WarpPerspectiveBackwardData { | |||
| protected: | |||
| template <typename ctype, typename mtype> | |||
| struct KernParam { | |||
| size_t n, c, ih, iw, oh, ow; | |||
| size_t n_src, n_mat, c, ih, iw, oh, ow; | |||
| ctype *sptr, *hptr; | |||
| mtype* mptr; | |||
| int* midx_ptr; //!< can be null | |||
| static KernParam from_tensors(_megdnn_tensor_in mat, | |||
| _megdnn_tensor_in mat_idx, | |||
| _megdnn_tensor_in diff, | |||
| _megdnn_tensor_out grad) { | |||
| KernParam ret; | |||
| ret.n = grad.layout.shape[0], ret.c = grad.layout.shape[1], | |||
| ret.n_src = grad.layout.shape[0], ret.c = grad.layout.shape[1]; | |||
| ret.ih = grad.layout.shape[2], ret.iw = grad.layout.shape[3]; | |||
| ret.oh = diff.layout.shape[2], ret.ow = diff.layout.shape[3]; | |||
| ret.hptr = diff.ptr<ctype>(); | |||
| ret.mptr = mat.ptr<mtype>(); | |||
| ret.sptr = grad.ptr<ctype>(); | |||
| if (mat_idx.raw_ptr) { | |||
| megdnn_assert(mat_idx.layout.ndim == 1); | |||
| ret.n_mat = mat_idx.layout.shape[0]; | |||
| ret.midx_ptr = mat_idx.ptr<int>(); | |||
| } else { | |||
| megdnn_assert(mat_idx.layout.ndim == 0); | |||
| ret.n_mat = ret.n_src; | |||
| ret.midx_ptr = nullptr; | |||
| } | |||
| return ret; | |||
| } | |||
| }; | |||
| public: | |||
| using WarpPerspectiveBackwardData::WarpPerspectiveBackwardData; | |||
| void exec(_megdnn_tensor_in mat, _megdnn_tensor_in diff, | |||
| _megdnn_tensor_out grad, _megdnn_workspace workspace) override; | |||
| void exec(_megdnn_tensor_in mat, _megdnn_tensor_in mat_idx, | |||
| _megdnn_tensor_in diff, _megdnn_tensor_out grad, | |||
| _megdnn_workspace workspace) override; | |||
| size_t get_workspace_in_bytes(const TensorLayout&, const TensorLayout&, | |||
| const TensorLayout&, | |||
| const TensorLayout&) override { | |||
| return 0; | |||
| } | |||
| private: | |||
| template <typename ctype, typename mtype> | |||
| void kern_naive(const KernParam<ctype, mtype>& kern_param); | |||
| @@ -162,23 +177,35 @@ class WarpPerspectiveBackwardMatImpl : public WarpPerspectiveBackwardMat { | |||
| protected: | |||
| template <typename ctype, typename mtype> | |||
| struct KernParam { | |||
| size_t n, c, ih, iw, oh, ow; | |||
| size_t n_src, n_mat, c, ih, iw, oh, ow; | |||
| ctype *sptr, *hptr; | |||
| mtype* mptr, *res; | |||
| mtype *mptr, *res; | |||
| int* midx_ptr; //!< can be null | |||
| float border_val; | |||
| static KernParam from_tensors(float border_val_, _megdnn_tensor_in src, | |||
| _megdnn_tensor_in mat, | |||
| _megdnn_tensor_in mat_idx, | |||
| _megdnn_tensor_in diff, | |||
| _megdnn_tensor_out grad) { | |||
| KernParam ret; | |||
| ret.border_val = border_val_; | |||
| ret.n = src.layout.shape[0], ret.c = src.layout.shape[1], | |||
| ret.n_src = src.layout.shape[0], ret.c = src.layout.shape[1]; | |||
| ret.ih = src.layout.shape[2], ret.iw = src.layout.shape[3]; | |||
| ret.oh = diff.layout.shape[2], ret.ow = diff.layout.shape[3]; | |||
| ret.hptr = diff.ptr<ctype>(); | |||
| ret.mptr = mat.ptr<mtype>(); | |||
| ret.sptr = src.ptr<ctype>(); | |||
| ret.res = grad.ptr<mtype>(); | |||
| if (mat_idx.raw_ptr) { | |||
| megdnn_assert(mat_idx.layout.ndim == 1); | |||
| ret.n_mat = mat_idx.layout.shape[0]; | |||
| ret.midx_ptr = mat_idx.ptr<int>(); | |||
| } else { | |||
| megdnn_assert(mat_idx.layout.ndim == 0); | |||
| ret.n_mat = ret.n_src; | |||
| ret.midx_ptr = nullptr; | |||
| } | |||
| return ret; | |||
| } | |||
| }; | |||
| @@ -186,10 +213,10 @@ protected: | |||
| public: | |||
| using WarpPerspectiveBackwardMat::WarpPerspectiveBackwardMat; | |||
| void exec(_megdnn_tensor_in src, _megdnn_tensor_in mat, | |||
| _megdnn_tensor_in diff, _megdnn_tensor_out grad, | |||
| _megdnn_workspace workspace) override; | |||
| _megdnn_tensor_in mat_idx, _megdnn_tensor_in diff, | |||
| _megdnn_tensor_out grad, _megdnn_workspace workspace) override; | |||
| size_t get_workspace_in_bytes(const TensorLayout&, const TensorLayout&, | |||
| const TensorLayout&, | |||
| const TensorLayout&, const TensorLayout&, | |||
| const TensorLayout&) override { | |||
| return 0; | |||
| } | |||
| @@ -6,7 +6,8 @@ | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, | |||
| * software distributed under the License is distributed on an | |||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||
| * implied. | |||
| */ | |||
| #include "test/common/warp_perspective.h" | |||
| @@ -19,6 +20,10 @@ using namespace warp_perspective; | |||
| void WarpPerspectiveMatIdxProxy::deduce_layout(WarpPerspective*, | |||
| TensorLayoutArray&) {} | |||
| void WarpPerspectiveMatIdxProxy::deduce_layout(WarpPerspectiveBackwardData*, | |||
| TensorLayoutArray&) {} | |||
| void WarpPerspectiveMatIdxProxy::deduce_layout(WarpPerspectiveBackwardMat*, | |||
| TensorLayoutArray&) {} | |||
| void WarpPerspectiveMatIdxProxy::exec(WarpPerspective* opr, | |||
| const TensorNDArray& tensors) { | |||
| @@ -31,6 +36,30 @@ void WarpPerspectiveMatIdxProxy::exec(WarpPerspective* opr, | |||
| opr->exec(tensors[0], tensors[1], tensors[2], tensors[3], W.workspace()); | |||
| } | |||
| void WarpPerspectiveMatIdxProxy::exec(WarpPerspectiveBackwardData* opr, | |||
| const TensorNDArray& tensors) { | |||
| if (!W.valid()) { | |||
| W = WorkspaceWrapper(opr->handle(), 0); | |||
| } | |||
| megdnn_assert(tensors.size() == 4); | |||
| W.update(opr->get_workspace_in_bytes(tensors[0].layout, tensors[1].layout, | |||
| tensors[2].layout, tensors[3].layout)); | |||
| opr->exec(tensors[0], tensors[1], tensors[2], tensors[3], W.workspace()); | |||
| } | |||
| void WarpPerspectiveMatIdxProxy::exec(WarpPerspectiveBackwardMat* opr, | |||
| const TensorNDArray& tensors) { | |||
| if (!W.valid()) { | |||
| W = WorkspaceWrapper(opr->handle(), 0); | |||
| } | |||
| megdnn_assert(tensors.size() == 5); | |||
| W.update(opr->get_workspace_in_bytes(tensors[0].layout, tensors[1].layout, | |||
| tensors[2].layout, tensors[3].layout, | |||
| tensors[4].layout)); | |||
| opr->exec(tensors[0], tensors[1], tensors[2], tensors[3], tensors[4], | |||
| W.workspace()); | |||
| } | |||
| std::vector<TestArg> warp_perspective::get_cv_args() { | |||
| std::vector<TestArg> args; | |||
| @@ -101,10 +130,10 @@ void warp_perspective::run_mat_idx_test(Handle* handle) { | |||
| // test NHWC | |||
| param.format = WarpPerspective::Param::Format::NHWC; | |||
| checker.set_param(param) | |||
| .set_rng(2, &mat_idx_rng) | |||
| .set_epsilon(1e-1) | |||
| .set_dtype(2, dtype::Int32()); | |||
| checker.set_param(param) | |||
| .set_rng(2, &mat_idx_rng) | |||
| .set_epsilon(1e-1) | |||
| .set_dtype(2, dtype::Int32()); | |||
| checker.execs({{N_SRC, 10, 11, 3}, {2, 3, 3}, {2}, {2, 11, 12, 3}}); | |||
| } | |||
| @@ -22,7 +22,11 @@ namespace test { | |||
| struct WarpPerspectiveMatIdxProxy { | |||
| WorkspaceWrapper W; | |||
| static void deduce_layout(WarpPerspective*, TensorLayoutArray&); | |||
| static void deduce_layout(WarpPerspectiveBackwardData*, TensorLayoutArray&); | |||
| static void deduce_layout(WarpPerspectiveBackwardMat*, TensorLayoutArray&); | |||
| void exec(WarpPerspective* opr, const TensorNDArray& tensors); | |||
| void exec(WarpPerspectiveBackwardData* opr, const TensorNDArray& tensors); | |||
| void exec(WarpPerspectiveBackwardMat* opr, const TensorNDArray& tensors); | |||
| }; | |||
| class WarpPerspectiveMatRNG final : public IIDRNG { | |||
| @@ -6,7 +6,8 @@ | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, | |||
| * software distributed under the License is distributed on an | |||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||
| * implied. | |||
| */ | |||
| #include "test/cuda/fixture.h" | |||
| @@ -21,10 +22,10 @@ namespace { | |||
| using namespace megdnn; | |||
| using namespace test; | |||
| class NanMatRNG: public RNG { | |||
| void gen(const TensorND &tensor_) override | |||
| class NanMatRNG : public RNG { | |||
| void gen(const TensorND& tensor_) override | |||
| { | |||
| auto &gen = RandomState::generator(); | |||
| auto& gen = RandomState::generator(); | |||
| std::uniform_real_distribution<dt_float32> pdist3(1.9f, 2.1f); | |||
| std::uniform_real_distribution<dt_float32> pdist(0.9f, 1.1f); | |||
| std::uniform_real_distribution<dt_float32> pdisth(0.4f, 0.6f); | |||
| @@ -32,7 +33,7 @@ class NanMatRNG: public RNG { | |||
| std::uniform_real_distribution<dt_float32> ndist3(-2.1f, -1.9f); | |||
| std::uniform_real_distribution<dt_float32> ndisth(-0.6f, -0.4f); | |||
| std::uniform_int_distribution<int> dice(0, 5); | |||
| float *ptr = tensor_.ptr<dt_float32>(); | |||
| float* ptr = tensor_.ptr<dt_float32>(); | |||
| auto N = tensor_.layout.shape[0]; | |||
| for (size_t n = 0; n < N; ++n) { | |||
| for (size_t i = 0; i < 9; ++i) { | |||
| @@ -65,7 +66,7 @@ class NanMatRNG: public RNG { | |||
| } | |||
| }; | |||
| } // anonymous namespace | |||
| } // anonymous namespace | |||
| namespace megdnn { | |||
| namespace test { | |||
| @@ -171,17 +172,15 @@ TEST_F(CUDA, WARP_PERSPECTIVE_CV) { | |||
| } | |||
| #endif | |||
| TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD) | |||
| { | |||
| TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD) { | |||
| using Param = WarpPerspective::Param; | |||
| Checker<WarpPerspectiveForward> checker(handle_cuda()); | |||
| WarpPerspectiveMatRNG rng; | |||
| checker.set_rng(1, &rng); | |||
| for (auto bmode: {WarpPerspective::BorderMode::WRAP, | |||
| WarpPerspective::BorderMode::REFLECT, | |||
| WarpPerspective::BorderMode::REPLICATE, | |||
| WarpPerspective::BorderMode::CONSTANT}) | |||
| { | |||
| for (auto bmode : {WarpPerspective::BorderMode::WRAP, | |||
| WarpPerspective::BorderMode::REFLECT, | |||
| WarpPerspective::BorderMode::REPLICATE, | |||
| WarpPerspective::BorderMode::CONSTANT}) { | |||
| WarpPerspective::Param param; | |||
| param.border_val = 0.3f; | |||
| param.bmode = bmode; | |||
| @@ -204,8 +203,7 @@ TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD) | |||
| // nan case | |||
| NanMatRNG rng_nan; | |||
| UniformFloatRNG rng_zero(0, 0); | |||
| for (auto rng: std::vector<RNG *>{&rng_nan, &rng_zero}) | |||
| { | |||
| for (auto rng : std::vector<RNG*>{&rng_nan, &rng_zero}) { | |||
| param::WarpPerspective param; | |||
| param.bmode = param::WarpPerspective::BorderMode::CONSTANT; | |||
| param.imode = param::WarpPerspective::InterpolationMode::LINEAR; | |||
| @@ -213,20 +211,18 @@ TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD) | |||
| param.border_val = 1.737; | |||
| checker.set_param(param); | |||
| // no invalid mem access is enough; no need to check value | |||
| checker.set_expect_exec_fail([](){}); | |||
| checker.set_expect_exec_fail([]() {}); | |||
| checker.exec({{1000, 2, 10, 11}, {1000, 3, 3}, {1000, 2, 12, 13}}); | |||
| } | |||
| } | |||
| TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD_INTMAX) | |||
| { | |||
| TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD_INTMAX) { | |||
| require_compute_capability(6, 0); | |||
| using Param = WarpPerspective::Param; | |||
| Checker<WarpPerspectiveForward> checker(handle_cuda()); | |||
| WarpPerspectiveMatRNG rng; | |||
| checker.set_rng(1, &rng); | |||
| for (auto bmode: {WarpPerspective::BorderMode::REPLICATE}) | |||
| { | |||
| for (auto bmode : {WarpPerspective::BorderMode::REPLICATE}) { | |||
| WarpPerspective::Param param; | |||
| param.border_val = 0.3f; | |||
| param.bmode = bmode; | |||
| @@ -235,27 +231,24 @@ TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD_INTMAX) | |||
| param.format = Param::Format::NHWC; | |||
| checker.set_param(param); | |||
| checker.set_epsilon(0.15).set_max_avg_error(4e-2); | |||
| size_t n = (INT_MAX) / (512 * 512 * 3); | |||
| checker.execs( | |||
| {{n + 1, 512, 512, 3}, {n + 1, 3, 3}, {n + 1, 25, 25, 3}}); | |||
| size_t n = (INT_MAX) / (512 * 512 * 3); | |||
| checker.execs( | |||
| {{n + 1, 512, 512, 3}, {n + 1, 3, 3}, {n + 1, 25, 25, 3}}); | |||
| } | |||
| } | |||
| TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD_FP16) | |||
| { | |||
| TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD_FP16) { | |||
| using Param = WarpPerspective::Param; | |||
| Checker<WarpPerspectiveForward> checker(handle_cuda()); | |||
| WarpPerspectiveMatRNG rng; | |||
| checker.set_rng(1, &rng); | |||
| checker.set_dtype(0, dtype::Float16()) | |||
| .set_dtype(1, dtype::Float32()) | |||
| .set_dtype(2, dtype::Float16()); | |||
| for (auto bmode: {WarpPerspective::BorderMode::WRAP, | |||
| WarpPerspective::BorderMode::REFLECT, | |||
| WarpPerspective::BorderMode::REPLICATE, | |||
| WarpPerspective::BorderMode::CONSTANT}) | |||
| { | |||
| .set_dtype(1, dtype::Float32()) | |||
| .set_dtype(2, dtype::Float16()); | |||
| for (auto bmode : {WarpPerspective::BorderMode::WRAP, | |||
| WarpPerspective::BorderMode::REFLECT, | |||
| WarpPerspective::BorderMode::REPLICATE, | |||
| WarpPerspective::BorderMode::CONSTANT}) { | |||
| WarpPerspective::Param param; | |||
| param.border_val = 0.3f; | |||
| param.bmode = bmode; | |||
| @@ -278,8 +271,7 @@ TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD_FP16) | |||
| // nan case | |||
| NanMatRNG rng_nan; | |||
| UniformFloatRNG rng_zero(0, 0); | |||
| for (auto rng: std::vector<RNG *>{&rng_nan, &rng_zero}) | |||
| { | |||
| for (auto rng : std::vector<RNG*>{&rng_nan, &rng_zero}) { | |||
| param::WarpPerspective param; | |||
| param.bmode = param::WarpPerspective::BorderMode::CONSTANT; | |||
| param.imode = param::WarpPerspective::InterpolationMode::LINEAR; | |||
| @@ -287,13 +279,12 @@ TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD_FP16) | |||
| param.border_val = 1.737; | |||
| checker.set_param(param); | |||
| // no invalid mem access is enough; no need to check value | |||
| checker.set_expect_exec_fail([](){}); | |||
| checker.set_expect_exec_fail([]() {}); | |||
| checker.exec({{1000, 2, 10, 11}, {1000, 3, 3}, {1000, 2, 12, 13}}); | |||
| } | |||
| } | |||
| TEST_F(CUDA, WARP_PERSPECTIVE_NCHW4) | |||
| { | |||
| TEST_F(CUDA, WARP_PERSPECTIVE_NCHW4) { | |||
| using Param = WarpPerspective::Param; | |||
| WarpPerspective::Param param; | |||
| Checker<WarpPerspectiveForward> checker(handle_cuda()); | |||
| @@ -348,31 +339,29 @@ TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD_NCHW_INT8) { | |||
| warp_perspective::run_int8_test(handle_cuda()); | |||
| } | |||
| TEST_F(CUDA, WARP_PERSPECTIVE_BACKWARD_DATA) | |||
| { | |||
| TEST_F(CUDA, WARP_PERSPECTIVE_BACKWARD_DATA) { | |||
| Checker<WarpPerspectiveBackwardData> checker(handle_cuda()); | |||
| WarpPerspectiveMatRNG rng; | |||
| checker.set_rng(0, &rng); | |||
| for (int i = 0; i < 1; ++i) { | |||
| for (auto bmode: {WarpPerspective::BorderMode::WRAP, | |||
| WarpPerspective::BorderMode::REFLECT, | |||
| WarpPerspective::BorderMode::REPLICATE, | |||
| WarpPerspective::BorderMode::CONSTANT}) | |||
| { | |||
| for (auto bmode : {WarpPerspective::BorderMode::WRAP, | |||
| WarpPerspective::BorderMode::REFLECT, | |||
| WarpPerspective::BorderMode::REPLICATE, | |||
| WarpPerspective::BorderMode::CONSTANT}) { | |||
| WarpPerspective::Param param; | |||
| param.border_val = 0.3f; | |||
| param.bmode = bmode; | |||
| param.imode = param::WarpPerspective::InterpolationMode::LINEAR; | |||
| checker.set_param(param); | |||
| checker.execs({{2, 3, 3}, {2, 3, 11, 12}, {2, 3, 10, 11}}); | |||
| checker.execs({{22000, 3, 3}, {22000, 3, 11, 12}, {22000, 3, 10, 11}}); | |||
| checker.execs( | |||
| {{22000, 3, 3}, {22000, 3, 11, 12}, {22000, 3, 10, 11}}); | |||
| } | |||
| } | |||
| // nan case | |||
| NanMatRNG rng_nan; | |||
| UniformFloatRNG rng_zero(0, 0); | |||
| for (auto rng: std::vector<RNG *>{&rng_nan, &rng_zero}) | |||
| { | |||
| for (auto rng : std::vector<RNG*>{&rng_nan, &rng_zero}) { | |||
| param::WarpPerspective param; | |||
| param.bmode = param::WarpPerspective::BorderMode::CONSTANT; | |||
| param.imode = param::WarpPerspective::InterpolationMode::LINEAR; | |||
| @@ -380,39 +369,54 @@ TEST_F(CUDA, WARP_PERSPECTIVE_BACKWARD_DATA) | |||
| param.border_val = 1.737; | |||
| checker.set_param(param); | |||
| // no invalid mem access is enough; no need to check value | |||
| checker.set_expect_exec_fail([](){}); | |||
| checker.set_expect_exec_fail([]() {}); | |||
| checker.exec({{1000, 3, 3}, {1000, 2, 10, 11}, {1000, 2, 12, 13}}); | |||
| } | |||
| { | |||
| Checker<WarpPerspectiveBackwardData, WarpPerspectiveMatIdxProxy> | |||
| checker(handle_cuda()); | |||
| constexpr int N_SRC = 5; | |||
| UniformIntRNG mat_idx_rng{0, N_SRC - 1}; | |||
| checker.set_rng(0, &rng); | |||
| checker.set_dtype(1, dtype::Int32()); | |||
| checker.set_rng(1, &mat_idx_rng); | |||
| param::WarpPerspective param; | |||
| param.bmode = param::WarpPerspective::BorderMode::REFLECT; | |||
| param.imode = param::WarpPerspective::InterpolationMode::LINEAR; | |||
| checker.set_param(param); | |||
| checker.set_epsilon(1 + 1e-3); | |||
| checker.execs({{2, 3, 3}, {2}, {2, 12, 11, 12}, {N_SRC, 12, 10, 11}}); | |||
| checker.execs( | |||
| {{123, 3, 3}, {123}, {123, 56, 16, 15}, {N_SRC, 56, 17, 13}}); | |||
| } | |||
| } | |||
| TEST_F(CUDA, WARP_PERSPECTIVE_BACKWARD_MAT) | |||
| { | |||
| TEST_F(CUDA, WARP_PERSPECTIVE_BACKWARD_MAT) { | |||
| Checker<WarpPerspectiveBackwardMat> checker(handle_cuda()); | |||
| WarpPerspectiveMatRNG rng; | |||
| checker.set_rng(1, &rng); | |||
| for (int i = 0; i < 1; ++i) { | |||
| for (auto bmode: {WarpPerspective::BorderMode::WRAP, | |||
| WarpPerspective::BorderMode::REFLECT, | |||
| WarpPerspective::BorderMode::REPLICATE, | |||
| WarpPerspective::BorderMode::CONSTANT}) | |||
| { | |||
| for (auto bmode : {WarpPerspective::BorderMode::WRAP, | |||
| WarpPerspective::BorderMode::REFLECT, | |||
| WarpPerspective::BorderMode::REPLICATE, | |||
| WarpPerspective::BorderMode::CONSTANT}) { | |||
| WarpPerspective::Param param; | |||
| param.border_val = 0.3f; | |||
| param.imode = param::WarpPerspective::InterpolationMode::LINEAR; | |||
| param.bmode = bmode; | |||
| checker.set_param(param); | |||
| checker.set_epsilon(1e-2); | |||
| checker.execs({ | |||
| {1000, 3, 11, 12}, {1000, 3, 3}, | |||
| {1000, 3, 10, 11}, {1000, 3, 3} | |||
| }); | |||
| checker.execs({{1000, 3, 11, 12}, | |||
| {1000, 3, 3}, | |||
| {1000, 3, 10, 11}, | |||
| {1000, 3, 3}}); | |||
| } | |||
| } | |||
| // nan case | |||
| NanMatRNG rng_nan; | |||
| UniformFloatRNG rng_zero(0, 0); | |||
| for (auto rng: std::vector<RNG *>{&rng_nan, &rng_zero}) | |||
| { | |||
| for (auto rng : std::vector<RNG*>{&rng_nan, &rng_zero}) { | |||
| param::WarpPerspective param; | |||
| param.bmode = param::WarpPerspective::BorderMode::CONSTANT; | |||
| param.imode = param::WarpPerspective::InterpolationMode::LINEAR; | |||
| @@ -420,26 +424,50 @@ TEST_F(CUDA, WARP_PERSPECTIVE_BACKWARD_MAT) | |||
| param.border_val = 1.737; | |||
| checker.set_param(param); | |||
| // no invalid mem access is enough; no need to check value | |||
| checker.set_expect_exec_fail([](){}); | |||
| checker.exec({{1000, 2, 10, 11}, {1000, 3, 3}, | |||
| {1000, 2, 12, 13}, {1000, 3, 3}}); | |||
| checker.set_expect_exec_fail([]() {}); | |||
| checker.exec({{1000, 2, 10, 11}, | |||
| {1000, 3, 3}, | |||
| {1000, 2, 12, 13}, | |||
| {1000, 3, 3}}); | |||
| } | |||
| { | |||
| Checker<WarpPerspectiveBackwardMat, WarpPerspectiveMatIdxProxy> checker( | |||
| handle_cuda()); | |||
| constexpr int N_SRC = 5; | |||
| UniformIntRNG mat_idx_rng{0, N_SRC - 1}; | |||
| checker.set_rng(1, &rng); | |||
| checker.set_dtype(2, dtype::Int32()); | |||
| checker.set_rng(2, &mat_idx_rng); | |||
| param::WarpPerspective param; | |||
| param.bmode = param::WarpPerspective::BorderMode::REFLECT; | |||
| param.imode = param::WarpPerspective::InterpolationMode::LINEAR; | |||
| checker.set_param(param); | |||
| checker.set_epsilon(1 + 1e-3); | |||
| checker.execs({{N_SRC, 12, 10, 11}, | |||
| {2, 3, 3}, | |||
| {2}, | |||
| {2, 12, 11, 12}, | |||
| {2, 3, 3}}); | |||
| checker.execs({{N_SRC, 56, 17, 13}, | |||
| {123, 3, 3}, | |||
| {123}, | |||
| {123, 56, 16, 15}, | |||
| {123, 3, 3}}); | |||
| } | |||
| } | |||
| TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD_BFLOAT16) | |||
| { | |||
| TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD_BFLOAT16) { | |||
| using Param = WarpPerspective::Param; | |||
| Checker<WarpPerspectiveForward> checker(handle_cuda()); | |||
| WarpPerspectiveMatRNG rng; | |||
| checker.set_rng(1, &rng); | |||
| checker.set_dtype(0, dtype::BFloat16()) | |||
| .set_dtype(1, dtype::Float32()) | |||
| .set_dtype(2, dtype::BFloat16()); | |||
| for (auto bmode: {WarpPerspective::BorderMode::WRAP, | |||
| WarpPerspective::BorderMode::REFLECT, | |||
| WarpPerspective::BorderMode::REPLICATE, | |||
| WarpPerspective::BorderMode::CONSTANT}) | |||
| { | |||
| .set_dtype(1, dtype::Float32()) | |||
| .set_dtype(2, dtype::BFloat16()); | |||
| for (auto bmode : {WarpPerspective::BorderMode::WRAP, | |||
| WarpPerspective::BorderMode::REFLECT, | |||
| WarpPerspective::BorderMode::REPLICATE, | |||
| WarpPerspective::BorderMode::CONSTANT}) { | |||
| WarpPerspective::Param param; | |||
| param.border_val = 0.3f; | |||
| param.bmode = bmode; | |||
| @@ -457,21 +485,19 @@ TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD_BFLOAT16) | |||
| } | |||
| } | |||
| TEST_F(CUDA, WARP_PERSPECTIVE_BACKWARD_DATA_BFLOAT16) | |||
| { | |||
| TEST_F(CUDA, WARP_PERSPECTIVE_BACKWARD_DATA_BFLOAT16) { | |||
| Checker<WarpPerspectiveBackwardData> checker(handle_cuda()); | |||
| WarpPerspectiveMatRNG rng; | |||
| checker.set_rng(0, &rng) | |||
| .set_epsilon(1e-1) | |||
| .set_dtype(0, dtype::Float32()) | |||
| .set_dtype(1, dtype::BFloat16()) | |||
| .set_dtype(2, dtype::BFloat16()); | |||
| .set_epsilon(1e-1) | |||
| .set_dtype(0, dtype::Float32()) | |||
| .set_dtype(1, dtype::BFloat16()) | |||
| .set_dtype(2, dtype::BFloat16()); | |||
| for (int i = 0; i < 1; ++i) { | |||
| for (auto bmode: {WarpPerspective::BorderMode::WRAP, | |||
| WarpPerspective::BorderMode::REFLECT, | |||
| WarpPerspective::BorderMode::REPLICATE, | |||
| WarpPerspective::BorderMode::CONSTANT}) | |||
| { | |||
| for (auto bmode : {WarpPerspective::BorderMode::WRAP, | |||
| WarpPerspective::BorderMode::REFLECT, | |||
| WarpPerspective::BorderMode::REPLICATE, | |||
| WarpPerspective::BorderMode::CONSTANT}) { | |||
| WarpPerspective::Param param; | |||
| param.border_val = 0.3f; | |||
| param.bmode = bmode; | |||
| @@ -482,31 +508,29 @@ TEST_F(CUDA, WARP_PERSPECTIVE_BACKWARD_DATA_BFLOAT16) | |||
| } | |||
| } | |||
| TEST_F(CUDA, WARP_PERSPECTIVE_BACKWARD_MAT_BFLOAT16) | |||
| { | |||
| TEST_F(CUDA, WARP_PERSPECTIVE_BACKWARD_MAT_BFLOAT16) { | |||
| Checker<WarpPerspectiveBackwardMat> checker(handle_cuda()); | |||
| WarpPerspectiveMatRNG rng; | |||
| checker.set_rng(1, &rng) | |||
| .set_epsilon(1e-2) | |||
| .set_dtype(0, dtype::BFloat16()) | |||
| .set_dtype(1, dtype::Float32()) | |||
| .set_dtype(2, dtype::BFloat16()) | |||
| .set_dtype(3, dtype::Float32()); | |||
| .set_epsilon(1e-2) | |||
| .set_dtype(0, dtype::BFloat16()) | |||
| .set_dtype(1, dtype::Float32()) | |||
| .set_dtype(2, dtype::BFloat16()) | |||
| .set_dtype(3, dtype::Float32()); | |||
| for (int i = 0; i < 1; ++i) { | |||
| for (auto bmode: {WarpPerspective::BorderMode::WRAP, | |||
| WarpPerspective::BorderMode::REFLECT, | |||
| WarpPerspective::BorderMode::REPLICATE, | |||
| WarpPerspective::BorderMode::CONSTANT}) | |||
| { | |||
| for (auto bmode : {WarpPerspective::BorderMode::WRAP, | |||
| WarpPerspective::BorderMode::REFLECT, | |||
| WarpPerspective::BorderMode::REPLICATE, | |||
| WarpPerspective::BorderMode::CONSTANT}) { | |||
| WarpPerspective::Param param; | |||
| param.border_val = 0.3f; | |||
| param.imode = param::WarpPerspective::InterpolationMode::LINEAR; | |||
| param.bmode = bmode; | |||
| checker.set_param(param); | |||
| checker.execs({ | |||
| {1000, 3, 11, 12}, {1000, 3, 3}, | |||
| {1000, 3, 10, 11}, {1000, 3, 3} | |||
| }); | |||
| checker.execs({{1000, 3, 11, 12}, | |||
| {1000, 3, 3}, | |||
| {1000, 3, 10, 11}, | |||
| {1000, 3, 3}}); | |||
| } | |||
| } | |||
| } | |||
| @@ -549,14 +573,14 @@ TEST_F(CUDA, BENCHMARK_WARP_PERSPECTIVE_NCHW4) { | |||
| benchmarker.set_dtype(0, dtype::QuantizedS8(1.0f)); | |||
| benchmarker.set_dtype(2, dtype::QuantizedS8(1.0f)); | |||
| run({TensorShape{1, 25, 256, 256, 4}, {1, 3, 3}, {1, 25, 256, 5120, 4}}); | |||
| run({TensorShape{1, 25, 256, 5120, 4}, {1, 3, 3}, {1,25, 256, 256, 4}}); | |||
| run({TensorShape{1, 25, 256, 5120, 4}, {1, 3, 3}, {1, 25, 256, 256, 4}}); | |||
| run({TensorShape{1, 25, 256, 256, 4}, {1, 3, 3}, {1, 25, 512, 512, 4}}); | |||
| run({TensorShape{1, 25, 512, 512, 4}, {1, 3, 3}, {1, 25, 256, 256, 4}}); | |||
| } | |||
| #endif | |||
| } // namespace test | |||
| } // namespace megdnn | |||
| } // namespace test | |||
| } // namespace megdnn | |||
| // vim: syntax=cpp.doxygen | |||
| @@ -6,18 +6,18 @@ | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, | |||
| * software distributed under the License is distributed on an | |||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||
| * implied. | |||
| */ | |||
| #include "./internal/megdnn_opr_wrapper.inl" | |||
| #include "megbrain/opr/imgproc.h" | |||
| #include "megbrain/opr/utility.h" | |||
| #include "./internal/megdnn_opr_wrapper.inl" | |||
| #include "megbrain/graph/grad_impl.h" | |||
| #include "megbrain/opr/utility.h" | |||
| using namespace mgb; | |||
| using namespace opr; | |||
| /* ======================= WarpPerspectiveForward ======================= */ | |||
| MGB_DYN_TYPE_OBJ_FINAL_IMPL(WarpPerspectiveForward); | |||
| @@ -54,8 +54,7 @@ void WarpPerspectiveForward::add_input_layout_constraint() { | |||
| } | |||
| void WarpPerspectiveForward::outshape_by_symvar_do_get_output_shape( | |||
| TensorShape &dest, const ShapeInferInfo &shpinfo) { | |||
| TensorShape& dest, const ShapeInferInfo& shpinfo) { | |||
| TensorShape oshp2d; | |||
| cg::copy_tensor_value_to_shape(oshp2d, *shpinfo.shpval_inp_val.at(0)); | |||
| auto imgshp = shpinfo.shape_inp_shp.at(0), | |||
| @@ -112,8 +111,8 @@ void WarpPerspectiveForward::scn_do_execute() { | |||
| } | |||
| size_t WarpPerspectiveForward::get_workspace_size_bytes( | |||
| const TensorShapeArray &input_shapes, | |||
| const TensorShapeArray &output_shapes) const { | |||
| const TensorShapeArray& input_shapes, | |||
| const TensorShapeArray& output_shapes) const { | |||
| if (input().size() == 3) { | |||
| return intl::_MegDNNOprMethInvoker<2, 1>::get_workspace_in_bytes( | |||
| megdnn_opr(), this, input_shapes, output_shapes); | |||
| @@ -129,19 +128,34 @@ void WarpPerspectiveForward::record_execute_deps(ExecDependencyArray& deps) { | |||
| #ifdef MGB_ENABLE_GRAD | |||
| MGB_IMPL_OPR_GRAD(WarpPerspectiveForward) { | |||
| mgb_assert(opr.input().size() == 3, | |||
| "backward with mat_idx is currently unsupported"); | |||
| if (opr.input().size() == 4) { | |||
| if (wrt_idx == 0) { | |||
| // wrt data | |||
| SymbolVar grad = WarpPerspectiveBackwardData::make( | |||
| opr.input(1), opr.input(2), out_grad[0], opr.input(0), | |||
| opr.param()); | |||
| return grad.node(); | |||
| } else if (wrt_idx == 1) { | |||
| // wrt mat | |||
| SymbolVar grad = WarpPerspectiveBackwardMat::make( | |||
| opr.input(0), opr.input(1), opr.input(2), out_grad[0], | |||
| opr.param()); | |||
| return grad.node(); | |||
| } else { | |||
| return InvalidGrad::make(opr, wrt_idx); | |||
| } | |||
| } | |||
| mgb_assert(opr.input().size() == 3); | |||
| if (wrt_idx == 0) { | |||
| // wrt data | |||
| SymbolVar grad = WarpPerspectiveBackwardData::make( | |||
| opr.input(1), out_grad[0], opr.input(0), | |||
| opr.param()); | |||
| opr.input(1), out_grad[0], opr.input(0), opr.param()); | |||
| return grad.node(); | |||
| } else if (wrt_idx == 1){ | |||
| } else if (wrt_idx == 1) { | |||
| // wrt mat | |||
| SymbolVar grad = WarpPerspectiveBackwardMat::make( | |||
| opr.input(0), opr.input(1), out_grad[0], | |||
| opr.param()); | |||
| opr.input(0), opr.input(1), out_grad[0], opr.param()); | |||
| return grad.node(); | |||
| } else | |||
| return InvalidGrad::make(opr, wrt_idx); | |||
| @@ -151,14 +165,116 @@ MGB_IMPL_OPR_GRAD(WarpPerspectiveForward) { | |||
| /* ====================== WarpPerspectiveBackwardData ====================== */ | |||
| MGB_DYN_TYPE_OBJ_FINAL_IMPL(WarpPerspectiveBackwardData); | |||
| MEGDNN_OPR_INIT3(WarpPerspectiveBackwardData, "warp_perspective_bwd_data", | |||
| 2, false); | |||
| WarpPerspectiveBackwardData::WarpPerspectiveBackwardData( | |||
| VarNode* mat, VarNode* out_diff, VarNode* in_for_shape, | |||
| const Param& param, const OperatorNodeConfig& config) | |||
| : Super(OperatorNodeBaseCtorParam{mat->owner_graph(), | |||
| config, | |||
| "warp_perspective_bwd_data", | |||
| {mat}}, | |||
| 2, false) { | |||
| init_megdnn_opr(*this, param); | |||
| add_input({mat, out_diff, in_for_shape}); | |||
| intl::MegDNNOprInitPostCtor<WarpPerspectiveBackwardData>::apply(*this); | |||
| } | |||
| WarpPerspectiveBackwardData::WarpPerspectiveBackwardData( | |||
| VarNode* mat, VarNode* mat_idx, VarNode* out_diff, | |||
| VarNode* in_for_shape, const Param& param, | |||
| const OperatorNodeConfig& config) | |||
| : Super(OperatorNodeBaseCtorParam{mat->owner_graph(), | |||
| config, | |||
| "warp_perspective_bwd_data", | |||
| {mat, mat_idx}}, | |||
| 3, false) { | |||
| init_megdnn_opr(*this, param); | |||
| add_input({mat, mat_idx, out_diff, in_for_shape}); | |||
| intl::MegDNNOprInitPostCtor<WarpPerspectiveBackwardData>::apply(*this); | |||
| } | |||
| SymbolVar WarpPerspectiveBackwardData::make(SymbolVar i0, SymbolVar i1, | |||
| SymbolVar i2, const Param& param, | |||
| const OperatorNodeConfig& config) { | |||
| intl::MegDNNOprInitInputsModifier<WarpPerspectiveBackwardData>::apply( | |||
| param, {&i0, &i1, &i2}); | |||
| return i0.insert_single_output_opr<WarpPerspectiveBackwardData>( | |||
| i0.node(), i1.node(), i2.node(), param, config); | |||
| } | |||
| SymbolVar WarpPerspectiveBackwardData::make(SymbolVar i0, SymbolVar i1, | |||
| SymbolVar i2, SymbolVar i3, | |||
| const Param& param, | |||
| const OperatorNodeConfig& config) { | |||
| intl::MegDNNOprInitInputsModifier<WarpPerspectiveBackwardData>::apply( | |||
| param, {&i0, &i1, &i2, &i3}); | |||
| return i0.insert_single_output_opr<WarpPerspectiveBackwardData>( | |||
| i0.node(), i1.node(), i2.node(), i3.node(), param, config); | |||
| } | |||
| void WarpPerspectiveBackwardData::scn_do_execute() { | |||
| if (input().size() == 3) { | |||
| megdnn_opr()->exec(input(0)->dev_tensor().as_megdnn(), | |||
| input(1)->dev_tensor().as_megdnn(), | |||
| output(0)->dev_tensor().as_megdnn(), | |||
| intl::get_megdnn_workspace_from_var(output(1))); | |||
| } else { | |||
| mgb_assert(input().size() == 4); | |||
| megdnn_opr()->exec(input(0)->dev_tensor().as_megdnn(), | |||
| input(1)->dev_tensor().as_megdnn(), | |||
| input(2)->dev_tensor().as_megdnn(), | |||
| output(0)->dev_tensor().as_megdnn(), | |||
| intl::get_megdnn_workspace_from_var(output(1))); | |||
| } | |||
| } | |||
| /* ====================== WarpPerspectiveBackwardMat ====================== */ | |||
| MGB_DYN_TYPE_OBJ_FINAL_IMPL(WarpPerspectiveBackwardMat); | |||
| MEGDNN_OPR_INIT3(WarpPerspectiveBackwardMat, "warp_perspective_bwd_mat", | |||
| 1, true); | |||
| WarpPerspectiveBackwardMat::WarpPerspectiveBackwardMat( | |||
| VarNode* src, VarNode* mat, VarNode* mat_idx, VarNode* out_diff, | |||
| const Param& param, const OperatorNodeConfig& config) | |||
| : Super(OperatorNodeBaseCtorParam{src->owner_graph(), | |||
| config, | |||
| "warp_perspective_bwd_mat", | |||
| {src, mat, mat_idx}}, | |||
| 1, true) { | |||
| init_megdnn_opr(*this, param); | |||
| if (mat_idx) { | |||
| add_input({src, mat, mat_idx, out_diff}); | |||
| } else { | |||
| add_input({src, mat, out_diff}); | |||
| } | |||
| intl::MegDNNOprInitPostCtor<WarpPerspectiveBackwardMat>::apply(*this); | |||
| } | |||
| void WarpPerspectiveBackwardMat::scn_do_execute() { | |||
| if (input().size() == 3) { | |||
| megdnn_opr()->exec(input(0)->dev_tensor().as_megdnn(), | |||
| input(1)->dev_tensor().as_megdnn(), | |||
| input(2)->dev_tensor().as_megdnn(), | |||
| output(0)->dev_tensor().as_megdnn(), | |||
| intl::get_megdnn_workspace_from_var(output(1))); | |||
| } else { | |||
| mgb_assert(input().size() == 4); | |||
| megdnn_opr()->exec(input(0)->dev_tensor().as_megdnn(), | |||
| input(1)->dev_tensor().as_megdnn(), | |||
| input(2)->dev_tensor().as_megdnn(), | |||
| input(3)->dev_tensor().as_megdnn(), | |||
| output(0)->dev_tensor().as_megdnn(), | |||
| intl::get_megdnn_workspace_from_var(output(1))); | |||
| } | |||
| } | |||
| SymbolVar WarpPerspectiveBackwardMat::make( | |||
| SymbolVar i0, SymbolVar i1, SymbolVar i2, SymbolVar i3, | |||
| const Param& param, const OperatorNodeConfig& config) { | |||
| intl::MegDNNOprInitInputsModifier<WarpPerspectiveBackwardMat>::apply( | |||
| param, {&i0, &i1, &i2, &i3}); | |||
| return i0.insert_single_output_opr<WarpPerspectiveBackwardMat>( | |||
| i0.node(), i1.node(), i2.node(), i3.node(), param, config); | |||
| } | |||
| /* ====================== Cv operator ====================== */ | |||
| @@ -188,8 +304,7 @@ void ResizeForward::add_input_layout_constraint() { | |||
| } | |||
| void ResizeForward::outshape_by_symvar_do_get_output_shape( | |||
| TensorShape &dest, const ShapeInferInfo &shpinfo) { | |||
| TensorShape& dest, const ShapeInferInfo& shpinfo) { | |||
| TensorShape oshp2d; | |||
| cg::copy_tensor_value_to_shape(oshp2d, *shpinfo.shpval_inp_val.at(0)); | |||
| auto imgshp = shpinfo.shape_inp_shp.at(0); | |||
| @@ -232,7 +347,7 @@ size_t ResizeForward::get_workspace_size_bytes( | |||
| megdnn_opr(), this, input_shapes, output_shapes); | |||
| } | |||
| void ResizeForward::record_execute_deps(ExecDependencyArray &deps) { | |||
| void ResizeForward::record_execute_deps(ExecDependencyArray& deps) { | |||
| record_megdnn_opr(deps); | |||
| } | |||
| @@ -268,19 +383,17 @@ void WarpAffineForward::add_input_layout_constraint() { | |||
| } | |||
| void WarpAffineForward::outshape_by_symvar_do_get_output_shape( | |||
| TensorShape &dest, const ShapeInferInfo &shpinfo) { | |||
| TensorShape& dest, const ShapeInferInfo& shpinfo) { | |||
| TensorShape oshp2d; | |||
| cg::copy_tensor_value_to_shape(oshp2d, *shpinfo.shpval_inp_val.at(0)); | |||
| auto imgshp = shpinfo.shape_inp_shp.at(0), | |||
| matshp = shpinfo.shape_inp_shp.at(1); | |||
| mgb_assert( | |||
| (imgshp.ndim == 4 || imgshp.ndim == 5) && matshp.ndim == 3 && oshp2d.ndim == 2 && | |||
| matshp.shape[0] == imgshp.shape[0] && | |||
| matshp.shape[1] == 2 && matshp.shape[2] == 3, | |||
| "shape mismatch for WarpAffineForward: img=%s mat=%s out2d=%s", | |||
| imgshp.to_string().c_str(), matshp.to_string().c_str(), | |||
| oshp2d.to_string().c_str()); | |||
| mgb_assert((imgshp.ndim == 4 || imgshp.ndim == 5) && matshp.ndim == 3 && | |||
| oshp2d.ndim == 2 && matshp.shape[0] == imgshp.shape[0] && | |||
| matshp.shape[1] == 2 && matshp.shape[2] == 3, | |||
| "shape mismatch for WarpAffineForward: img=%s mat=%s out2d=%s", | |||
| imgshp.to_string().c_str(), matshp.to_string().c_str(), | |||
| oshp2d.to_string().c_str()); | |||
| size_t height_idx = 0; | |||
| if (param().format == Param::Format::NCHW) { | |||
| @@ -305,18 +418,19 @@ void WarpAffineForward::init_output_static_infer_desc() { | |||
| } | |||
| void WarpAffineForward::scn_do_execute() { | |||
| intl::MegDNNOprMethInvoker<megdnn::WarpAffine>:: | |||
| exec(megdnn_opr(), this); | |||
| intl::MegDNNOprMethInvoker<megdnn::WarpAffine>::exec(megdnn_opr(), this); | |||
| } | |||
| size_t WarpAffineForward::get_workspace_size_bytes( | |||
| const TensorShapeArray &input_shapes, | |||
| const TensorShapeArray &output_shapes) const { | |||
| return intl::MegDNNOprMethInvoker<megdnn::WarpAffine>:: | |||
| get_workspace_in_bytes(megdnn_opr(), this, input_shapes, output_shapes); | |||
| const TensorShapeArray& input_shapes, | |||
| const TensorShapeArray& output_shapes) const { | |||
| return intl::MegDNNOprMethInvoker< | |||
| megdnn::WarpAffine>::get_workspace_in_bytes(megdnn_opr(), this, | |||
| input_shapes, | |||
| output_shapes); | |||
| } | |||
| void WarpAffineForward::record_execute_deps(ExecDependencyArray &deps) { | |||
| void WarpAffineForward::record_execute_deps(ExecDependencyArray& deps) { | |||
| record_megdnn_opr(deps); | |||
| } | |||
| @@ -325,7 +439,7 @@ void WarpAffineForward::record_execute_deps(ExecDependencyArray &deps) { | |||
| MGB_DYN_TYPE_OBJ_FINAL_IMPL(RemapForward); | |||
| MEGDNN_OPR_INIT2(RemapForward, "remap") | |||
| void RemapForward::init_output_dtype(){ | |||
| void RemapForward::init_output_dtype() { | |||
| output(0)->dtype(input(0)->dtype()); | |||
| } | |||
| @@ -37,13 +37,59 @@ namespace serialization { | |||
| } | |||
| } | |||
| }; | |||
| template<> | |||
| struct OprMaker<opr::WarpPerspectiveBackwardData, 0> { | |||
| using Opr = opr::WarpPerspectiveBackwardData; | |||
| using Param = Opr::Param; | |||
| static cg::OperatorNodeBase* make(const Param& param, | |||
| const cg::VarNodeArray& inputs, | |||
| ComputingGraph& graph, | |||
| const OperatorNodeConfig& config) { | |||
| MGB_MARK_USED_VAR(graph); | |||
| if (inputs.size() == 3) { | |||
| return Opr::make(inputs[0], inputs[1], inputs[2], param, config) | |||
| .node() | |||
| ->owner_opr(); | |||
| } else { | |||
| mgb_assert(inputs.size() == 4); | |||
| return Opr::make(inputs[0], inputs[1], inputs[2], inputs[3], | |||
| param, config) | |||
| .node() | |||
| ->owner_opr(); | |||
| } | |||
| } | |||
| }; | |||
| template<> | |||
| struct OprMaker<opr::WarpPerspectiveBackwardMat, 0> { | |||
| using Opr = opr::WarpPerspectiveBackwardMat; | |||
| using Param = Opr::Param; | |||
| static cg::OperatorNodeBase* make(const Param& param, | |||
| const cg::VarNodeArray& inputs, | |||
| ComputingGraph& graph, | |||
| const OperatorNodeConfig& config) { | |||
| MGB_MARK_USED_VAR(graph); | |||
| if (inputs.size() == 3) { | |||
| return Opr::make(inputs[0], inputs[1], inputs[2], param, config) | |||
| .node() | |||
| ->owner_opr(); | |||
| } else { | |||
| mgb_assert(inputs.size() == 4); | |||
| return Opr::make(inputs[0], inputs[1], inputs[2], inputs[3], | |||
| param, config) | |||
| .node() | |||
| ->owner_opr(); | |||
| } | |||
| } | |||
| }; | |||
| } // namespace serialization | |||
| namespace opr { | |||
| MGB_SEREG_OPR(WarpPerspective, 0); | |||
| MGB_SEREG_OPR(WarpPerspectiveBackwardData, 3); | |||
| MGB_SEREG_OPR(WarpPerspectiveBackwardMat, 3); | |||
| MGB_SEREG_OPR(WarpPerspectiveBackwardData, 0); | |||
| MGB_SEREG_OPR(WarpPerspectiveBackwardMat, 0); | |||
| MGB_SEREG_OPR(Rotate, 1); | |||
| MGB_SEREG_OPR(CvtColor, 1); | |||
| @@ -6,7 +6,8 @@ | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, | |||
| * software distributed under the License is distributed on an | |||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||
| * implied. | |||
| */ | |||
| #pragma once | |||
| @@ -33,77 +34,93 @@ namespace opr { | |||
| * Impl note: this operator might have 3 or 4 inputs depending on whether | |||
| * \p mat_idx is given | |||
| */ | |||
| MGB_DEFINE_OPR_CLASS(WarpPerspectiveForward, | |||
| MGB_DEFINE_OPR_CLASS( | |||
| WarpPerspectiveForward, | |||
| intl::WorkspaceSizeInfer< | |||
| intl::OutshapeBySymvarSCNOpr<mixin::MegDNNOprHolderImpl< | |||
| megdnn::WarpPerspectiveForward>>>) // { | |||
| public: | |||
| WarpPerspectiveForward( | |||
| VarNode *in_tensor, VarNode *mat, VarNode *mat_idx, | |||
| VarNode *out_shape, | |||
| const Param ¶m, | |||
| const OperatorNodeConfig &config); | |||
| static SymbolVar make(SymbolVar in_tensor, | |||
| SymbolVar mat, SymbolVar mat_idx, SymbolVar out_shape, | |||
| const Param ¶m = {}, | |||
| const OperatorNodeConfig &config = {}); | |||
| static SymbolVar make(SymbolVar in_tensor, SymbolVar mat, | |||
| SymbolVar out_shape, | |||
| const Param ¶m = {}, | |||
| const OperatorNodeConfig &config = {}) { | |||
| return make(in_tensor, mat, SymbolVar{}, out_shape, param, config); | |||
| } | |||
| static SymbolVar make(SymbolVar in_tensor, SymbolVar mat, | |||
| const TensorShape &out_shape, | |||
| const Param ¶m = {}, | |||
| const OperatorNodeConfig &config = {}) | |||
| { | |||
| return make(in_tensor, mat, | |||
| cg::var_from_tensor_shape( | |||
| in_tensor, out_shape), param, config); | |||
| } | |||
| private: | |||
| void init_output_dtype() override; | |||
| void add_input_layout_constraint() override; | |||
| void init_output_static_infer_desc() override; | |||
| void outshape_by_symvar_do_get_output_shape( | |||
| TensorShape &dest, const ShapeInferInfo &shpinfo) override; | |||
| void scn_do_execute() override; | |||
| size_t get_workspace_size_bytes( | |||
| const TensorShapeArray &input_shapes, | |||
| const TensorShapeArray &output_shapes) const override; | |||
| void record_execute_deps(ExecDependencyArray& deps) override; | |||
| }; | |||
| intl::OutshapeBySymvarSCNOpr<mixin::MegDNNOprHolderImpl< | |||
| megdnn::WarpPerspectiveForward>>>) // { | |||
| public: | |||
| WarpPerspectiveForward(VarNode* in_tensor, VarNode* mat, VarNode* mat_idx, | |||
| VarNode* out_shape, const Param& param, | |||
| const OperatorNodeConfig& config); | |||
| static SymbolVar make(SymbolVar in_tensor, SymbolVar mat, SymbolVar mat_idx, | |||
| SymbolVar out_shape, const Param& param = {}, | |||
| const OperatorNodeConfig& config = {}); | |||
| static SymbolVar make(SymbolVar in_tensor, SymbolVar mat, SymbolVar out_shape, | |||
| const Param& param = {}, | |||
| const OperatorNodeConfig& config = {}) { | |||
| return make(in_tensor, mat, SymbolVar{}, out_shape, param, config); | |||
| } | |||
| static SymbolVar make(SymbolVar in_tensor, SymbolVar mat, | |||
| const TensorShape& out_shape, const Param& param = {}, | |||
| const OperatorNodeConfig& config = {}) { | |||
| return make(in_tensor, mat, cg::var_from_tensor_shape(in_tensor, out_shape), | |||
| param, config); | |||
| } | |||
| private: | |||
| void init_output_dtype() override; | |||
| void add_input_layout_constraint() override; | |||
| void init_output_static_infer_desc() override; | |||
| void outshape_by_symvar_do_get_output_shape( | |||
| TensorShape& dest, const ShapeInferInfo& shpinfo) override; | |||
| void scn_do_execute() override; | |||
| size_t get_workspace_size_bytes( | |||
| const TensorShapeArray& input_shapes, | |||
| const TensorShapeArray& output_shapes) const override; | |||
| void record_execute_deps(ExecDependencyArray& deps) override; | |||
| }; // namespace opr | |||
| using WarpPerspective = WarpPerspectiveForward; | |||
| MGB_DEFINE_OPR_CLASS(WarpPerspectiveBackwardData, | |||
| intl::MegDNNOprWrapperBwd<megdnn::WarpPerspectiveBackwardData>) // { | |||
| public: | |||
| WarpPerspectiveBackwardData(VarNode *mat, VarNode *out_diff, | |||
| VarNode *in_for_shape, const Param ¶m, | |||
| const OperatorNodeConfig &config); | |||
| static SymbolVar make(SymbolVar mat, SymbolVar out_diff, | |||
| SymbolVar in_for_shape, const Param ¶m = {}, | |||
| const OperatorNodeConfig &config = {}); | |||
| }; | |||
| MGB_DEFINE_OPR_CLASS(WarpPerspectiveBackwardMat, | |||
| intl::MegDNNOprWrapperBwd<megdnn::WarpPerspectiveBackwardMat>) // { | |||
| public: | |||
| WarpPerspectiveBackwardMat( | |||
| VarNode *src, VarNode *mat, VarNode *out_diff, | |||
| const Param ¶m, const OperatorNodeConfig &config); | |||
| static SymbolVar make( | |||
| SymbolVar src, SymbolVar mat, SymbolVar out_diff, | |||
| const Param ¶m = {}, const OperatorNodeConfig &config = {}); | |||
| }; | |||
| MGB_DEFINE_OPR_CLASS( | |||
| WarpPerspectiveBackwardData, | |||
| intl::MegDNNOprWrapperBwd<megdnn::WarpPerspectiveBackwardData>) // { | |||
| public: | |||
| WarpPerspectiveBackwardData(VarNode* mat, VarNode* out_diff, | |||
| VarNode* in_for_shape, const Param& param, | |||
| const OperatorNodeConfig& config); | |||
| WarpPerspectiveBackwardData(VarNode* mat, VarNode* mat_idx, VarNode* out_diff, | |||
| VarNode* in_for_shape, const Param& param, | |||
| const OperatorNodeConfig& config); | |||
| static SymbolVar make(SymbolVar mat, SymbolVar out_diff, SymbolVar in_for_shape, | |||
| const Param& param = {}, | |||
| const OperatorNodeConfig& config = {}); | |||
| static SymbolVar make(SymbolVar mat, SymbolVar mat_idx, SymbolVar out_diff, | |||
| SymbolVar in_for_shape, const Param& param = {}, | |||
| const OperatorNodeConfig& config = {}); | |||
| void scn_do_execute() override; | |||
| }; // namespace mgb | |||
| MGB_DEFINE_OPR_CLASS( | |||
| WarpPerspectiveBackwardMat, | |||
| intl::MegDNNOprWrapperBwd<megdnn::WarpPerspectiveBackwardMat>) // { | |||
| public: | |||
| WarpPerspectiveBackwardMat(VarNode* src, VarNode* mat, VarNode* mat_idx, | |||
| VarNode* out_diff, const Param& param, | |||
| const OperatorNodeConfig& config); | |||
| static SymbolVar make(SymbolVar src, SymbolVar mat, SymbolVar out_diff, | |||
| const Param& param = {}, | |||
| const OperatorNodeConfig& config = {}) { | |||
| return make(src, mat, {}, out_diff, param, config); | |||
| } | |||
| static SymbolVar make(SymbolVar src, SymbolVar mat, SymbolVar mat_idx, | |||
| SymbolVar out_diff, const Param& param = {}, | |||
| const OperatorNodeConfig& config = {}); | |||
| void scn_do_execute() override; | |||
| } | |||
| ; | |||
| /* ============================= shape infer ============================== */ | |||
| //! param: src, dst | |||
| @@ -116,68 +133,67 @@ using CvtColor = CvtColorForward; | |||
| using GaussianBlur = GaussianBlurForward; | |||
| /* ============================= user set shape =========================== */ | |||
| MGB_DEFINE_OPR_CLASS(ResizeForward, | |||
| intl::WorkspaceSizeInfer< | |||
| intl::OutshapeBySymvarSCNOpr<mixin::MegDNNOprHolderImpl< | |||
| megdnn::ResizeForward>>>) // { | |||
| public: | |||
| ResizeForward( | |||
| VarNode *in_tensor, VarNode *out_shape, const Param ¶m, | |||
| const OperatorNodeConfig &config); | |||
| static SymbolVar make(SymbolVar in_tensor, SymbolVar out_shape, | |||
| const Param ¶m = {}, | |||
| const OperatorNodeConfig &config = {}); | |||
| static SymbolVar make(SymbolVar in_tensor, const TensorShape &out_shape, | |||
| const Param ¶m = {}, | |||
| const OperatorNodeConfig &config = {}) | |||
| { | |||
| return make(in_tensor, | |||
| cg::var_from_tensor_shape( | |||
| in_tensor, out_shape), param, config); | |||
| } | |||
| private: | |||
| void init_output_dtype() override; | |||
| void add_input_layout_constraint() override; | |||
| void init_output_static_infer_desc() override; | |||
| void outshape_by_symvar_do_get_output_shape( | |||
| TensorShape &dest, const ShapeInferInfo &shpinfo) override; | |||
| void scn_do_execute() override; | |||
| size_t get_workspace_size_bytes( | |||
| const TensorShapeArray &input_shapes, | |||
| const TensorShapeArray &output_shapes) const override; | |||
| void record_execute_deps(ExecDependencyArray &deps) override; | |||
| }; | |||
| MGB_DEFINE_OPR_CLASS( | |||
| ResizeForward, | |||
| intl::WorkspaceSizeInfer<intl::OutshapeBySymvarSCNOpr< | |||
| mixin::MegDNNOprHolderImpl<megdnn::ResizeForward>>>) // { | |||
| public: | |||
| ResizeForward(VarNode* in_tensor, VarNode* out_shape, const Param& param, | |||
| const OperatorNodeConfig& config); | |||
| static SymbolVar make(SymbolVar in_tensor, SymbolVar out_shape, | |||
| const Param& param = {}, | |||
| const OperatorNodeConfig& config = {}); | |||
| static SymbolVar make(SymbolVar in_tensor, const TensorShape& out_shape, | |||
| const Param& param = {}, | |||
| const OperatorNodeConfig& config = {}) { | |||
| return make(in_tensor, cg::var_from_tensor_shape(in_tensor, out_shape), | |||
| param, config); | |||
| } | |||
| private: | |||
| void init_output_dtype() override; | |||
| void add_input_layout_constraint() override; | |||
| void init_output_static_infer_desc() override; | |||
| void outshape_by_symvar_do_get_output_shape( | |||
| TensorShape& dest, const ShapeInferInfo& shpinfo) override; | |||
| void scn_do_execute() override; | |||
| size_t get_workspace_size_bytes( | |||
| const TensorShapeArray& input_shapes, | |||
| const TensorShapeArray& output_shapes) const override; | |||
| void record_execute_deps(ExecDependencyArray& deps) override; | |||
| } | |||
| ; | |||
| using Resize = ResizeForward; | |||
| MGB_DEFINE_OPR_CLASS(ResizeBackward, | |||
| intl::MegDNNOprWrapperBwd<megdnn::ResizeBackward>) // { | |||
| public: | |||
| ResizeBackward(VarNode *out_diff, | |||
| VarNode *in_for_shape, const Param ¶m, | |||
| const OperatorNodeConfig &config); | |||
| intl::MegDNNOprWrapperBwd<megdnn::ResizeBackward>) // { | |||
| public: | |||
| ResizeBackward(VarNode* out_diff, VarNode* in_for_shape, const Param& param, | |||
| const OperatorNodeConfig& config); | |||
| static SymbolVar make(SymbolVar out_diff, | |||
| SymbolVar in_for_shape, const Param ¶m = {}, | |||
| const OperatorNodeConfig &config = {}); | |||
| }; | |||
| static SymbolVar make(SymbolVar out_diff, SymbolVar in_for_shape, | |||
| const Param& param = {}, | |||
| const OperatorNodeConfig& config = {}); | |||
| } | |||
| ; | |||
| MGB_DEFINE_OPR_CLASS(RemapForward, | |||
| intl::MegDNNOprWrapperFwd<megdnn::RemapForward>) // { | |||
| public: | |||
| RemapForward( | |||
| VarNode *in_tensor, VarNode* map, | |||
| const Param ¶m, const OperatorNodeConfig &config); | |||
| static SymbolVar make(SymbolVar in_tensor, SymbolVar map, const Param ¶m = {}, | |||
| const OperatorNodeConfig &config = {}); | |||
| private: | |||
| void init_output_dtype() override; | |||
| }; | |||
| intl::MegDNNOprWrapperFwd<megdnn::RemapForward>) // { | |||
| public: | |||
| RemapForward(VarNode* in_tensor, VarNode* map, const Param& param, | |||
| const OperatorNodeConfig& config); | |||
| static SymbolVar make(SymbolVar in_tensor, SymbolVar map, | |||
| const Param& param = {}, | |||
| const OperatorNodeConfig& config = {}); | |||
| private: | |||
| void init_output_dtype() override; | |||
| } | |||
| ; | |||
| using Remap = RemapForward; | |||
| /*! | |||
| @@ -191,47 +207,42 @@ using Remap = RemapForward; | |||
| * Input mat shape: batch, 2, 2; note that the mat is used to translate output | |||
| * coordinate onto input coordinate, so it is not inversed. | |||
| */ | |||
| MGB_DEFINE_OPR_CLASS(WarpAffineForward, | |||
| intl::WorkspaceSizeInfer< | |||
| intl::OutshapeBySymvarSCNOpr<mixin::MegDNNOprHolderImpl< | |||
| megdnn::WarpAffineForward>>>) // { | |||
| public: | |||
| WarpAffineForward( | |||
| VarNode *in_tensor, VarNode *mat, VarNode *out_shape, | |||
| const Param ¶m, | |||
| const OperatorNodeConfig &config); | |||
| static SymbolVar make(SymbolVar in_tensor, SymbolVar mat, | |||
| SymbolVar out_shape, | |||
| const Param ¶m = {}, | |||
| const OperatorNodeConfig &config = {}); | |||
| static SymbolVar make(SymbolVar in_tensor, SymbolVar mat, | |||
| const TensorShape &out_shape, | |||
| const Param ¶m = {}, | |||
| const OperatorNodeConfig &config = {}) | |||
| { | |||
| return make(in_tensor, mat, | |||
| cg::var_from_tensor_shape( | |||
| in_tensor, out_shape), param, config); | |||
| } | |||
| private: | |||
| void init_output_dtype() override; | |||
| void add_input_layout_constraint() override; | |||
| void init_output_static_infer_desc() override; | |||
| void outshape_by_symvar_do_get_output_shape( | |||
| TensorShape &dest, const ShapeInferInfo &shpinfo) override; | |||
| void scn_do_execute() override; | |||
| size_t get_workspace_size_bytes( | |||
| const TensorShapeArray &input_shapes, | |||
| const TensorShapeArray &output_shapes) const override; | |||
| void record_execute_deps(ExecDependencyArray &deps) override; | |||
| }; | |||
| MGB_DEFINE_OPR_CLASS( | |||
| WarpAffineForward, | |||
| intl::WorkspaceSizeInfer<intl::OutshapeBySymvarSCNOpr< | |||
| mixin::MegDNNOprHolderImpl<megdnn::WarpAffineForward>>>) // { | |||
| public: | |||
| WarpAffineForward(VarNode* in_tensor, VarNode* mat, VarNode* out_shape, | |||
| const Param& param, const OperatorNodeConfig& config); | |||
| static SymbolVar make(SymbolVar in_tensor, SymbolVar mat, SymbolVar out_shape, | |||
| const Param& param = {}, | |||
| const OperatorNodeConfig& config = {}); | |||
| static SymbolVar make(SymbolVar in_tensor, SymbolVar mat, | |||
| const TensorShape& out_shape, const Param& param = {}, | |||
| const OperatorNodeConfig& config = {}) { | |||
| return make(in_tensor, mat, cg::var_from_tensor_shape(in_tensor, out_shape), | |||
| param, config); | |||
| } | |||
| private: | |||
| void init_output_dtype() override; | |||
| void add_input_layout_constraint() override; | |||
| void init_output_static_infer_desc() override; | |||
| void outshape_by_symvar_do_get_output_shape( | |||
| TensorShape& dest, const ShapeInferInfo& shpinfo) override; | |||
| void scn_do_execute() override; | |||
| size_t get_workspace_size_bytes( | |||
| const TensorShapeArray& input_shapes, | |||
| const TensorShapeArray& output_shapes) const override; | |||
| void record_execute_deps(ExecDependencyArray& deps) override; | |||
| } | |||
| ; | |||
| using WarpAffine = WarpAffineForward; | |||
| } // opr | |||
| } // mgb | |||
| } // opr | |||
| } // mgb | |||
| // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} | |||
| @@ -216,7 +216,10 @@ TEST(TestOprImgproc, WarpPerspectiveWithMatIdx) { | |||
| .set_input_generator(1, gen_mat) | |||
| .set_input_generator(2, gen_mat_idx) | |||
| .set_input_dtype(2, dtype::Int32{}) | |||
| /*! it's hard to make the grad check success, | |||
| the cuda implementation is grad sum */ | |||
| .disable_grad_check() | |||
| .set_input_allow_grad(2,false) | |||
| .run({TensorShape{N_SRC, C, 4, 5}, {N_MAT, 3, 3}, {N_MAT}}) | |||
| .run({TensorShape{N_SRC, C, 6, 5}, {N_MAT, 3, 3}, {N_MAT}}) | |||
| .run({TensorShape{N_SRC, C, 22, 19}, {N_MAT, 3, 3}, {N_MAT}}); | |||