wangwei
/
MegEngine

 
			
			   
				 
					
						
						
							
							/**
 * \file dnn/src/cuda/rotate/opr_impl.cpp
 * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
 *
 * Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 */

#include <cstring>

#include "./opr_impl.h"
#include "./rotate.cuh"

#include "src/cuda/handle.h"
#include "src/common/utils.h"
#include "src/cuda/utils.h"

namespace megdnn {
namespace cuda {

namespace rotate_intl {

template <typename ctype>
void rotate_exec(const ctype* src, ctype* dst, size_t N, size_t IH, size_t IW,
                 size_t IC, size_t istride0, size_t istride1, size_t istride2,
                 size_t OH, size_t OW, size_t OC, size_t ostride0,
                 size_t ostride1, size_t ostride2, bool clockwise,
                 cudaStream_t stream) {
    megdnn_assert(IC == OC);
    if (clockwise) {
        rotate::rotate<ctype, true>(src, dst, N, IH, IW, IC, istride0, istride1,
                                    istride2, OH, OW, ostride0, ostride1,
                                    ostride2, stream);
    } else {
        rotate::rotate<ctype, false>(src, dst, N, IH, IW, IC, istride0,
                                     istride1, istride2, OH, OW, ostride0,
                                     ostride1, ostride2, stream);
    }
}

}  // namespace rotate_intl

void RotateImpl::exec(_megdnn_tensor_in src, _megdnn_tensor_in dst,
                      _megdnn_workspace workspace) {
    check_exec(src.layout, dst.layout, workspace.size);
    auto stream = cuda_stream(handle());
    //! src layout is the same as dst layout
    size_t N = src.layout.shape[0];
    size_t batch_size = 0;

#define cb(DType)                                                              \
    if (src.layout.dtype.enumv() == DTypeTrait<DType>::enumv) {                                         \
        using ctype = typename DTypeTrait<DType>::ctype;                       \
        ctype* src_ptr = src.ptr<ctype>() + curr_batch * src.layout.stride[0]; \
        ctype* dst_ptr = dst.ptr<ctype>() + curr_batch * dst.layout.stride[0]; \
        batch_size = std::min<size_t>(N - curr_batch, max_batch_x_channel);    \
        rotate_intl::rotate_exec<ctype>(                                       \
                src_ptr, dst_ptr, batch_size, src.layout.shape[1],             \
                src.layout.shape[2], src.layout.shape[3],                      \
                src.layout.stride[0], src.layout.stride[1],                    \
                src.layout.stride[2], dst.layout.shape[1],                     \
                dst.layout.shape[2], dst.layout.shape[3],                      \
                dst.layout.stride[0], dst.layout.stride[1],                    \
                dst.layout.stride[2], param().clockwise, stream);              \
    }

    size_t max_batch_x_channel = max_batch_x_channel_size();
    size_t curr_batch = 0;
    if (N <= max_batch_x_channel) {
        MEGDNN_FOREACH_COMPUTING_DTYPE(cb)
    } else {
        while (curr_batch < N) {
            MEGDNN_FOREACH_COMPUTING_DTYPE(cb)

            curr_batch += max_batch_x_channel;
        }
    }
#undef cb
}

}  // namespace cuda
}  // namespace megdnn

// vim: syntax=cpp.doxygen