|
- /**
- * \file dnn/src/cuda/convpooling/conv_pooling_utils.cuh
- * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
- *
- * Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- */
- #pragma once
- #include "src/cuda/utils.cuh"
- #include <algorithm>
- #include <math.h>
- #include <cuda_runtime_api.h>
-
- //#include "./helper.cuh"
-
-
- namespace megdnn {
- namespace cuda {
- namespace conv_pool {
-
- #define CUDA_CHKERR(call) \
- do { \
- cudaError_t code = (call); \
- megdnn_assert(code == cudaSuccess, "cuda err %d: %s (call %s at %s:%s:%d)", \
- int(code), cudaGetErrorString(code), # call, \
- __FILE__, __func__, __LINE__); \
- } while(0)
-
- #define CUDA_CHK_KERN_ERR CUDA_CHKERR(cudaDeviceSynchronize());
-
- static inline int __host__ align_to_warp(int n) {
- int x = n / 32 * 32;
- if (!x)
- x = n;
- return x;
- }
-
- // --- Nonline ---
- struct Relu {
- static __device__ float apply(float x) {
- return x > 0 ? x : 0;
- }
- };
-
- struct Sigmoid {
- static __device__ float apply(float x) {
- float exp_value = exp((double) -x);
- return 1 / (1 + exp_value);
- }
- };
-
- struct Identity {
- static __device__ float apply(float x) {
- return x;
- }
- };
-
- // --- Static Reduce ---
- template<int size, class Op>
- struct StaticReduce {
- static __device__ float apply(const float *val) {
- const int half = size / 2;
- return Op::apply(
- StaticReduce<half, Op>::apply(val),
- StaticReduce<size - half, Op>::apply(val + half));
- }
- };
-
- template<class Op>
- struct StaticReduce<1, Op> {
- static __device__ float apply(const float *val) {
- return val[0];
- }
- };
-
- template<class Op>
- struct StaticReduce<2, Op> {
- static __device__ float apply(const float *val) {
- return Op::apply(val[0], val[1]);
- }
- };
-
- struct OpAdd {
- static __device__ float apply(float a, float b) {
- return a + b;
- }
- };
-
- struct OpMax {
- static __device__ float apply(float a, float b) {
- return max(a, b);
- }
- };
-
- struct IdxGetterConvolution {
- static inline __device__ int apply(int kern, int i, int p) {
- return kern - i - 1 + p;
- }
-
- };
-
- struct IdxGetterCorrRel {
- static inline __device__ int apply(int kern, int i, int p) {
- return i - p;
- }
- };
-
-
- // --- Pooling ---
- struct MeanPooler {
- template<int pool_shape_h, int pool_shape_w>
- static __device__ float apply(const float *val) {
- const int size = pool_shape_h * pool_shape_w;
- return StaticReduce<size, OpAdd>::apply(val) / size;
- }
- };
-
- struct MaxPooler {
- template<int pool_shape_h, int pool_shape_w>
- static __device__ float apply(const float *val) {
- return StaticReduce<pool_shape_h * pool_shape_w, OpMax>::apply(val);
- }
- };
-
-
- // --- Reader ---
- class Tex1DReader {
- cudaTextureObject_t m_tex;
- int m_base_offset, m_chl_stride, m_row_stride, m_row_offset;
- //size_t batch_, chal_, height_, weight_;
-
- public:
- // Set attributes of texture Object
- /*__device__ void init(cudaTextureObject_t& tex,
- size_t batch, size_t chal, size_t height, size_t weight) {
- batch_ = batch;
- chal_ = chal;
- height_ = height;
- weight_ = weight;
- m_chl_stride = height * weight;
- m_row_stride = weight;
- }
-
- __device__ void set_pos(cudaTextureObject_t& tex,
- // Current position
- size_t n, size_t c, size_t h, size_t w) {
- m_tex = tex;
- m_base_offset = ((n * chal_ + c) * height_ + h) * weight_ + w;
- }
- */
- __device__ void set_pos(cudaTextureObject_t& tex,
- // Current position
- int chal, int height, int weight, int n, int c, int h, int w) {
- m_chl_stride = height * weight;
- m_row_stride = weight;
- m_tex = tex;
- m_base_offset = ((n * chal + c) * height + h) * weight + w;
- }
-
- __device__ void reset_row() {
- m_row_offset = m_base_offset;
- }
-
- __device__ void next_row() {
- m_row_offset += m_row_stride;
- }
-
- __device__ void next_channel() {
- m_base_offset += m_chl_stride;
- }
-
- __device__ float get(int /*dr*/, int dc) {
- return tex1Dfetch<float>(m_tex, dc + m_row_offset);
- }
-
- __device__ float get(int idx) {
- return tex1Dfetch<float>(m_tex, idx + m_base_offset);
- }
- };
-
- extern __host__ void create_cuda_tex(float *input, cudaTextureObject_t& tex,
- size_t N, size_t IC, size_t IH, size_t IW);
-
-
-
- } // namespace conv_pool
- } // namespace cuda
- } // namespace megdnn
- // vim: syntax=cpp.doxygen
|