|
- /**
- * \file dnn/src/cuda/cudnn_wrapper.cpp
- * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
- *
- * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- */
- #include "src/cuda/cudnn_wrapper.h"
-
- #include "src/common/utils.h"
- #include "src/cuda/utils.h"
-
- namespace {
-
- using namespace megdnn;
-
- cudnnDataType_t to_cudnn_dtype(
- DType type, const param::Convolution::Format format = {}) {
- switch (type.enumv()) {
- case DTypeEnum::Float32:
- return CUDNN_DATA_FLOAT;
- case DTypeEnum::Float16:
- return CUDNN_DATA_HALF;
- #if CUDNN_MAJOR >= 7
- case DTypeEnum::Int32:
- case DTypeEnum::QuantizedS32:
- return CUDNN_DATA_INT32;
- #endif
- #if CUDNN_MAJOR >= 6
- case DTypeEnum::QuantizedS8: {
- if (format == param::Convolution::Format::NCHW4)
- return CUDNN_DATA_INT8x4;
- #if CUDNN_VERSION >= 7500
- else if (format == param::Convolution::Format::NCHW32)
- return CUDNN_DATA_INT8x32;
- #endif
- else
- return CUDNN_DATA_INT8;
- }
-
- case DTypeEnum::Int8: {
- if (format == param::Convolution::Format::NCHW4)
- return CUDNN_DATA_INT8x4;
- #if CUDNN_VERSION >= 7500
- else if (format == param::Convolution::Format::NCHW32)
- return CUDNN_DATA_INT8x32;
- #endif
- else
- return CUDNN_DATA_INT8;
- }
- #endif
- default:
- #if CUDNN_MAJOR >= 6
- megdnn_throw("dtype must be float16/float32/int8/int32");
- #else
- megdnn_throw("dtype must be float16/float32");
- #endif
- }
- }
-
- cudnnTensorFormat_t to_cudnn_format(const param::Convolution::Format format) {
- switch (format) {
- case param::Convolution::Format::NCHW:
- return CUDNN_TENSOR_NCHW;
- #if CUDNN_MAJOR >= 7
- case param::Convolution::Format::NCHW4:
- case param::Convolution::Format::NCHW32:
- return CUDNN_TENSOR_NCHW_VECT_C;
- #endif
- case param::Convolution::Format::NHWC:
- return CUDNN_TENSOR_NHWC;
- default:
- megdnn_assert_internal(0);
- }
- }
-
- } // namespace
-
- namespace megdnn {
- namespace cuda {
-
- cudnnDataType_t get_compute_type_fp16(param::Convolution::ComputeMode comp_mode) {
- using Param = param::Convolution;
- cudnnDataType_t compute_type;
- if (comp_mode == Param::ComputeMode::DEFAULT) {
- // TRUE_HALF_CONFIG
- if (is_compute_capability_required(5, 3)) {
- compute_type = CUDNN_DATA_HALF;
- } else {
- auto&& device_prop = current_device_prop();
- int major = device_prop.major, minor = device_prop.minor;
- MEGDNN_MARK_USED_VAR(major);
- MEGDNN_MARK_USED_VAR(minor);
- megdnn_log_warn(
- "TRUE_HALF_CONFIG only supported on architectures with "
- "true fp16 support, i.e., compute capability 5.3 and "
- "later (got %d.%d). Use PSEUDO_HALF_CONFIG instead",
- major, minor);
- compute_type = CUDNN_DATA_FLOAT;
- }
- } else {
- megdnn_assert(comp_mode == Param::ComputeMode::FLOAT32);
- // PSEUDO_HALF_CONFIG
- compute_type = CUDNN_DATA_FLOAT;
- }
- return compute_type;
- }
-
- TensorDesc::TensorDesc() {
- cudnn_check(cudnnCreateTensorDescriptor(&desc));
- }
-
- TensorDesc::~TensorDesc() {
- cudnn_check(cudnnDestroyTensorDescriptor(desc));
- }
-
- void TensorDesc::set(
- const TensorLayout& layout, const param::Convolution::Format format) {
- // Layout can be not contiguous; group conv needs it.
- // megdnn_assert_contiguous(layout);
- if (format == param::Convolution::Format::NCHW4 ||
- format == param::Convolution::Format::NCHW32)
- megdnn_assert_eq_size_t(layout.ndim, 5_z);
- else
- megdnn_assert_eq_size_t(layout.ndim, 4_z);
-
- size_t c_pos, spatial_pos;
- if (format == param::Convolution::Format::NCHW ||
- format == param::Convolution::Format::NCHW4 ||
- format == param::Convolution::Format::NCHW32) {
- c_pos = 1;
- spatial_pos = 2;
- } else {
- megdnn_assert(format == param::Convolution::Format::NHWC);
- c_pos = 3;
- spatial_pos = 1;
- }
- if (format == param::Convolution::Format::NCHW4) {
- megdnn_assert(layout.is_physical_contiguous());
- cudnn_check(cudnnSetTensor4dDescriptor(
- desc, to_cudnn_format(format), to_cudnn_dtype(layout.dtype, format),
- layout.shape[0], layout.shape[c_pos] * 4, layout.shape[spatial_pos + 0],
- layout.shape[spatial_pos + 1]));
- } else if (format == param::Convolution::Format::NCHW32) {
- megdnn_assert(layout.is_physical_contiguous());
- cudnn_check(cudnnSetTensor4dDescriptor(
- desc, to_cudnn_format(format), to_cudnn_dtype(layout.dtype, format),
- layout.shape[0], layout.shape[c_pos] * 32,
- layout.shape[spatial_pos + 0], layout.shape[spatial_pos + 1]));
-
- } else {
- cudnn_check(cudnnSetTensor4dDescriptorEx(
- desc, to_cudnn_dtype(layout.dtype), layout.shape[0],
- layout.shape[c_pos], layout.shape[spatial_pos + 0],
- layout.shape[spatial_pos + 1], layout.stride[0], layout.stride[c_pos],
- layout.stride[spatial_pos + 0], layout.stride[spatial_pos + 1]));
- }
- }
-
- std::string TensorDesc::to_string() {
- cudnnDataType_t data_type;
- int n;
- int c;
- int h;
- int w;
- int n_stride;
- int c_stride;
- int h_stride;
- int w_stride;
- cudnn_check(cudnnGetTensor4dDescriptor(
- desc, &data_type, &n, &c, &h, &w, &n_stride, &c_stride, &h_stride,
- &w_stride));
- return ssprintf(
- "<dtype_%d, %d,%d,%d,%d(%d,%d,%d,%d)>", data_type, n, c, h, w, n_stride,
- c_stride, h_stride, w_stride);
- }
-
- template <typename Param>
- FilterDesc<Param>::FilterDesc() {
- cudnn_check(cudnnCreateFilterDescriptor(&desc));
- }
-
- template <typename Param>
- FilterDesc<Param>::~FilterDesc() {
- cudnn_check(cudnnDestroyFilterDescriptor(desc));
- }
-
- template <typename Param>
- std::string FilterDesc<Param>::to_string() {
- cudnnDataType_t data_type;
- cudnnTensorFormat_t format;
- int k;
- int c;
- int h;
- int w;
- cudnn_check(cudnnGetFilter4dDescriptor(desc, &data_type, &format, &k, &c, &h, &w));
- return ssprintf(
- "<dtype_%d, format_%d, %d,%d,%d,%d>", data_type, format, k, c, h, w);
- }
-
- template <typename Param>
- void FilterDesc<Param>::set(
- const typename ConvolutionBase<Param>::CanonizedFilterMeta& filter_meta) {
- megdnn_assert(filter_meta.spatial_ndim == 2);
- #if CUDNN_VERSION < 7500
- megdnn_assert(filter_meta.dilation[0] == 1 && filter_meta.dilation[1] == 1);
- #endif
- #if CUDNN_MAJOR <= 6
- megdnn_assert(filter_meta.group == 1);
- #endif
-
- auto filter_format = filter_meta.format;
- if (filter_format == param::ConvBias::Format::NCHW4_NCHW) {
- filter_format = param::ConvBias::Format::NCHW4;
- }
- // cuDNN version 6 or below filter_meta.group always is 1.
- // So it is compatible for all cuDNN versions.
- cudnn_check(cudnnSetFilter4dDescriptor(
- desc, to_cudnn_dtype(filter_meta.dtype, filter_format),
- to_cudnn_format(filter_format),
- filter_meta.ocpg * filter_meta.group, // cudnn 6 group always be 1
- filter_meta.icpg, filter_meta.spatial[0], filter_meta.spatial[1]));
- }
-
- template class FilterDesc<param::Convolution>;
- template class FilterDesc<param::ConvBias>;
-
- ConvDesc::ConvDesc() {
- cudnn_check(cudnnCreateConvolutionDescriptor(&desc));
- #if CUDNN_VERSION >= 7000
- // cudnn enables tensor core when tensors have dataType =
- // CUDNN_DATA_HALF, so it should be safe to enable globally
- cudnn_check(cudnnSetConvolutionMathType(desc, CUDNN_TENSOR_OP_MATH));
- #endif
- }
-
- ConvDesc::~ConvDesc() {
- cudnn_check(cudnnDestroyConvolutionDescriptor(desc));
- }
-
- void ConvDesc::set(
- DType data_type, const param::Convolution& param, const size_t nr_group) {
- using Param = param::Convolution;
- cudnnConvolutionMode_t mode;
- switch (param.mode) {
- case Param::Mode::CROSS_CORRELATION:
- mode = CUDNN_CROSS_CORRELATION;
- break;
- case Param::Mode::CONVOLUTION:
- mode = CUDNN_CONVOLUTION;
- break;
- default:
- megdnn_throw("conv mode must be conv or xcorr.");
- }
- cudnnDataType_t compute_type;
- MEGDNN_MARK_USED_VAR(compute_type);
- if (data_type.enumv() == DTypeEnum::Float32) {
- // FLOAT_CONFIG
- compute_type = CUDNN_DATA_FLOAT;
- } else if (data_type.enumv() == DTypeEnum::Float16) {
- auto comp_mode = param.compute_mode;
- compute_type = get_compute_type_fp16(comp_mode);
- #if CUDNN_MAJOR >= 7
- } else if (
- data_type.category() == DTypeCategory::INT ||
- data_type.category() == DTypeCategory::QUANTIZED) {
- compute_type = CUDNN_DATA_INT32;
- #endif
- } else {
- megdnn_throw("unspport data type for conv bias");
- }
- #if CUDNN_MAJOR >= 7
- cudnn_check(cudnnSetConvolutionGroupCount(desc, nr_group));
- #else
- megdnn_assert(nr_group == 1);
- #endif
-
- #if CUDNN_MAJOR >= 6
- cudnn_check(cudnnSetConvolution2dDescriptor(
- desc, param.pad_h, param.pad_w, param.stride_h, param.stride_w,
- param.dilate_h, param.dilate_w, mode, compute_type));
- #else
- cudnn_check(cudnnSetConvolution2dDescriptor(
- desc, param.pad_h, param.pad_w, param.stride_h, param.stride_w,
- param.dilate_h, param.dilate_w, mode));
- #endif
- }
-
- LRNDesc::LRNDesc() {
- cudnn_check(cudnnCreateLRNDescriptor(&desc));
- }
-
- LRNDesc::~LRNDesc() {
- cudnn_check(cudnnDestroyLRNDescriptor(desc));
- }
-
- void LRNDesc::set(const param::LRN& param) {
- megdnn_assert(param.n & 1, "n is %u", param.n);
- megdnn_assert(
- param.n >= CUDNN_LRN_MIN_N, "n is %u, CUDNN_LRN_MIN_N is %d", param.n,
- CUDNN_LRN_MIN_N);
- megdnn_assert(
- param.n <= CUDNN_LRN_MAX_N, "n is %u, CUDNN_LRN_MAX_N is %d", param.n,
- CUDNN_LRN_MAX_N);
- megdnn_assert(
- param.k >= CUDNN_LRN_MIN_K, "k is %f, CUDNN_LRN_MIN_K is %lf", param.k,
- CUDNN_LRN_MIN_K);
- megdnn_assert(
- param.beta >= CUDNN_LRN_MIN_BETA, "beta is %f, CUDNN_LRN_MIN_BETA is %lf",
- param.beta, CUDNN_LRN_MIN_BETA);
- // Note that alpha is divided by n in the cudnn implementation,
- // so we have to multiply alpha by n ahead of time.
- cudnn_check(cudnnSetLRNDescriptor(
- desc, param.n, param.alpha * param.n, param.beta, param.k));
- }
-
- BNParamDesc::BNParamDesc() {
- cudnn_check(cudnnCreateTensorDescriptor(&desc));
- }
-
- void BNParamDesc::set(const cudnnTensorDescriptor_t xDesc, cudnnBatchNormMode_t mode) {
- cudnn_check(cudnnDeriveBNTensorDescriptor(desc, xDesc, mode));
- }
-
- BNParamDesc::~BNParamDesc() {
- cudnn_check(cudnnDestroyTensorDescriptor(desc));
- }
-
- Tensor3DDesc::Tensor3DDesc() {
- cudnn_check(cudnnCreateTensorDescriptor(&desc));
- }
-
- Tensor3DDesc::~Tensor3DDesc() {
- cudnn_check(cudnnDestroyTensorDescriptor(desc));
- }
-
- int sc(const size_t x) {
- return static_cast<int>(x);
- }
- void Tensor3DDesc::set(const TensorLayout& layout, bool is_ndhwc) {
- megdnn_assert_eq_size_t(layout.ndim, 5_z);
- size_t c_pos, spatial_pos;
- if (is_ndhwc) {
- c_pos = 4;
- spatial_pos = 1;
- } else { // ncdhw
- c_pos = 1;
- spatial_pos = 2;
- }
- const int dimA[] = {
- sc(layout.shape[0]), sc(layout.shape[c_pos]),
- sc(layout.shape[spatial_pos + 0]), sc(layout.shape[spatial_pos + 1]),
- sc(layout.shape[spatial_pos + 2])};
-
- const int strideA[] = {
- sc(layout.stride[0]), sc(layout.stride[c_pos]),
- sc(layout.stride[spatial_pos + 0]), sc(layout.stride[spatial_pos + 1]),
- sc(layout.stride[spatial_pos + 2])};
-
- cudnn_check(cudnnSetTensorNdDescriptor(
- desc, to_cudnn_dtype(layout.dtype), 5, dimA, strideA));
- }
-
- Filter3DDesc::Filter3DDesc() {
- cudnn_check(cudnnCreateFilterDescriptor(&desc));
- }
-
- Filter3DDesc::~Filter3DDesc() {
- cudnn_check(cudnnDestroyFilterDescriptor(desc));
- }
-
- void Filter3DDesc::set(const Convolution3DBase::CanonizedFilterMeta& filter_meta) {
- megdnn_assert(filter_meta.spatial_ndim == 3);
- #if CUDNN_MAJOR <= 6
- megdnn_assert(filter_meta.group == 1);
- #endif
-
- // cuDNN version 6 or below filter_meta.group always is 1.
- // So it is compatible for all cuDNN versions.
- const int filterDimA[] = {
- sc(filter_meta.ocpg * filter_meta.group), // cudnn 6 group always be 1
- sc(filter_meta.icpg), sc(filter_meta.spatial[0]),
- sc(filter_meta.spatial[1]), sc(filter_meta.spatial[2])};
-
- cudnn_check(cudnnSetFilterNdDescriptor(
- desc, to_cudnn_dtype(DType::from_enum(filter_meta.dtype_enum)),
- CUDNN_TENSOR_NCHW, 5, filterDimA));
- }
-
- Conv3DDesc::Conv3DDesc() {
- cudnn_check(cudnnCreateConvolutionDescriptor(&desc));
-
- #if CUDNN_MAJOR >= 7
- // cudnn enables tensor core when tensors have dataType = CUDNN_DATA_HALF,
- // so it should be safe to enable globally
- cudnn_check(cudnnSetConvolutionMathType(desc, CUDNN_TENSOR_OP_MATH));
- #endif
- }
-
- Conv3DDesc::~Conv3DDesc() {
- cudnn_check(cudnnDestroyConvolutionDescriptor(desc));
- }
-
- void Conv3DDesc::set(const param::Convolution3D& param, const size_t nr_group) {
- cudnnConvolutionMode_t mode;
- switch (param.mode) {
- case param::Convolution3D::Mode::CROSS_CORRELATION:
- mode = CUDNN_CROSS_CORRELATION;
- break;
- case param::Convolution3D::Mode::CONVOLUTION:
- mode = CUDNN_CONVOLUTION;
- break;
- default:
- megdnn_throw("conv mode must be conv or xcorr.");
- }
- #if CUDNN_MAJOR >= 7
- cudnn_check(cudnnSetConvolutionGroupCount(desc, nr_group));
- #else
- megdnn_assert(nr_group == 1);
- #endif
-
- const int padA[] = {sc(param.pad_d), sc(param.pad_h), sc(param.pad_w)},
- filterStrideA[] =
- {sc(param.stride_d), sc(param.stride_h), sc(param.stride_w)},
- dilationA[] = {
- sc(param.dilate_d), sc(param.dilate_h), sc(param.dilate_w)};
- // not use true half
- // in CUDNN_MAJOR < 6, all elements in dilA shoule be 1
- cudnn_check(cudnnSetConvolutionNdDescriptor(
- desc, 3, padA, filterStrideA, dilationA, mode, CUDNN_DATA_FLOAT));
- }
-
- ////////////////////////// CudnnAlgoPack //////////////////////////
-
- #define V1(v) #v
- #define V(v) V1(v)
- #define DEF_NAME(NAME) \
- #NAME "v" V(CUDNN_MAJOR) "." V(CUDNN_MINOR) "." V(CUDNN_PATCHLEVEL)
- #define DEF_ALGO(NAME, PROD1, PROD2) \
- { \
- NAME, { DEF_NAME(NAME), PROD1, PROD2 } \
- }
-
- #if !(CUDNN_MAJOR >= 6 || CUDNN_MINOR >= 1)
- #pragma message "not latest cudnn"
- #endif
-
- const std::unordered_map<cudnnConvolutionBwdDataAlgo_t, CudnnAlgoPack::Attr>
- CudnnAlgoPack::conv_bwd_data_algos() {
- static const std::unordered_map<cudnnConvolutionBwdDataAlgo_t, CudnnAlgoPack::Attr>
- algos = {
- DEF_ALGO(CUDNN_CONVOLUTION_BWD_DATA_ALGO_0, false, false),
- #if CUDNN_VERSION == 8004
- DEF_ALGO(CUDNN_CONVOLUTION_BWD_DATA_ALGO_1, true, true),
- #else
- DEF_ALGO(CUDNN_CONVOLUTION_BWD_DATA_ALGO_1, true, false),
- #endif
- DEF_ALGO(CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT, true, true),
- DEF_ALGO(CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING, true, true),
- #if CUDNN_MAJOR >= 5
- DEF_ALGO(CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD, true, true),
- #if CUDNN_MAJOR >= 6 || CUDNN_MINOR >= 1
- DEF_ALGO(
- CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD_NONFUSED, true, false),
- #endif
- #endif
- };
-
- return algos;
- }
-
- const std::unordered_map<cudnnConvolutionBwdFilterAlgo_t, CudnnAlgoPack::Attr>
- CudnnAlgoPack::conv_bwd_flt_algos() {
- static const std::unordered_map<
- cudnnConvolutionBwdFilterAlgo_t, CudnnAlgoPack::Attr>
- algos =
- { DEF_ALGO(CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0, false, false),
- DEF_ALGO(CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1, true, false),
- DEF_ALGO(CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT, true, true),
- DEF_ALGO(CUDNN_CONVOLUTION_BWD_FILTER_ALGO_3, false, false),
- #if CUDNN_MAJOR >= 6 || (CUDNN_MAJOR >= 5 && CUDNN_MINOR >= 1)
- DEF_ALGO(CUDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD_NONFUSED, true, false),
- #if CUDNN_MAJOR >= 6
- DEF_ALGO(CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT_TILING, true, true),
- #endif
- #endif
-
- };
-
- return algos;
- }
-
- const std::unordered_map<cudnnConvolutionFwdAlgo_t, CudnnAlgoPack::Attr> CudnnAlgoPack::
- conv_fwd_algos() {
- static const std::unordered_map<cudnnConvolutionFwdAlgo_t, CudnnAlgoPack::Attr>
- algos = {
- DEF_ALGO(CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM, true, false),
- #if (CUDNN_VERSION >= 8000 && CUDNN_VERSION <= 8201)
- DEF_ALGO(CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM, true, true),
- #else
- DEF_ALGO(CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM, true, false),
- #endif
- DEF_ALGO(CUDNN_CONVOLUTION_FWD_ALGO_GEMM, true, false),
- DEF_ALGO(CUDNN_CONVOLUTION_FWD_ALGO_DIRECT, true, false),
- DEF_ALGO(CUDNN_CONVOLUTION_FWD_ALGO_FFT, true, true),
- DEF_ALGO(CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING, true, true),
-
- #if CUDNN_MAJOR >= 5
- DEF_ALGO(CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD, true, false),
- #if CUDNN_MAJOR >= 6 || CUDNN_MINOR >= 1
- DEF_ALGO(CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED, true, false),
- #endif
- #endif
-
- };
-
- return algos;
- }
-
- const std::unordered_map<cudnnConvolutionBwdDataAlgo_t, CudnnAlgoPack::Attr>
- CudnnAlgoPack::conv3d_bwd_data_algos() {
- static const std::unordered_map<cudnnConvolutionBwdDataAlgo_t, CudnnAlgoPack::Attr>
- algos = {
- DEF_ALGO(CUDNN_CONVOLUTION_BWD_DATA_ALGO_0, false, false),
- DEF_ALGO(CUDNN_CONVOLUTION_BWD_DATA_ALGO_1, true, false),
- DEF_ALGO(CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING, true, true),
- };
-
- return algos;
- } // namespace cuda
-
- const std::unordered_map<cudnnConvolutionBwdFilterAlgo_t, CudnnAlgoPack::Attr>
- CudnnAlgoPack::conv3d_bwd_flt_algos() {
- #pragma message \
- "fp16 dilated conv with odd size filter, only algo_1 works, need focus on doc"
- static const std::unordered_map<
- cudnnConvolutionBwdFilterAlgo_t, CudnnAlgoPack::Attr>
- algos = {
- DEF_ALGO(CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0, false, false),
- DEF_ALGO(CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1, true, false),
- DEF_ALGO(CUDNN_CONVOLUTION_BWD_FILTER_ALGO_3, false, false),
- };
-
- return algos;
- }
-
- const std::unordered_map<cudnnConvolutionFwdAlgo_t, CudnnAlgoPack::Attr> CudnnAlgoPack::
- conv3d_fwd_algos() {
- static const std::unordered_map<cudnnConvolutionFwdAlgo_t, CudnnAlgoPack::Attr>
- algos = {
- DEF_ALGO(CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM, true, false),
- #if CUDNN_VERSION == 8004
- DEF_ALGO(CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM, true, true),
- #else
- DEF_ALGO(CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM, true, false),
- #endif
- DEF_ALGO(CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING, true, true),
- };
-
- return algos;
- }
-
- #undef DEF_ALGO
- #undef DEF_NAME
- #undef V
- #undef V1
-
- } // namespace cuda
- } // namespace megdnn
-
- // vim: syntax=cpp.doxygen
|