/**
 * \file dnn/src/cuda/convolution/helper.cpp
 * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
 *
 * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 */

#include "./helper.h"

using namespace megdnn;
using namespace cuda;
using namespace convolution;

bool convolution::is_cudnn_supported(const ForwardSizeArgs &args) {
    if (args.src_layout->dtype == args.filter_layout->dtype &&
        args.src_layout->dtype == dtype::BFloat16()) {
        return false;
    }

    // CUDNN_STATUS_EXECUTION_FAILED on Tegra K1, so disable CUDNN
    // on Tegra K1.
    if (args.handle->is_tegra_k1())
        return false;

    // TODO: We only support NCHW format now. It seems cuDNN provides support
    // for NHWC as well.
    if (args.filter_meta.format == param::Convolution::Format::NCHW4) {
        if (args.dst_layout->dtype.enumv() != DTypeEnum::Int8 &&
                args.dst_layout->dtype.enumv() != DTypeEnum::QuantizedS8) {
            return false;
        }
    } else if (args.filter_meta.format != param::Convolution::Format::NCHW) {
        return false;
    }
    auto& fm = args.filter_meta;
    bool supported = true;
    supported &= (fm.spatial_ndim == 2);
#if CUDNN_VERSION < 7000
    supported &= (fm.group == 1);
#endif
#if CUDNN_VERSION < 7500
    supported &= (fm.dilation[0] == 1 && fm.dilation[1] == 1);
#endif
    return supported;
}

SmallVector<size_t> convolution::matmul_get_workspace_bundle(
        const ForwardSizeArgs &args) {
    auto dtype = args.src_layout->dtype;
    auto &&fm = args.filter_meta;
    megdnn_assert(fm.group == 1);
    auto N = args.src_layout->shape[0];
    auto OC = fm.ocpg,
         IC = fm.icpg,
         FH = fm.spatial[0],
         FW = fm.spatial[1];
    auto OH = args.dst_layout->shape[2],
         OW = args.dst_layout->shape[3];
    SmallVector<size_t> sizes{
            dtype.size() * args.dst_layout->total_nr_elems(),
            dtype.size() * IC*FH*FW*OH*OW*N
    };
    if (args.filter_meta.should_flip) {
        sizes.push_back(dtype.size() * OC * IC * FH * FW);
    }
    return sizes;
}

void convolution::flip_filter(const ForwardSizeArgs &args,
        const Workspace &workspace, void *&raw_ptr) {
    auto &&fm = args.filter_meta;
    megdnn_assert(fm.group == 1 && fm.spatial_ndim == 2);
    auto OC = fm.ocpg, IC = fm.icpg, FH = fm.spatial[0], FW = fm.spatial[1];
    auto dtype = fm.dtype;
    megdnn_assert(workspace.size >= dtype.size() * OC * IC * FH * FW);

    TensorND src{raw_ptr, {{OC, IC, FH, FW}, dtype}},
             dst{workspace.raw_ptr + (FH * FW - 1) * dtype.size(), src.layout};
    dst.layout.stride[2] = -dst.layout.stride[2];
    dst.layout.stride[3] = -dst.layout.stride[3];
    args.handle->relayout_opr()->exec(src, dst);
    raw_ptr = workspace.raw_ptr;
}

// vim: syntax=cpp.doxygen