| @@ -54,7 +54,10 @@ if(MGE_WITH_CUDA) | |||||
| add_library(cutlass INTERFACE) | add_library(cutlass INTERFACE) | ||||
| target_include_directories( | target_include_directories( | ||||
| cutlass | cutlass | ||||
| INTERFACE $<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/third_party/cutlass/include>) | |||||
| INTERFACE | |||||
| $<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/third_party/cutlass/include> | |||||
| $<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/third_party/cutlass/tools/util/include>) | |||||
| add_library(cudnn-frontend INTERFACE) | add_library(cudnn-frontend INTERFACE) | ||||
| target_include_directories( | target_include_directories( | ||||
| cudnn-frontend | cudnn-frontend | ||||
| @@ -31,7 +31,7 @@ public: | |||||
| } | } | ||||
| }; | }; | ||||
| class Key { | |||||
| struct Key { | |||||
| Handle* m_handle; | Handle* m_handle; | ||||
| uint32_t m_opr_type; | uint32_t m_opr_type; | ||||
| const TensorLayout* m_inp_layouts_ptr; | const TensorLayout* m_inp_layouts_ptr; | ||||
| @@ -15,7 +15,7 @@ ConvBiasForwardImpl::AlgoPack::AlgoPack() { | |||||
| non_cudnn_algos.push_back(&batched_matmul); | non_cudnn_algos.push_back(&batched_matmul); | ||||
| non_cudnn_algos.push_back(&int1_simple); | non_cudnn_algos.push_back(&int1_simple); | ||||
| #if CUDNN_VERSION > 8004 | |||||
| #if CUDNN_VERSION >= 8020 | |||||
| all_algos.push_back(&cudnn_conv_v8); | all_algos.push_back(&cudnn_conv_v8); | ||||
| all_algos.push_back(&cudnn_conv_bias_activation_v8); | all_algos.push_back(&cudnn_conv_bias_activation_v8); | ||||
| #endif | #endif | ||||
| @@ -173,10 +173,10 @@ public: | |||||
| bool is_cudnn() const override { return true; } | bool is_cudnn() const override { return true; } | ||||
| size_t get_preprocess_workspace_in_bytes(const SizeArgs& args) const override; | |||||
| SmallVector<TensorLayout> deduce_preprocessed_filter_layout( | |||||
| const SizeArgs& args) const override; | |||||
| void exec_preprocess(const ExecArgs& args) const override; | |||||
| // size_t get_preprocess_workspace_in_bytes(const SizeArgs& args) const override; | |||||
| // SmallVector<TensorLayout> deduce_preprocessed_filter_layout( | |||||
| // const SizeArgs& args) const override; | |||||
| // void exec_preprocess(const ExecArgs& args) const override; | |||||
| protected: | protected: | ||||
| virtual size_t cudnn_get_workspace_in_bytes(const SizeArgs& args) const = 0; | virtual size_t cudnn_get_workspace_in_bytes(const SizeArgs& args) const = 0; | ||||
| @@ -237,7 +237,7 @@ private: | |||||
| CudnnAlgoPack::Attr m_attr; | CudnnAlgoPack::Attr m_attr; | ||||
| }; | }; | ||||
| #if CUDNN_VERSION > 8004 | |||||
| #if CUDNN_VERSION >= 8020 | |||||
| class ConvBiasForwardImpl::AlgoCUDNNConvBiasActivationV8 final | class ConvBiasForwardImpl::AlgoCUDNNConvBiasActivationV8 final | ||||
| : public AlgoCUDNNConvBiasActivationBase { | : public AlgoCUDNNConvBiasActivationBase { | ||||
| public: | public: | ||||
| @@ -414,7 +414,7 @@ private: | |||||
| CudnnAlgoPack::Attr m_attr; | CudnnAlgoPack::Attr m_attr; | ||||
| }; | }; | ||||
| #if CUDNN_VERSION > 8004 | |||||
| #if CUDNN_VERSION >= 8020 | |||||
| class ConvBiasForwardImpl::AlgoCUDNNConvV8 final : public AlgoCUDNNConvBase { | class ConvBiasForwardImpl::AlgoCUDNNConvV8 final : public AlgoCUDNNConvBase { | ||||
| public: | public: | ||||
| AlgoCUDNNConvV8() : AlgoCUDNNConvBase() { | AlgoCUDNNConvV8() : AlgoCUDNNConvBase() { | ||||
| @@ -1247,7 +1247,7 @@ public: | |||||
| AlgoGroupConvGeneral group; | AlgoGroupConvGeneral group; | ||||
| AlgoBFloat16 bfloat16; | AlgoBFloat16 bfloat16; | ||||
| AlgoSimpleInt1 int1_simple; | AlgoSimpleInt1 int1_simple; | ||||
| #if CUDNN_VERSION > 8004 | |||||
| #if CUDNN_VERSION >= 8020 | |||||
| AlgoCUDNNConvV8 cudnn_conv_v8; | AlgoCUDNNConvV8 cudnn_conv_v8; | ||||
| AlgoCUDNNConvBiasActivationV8 cudnn_conv_bias_activation_v8; | AlgoCUDNNConvBiasActivationV8 cudnn_conv_bias_activation_v8; | ||||
| #endif | #endif | ||||
| @@ -1,14 +1,3 @@ | |||||
| /** | |||||
| * \file dnn/src/cuda/conv_bias/cudnn_conv_base.cpp | |||||
| * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
| * | |||||
| * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, | |||||
| * software distributed under the License is distributed on an | |||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| */ | |||||
| #include "src/common/conv_bias.h" | #include "src/common/conv_bias.h" | ||||
| #include "src/cuda/conv_bias/algo.h" | #include "src/cuda/conv_bias/algo.h" | ||||
| #include "src/cuda/utils.h" | #include "src/cuda/utils.h" | ||||
| @@ -1,14 +1,3 @@ | |||||
| /** | |||||
| * \file dnn/src/cuda/conv_bias/cudnn_conv_bias_activation_base.cpp | |||||
| * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
| * | |||||
| * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, | |||||
| * software distributed under the License is distributed on an | |||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| */ | |||||
| #include "megdnn/oprs/general.h" | #include "megdnn/oprs/general.h" | ||||
| #include "./algo.h" | #include "./algo.h" | ||||
| @@ -26,19 +15,21 @@ size_t ConvBiasForwardImpl::AlgoCUDNNConvBiasActivationBase::get_workspace_in_by | |||||
| const SizeArgs& args) const { | const SizeArgs& args) const { | ||||
| auto workspace_size = cudnn_get_workspace_in_bytes(args); | auto workspace_size = cudnn_get_workspace_in_bytes(args); | ||||
| auto&& param = args.opr->param(); | |||||
| if (args.preprocessed_filter == nullptr) { | |||||
| if (args.bias_layout && args.bias_layout->dtype != dtype::Float32() && | |||||
| args.src_layout->dtype.category() != DTypeCategory::FLOAT) { | |||||
| // cudnn require bias to be float when executing CONFIG_INT | |||||
| // convert bias to float if bias is not float at first | |||||
| workspace_size += sizeof(float) * args.bias_layout->span().dist_elem(); | |||||
| } | |||||
| if (param.format == param::ConvBias::Format::NCHW32) { | |||||
| workspace_size += args.filter_layout->span().dist_byte() + | |||||
| args.bias_layout->span().dist_byte(); | |||||
| } | |||||
| // if (args.preprocessed_filter == nullptr) { | |||||
| if (args.bias_layout && args.bias_layout->dtype != dtype::Float32() && | |||||
| args.src_layout->dtype.category() != DTypeCategory::FLOAT) { | |||||
| // cudnn require bias to be float when executing CONFIG_INT | |||||
| // convert bias to float if bias is not float at first | |||||
| workspace_size += sizeof(float) * args.bias_layout->span().dist_elem(); | |||||
| } | } | ||||
| // #if CUDNN_VERSION >= 7500 | |||||
| // auto&& param = args.opr->param(); | |||||
| // if (param.format == param::ConvBias::Format::NCHW32) { | |||||
| // workspace_size += args.filter_layout->span().dist_byte() + | |||||
| // args.bias_layout->span().dist_byte(); | |||||
| // } | |||||
| // #endif | |||||
| // } | |||||
| return workspace_size; | return workspace_size; | ||||
| } | } | ||||
| @@ -56,55 +47,62 @@ void ConvBiasForwardImpl::AlgoCUDNNConvBiasActivationBase::exec( | |||||
| TensorND filter_tensor; | TensorND filter_tensor; | ||||
| TensorND bias_tensor; | TensorND bias_tensor; | ||||
| auto&& param = args.opr->param(); | |||||
| if (args.preprocessed_filter != nullptr) { | |||||
| bias_tensor = TensorND{ | |||||
| args.bias_tensor->layout, | |||||
| args.preprocessed_filter->tensors[0].raw_ptr()}; | |||||
| if (param.format == Param::Format::NCHW32) { | |||||
| megdnn_assert(args.preprocessed_filter->tensors.size() == 2); | |||||
| filter_tensor = TensorND{ | |||||
| args.filter_tensor->layout, | |||||
| args.preprocessed_filter->tensors[1].raw_ptr()}; | |||||
| } else { | |||||
| filter_tensor = *args.filter_tensor; | |||||
| } | |||||
| } else { | |||||
| if (args.bias_layout && args.bias_layout->dtype != dtype::Float32() && | |||||
| args.src_layout->dtype.category() != DTypeCategory::FLOAT) { | |||||
| auto cvt = args.handle->create_operator<TypeCvt>(); | |||||
| auto float_bias_layout = *args.bias_layout; | |||||
| auto converted_bias_layout = *args.bias_layout; | |||||
| converted_bias_layout.dtype = dtype::QuantizedS32(alpha); | |||||
| float_bias_layout.dtype = dtype::Float32(); | |||||
| auto bias_size_in_bytes = float_bias_layout.span().dist_byte(); | |||||
| megdnn_assert(args.workspace.size >= bias_size_in_bytes); | |||||
| cvt->exec( | |||||
| {args.bias_tensor->raw_ptr(), converted_bias_layout}, | |||||
| TensorND{workspace_ptr, float_bias_layout}); | |||||
| bias_ptr = workspace_ptr; | |||||
| workspace_ptr += bias_size_in_bytes; | |||||
| workspace_size -= bias_size_in_bytes; | |||||
| } | |||||
| if (param.format == Param::Format::NCHW32) { | |||||
| size_t reorder_workspace_size = | |||||
| args.filter_tensor->layout.span().dist_byte() + | |||||
| args.bias_tensor->layout.span().dist_byte(); | |||||
| auto reorder_filter_ptr = workspace_ptr; | |||||
| auto reorder_bias_ptr = | |||||
| workspace_ptr + args.filter_tensor->layout.span().dist_byte(); | |||||
| cudnn_reorder_filer_and_bias_nchw32( | |||||
| cudnn_handle(args.opr->handle()), args.filter_tensor->raw_ptr(), | |||||
| args.filter_meta, bias_ptr, reorder_filter_ptr, reorder_bias_ptr); | |||||
| filter_tensor = TensorND(args.filter_tensor->layout, reorder_filter_ptr); | |||||
| bias_ptr = reorder_bias_ptr; | |||||
| workspace_ptr += reorder_workspace_size; | |||||
| workspace_size -= reorder_workspace_size; | |||||
| } else { | |||||
| filter_tensor = *args.filter_tensor; | |||||
| } | |||||
| // if (args.preprocessed_filter != nullptr) { | |||||
| // bias_tensor = TensorND{ | |||||
| // args.bias_tensor->layout, | |||||
| // args.preprocessed_filter->tensors[0].raw_ptr()}; | |||||
| // // #if CUDNN_VERSION >= 7500 | |||||
| // // auto&& param = args.opr->param(); | |||||
| // // if (param.format == Param::Format::NCHW32) { | |||||
| // // megdnn_assert(args.preprocessed_filter->tensors.size() == 2); | |||||
| // // filter_tensor = TensorND{ | |||||
| // // args.filter_tensor->layout, | |||||
| // // args.preprocessed_filter->tensors[1].raw_ptr()}; | |||||
| // // } | |||||
| // // #else | |||||
| // filter_tensor = *args.filter_tensor; | |||||
| // // #endif | |||||
| // } else { | |||||
| if (args.bias_layout && args.bias_layout->dtype != dtype::Float32() && | |||||
| args.src_layout->dtype.category() != DTypeCategory::FLOAT) { | |||||
| auto cvt = args.handle->create_operator<TypeCvt>(); | |||||
| auto float_bias_layout = *args.bias_layout; | |||||
| auto converted_bias_layout = *args.bias_layout; | |||||
| converted_bias_layout.dtype = dtype::QuantizedS32(alpha); | |||||
| float_bias_layout.dtype = dtype::Float32(); | |||||
| auto bias_size_in_bytes = float_bias_layout.span().dist_byte(); | |||||
| megdnn_assert(args.workspace.size >= bias_size_in_bytes); | |||||
| cvt->exec( | |||||
| {args.bias_tensor->raw_ptr(), converted_bias_layout}, | |||||
| TensorND{workspace_ptr, float_bias_layout}); | |||||
| bias_ptr = workspace_ptr; | |||||
| workspace_ptr += bias_size_in_bytes; | |||||
| workspace_size -= bias_size_in_bytes; | |||||
| } | } | ||||
| // #if CUDNN_VERSION >= 7500 | |||||
| // auto&& param = args.opr->param(); | |||||
| // if (param.format == Param::Format::NCHW32) { | |||||
| // size_t reorder_workspace_size = | |||||
| // args.filter_tensor->layout.span().dist_byte() + | |||||
| // args.bias_tensor->layout.span().dist_byte(); | |||||
| // auto reorder_filter_ptr = workspace_ptr; | |||||
| // auto reorder_bias_ptr = | |||||
| // workspace_ptr + | |||||
| // args.filter_tensor->layout.span().dist_byte(); | |||||
| // cudnn_reorder_filter_and_bias_nchw32( | |||||
| // cudnn_handle(args.opr->handle()), | |||||
| // args.filter_tensor->raw_ptr(), args.filter_meta, | |||||
| // bias_ptr, reorder_filter_ptr, reorder_bias_ptr); | |||||
| // filter_tensor = TensorND(args.filter_tensor->layout, | |||||
| // reorder_filter_ptr); bias_ptr = reorder_bias_ptr; workspace_ptr | |||||
| // += reorder_workspace_size; workspace_size -= | |||||
| // reorder_workspace_size; | |||||
| // } | |||||
| // #else | |||||
| filter_tensor = *args.filter_tensor; | |||||
| // #endif | |||||
| // } | |||||
| bias_tensor = TensorND{args.bias_tensor->layout, bias_ptr}; | bias_tensor = TensorND{args.bias_tensor->layout, bias_ptr}; | ||||
| ExecArgs exec_args{ | ExecArgs exec_args{ | ||||
| @@ -153,58 +151,64 @@ void ConvBiasForwardImpl::AlgoCUDNNConvBiasActivationBase::exec( | |||||
| } | } | ||||
| } | } | ||||
| size_t ConvBiasForwardImpl::AlgoCUDNNConvBiasActivationBase:: | |||||
| get_preprocess_workspace_in_bytes(const SizeArgs& args) const { | |||||
| auto&& param = args.opr->param(); | |||||
| if (param.format == Param::Format::NCHW32) { | |||||
| return args.bias_layout->span().dist_byte(); | |||||
| } | |||||
| return 0_z; | |||||
| } | |||||
| SmallVector<TensorLayout> ConvBiasForwardImpl::AlgoCUDNNConvBiasActivationBase:: | |||||
| deduce_preprocessed_filter_layout(const SizeArgs& args) const { | |||||
| auto&& param = args.opr->param(); | |||||
| if (param.format == Param::Format::NCHW32) { | |||||
| return {args.bias_layout->collapse_contiguous(), | |||||
| args.filter_layout->collapse_contiguous()}; | |||||
| } else { | |||||
| return {args.bias_layout->collapse_contiguous()}; | |||||
| } | |||||
| } | |||||
| void ConvBiasForwardImpl::AlgoCUDNNConvBiasActivationBase::exec_preprocess( | |||||
| const ExecArgs& args) const { | |||||
| float alpha, beta; | |||||
| std::tie(alpha, beta) = cudnn_get_conv_bias_act_scale_param( | |||||
| args.src_tensor->layout, args.dst_tensor->layout, | |||||
| args.filter_tensor->layout, args.bias_tensor->layout, | |||||
| args.z_tensor->layout); | |||||
| MEGDNN_MARK_USED_VAR(beta); | |||||
| auto workspace_ptr = args.workspace.raw_ptr; | |||||
| auto workspace_size = args.workspace.size; | |||||
| auto bias_ptr = workspace_size > 0 ? workspace_ptr | |||||
| : args.preprocessed_filter->tensors[0].raw_ptr(); | |||||
| if (args.bias_layout && args.bias_layout->dtype != dtype::Float32() && | |||||
| args.src_layout->dtype.category() != DTypeCategory::FLOAT) { | |||||
| auto cvt = args.handle->create_operator<TypeCvt>(); | |||||
| auto float_bias_layout = *args.bias_layout; | |||||
| auto converted_bias_layout = *args.bias_layout; | |||||
| converted_bias_layout.dtype = dtype::QuantizedS32(alpha); | |||||
| float_bias_layout.dtype = dtype::Float32(); | |||||
| cvt->exec( | |||||
| {args.bias_tensor->raw_ptr(), converted_bias_layout}, | |||||
| TensorND{bias_ptr, float_bias_layout}); | |||||
| } | |||||
| if (args.opr->param().format == Param::Format::NCHW32) { | |||||
| auto reorder_filter_ptr = args.preprocessed_filter->tensors[1].raw_ptr(); | |||||
| auto reorder_bias_ptr = args.preprocessed_filter->tensors[0].raw_ptr(); | |||||
| cudnn_reorder_filer_and_bias_nchw32( | |||||
| cudnn_handle(args.opr->handle()), args.filter_tensor->raw_ptr(), | |||||
| args.filter_meta, bias_ptr, reorder_filter_ptr, reorder_bias_ptr); | |||||
| } | |||||
| } | |||||
| // size_t ConvBiasForwardImpl::AlgoCUDNNConvBiasActivationBase:: | |||||
| // get_preprocess_workspace_in_bytes(const SizeArgs&) const { | |||||
| // #if CUDNN_VERSION >= 7500 | |||||
| // auto&& param = args.opr->param(); | |||||
| // if (param.format == Param::Format::NCHW32) { | |||||
| // return args.bias_layout->span().dist_byte(); | |||||
| // } | |||||
| // #endif | |||||
| // return 0_z; | |||||
| // } | |||||
| // SmallVector<TensorLayout> ConvBiasForwardImpl::AlgoCUDNNConvBiasActivationBase:: | |||||
| // deduce_preprocessed_filter_layout(const SizeArgs& args) const { | |||||
| // #if CUDNN_VERSION >= 7500 | |||||
| // auto&& param = args.opr->param(); | |||||
| // if (param.format == Param::Format::NCHW32) { | |||||
| // return {args.bias_layout->collapse_contiguous(), | |||||
| // args.filter_layout->collapse_contiguous()}; | |||||
| // } | |||||
| // #endif | |||||
| // return {args.bias_layout->collapse_contiguous()}; | |||||
| // } | |||||
| // void ConvBiasForwardImpl::AlgoCUDNNConvBiasActivationBase::exec_preprocess( | |||||
| // const ExecArgs& args) const { | |||||
| // float alpha, beta; | |||||
| // std::tie(alpha, beta) = cudnn_get_conv_bias_act_scale_param( | |||||
| // args.src_tensor->layout, args.dst_tensor->layout, | |||||
| // args.filter_tensor->layout, args.bias_tensor->layout, | |||||
| // args.z_tensor->layout); | |||||
| // MEGDNN_MARK_USED_VAR(beta); | |||||
| // auto workspace_ptr = args.workspace.raw_ptr; | |||||
| // auto workspace_size = args.workspace.size; | |||||
| // auto bias_ptr = workspace_size > 0 ? workspace_ptr | |||||
| // : | |||||
| // args.preprocessed_filter->tensors[0].raw_ptr(); | |||||
| // if (args.bias_layout && args.bias_layout->dtype != dtype::Float32() && | |||||
| // args.src_layout->dtype.category() != DTypeCategory::FLOAT) { | |||||
| // auto cvt = args.handle->create_operator<TypeCvt>(); | |||||
| // auto float_bias_layout = *args.bias_layout; | |||||
| // auto converted_bias_layout = *args.bias_layout; | |||||
| // converted_bias_layout.dtype = dtype::QuantizedS32(alpha); | |||||
| // float_bias_layout.dtype = dtype::Float32(); | |||||
| // cvt->exec( | |||||
| // {args.bias_tensor->raw_ptr(), converted_bias_layout}, | |||||
| // TensorND{bias_ptr, float_bias_layout}); | |||||
| // } | |||||
| // #if CUDNN_VERSION >= 7500 | |||||
| // if (args.opr->param().format == Param::Format::NCHW32) { | |||||
| // auto reorder_filter_ptr = args.preprocessed_filter->tensors[1].raw_ptr(); | |||||
| // auto reorder_bias_ptr = args.preprocessed_filter->tensors[0].raw_ptr(); | |||||
| // cudnn_reorder_filter_and_bias_nchw32( | |||||
| // cudnn_handle(args.opr->handle()), args.filter_tensor->raw_ptr(), | |||||
| // args.filter_meta, bias_ptr, reorder_filter_ptr, reorder_bias_ptr); | |||||
| // } | |||||
| // #endif | |||||
| // } | |||||
| // vim: syntax=cpp.doxygen | // vim: syntax=cpp.doxygen | ||||
| @@ -1,14 +1,3 @@ | |||||
| /** | |||||
| * \file dnn/src/cuda/conv_bias/cudnn_conv_bias_activation_v8.cpp | |||||
| * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
| * | |||||
| * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, | |||||
| * software distributed under the License is distributed on an | |||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| */ | |||||
| #include "megdnn/oprs/general.h" | #include "megdnn/oprs/general.h" | ||||
| #include "./algo.h" | #include "./algo.h" | ||||
| @@ -17,7 +6,7 @@ | |||||
| #include "src/cuda/cudnn_wrapper_v8.h" | #include "src/cuda/cudnn_wrapper_v8.h" | ||||
| #include "src/cuda/utils.h" | #include "src/cuda/utils.h" | ||||
| #if CUDNN_VERSION >= 8004 | |||||
| #if CUDNN_VERSION >= 8020 | |||||
| using namespace megdnn; | using namespace megdnn; | ||||
| using namespace cuda; | using namespace cuda; | ||||
| using namespace conv_bias; | using namespace conv_bias; | ||||
| @@ -1,20 +1,9 @@ | |||||
| /** | |||||
| * \file dnn/src/cuda/conv_bias/cudnn_conv_v8.cpp | |||||
| * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
| * | |||||
| * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, | |||||
| * software distributed under the License is distributed on an | |||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| */ | |||||
| #include "src/common/conv_bias.h" | #include "src/common/conv_bias.h" | ||||
| #include "src/cuda/conv_bias/algo.h" | #include "src/cuda/conv_bias/algo.h" | ||||
| #include "src/cuda/cudnn_wrapper_v8.h" | #include "src/cuda/cudnn_wrapper_v8.h" | ||||
| #include "src/cuda/utils.h" | #include "src/cuda/utils.h" | ||||
| #if CUDNN_VERSION >= 8004 | |||||
| #if CUDNN_VERSION >= 8020 | |||||
| using namespace megdnn; | using namespace megdnn; | ||||
| using namespace cuda; | using namespace cuda; | ||||
| using namespace conv_bias; | using namespace conv_bias; | ||||
| @@ -239,7 +239,8 @@ std::pair<float, float> cudnn_get_conv_bias_act_scale_param( | |||||
| return {alpha, beta}; | return {alpha, beta}; | ||||
| } | } | ||||
| void cudnn_reorder_filer_and_bias_nchw32( | |||||
| #if CUDNN_VERSION >= 7500 | |||||
| void cudnn_reorder_filter_and_bias_nchw32( | |||||
| const cudnnHandle_t& handle, const void* filter_ptr, | const cudnnHandle_t& handle, const void* filter_ptr, | ||||
| const CanonizedFilterMeta& fm, const void* bias_ptr, void* reordered_filter_ptr, | const CanonizedFilterMeta& fm, const void* bias_ptr, void* reordered_filter_ptr, | ||||
| void* reordered_bias_ptr) { | void* reordered_bias_ptr) { | ||||
| @@ -250,6 +251,8 @@ void cudnn_reorder_filer_and_bias_nchw32( | |||||
| handle, filter_desc.desc, CUDNN_DEFAULT_REORDER, filter_ptr, | handle, filter_desc.desc, CUDNN_DEFAULT_REORDER, filter_ptr, | ||||
| reordered_filter_ptr, reorder_bias, bias_ptr, reordered_bias_ptr)); | reordered_filter_ptr, reorder_bias, bias_ptr, reordered_bias_ptr)); | ||||
| } | } | ||||
| #endif | |||||
| } // namespace conv_bias | } // namespace conv_bias | ||||
| } // namespace cuda | } // namespace cuda | ||||
| } // namespace megdnn | } // namespace megdnn | ||||
| @@ -117,11 +117,12 @@ std::pair<float, float> cudnn_get_conv_bias_act_scale_param( | |||||
| const TensorLayout& x, const TensorLayout& y, const TensorLayout& w, | const TensorLayout& x, const TensorLayout& y, const TensorLayout& w, | ||||
| const TensorLayout& b, const TensorLayout& z); | const TensorLayout& b, const TensorLayout& z); | ||||
| void cudnn_reorder_filer_and_bias_nchw32( | |||||
| #if CUDNN_VERSION >= 7500 | |||||
| void cudnn_reorder_filter_and_bias_nchw32( | |||||
| const cudnnHandle_t& handle, const void* filter_ptr, | const cudnnHandle_t& handle, const void* filter_ptr, | ||||
| const CanonizedFilterMeta& fm, const void* bias_ptr, void* reordered_filter_ptr, | const CanonizedFilterMeta& fm, const void* bias_ptr, void* reordered_filter_ptr, | ||||
| void* reordered_bias_ptr); | void* reordered_bias_ptr); | ||||
| #endif | |||||
| } // namespace conv_bias | } // namespace conv_bias | ||||
| } // namespace cuda | } // namespace cuda | ||||
| } // namespace megdnn | } // namespace megdnn | ||||
| @@ -47,7 +47,7 @@ ConvBiasForward::Algorithm* ConvBiasForwardImpl::get_algorithm_heuristic( | |||||
| const AlgoAttribute& positive_attr, const AlgoAttribute& negative_attr) { | const AlgoAttribute& positive_attr, const AlgoAttribute& negative_attr) { | ||||
| using namespace conv_bias; | using namespace conv_bias; | ||||
| AlgoBase::SizeArgs args{this, src, filter, bias, z, dst}; | AlgoBase::SizeArgs args{this, src, filter, bias, z, dst}; | ||||
| #if CUDNN_VERSION >= 8004 | |||||
| #if CUDNN_VERSION >= 8020 | |||||
| if (sm_algo_pack.cudnn_conv_v8.is_available_attribute( | if (sm_algo_pack.cudnn_conv_v8.is_available_attribute( | ||||
| args, positive_attr, negative_attr, workspace_limit_in_bytes)) { | args, positive_attr, negative_attr, workspace_limit_in_bytes)) { | ||||
| return &sm_algo_pack.cudnn_conv_v8; | return &sm_algo_pack.cudnn_conv_v8; | ||||
| @@ -32,12 +32,10 @@ public: | |||||
| const char* get_algorithm_set_name() const override; | const char* get_algorithm_set_name() const override; | ||||
| class AlgoBase; | class AlgoBase; | ||||
| class AlgoCUDNNConvBiasActivation; | |||||
| class AlgoChanwise; | class AlgoChanwise; | ||||
| class AlgoChanwiseSmall; | class AlgoChanwiseSmall; | ||||
| class AlgoDepthwiseLargeFilter; | class AlgoDepthwiseLargeFilter; | ||||
| class AlgoChanwise8x8x32; | class AlgoChanwise8x8x32; | ||||
| class AlgoCUDNNConv; | |||||
| class AlgoFallbackNCHWQS8; | class AlgoFallbackNCHWQS8; | ||||
| class AlgoInplaceMatmul; | class AlgoInplaceMatmul; | ||||
| class AlgoMatmul; | class AlgoMatmul; | ||||
| @@ -67,8 +65,10 @@ public: | |||||
| class AlgoFloat32NCHWFMAImplicitBatchedGemm; | class AlgoFloat32NCHWFMAImplicitBatchedGemm; | ||||
| class AlgoFloat16NCHWHMMAImplicitBatchedGemm; | class AlgoFloat16NCHWHMMAImplicitBatchedGemm; | ||||
| class AlgoCUDNNConvBase; | class AlgoCUDNNConvBase; | ||||
| class AlgoCUDNNConv; | |||||
| class AlgoCUDNNConvBiasActivationBase; | class AlgoCUDNNConvBiasActivationBase; | ||||
| #if CUDNN_VERSION > 8004 | |||||
| class AlgoCUDNNConvBiasActivation; | |||||
| #if CUDNN_VERSION >= 8020 | |||||
| class AlgoCUDNNConvV8; | class AlgoCUDNNConvV8; | ||||
| class AlgoCUDNNConvBiasActivationV8; | class AlgoCUDNNConvBiasActivationV8; | ||||
| #endif | #endif | ||||
| @@ -1,13 +1,4 @@ | |||||
| /** | |||||
| * \file dnn/src/cuda/cudnn_wrapper_v8.cpp | |||||
| * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
| * | |||||
| * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, | |||||
| * software distributed under the License is distributed on an | |||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| */ | |||||
| #if CUDNN_VERSION >= 8020 | |||||
| #include "src/cuda/cudnn_wrapper_v8.h" | #include "src/cuda/cudnn_wrapper_v8.h" | ||||
| #include "src/cuda/cudnn_wrapper.h" | #include "src/cuda/cudnn_wrapper.h" | ||||
| @@ -19,7 +10,7 @@ | |||||
| #include "cudnn_frontend_EngineConfigGenerator.h" | #include "cudnn_frontend_EngineConfigGenerator.h" | ||||
| #include "megdnn/heuristic_cache.h" | |||||
| #include "megdnn/algorithm_cache.h" | |||||
| using namespace megdnn; | using namespace megdnn; | ||||
| using namespace cuda; | using namespace cuda; | ||||
| @@ -240,9 +231,9 @@ auto make_activation_descriptor( | |||||
| // high-level api for convolution execution | // high-level api for convolution execution | ||||
| struct StaticData { | struct StaticData { | ||||
| using Key = megdnn::HeuristicCache::Key; | |||||
| using KeyStorage = megdnn::HeuristicCache::KeyStorage; | |||||
| using KeyHash = megdnn::HeuristicCache::Hash; | |||||
| using Key = megdnn::AlgorithmCache::Key; | |||||
| using KeyStorage = megdnn::AlgorithmCache::KeyStorage; | |||||
| using KeyHash = megdnn::AlgorithmCache::Hash; | |||||
| using Result = cudnn_frontend::ExecutionPlan; | using Result = cudnn_frontend::ExecutionPlan; | ||||
| using CudnnFrontendExecutionPlanCache = | using CudnnFrontendExecutionPlanCache = | ||||
| std::unordered_map<KeyStorage, Result, KeyHash>; | std::unordered_map<KeyStorage, Result, KeyHash>; | ||||
| @@ -682,4 +673,5 @@ void megdnn::cuda::run_conv_bias_act_with_plan( | |||||
| handle, plan.get_raw_desc(), variant_pack.get_raw_desc())); | handle, plan.get_raw_desc(), variant_pack.get_raw_desc())); | ||||
| } | } | ||||
| #endif | |||||
| // vim: syntax=cpp.doxygen | // vim: syntax=cpp.doxygen | ||||
| @@ -1,15 +1,6 @@ | |||||
| /** | |||||
| * \file dnn/src/cuda/cudnn_wrapper_v8.h | |||||
| * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
| * | |||||
| * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, | |||||
| * software distributed under the License is distributed on an | |||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| */ | |||||
| #pragma once | #pragma once | ||||
| #if CUDNN_VERSION >= 8020 | |||||
| #include "megdnn/basic_types.h" | #include "megdnn/basic_types.h" | ||||
| #include "megdnn/oprs/nn.h" | #include "megdnn/oprs/nn.h" | ||||
| #include "src/common/utils.h" | #include "src/common/utils.h" | ||||
| @@ -67,4 +58,5 @@ void run_conv_bias_act_with_plan( | |||||
| } // namespace cuda | } // namespace cuda | ||||
| } // namespace megdnn | } // namespace megdnn | ||||
| #endif | |||||
| // vim: syntax=cpp.doxygen | // vim: syntax=cpp.doxygen | ||||
| @@ -58,11 +58,6 @@ HandleImpl::HandleImpl(megcoreComputingHandle_t comp_handle) | |||||
| For example `export CUDA_CACHE_MAXSIZE=2147483647` and `export CUDA_CACHE_PATH=/data/.cuda_cache`)"); | For example `export CUDA_CACHE_MAXSIZE=2147483647` and `export CUDA_CACHE_PATH=/data/.cuda_cache`)"); | ||||
| } | } | ||||
| #endif | #endif | ||||
| size_t free, tot; | |||||
| cudaMemGetInfo(&free, &tot); | |||||
| printf("before cudnn create, free: %.2f MB, tot: %.2f MB, allocated: %.2f MB\n", | |||||
| free / 1024.0 / 1024.0, tot / 1024.0 / 1024.0, | |||||
| (tot - free) / 1024.0 / 1024.0); | |||||
| cudnn_check(cudnnCreate(&m_cudnn_handle)); | cudnn_check(cudnnCreate(&m_cudnn_handle)); | ||||
| cublas_check(cublasCreate(&m_cublas_handle)); | cublas_check(cublasCreate(&m_cublas_handle)); | ||||
| #if CUDA_VERSION >= 10010 | #if CUDA_VERSION >= 10010 | ||||
| @@ -74,11 +69,6 @@ HandleImpl::HandleImpl(megcoreComputingHandle_t comp_handle) | |||||
| cudnn_check(cudnnSetStream(m_cudnn_handle, stream())); | cudnn_check(cudnnSetStream(m_cudnn_handle, stream())); | ||||
| cublas_check(cublasSetStream(m_cublas_handle, stream())); | cublas_check(cublasSetStream(m_cublas_handle, stream())); | ||||
| #if CUDNN_VERSION >= 8004 | |||||
| // cudnn_check(cudnnOpsInferVersionCheck()); | |||||
| // cudnn_check(cudnnCnnInferVersionCheck()); | |||||
| #endif | |||||
| // Note that all cublas scalars (alpha, beta) and scalar results such as dot | // Note that all cublas scalars (alpha, beta) and scalar results such as dot | ||||
| // output resides at device side. | // output resides at device side. | ||||
| cublas_check(cublasSetPointerMode(m_cublas_handle, CUBLAS_POINTER_MODE_DEVICE)); | cublas_check(cublasSetPointerMode(m_cublas_handle, CUBLAS_POINTER_MODE_DEVICE)); | ||||
| @@ -92,11 +82,6 @@ HandleImpl::HandleImpl(megcoreComputingHandle_t comp_handle) | |||||
| cudaMemcpyHostToDevice, stream())); | cudaMemcpyHostToDevice, stream())); | ||||
| cuda_check(cudaStreamSynchronize(stream())); | cuda_check(cudaStreamSynchronize(stream())); | ||||
| cudaMemGetInfo(&free, &tot); | |||||
| printf("after cudnn create, free: %.2f MB, tot: %.2f MB, allocated: %.2f MB\n", | |||||
| free / 1024.0 / 1024.0, tot / 1024.0 / 1024.0, | |||||
| (tot - free) / 1024.0 / 1024.0); | |||||
| // check tk1 | // check tk1 | ||||
| m_is_tegra_k1 = (strcmp(m_device_prop->name, "GK20A") == 0); | m_is_tegra_k1 = (strcmp(m_device_prop->name, "GK20A") == 0); | ||||
| m_cusolver_handle = nullptr; | m_cusolver_handle = nullptr; | ||||
| @@ -1,13 +1,3 @@ | |||||
| /** | |||||
| * \file dnn/test/cuda/conv_bias.cpp | |||||
| * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
| * | |||||
| * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, | |||||
| * software distributed under the License is distributed on an | |||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| */ | |||||
| #include "megdnn/dtype.h" | #include "megdnn/dtype.h" | ||||
| #include "test/cuda/fixture.h" | #include "test/cuda/fixture.h" | ||||
| @@ -26,7 +16,7 @@ using namespace megdnn; | |||||
| using namespace test; | using namespace test; | ||||
| using namespace conv_bias; | using namespace conv_bias; | ||||
| #if CUDNN_VERSION >= 8004 | |||||
| #if CUDNN_VERSION >= 8020 | |||||
| TEST_F(CUDA, CONV_V8_FLOAT) { | TEST_F(CUDA, CONV_V8_FLOAT) { | ||||
| Checker<ConvBiasForward> checker(handle_cuda()); | Checker<ConvBiasForward> checker(handle_cuda()); | ||||
| checker.set_before_exec_callback( | checker.set_before_exec_callback( | ||||