| @@ -24,21 +24,7 @@ Convolution3DBackwardDataImpl::AlgoPack::AlgoPack() { | |||
| for (auto &&i: cudnn) { | |||
| all_algos.push_back(&i); | |||
| } | |||
| all_algos.reserve(all_algos.size() * 2); | |||
| // add gconv algos by AlgoGroupConvGeneral | |||
| auto all_algos_data = all_algos.data(); | |||
| for (size_t i = 1; i < all_algos.size(); ++ i) { | |||
| gconv.push_back({all_algos[i]}); | |||
| } | |||
| for (size_t i = 1; i < all_algos.size(); ++ i) { | |||
| algo2gconv[all_algos[i]] = &gconv[i - 1]; | |||
| } | |||
| for (auto &&i: gconv) { | |||
| all_algos.push_back(&i); | |||
| } | |||
| megdnn_assert(all_algos_data == all_algos.data()); | |||
| all_algos.push_back(&group); | |||
| for (auto&& algo : all_algos) { | |||
| m_all_algos_map.emplace(algo->info().desc, algo); | |||
| @@ -61,27 +47,26 @@ Convolution3DBackwardDataImpl::AlgoPack::cudnn_from_enum( | |||
| Convolution3DBackwardDataImpl::AlgoPack Convolution3DBackwardDataImpl::sm_algo_pack; | |||
| Convolution3DBackwardDataImpl::AlgoBase::SizeArgs::SizeArgs( | |||
| Convolution3DBackwardDataImpl *o, | |||
| const TensorLayout &filter, const TensorLayout &diff, | |||
| const TensorLayout &grad): | |||
| SizeArgs(o, o->make_canonized_filter_meta(grad.ndim, filter), diff, grad) | |||
| { | |||
| } | |||
| const Convolution3DBackwardDataImpl* o, const TensorLayout& filter, | |||
| const TensorLayout& diff, const TensorLayout& grad) | |||
| : SizeArgs(o, filter, o->make_canonized_filter_meta(grad.ndim, filter), | |||
| diff, grad) {} | |||
| Convolution3DBackwardDataImpl::AlgoBase::SizeArgs::SizeArgs( | |||
| Convolution3DBackwardDataImpl *o, | |||
| const CanonizedFilterMeta &filter, const TensorLayout &diff, | |||
| const Convolution3DBackwardDataImpl *o, const TensorLayout& filter, | |||
| const CanonizedFilterMeta &filter_meta, const TensorLayout &diff, | |||
| const TensorLayout &grad): | |||
| handle{concrete_handle(o->handle())}, | |||
| filter_meta{filter}, | |||
| filter_meta{filter_meta}, | |||
| diff_layout{&diff}, | |||
| grad_layout{&grad}, | |||
| filter_layout{&filter}, | |||
| opr{o} | |||
| { | |||
| } | |||
| Convolution3DBackwardDataImpl::AlgoBase::ExecArgs::ExecArgs( | |||
| Convolution3DBackwardDataImpl *opr, | |||
| const Convolution3DBackwardDataImpl *opr, | |||
| _megdnn_tensor_in filter, | |||
| _megdnn_tensor_in diff, | |||
| _megdnn_tensor_out grad, | |||
| @@ -42,31 +42,33 @@ public: | |||
| struct SizeArgs { | |||
| HandleImpl* handle; | |||
| CanonizedFilterMeta filter_meta; | |||
| const TensorLayout *diff_layout, *grad_layout; | |||
| Convolution3DBackwardDataImpl* opr; | |||
| const TensorLayout *diff_layout, *grad_layout, *filter_layout; | |||
| const Convolution3DBackwardDataImpl* opr; | |||
| std::string to_string() const; | |||
| void init_desc(convolution3d::CUDNNBwdDataDescs& desc) const { | |||
| desc.set(filter_meta, *diff_layout, *grad_layout, opr->param()); | |||
| } | |||
| SizeArgs(Convolution3DBackwardDataImpl* opr, const TensorLayout& filter, | |||
| const TensorLayout& diff, const TensorLayout& grad); | |||
| SizeArgs(Convolution3DBackwardDataImpl* opr, | |||
| const CanonizedFilterMeta& filter, const TensorLayout& diff, | |||
| SizeArgs(const Convolution3DBackwardDataImpl* opr, | |||
| const TensorLayout& filter, const TensorLayout& diff, | |||
| const TensorLayout& grad); | |||
| SizeArgs(const Convolution3DBackwardDataImpl* opr, | |||
| const TensorLayout& filter, | |||
| const CanonizedFilterMeta& filter_meta, | |||
| const TensorLayout& diff, const TensorLayout& grad); | |||
| convolution3d::ForwardSizeArgs as_fwd_args() const { | |||
| return {handle, grad_layout, filter_meta, diff_layout, | |||
| opr->param().data_type}; | |||
| return {handle, grad_layout, filter_layout, | |||
| filter_meta, diff_layout, opr->param().data_type}; | |||
| } | |||
| }; | |||
| struct ExecArgs : public SizeArgs { | |||
| const TensorND *filter_tensor, *diff_tensor, *grad_tensor; | |||
| Workspace workspace; | |||
| ExecArgs(Convolution3DBackwardDataImpl* opr, _megdnn_tensor_in filter, | |||
| _megdnn_tensor_in diff, _megdnn_tensor_out grad, | |||
| _megdnn_workspace workspace); | |||
| ExecArgs(const Convolution3DBackwardDataImpl* opr, | |||
| _megdnn_tensor_in filter, _megdnn_tensor_in diff, | |||
| _megdnn_tensor_out grad, _megdnn_workspace workspace); | |||
| }; | |||
| virtual bool is_available(const SizeArgs& args) const = 0; | |||
| virtual size_t get_workspace_in_bytes(const SizeArgs& args) const = 0; | |||
| @@ -154,29 +156,25 @@ public: | |||
| //! implement group conv by another algo | |||
| class Convolution3DBackwardDataImpl::AlgoGroupConvGeneral final | |||
| : public AlgoBase { | |||
| AlgoBase* m_impl; | |||
| std::string m_name; | |||
| public: | |||
| AlgoGroupConvGeneral(AlgoBase* impl); | |||
| bool is_available(const SizeArgs& args) const override; | |||
| size_t get_workspace_in_bytes(const SizeArgs& args) const override; | |||
| void exec(const ExecArgs& args) const override; | |||
| std::vector<SearchItem> get_subopr_list( | |||
| const TensorLayoutArray& layouts, | |||
| const OperatorBase* opr) const override; | |||
| const char* name() const override { return m_name.c_str(); } | |||
| const char* name() const override { | |||
| return "CUDA:GROUP_CONV3D_BACKWARD_DATA"; | |||
| } | |||
| static void modify_size_args(SizeArgs& args, TensorLayout& diff_pg, | |||
| TensorLayout& grad_pg); | |||
| AlgoAttribute attribute() const override { | |||
| auto ret = static_cast<AlgoAttribute>(0); | |||
| if (m_impl->contain_attribute_all(AlgoAttribute::REPRODUCIBLE)) { | |||
| ret |= AlgoAttribute::REPRODUCIBLE; | |||
| } | |||
| return ret; | |||
| return AlgoAttribute::REPRODUCIBLE; | |||
| } | |||
| MEGDNN_DECL_ALGO_TYPE(CUDA_GROUP_CONV_GENERAL) | |||
| private: | |||
| WorkspaceBundle get_workspace_bundle(void* ptr, const SizeArgs& args) const; | |||
| }; | |||
| class Convolution3DBackwardDataImpl::AlgoPack : NonCopyableObj { | |||
| @@ -190,8 +188,7 @@ public: | |||
| std::vector<AlgoCUDNN> cudnn; | |||
| AlgoChanwise chanwise; | |||
| std::vector<AlgoGroupConvGeneral> gconv; | |||
| std::unordered_map<AlgoBase*, AlgoGroupConvGeneral*> algo2gconv; | |||
| AlgoGroupConvGeneral group; | |||
| std::vector<AlgoBase*> | |||
| //! all algorithms | |||
| @@ -15,68 +15,121 @@ using namespace megdnn; | |||
| using namespace cuda; | |||
| using namespace convolution3d; | |||
| void Convolution3DBackwardDataImpl::AlgoGroupConvGeneral::modify_size_args( | |||
| Convolution3DBackwardDataImpl::AlgoBase::SizeArgs &args, | |||
| TensorLayout &diff_pg, TensorLayout &grad_pg) { | |||
| diff_pg = *args.diff_layout; | |||
| grad_pg = *args.grad_layout; | |||
| namespace { | |||
| std::pair<TensorLayoutArray, Convolution3DBackwardDataImpl::Param> | |||
| sub_opr_config(const Convolution3DBackwardDataImpl::AlgoBase::SizeArgs& args) { | |||
| SmallVector<size_t> flt_shape(0); | |||
| std::vector<ptrdiff_t> flt_stride(0); | |||
| size_t idx = 0; | |||
| // check if the first dim is group | |||
| if (args.filter_layout->ndim > args.grad_layout->ndim) | |||
| ++idx; | |||
| for (; idx < args.filter_layout->ndim; ++idx) { | |||
| flt_shape.push_back(args.filter_layout->shape[idx]); | |||
| flt_stride.push_back(args.filter_layout->stride[idx]); | |||
| } | |||
| TensorLayout filter_pg(flt_shape, flt_stride, | |||
| args.filter_layout->dtype, | |||
| args.filter_layout->format); | |||
| TensorLayout diff_pg = *args.diff_layout; | |||
| TensorLayout grad_pg = *args.grad_layout; | |||
| auto nr_grp = args.filter_meta.group; | |||
| args.filter_meta.group = 1; | |||
| diff_pg.shape[1] /= nr_grp; | |||
| grad_pg.shape[1] /= nr_grp; | |||
| args.diff_layout = &diff_pg; | |||
| args.grad_layout = &grad_pg; | |||
| size_t c_pos = 1; | |||
| diff_pg.shape[c_pos] /= nr_grp; | |||
| grad_pg.shape[c_pos] /= nr_grp; | |||
| megdnn::param::Convolution3D param = args.opr->param(); | |||
| param.sparse = megdnn::param::Convolution3D::Sparse::DENSE; | |||
| std::pair<TensorLayoutArray, Convolution3DBackwardDataImpl::Param> ret; | |||
| ret.first = {filter_pg, diff_pg, grad_pg}; | |||
| ret.second = param; | |||
| return ret; | |||
| } | |||
| Convolution3DBackwardDataImpl::AlgoGroupConvGeneral::AlgoGroupConvGeneral( | |||
| AlgoBase *impl): | |||
| m_impl{impl} | |||
| { | |||
| m_name = "group_conv3d:"; | |||
| m_name += impl->name(); | |||
| std::pair<TensorLayoutArray, std::unique_ptr<Convolution3DBackwardData>> | |||
| prepare_sub_opr(const Convolution3DBackwardDataImpl::AlgoBase::SizeArgs& args) { | |||
| auto conv3d_backdata_opr = | |||
| args.handle->create_operator<Convolution3DBackwardData>(); | |||
| set_execution_policy<Convolution3DBackwardData, Convolution3DBackwardData*>( | |||
| args.opr, conv3d_backdata_opr.get()); | |||
| auto&& config = sub_opr_config(args); | |||
| conv3d_backdata_opr->param() = config.second; | |||
| return {config.first, std::move(conv3d_backdata_opr)}; | |||
| } | |||
| } // namespace | |||
| std::vector<Algorithm::SearchItem> | |||
| Convolution3DBackwardDataImpl::AlgoGroupConvGeneral::get_subopr_list( | |||
| const TensorLayoutArray& layouts, const OperatorBase* opr) const { | |||
| AlgoBase::SizeArgs args{ | |||
| static_cast<const Convolution3DBackwardDataImpl*>(opr), layouts[0], | |||
| layouts[1], layouts[2]}; | |||
| auto&& config = sub_opr_config(args); | |||
| std::string param_str; | |||
| Algorithm::serialize_write_pod(config.second, param_str); | |||
| return {{Algorithm::OprType::CONVOLUTION3D_BACKWARD_DATA, param_str, | |||
| config.first}}; | |||
| } | |||
| bool Convolution3DBackwardDataImpl::AlgoGroupConvGeneral::is_available( | |||
| const SizeArgs &args) const { | |||
| if (args.filter_meta.group <= 1) | |||
| return false; | |||
| auto sub_args = args; | |||
| TensorLayout diff_pg, grad_pg; | |||
| modify_size_args(sub_args, diff_pg, grad_pg); | |||
| return m_impl->is_available(sub_args); | |||
| if (args.filter_meta.format != Param::Format::NCDHW) { | |||
| return false; | |||
| } | |||
| auto config = prepare_sub_opr(args); | |||
| return get_algorithm( | |||
| static_cast<Convolution3DBackwardDataImpl*>(config.second.get()), | |||
| config.first[0], config.first[1], config.first[2]); | |||
| } | |||
| WorkspaceBundle | |||
| Convolution3DBackwardDataImpl::AlgoGroupConvGeneral::get_workspace_bundle( | |||
| void* ptr, const SizeArgs& args) const { | |||
| auto config = prepare_sub_opr(args); | |||
| size_t sizes = config.second->get_workspace_in_bytes( | |||
| config.first[0], config.first[1], config.first[2]); | |||
| return {ptr, {sizes}}; | |||
| } | |||
| size_t Convolution3DBackwardDataImpl::AlgoGroupConvGeneral:: | |||
| get_workspace_in_bytes(const SizeArgs &args) const { | |||
| auto sub_args = args; | |||
| TensorLayout diff_pg, grad_pg; | |||
| modify_size_args(sub_args, diff_pg, grad_pg); | |||
| return m_impl->get_workspace_in_bytes(sub_args); | |||
| size_t | |||
| Convolution3DBackwardDataImpl::AlgoGroupConvGeneral::get_workspace_in_bytes( | |||
| const SizeArgs& args) const { | |||
| return get_workspace_bundle(nullptr, args).total_size_in_bytes(); | |||
| } | |||
| void Convolution3DBackwardDataImpl::AlgoGroupConvGeneral::exec( | |||
| const ExecArgs &args) const { | |||
| auto sub_args = args; | |||
| TensorND tflt{*args.filter_tensor}, tdiff{*args.diff_tensor}, | |||
| tgrad{*args.grad_tensor}; | |||
| modify_size_args(sub_args, tdiff.layout, tgrad.layout); | |||
| sub_args.filter_tensor = &tflt; | |||
| sub_args.diff_tensor = &tdiff; | |||
| sub_args.grad_tensor = &tgrad; | |||
| auto grp = args.filter_meta.group; | |||
| auto &&fm = args.filter_meta; | |||
| auto strd_flt = (fm.icpg * fm.ocpg * | |||
| fm.spatial[0] * fm.spatial[1] * fm.spatial[2] * tflt.layout.dtype.size()), | |||
| strd_diff = ( | |||
| tdiff.layout.stride[1] * fm.ocpg * tdiff.layout.dtype.size()), | |||
| strd_grad = ( | |||
| tgrad.layout.stride[1] * fm.icpg * tgrad.layout.dtype.size()); | |||
| for (uint32_t g = 0; g < grp; ++ g) { | |||
| m_impl->exec(sub_args); | |||
| incr_voidp(tflt.raw_ptr, strd_flt); | |||
| incr_voidp(tdiff.raw_ptr, strd_diff); | |||
| incr_voidp(tgrad.raw_ptr, strd_grad); | |||
| const ExecArgs& args) const { | |||
| auto bundle = get_workspace_bundle(args.workspace.raw_ptr, args); | |||
| { | |||
| auto config = prepare_sub_opr(args); | |||
| TensorND tfilter{args.filter_tensor->raw_ptr, config.first[0]}; | |||
| TensorND tdiff{args.diff_tensor->raw_ptr, config.first[1]}; | |||
| TensorND tgrad{args.grad_tensor->raw_ptr, config.first[2]}; | |||
| size_t c_pos = 1; | |||
| auto grp = args.filter_meta.group; | |||
| auto&& fm = args.filter_meta; | |||
| auto strd_flt = (fm.icpg * fm.ocpg * fm.spatial[0] * fm.spatial[1] * | |||
| fm.spatial[2] * tfilter.layout.dtype.size()), | |||
| strd_diff = (tdiff.layout.stride[c_pos] * fm.ocpg * | |||
| tdiff.layout.dtype.size()), | |||
| strd_grad = (tgrad.layout.stride[c_pos] * fm.icpg * | |||
| tgrad.layout.dtype.size()); | |||
| for (uint32_t g = 0; g < grp; ++g) { | |||
| config.second->exec(tfilter, tdiff, tgrad, bundle.get_workspace(0)); | |||
| incr_voidp(tfilter.raw_ptr, strd_flt); | |||
| incr_voidp(tdiff.raw_ptr, strd_diff); | |||
| incr_voidp(tgrad.raw_ptr, strd_grad); | |||
| } | |||
| } | |||
| } | |||
| @@ -26,21 +26,7 @@ Convolution3DBackwardFilterImpl::AlgoPack::AlgoPack() { | |||
| } | |||
| all_algos.push_back(&inplace_matmul); | |||
| all_algos.reserve(all_algos.size() * 2); | |||
| // add gconv algos by AlgoGroupConvGeneral | |||
| auto all_algos_data = all_algos.data(); | |||
| for (size_t i = 1; i < all_algos.size(); ++ i) { | |||
| gconv.push_back({all_algos[i]}); | |||
| } | |||
| for (size_t i = 1; i < all_algos.size(); ++ i) { | |||
| algo2gconv[all_algos[i]] = &gconv[i - 1]; | |||
| } | |||
| for (auto &&i: gconv) { | |||
| all_algos.push_back(&i); | |||
| } | |||
| megdnn_assert(all_algos_data == all_algos.data()); | |||
| non_cudnn_algos.push_back(all_algos.rbegin()[0]); //group inplace_matmul | |||
| all_algos.push_back(&group); | |||
| for (auto&& algo : all_algos) { | |||
| m_all_algos_map.emplace(algo->info().desc, algo); | |||
| @@ -64,27 +50,26 @@ Convolution3DBackwardFilterImpl::AlgoPack | |||
| Convolution3DBackwardFilterImpl::sm_algo_pack; | |||
| Convolution3DBackwardFilterImpl::AlgoBase::SizeArgs::SizeArgs( | |||
| Convolution3DBackwardFilterImpl *o, | |||
| const Convolution3DBackwardFilterImpl *o, | |||
| const TensorLayout &src, const TensorLayout &diff, | |||
| const TensorLayout &grad): | |||
| SizeArgs(o, src, diff, o->make_canonized_filter_meta(src.ndim, grad)) | |||
| SizeArgs(o, src, diff, grad, o->make_canonized_filter_meta(src.ndim, grad)) | |||
| { | |||
| } | |||
| Convolution3DBackwardFilterImpl::AlgoBase::SizeArgs::SizeArgs( | |||
| Convolution3DBackwardFilterImpl *o, | |||
| const TensorLayout &src, const TensorLayout &diff, | |||
| const CanonizedFilterMeta &grad): | |||
| handle{concrete_handle(o->handle())}, | |||
| src_layout{&src}, | |||
| diff_layout{&diff}, | |||
| grad_filter_meta{grad}, | |||
| opr{o} | |||
| { | |||
| } | |||
| const Convolution3DBackwardFilterImpl* o, const TensorLayout& src, | |||
| const TensorLayout& diff, const TensorLayout& grad, | |||
| const CanonizedFilterMeta& grad_meta) | |||
| : handle{concrete_handle(o->handle())}, | |||
| src_layout{&src}, | |||
| diff_layout{&diff}, | |||
| grad_layout{&grad}, | |||
| grad_filter_meta{grad_meta}, | |||
| opr{o} {} | |||
| Convolution3DBackwardFilterImpl::AlgoBase::ExecArgs::ExecArgs( | |||
| Convolution3DBackwardFilterImpl *opr, | |||
| const Convolution3DBackwardFilterImpl *opr, | |||
| _megdnn_tensor_in src, | |||
| _megdnn_tensor_in diff, | |||
| _megdnn_tensor_out grad, | |||
| @@ -36,31 +36,34 @@ public: | |||
| struct SizeArgs { | |||
| HandleImpl* handle; | |||
| const TensorLayout *src_layout, *diff_layout; | |||
| const TensorLayout *src_layout, *diff_layout, *grad_layout; | |||
| CanonizedFilterMeta grad_filter_meta; | |||
| Convolution3DBackwardFilterImpl* opr; | |||
| const Convolution3DBackwardFilterImpl* opr; | |||
| std::string to_string() const; | |||
| void init_desc(convolution3d::CUDNNBwdFilterDescs& desc) const { | |||
| desc.set(*src_layout, *diff_layout, grad_filter_meta, opr->param()); | |||
| } | |||
| SizeArgs(Convolution3DBackwardFilterImpl* opr, const TensorLayout& src, | |||
| const TensorLayout& diff, const TensorLayout& grad); | |||
| SizeArgs(Convolution3DBackwardFilterImpl* opr, const TensorLayout& src, | |||
| const TensorLayout& diff, const CanonizedFilterMeta& grad); | |||
| SizeArgs(const Convolution3DBackwardFilterImpl* opr, | |||
| const TensorLayout& src, const TensorLayout& diff, | |||
| const TensorLayout& grad); | |||
| SizeArgs(const Convolution3DBackwardFilterImpl* opr, | |||
| const TensorLayout& src, const TensorLayout& diff, | |||
| const TensorLayout& grad, | |||
| const CanonizedFilterMeta& grad_meta); | |||
| convolution3d::ForwardSizeArgs as_fwd_args() const { | |||
| return {handle, src_layout, grad_filter_meta, diff_layout, | |||
| opr->param().data_type}; | |||
| return {handle, src_layout, grad_layout, | |||
| grad_filter_meta, diff_layout, opr->param().data_type}; | |||
| } | |||
| }; | |||
| struct ExecArgs : public SizeArgs { | |||
| const TensorND *src_tensor, *diff_tensor, *grad_tensor; | |||
| Workspace workspace; | |||
| ExecArgs(Convolution3DBackwardFilterImpl* opr, _megdnn_tensor_in src, | |||
| _megdnn_tensor_in diff, _megdnn_tensor_out grad, | |||
| _megdnn_workspace workspace); | |||
| ExecArgs(const Convolution3DBackwardFilterImpl* opr, | |||
| _megdnn_tensor_in src, _megdnn_tensor_in diff, | |||
| _megdnn_tensor_out grad, _megdnn_workspace workspace); | |||
| }; | |||
| virtual bool is_available(const SizeArgs& args) const = 0; | |||
| virtual size_t get_workspace_in_bytes(const SizeArgs& args) const = 0; | |||
| @@ -162,30 +165,25 @@ public: | |||
| //! implement group conv by another algo | |||
| class Convolution3DBackwardFilterImpl::AlgoGroupConvGeneral final | |||
| : public AlgoBase { | |||
| AlgoBase* m_impl; | |||
| std::string m_name; | |||
| public: | |||
| AlgoGroupConvGeneral(AlgoBase* impl); | |||
| bool is_available(const SizeArgs& args) const override; | |||
| size_t get_workspace_in_bytes(const SizeArgs& args) const override; | |||
| void exec(const ExecArgs& args) const override; | |||
| std::vector<SearchItem> get_subopr_list( | |||
| const TensorLayoutArray& layouts, | |||
| const OperatorBase* opr) const override; | |||
| const char* name() const override { return m_name.c_str(); } | |||
| const char* name() const override { | |||
| return "CUDA:GROUP_CONV3D_BACKWARD_FILTER"; | |||
| } | |||
| AlgoAttribute attribute() const override { | |||
| auto ret = static_cast<AlgoAttribute>(0); | |||
| if (m_impl->contain_attribute_all(AlgoAttribute::REPRODUCIBLE)) { | |||
| ret |= AlgoAttribute::REPRODUCIBLE; | |||
| } | |||
| return ret; | |||
| return AlgoAttribute::REPRODUCIBLE; | |||
| } | |||
| static void modify_size_args(SizeArgs& args, TensorLayout& src_pg, | |||
| TensorLayout& diff_pg); | |||
| MEGDNN_DECL_ALGO_TYPE(CUDA_GROUP_CONV_GENERAL) | |||
| private: | |||
| WorkspaceBundle get_workspace_bundle(void* ptr, const SizeArgs& args) const; | |||
| }; | |||
| class Convolution3DBackwardFilterImpl::AlgoPack : NonCopyableObj { | |||
| @@ -200,8 +198,7 @@ public: | |||
| std::vector<AlgoCUDNN> cudnn; | |||
| AlgoInplaceMatmul inplace_matmul; | |||
| AlgoChanwise chanwise; | |||
| std::vector<AlgoGroupConvGeneral> gconv; | |||
| std::unordered_map<AlgoBase*, AlgoGroupConvGeneral*> algo2gconv; | |||
| AlgoGroupConvGeneral group; | |||
| std::vector<AlgoBase*> | |||
| //! all algorithms | |||
| @@ -15,69 +15,123 @@ using namespace megdnn; | |||
| using namespace cuda; | |||
| using namespace convolution3d; | |||
| void Convolution3DBackwardFilterImpl::AlgoGroupConvGeneral::modify_size_args( | |||
| Convolution3DBackwardFilterImpl::AlgoBase::SizeArgs &args, | |||
| TensorLayout &src_pg, TensorLayout &diff_pg) { | |||
| src_pg = *args.src_layout; | |||
| diff_pg = *args.diff_layout; | |||
| namespace { | |||
| std::pair<TensorLayoutArray, Convolution3DBackwardFilterImpl::Param> | |||
| sub_opr_config( | |||
| const Convolution3DBackwardFilterImpl::AlgoBase::SizeArgs& args) { | |||
| SmallVector<size_t> flt_shape(0); | |||
| std::vector<ptrdiff_t> flt_stride(0); | |||
| size_t idx = 0; | |||
| // check if the first dim is group | |||
| if (args.grad_layout->ndim > args.src_layout->ndim) | |||
| ++idx; | |||
| for (; idx < args.grad_layout->ndim; ++idx) { | |||
| flt_shape.push_back(args.grad_layout->shape[idx]); | |||
| flt_stride.push_back(args.grad_layout->stride[idx]); | |||
| } | |||
| TensorLayout grad_pg(flt_shape, flt_stride, args.grad_layout->dtype, | |||
| args.grad_layout->format); | |||
| TensorLayout src_pg = *args.src_layout; | |||
| TensorLayout diff_pg = *args.diff_layout; | |||
| auto nr_grp = args.grad_filter_meta.group; | |||
| args.grad_filter_meta.group = 1; | |||
| src_pg.shape[1] /= nr_grp; | |||
| diff_pg.shape[1] /= nr_grp; | |||
| args.src_layout = &src_pg; | |||
| args.diff_layout = &diff_pg; | |||
| size_t c_pos = 1; | |||
| src_pg.shape[c_pos] /= nr_grp; | |||
| diff_pg.shape[c_pos] /= nr_grp; | |||
| megdnn::param::Convolution3D param = args.opr->param(); | |||
| param.sparse = megdnn::param::Convolution3D::Sparse::DENSE; | |||
| std::pair<TensorLayoutArray, Convolution3DBackwardFilterImpl::Param> ret; | |||
| ret.first = {src_pg, diff_pg, grad_pg}; | |||
| ret.second = param; | |||
| return ret; | |||
| } | |||
| Convolution3DBackwardFilterImpl::AlgoGroupConvGeneral::AlgoGroupConvGeneral( | |||
| AlgoBase *impl): | |||
| m_impl{impl} | |||
| { | |||
| m_name = "group_conv3d:"; | |||
| m_name += impl->name(); | |||
| std::pair<TensorLayoutArray, std::unique_ptr<Convolution3DBackwardFilter>> | |||
| prepare_sub_opr( | |||
| const Convolution3DBackwardFilterImpl::AlgoBase::SizeArgs& args) { | |||
| auto conv3d_backfilter_opr = | |||
| args.handle->create_operator<Convolution3DBackwardFilter>(); | |||
| set_execution_policy<Convolution3DBackwardFilter, | |||
| Convolution3DBackwardFilter*>( | |||
| args.opr, conv3d_backfilter_opr.get()); | |||
| auto&& config = sub_opr_config(args); | |||
| conv3d_backfilter_opr->param() = config.second; | |||
| return {config.first, std::move(conv3d_backfilter_opr)}; | |||
| } | |||
| } // namespace | |||
| std::vector<Algorithm::SearchItem> | |||
| Convolution3DBackwardFilterImpl::AlgoGroupConvGeneral::get_subopr_list( | |||
| const TensorLayoutArray& layouts, const OperatorBase* opr) const { | |||
| AlgoBase::SizeArgs args{ | |||
| static_cast<const Convolution3DBackwardFilterImpl*>(opr), | |||
| layouts[0], layouts[1], layouts[2]}; | |||
| auto&& config = sub_opr_config(args); | |||
| std::string param_str; | |||
| Algorithm::serialize_write_pod(config.second, param_str); | |||
| return {{Algorithm::OprType::CONVOLUTION3D_BACKWARD_FILTER, param_str, | |||
| config.first}}; | |||
| } | |||
| bool Convolution3DBackwardFilterImpl::AlgoGroupConvGeneral::is_available( | |||
| const SizeArgs &args) const { | |||
| const SizeArgs& args) const { | |||
| if (args.grad_filter_meta.group <= 1) | |||
| return false; | |||
| auto sub_args = args; | |||
| TensorLayout src_pg, diff_pg; | |||
| modify_size_args(sub_args, src_pg, diff_pg); | |||
| return m_impl->is_available(sub_args); | |||
| if (args.grad_filter_meta.format != Param::Format::NCDHW) { | |||
| return false; | |||
| } | |||
| auto config = prepare_sub_opr(args); | |||
| return get_algorithm( | |||
| static_cast<Convolution3DBackwardFilterImpl*>(config.second.get()), | |||
| config.first[0], config.first[1], config.first[2]); | |||
| } | |||
| WorkspaceBundle | |||
| Convolution3DBackwardFilterImpl::AlgoGroupConvGeneral::get_workspace_bundle( | |||
| void* ptr, const SizeArgs& args) const { | |||
| auto config = prepare_sub_opr(args); | |||
| size_t sizes = config.second->get_workspace_in_bytes( | |||
| config.first[0], config.first[1], config.first[2]); | |||
| return {ptr, {sizes}}; | |||
| } | |||
| size_t Convolution3DBackwardFilterImpl::AlgoGroupConvGeneral:: | |||
| get_workspace_in_bytes(const SizeArgs &args) const { | |||
| auto sub_args = args; | |||
| TensorLayout src_pg, diff_pg; | |||
| modify_size_args(sub_args, src_pg, diff_pg); | |||
| return m_impl->get_workspace_in_bytes(sub_args); | |||
| size_t | |||
| Convolution3DBackwardFilterImpl::AlgoGroupConvGeneral::get_workspace_in_bytes( | |||
| const SizeArgs& args) const { | |||
| return get_workspace_bundle(nullptr, args).total_size_in_bytes(); | |||
| } | |||
| void Convolution3DBackwardFilterImpl::AlgoGroupConvGeneral::exec( | |||
| const ExecArgs &args) const { | |||
| auto sub_args = args; | |||
| TensorND tsrc{*args.src_tensor}, tdiff{*args.diff_tensor}, | |||
| tgrad{*args.grad_tensor}; | |||
| modify_size_args(sub_args, tsrc.layout, tdiff.layout); | |||
| sub_args.src_tensor = &tsrc; | |||
| sub_args.diff_tensor = &tdiff; | |||
| sub_args.grad_tensor = &tgrad; | |||
| auto &&fm = args.grad_filter_meta; | |||
| auto grp = fm.group; | |||
| auto strd_src = ( | |||
| tsrc.layout.stride[1] * fm.icpg * tsrc.layout.dtype.size()), | |||
| strd_diff = ( | |||
| tdiff.layout.stride[1] * fm.ocpg * tdiff.layout.dtype.size()), | |||
| strd_grad = (fm.icpg * fm.ocpg * | |||
| fm.spatial[0] * fm.spatial[1] * fm.spatial[2] * tgrad.layout.dtype.size()); | |||
| for (uint32_t g = 0; g < grp; ++ g) { | |||
| m_impl->exec(sub_args); | |||
| incr_voidp(tsrc.raw_ptr, strd_src); | |||
| incr_voidp(tdiff.raw_ptr, strd_diff); | |||
| incr_voidp(tgrad.raw_ptr, strd_grad); | |||
| const ExecArgs& args) const { | |||
| auto bundle = get_workspace_bundle(args.workspace.raw_ptr, args); | |||
| { | |||
| auto config = prepare_sub_opr(args); | |||
| TensorND tsrc{args.src_tensor->raw_ptr, config.first[0]}; | |||
| TensorND tdiff{args.diff_tensor->raw_ptr, config.first[1]}; | |||
| TensorND tgrad{args.grad_tensor->raw_ptr, config.first[2]}; | |||
| size_t c_pos = 1; | |||
| auto grp = args.grad_filter_meta.group; | |||
| auto&& fm = args.grad_filter_meta; | |||
| auto strd_src = (tsrc.layout.stride[c_pos] * fm.icpg * | |||
| tsrc.layout.dtype.size()), | |||
| strd_diff = (tdiff.layout.stride[c_pos] * fm.ocpg * | |||
| tdiff.layout.dtype.size()), | |||
| strd_grad = (fm.icpg * fm.ocpg * fm.spatial[0] * fm.spatial[1] * | |||
| fm.spatial[2] * tgrad.layout.dtype.size()); | |||
| for (uint32_t g = 0; g < grp; ++g) { | |||
| config.second->exec(tsrc, tdiff, tgrad, bundle.get_workspace(0)); | |||
| incr_voidp(tsrc.raw_ptr, strd_src); | |||
| incr_voidp(tdiff.raw_ptr, strd_diff); | |||
| incr_voidp(tgrad.raw_ptr, strd_grad); | |||
| } | |||
| } | |||
| } | |||
| @@ -28,22 +28,7 @@ Convolution3DForwardImpl::AlgoPack::AlgoPack() { | |||
| } | |||
| all_algos.push_back(&inplace_matmul); | |||
| all_algos.push_back(&a1x1x1); | |||
| all_algos.reserve(all_algos.size() * 2); | |||
| // add gconv algos by AlgoGroupConvGeneral | |||
| auto all_algos_data = all_algos.data(); | |||
| for (size_t i = 1; i < all_algos.size(); ++ i) { | |||
| gconv.push_back({all_algos[i]}); | |||
| } | |||
| for (size_t i = 1; i < all_algos.size(); ++ i) { | |||
| algo2gconv[all_algos[i]] = &gconv[i - 1]; | |||
| } | |||
| for (auto &&i: gconv) { | |||
| all_algos.push_back(&i); | |||
| } | |||
| megdnn_assert(all_algos_data == all_algos.data()); | |||
| non_cudnn_algos.push_back(all_algos.rbegin()[1]); // group inplace_matmul | |||
| non_cudnn_algos.push_back(all_algos.rbegin()[0]); // group 1x1x1 | |||
| all_algos.push_back(&group); | |||
| for (auto&& algo : all_algos) { | |||
| m_all_algos_map.emplace(algo->info().desc, algo); | |||
| @@ -66,28 +51,25 @@ Convolution3DForwardImpl::AlgoPack::cudnn_from_enum( | |||
| Convolution3DForwardImpl::AlgoPack Convolution3DForwardImpl::sm_algo_pack; | |||
| Convolution3DForwardImpl::AlgoBase::SizeArgs::SizeArgs( | |||
| Convolution3DForwardImpl *o, | |||
| const TensorLayout &src, const TensorLayout &filter, | |||
| const TensorLayout &dst): | |||
| SizeArgs(o, src, o->make_canonized_filter_meta(src.ndim, filter), dst) | |||
| { | |||
| } | |||
| const Convolution3DForwardImpl* o, const TensorLayout& src, | |||
| const TensorLayout& filter, const TensorLayout& dst) | |||
| : SizeArgs(o, src, filter, | |||
| o->make_canonized_filter_meta(src.ndim, filter), dst) {} | |||
| Convolution3DForwardImpl::AlgoBase::SizeArgs::SizeArgs( | |||
| Convolution3DForwardImpl *o, | |||
| const TensorLayout &src, const CanonizedFilterMeta &filter, | |||
| const TensorLayout &dst): | |||
| ForwardSizeArgs{ | |||
| concrete_handle(o->handle()), | |||
| &src, filter, &dst, | |||
| o->param().data_type | |||
| }, | |||
| opr{o} | |||
| { | |||
| } | |||
| const Convolution3DForwardImpl* o, const TensorLayout& src, | |||
| const TensorLayout& filter, const CanonizedFilterMeta& filter_meta, | |||
| const TensorLayout& dst) | |||
| : ForwardSizeArgs{concrete_handle(o->handle()), | |||
| &src, | |||
| &filter, | |||
| filter_meta, | |||
| &dst, | |||
| o->param().data_type}, | |||
| opr{o} {} | |||
| Convolution3DForwardImpl::AlgoBase::ExecArgs::ExecArgs( | |||
| Convolution3DForwardImpl *opr, | |||
| const Convolution3DForwardImpl *opr, | |||
| _megdnn_tensor_in src, | |||
| _megdnn_tensor_in filter, | |||
| _megdnn_tensor_out dst, | |||
| @@ -48,22 +48,24 @@ public: | |||
| AlgoBase() : Algorithm() { m_handle_type = Handle::HandleType::CUDA; } | |||
| struct SizeArgs : public convolution3d::ForwardSizeArgs { | |||
| Convolution3DForwardImpl* opr; | |||
| const Convolution3DForwardImpl* opr; | |||
| std::string to_string() const; | |||
| void init_desc(convolution3d::CUDNNForwardDescs& desc) const { | |||
| desc.set(*src_layout, filter_meta, *dst_layout, opr->param()); | |||
| } | |||
| SizeArgs(Convolution3DForwardImpl* opr, const TensorLayout& src, | |||
| SizeArgs(const Convolution3DForwardImpl* opr, const TensorLayout& src, | |||
| const TensorLayout& filter, const TensorLayout& dst); | |||
| SizeArgs(Convolution3DForwardImpl* opr, const TensorLayout& src, | |||
| const CanonizedFilterMeta& filter, const TensorLayout& dst); | |||
| SizeArgs(const Convolution3DForwardImpl* opr, const TensorLayout& src, | |||
| const TensorLayout& filter, | |||
| const CanonizedFilterMeta& filter_meta, | |||
| const TensorLayout& dst); | |||
| }; | |||
| struct ExecArgs : public SizeArgs { | |||
| const TensorND *src_tensor, *filter_tensor, *dst_tensor; | |||
| Workspace workspace; | |||
| ExecArgs(Convolution3DForwardImpl* opr, _megdnn_tensor_in src, | |||
| ExecArgs(const Convolution3DForwardImpl* opr, _megdnn_tensor_in src, | |||
| _megdnn_tensor_in filter, _megdnn_tensor_out dst, | |||
| _megdnn_workspace workspace); | |||
| }; | |||
| @@ -114,35 +116,22 @@ public: | |||
| //! implement group conv by another algo | |||
| class Convolution3DForwardImpl::AlgoGroupConvGeneral final : public AlgoBase { | |||
| AlgoBase* m_impl; | |||
| std::string m_name; | |||
| public: | |||
| AlgoGroupConvGeneral(AlgoBase* impl); | |||
| bool is_available(const SizeArgs& args) const override; | |||
| size_t get_workspace_in_bytes(const SizeArgs& args) const override; | |||
| void exec(const ExecArgs& args) const override; | |||
| std::vector<SearchItem> get_subopr_list( | |||
| const TensorLayoutArray& layouts, | |||
| const OperatorBase* opr) const override; | |||
| const char* name() const override { return m_name.c_str(); } | |||
| const char* name() const override { return "CUDA:GROUP_CONV3D_FORWARD"; } | |||
| AlgoAttribute attribute() const override { | |||
| auto ret = AlgoAttribute::DEFAULT; | |||
| if (m_impl->contain_attribute_all(AlgoAttribute::REPRODUCIBLE)) { | |||
| ret |= AlgoAttribute::REPRODUCIBLE; | |||
| } | |||
| #define cb(attr) \ | |||
| if (m_impl->contain_attribute_all(attr)) { \ | |||
| ret |= attr; \ | |||
| } | |||
| MEGDNN_FOREACH_ALGO_ATTRIBUTE_INHERITABLE(cb) | |||
| #undef cb | |||
| return ret; | |||
| return AlgoAttribute::REPRODUCIBLE; | |||
| } | |||
| static void modify_size_args(SizeArgs& args, TensorLayout& src_pg, | |||
| TensorLayout& dst_pg); | |||
| MEGDNN_DECL_ALGO_TYPE(CUDA_GROUP_CONV_GENERAL) | |||
| private: | |||
| WorkspaceBundle get_workspace_bundle(void* ptr, const SizeArgs& args) const; | |||
| }; | |||
| class Convolution3DForwardImpl::AlgoCUDNN final : public AlgoBase { | |||
| @@ -226,8 +215,7 @@ public: | |||
| Algo1x1x1 a1x1x1; | |||
| AlgoInplaceMatmul inplace_matmul; | |||
| AlgoChanwise chanwise; | |||
| std::vector<AlgoGroupConvGeneral> gconv; | |||
| std::unordered_map<AlgoBase*, AlgoGroupConvGeneral*> algo2gconv; | |||
| AlgoGroupConvGeneral group; | |||
| std::vector<AlgoBase*> | |||
| //! all algorithms | |||
| @@ -15,84 +15,136 @@ using namespace megdnn; | |||
| using namespace cuda; | |||
| using namespace convolution3d; | |||
| void Convolution3DForwardImpl::AlgoGroupConvGeneral::modify_size_args( | |||
| Convolution3DForwardImpl::AlgoBase::SizeArgs &args, | |||
| TensorLayout &src_pg, TensorLayout &dst_pg) { | |||
| src_pg = *args.src_layout; | |||
| dst_pg = *args.dst_layout; | |||
| namespace { | |||
| std::pair<TensorLayoutArray, Convolution3DForwardImpl::Param> sub_opr_config( | |||
| const Convolution3DForwardImpl::AlgoBase::SizeArgs& args) { | |||
| TensorLayout src_pg = *args.src_layout; | |||
| SmallVector<size_t> flt_shape(0); | |||
| std::vector<ptrdiff_t> flt_stride(0); | |||
| size_t idx = 0; | |||
| // check if the first dim is group | |||
| if (args.filter_layout->ndim > args.src_layout->ndim) | |||
| ++idx; | |||
| for (; idx < args.filter_layout->ndim; ++idx) { | |||
| flt_shape.push_back(args.filter_layout->shape[idx]); | |||
| flt_stride.push_back(args.filter_layout->stride[idx]); | |||
| } | |||
| TensorLayout filter_pg(flt_shape, flt_stride, | |||
| args.filter_layout->dtype, | |||
| args.filter_layout->format); | |||
| TensorLayout dst_pg = *args.dst_layout; | |||
| auto nr_grp = args.filter_meta.group; | |||
| args.filter_meta.group = 1; | |||
| size_t c_pos; | |||
| if (args.filter_meta.format == Param::Format::NCDHW) { | |||
| if (args.filter_meta.format == param::Convolution3D::Format::NCDHW) { | |||
| c_pos = 1; | |||
| } else { | |||
| megdnn_assert(args.filter_meta.format == Param::Format::NDHWC, | |||
| megdnn_assert( | |||
| args.filter_meta.format == param::Convolution3D::Format::NDHWC, | |||
| "invalid conv format"); | |||
| c_pos = 4; | |||
| } | |||
| src_pg.shape[c_pos] /= nr_grp; | |||
| dst_pg.shape[c_pos] /= nr_grp; | |||
| args.src_layout = &src_pg; | |||
| args.dst_layout = &dst_pg; | |||
| megdnn::param::Convolution3D param = args.opr->param(); | |||
| param.sparse = megdnn::param::Convolution3D::Sparse::DENSE; | |||
| std::pair<TensorLayoutArray, Convolution3DForwardImpl::Param> ret; | |||
| ret.first = {src_pg, filter_pg, dst_pg}; | |||
| ret.second = param; | |||
| return ret; | |||
| } | |||
| std::pair<TensorLayoutArray, std::unique_ptr<Convolution3DForward>> | |||
| prepare_sub_opr(const Convolution3DForwardImpl::AlgoBase::SizeArgs& args) { | |||
| auto conv3d_opr = args.handle->create_operator<Convolution3D>(); | |||
| set_execution_policy<Convolution3DForward, Convolution3DForward*>( | |||
| args.opr, conv3d_opr.get()); | |||
| auto&& config = sub_opr_config(args); | |||
| conv3d_opr->param() = config.second; | |||
| return {config.first, std::move(conv3d_opr)}; | |||
| } | |||
| } // namespace | |||
| std::vector<Algorithm::SearchItem> | |||
| Convolution3DForwardImpl::AlgoGroupConvGeneral::get_subopr_list( | |||
| const TensorLayoutArray& layouts, const OperatorBase* opr) const { | |||
| AlgoBase::SizeArgs args{static_cast<const Convolution3DForwardImpl*>(opr), | |||
| layouts[0], layouts[1], layouts[2]}; | |||
| auto&& config = sub_opr_config(args); | |||
| Convolution3DForwardImpl::AlgoGroupConvGeneral::AlgoGroupConvGeneral( | |||
| AlgoBase *impl): | |||
| m_impl{impl} { | |||
| m_name = "group_conv3d:"; | |||
| m_name += impl->name(); | |||
| std::string param_str; | |||
| Algorithm::serialize_write_pod(config.second, param_str); | |||
| return {{Algorithm::OprType::CONVOLUTION3D_FORWARD, param_str, | |||
| config.first}}; | |||
| } | |||
| bool Convolution3DForwardImpl::AlgoGroupConvGeneral::is_available( | |||
| const SizeArgs &args) const { | |||
| if (args.filter_meta.group <= 1) | |||
| return false; | |||
| auto sub_args = args; | |||
| TensorLayout src_pg, dst_pg; | |||
| modify_size_args(sub_args, src_pg, dst_pg); | |||
| return m_impl->is_available(sub_args); | |||
| if (args.filter_meta.format != Param::Format::NCDHW && | |||
| args.filter_meta.format != Param::Format::NDHWC) { | |||
| return false; | |||
| } | |||
| auto config = prepare_sub_opr(args); | |||
| return get_algorithm( | |||
| static_cast<Convolution3DForwardImpl*>(config.second.get()), | |||
| config.first[0], config.first[1], config.first[2]); | |||
| } | |||
| WorkspaceBundle | |||
| Convolution3DForwardImpl::AlgoGroupConvGeneral::get_workspace_bundle( | |||
| void* ptr, const SizeArgs& args) const { | |||
| auto config = prepare_sub_opr(args); | |||
| size_t sizes = config.second->get_workspace_in_bytes( | |||
| config.first[0], config.first[1], config.first[2]); | |||
| return {ptr, {sizes}}; | |||
| } | |||
| size_t Convolution3DForwardImpl::AlgoGroupConvGeneral::get_workspace_in_bytes( | |||
| const SizeArgs &args) const { | |||
| auto sub_args = args; | |||
| TensorLayout src_pg, dst_pg; | |||
| modify_size_args(sub_args, src_pg, dst_pg); | |||
| return m_impl->get_workspace_in_bytes(sub_args); | |||
| const SizeArgs& args) const { | |||
| return get_workspace_bundle(nullptr, args).total_size_in_bytes(); | |||
| } | |||
| void Convolution3DForwardImpl::AlgoGroupConvGeneral::exec( | |||
| const ExecArgs &args) const { | |||
| auto sub_args = args; | |||
| TensorND tsrc{*args.src_tensor}, tdst{*args.dst_tensor}, | |||
| tflt{*args.filter_tensor}; | |||
| modify_size_args(sub_args, tsrc.layout, tdst.layout); | |||
| sub_args.src_tensor = &tsrc; | |||
| sub_args.dst_tensor = &tdst; | |||
| sub_args.filter_tensor = &tflt; | |||
| const ExecArgs& args) const { | |||
| auto bundle = get_workspace_bundle(args.workspace.raw_ptr, args); | |||
| { | |||
| auto config = prepare_sub_opr(args); | |||
| TensorND tsrc{args.src_tensor->raw_ptr, config.first[0]}; | |||
| TensorND tfilter{args.filter_tensor->raw_ptr, config.first[1]}; | |||
| TensorND tdst{args.dst_tensor->raw_ptr, config.first[2]}; | |||
| size_t c_pos; | |||
| if (args.filter_meta.format == Param::Format::NCDHW) { | |||
| c_pos = 1; | |||
| } else { | |||
| megdnn_assert(args.filter_meta.format == Param::Format::NDHWC, | |||
| "invalid conv format"); | |||
| c_pos = 4; | |||
| } | |||
| size_t c_pos; | |||
| if (args.filter_meta.format == Param::Format::NCDHW) { | |||
| c_pos = 1; | |||
| } else { | |||
| megdnn_assert(args.filter_meta.format == Param::Format::NDHWC, | |||
| "invalid conv format"); | |||
| c_pos = 4; | |||
| } | |||
| auto grp = args.filter_meta.group; | |||
| auto&& fm = args.filter_meta; | |||
| auto strd_src = tsrc.layout.stride[c_pos] * fm.icpg * | |||
| tsrc.layout.dtype.size(), | |||
| strd_dst = tdst.layout.stride[c_pos] * fm.ocpg * | |||
| tdst.layout.dtype.size(), | |||
| strd_flt = fm.icpg * fm.ocpg * fm.spatial[0] * fm.spatial[1] * | |||
| fm.spatial[2] * tfilter.layout.dtype.size(); | |||
| auto grp = args.filter_meta.group; | |||
| auto &&fm = args.filter_meta; | |||
| auto strd_src = tsrc.layout.stride[c_pos] * fm.icpg * tsrc.layout.dtype.size(), | |||
| strd_dst = tdst.layout.stride[c_pos] * fm.ocpg * tdst.layout.dtype.size(), | |||
| strd_flt = fm.icpg * fm.ocpg * | |||
| fm.spatial[0] * fm.spatial[1] * fm.spatial[2] * | |||
| tflt.layout.dtype.size(); | |||
| for (uint32_t g = 0; g < grp; ++ g) { | |||
| m_impl->exec(sub_args); | |||
| incr_voidp(tsrc.raw_ptr, strd_src); | |||
| incr_voidp(tdst.raw_ptr, strd_dst); | |||
| incr_voidp(tflt.raw_ptr, strd_flt); | |||
| for (uint32_t g = 0; g < grp; ++g) { | |||
| config.second->exec(tsrc, tfilter, tdst, bundle.get_workspace(0)); | |||
| incr_voidp(tsrc.raw_ptr, strd_src); | |||
| incr_voidp(tdst.raw_ptr, strd_dst); | |||
| incr_voidp(tfilter.raw_ptr, strd_flt); | |||
| } | |||
| } | |||
| } | |||
| @@ -26,6 +26,7 @@ namespace convolution3d { | |||
| struct ForwardSizeArgs { | |||
| HandleImpl *handle; | |||
| const TensorLayout *src_layout; | |||
| const TensorLayout *filter_layout; | |||
| CanonizedFilterMeta filter_meta; | |||
| const TensorLayout *dst_layout; | |||
| param::Convolution3D::DataType data_type; | |||
| @@ -35,16 +35,6 @@ Convolution3DForwardImpl::get_algorithm_heuristic( | |||
| const TensorLayout& dst, size_t workspace_limit_in_bytes, | |||
| const AlgoAttribute& positive_attr, | |||
| const AlgoAttribute& negative_attr) { | |||
| auto fm = check_layout_fwd(src, filter, dst); | |||
| return get_algorithm_heuristic(src, fm, dst, workspace_limit_in_bytes, | |||
| positive_attr, negative_attr); | |||
| } | |||
| Convolution3DForwardImpl::Algorithm* | |||
| Convolution3DForwardImpl::get_algorithm_heuristic( | |||
| const TensorLayout& src, const CanonizedFilterMeta& filter, | |||
| const TensorLayout& dst, size_t workspace_limit_in_bytes, | |||
| const AlgoAttribute& positive_attr, | |||
| const AlgoAttribute& negative_attr) { | |||
| AlgoBase::SizeArgs args(this, src, filter, dst); | |||
| #if CUDNN_MAJOR < 7 || (CUDNN_MAJOR == 7 && CUDNN_MINOR < 5) | |||
| @@ -98,18 +88,14 @@ Convolution3DForwardImpl::get_algorithm_heuristic( | |||
| if (auto algo = get_cudnn_algo()) | |||
| return algo; | |||
| } | |||
| if (args.filter_meta.group > 1) { | |||
| auto orig_args = args; | |||
| TensorLayout a, b; | |||
| AlgoGroupConvGeneral::modify_size_args(args, a, b); | |||
| if (prefer_1x1x1()) { | |||
| return sm_algo_pack.algo2gconv.at(&sm_algo_pack.a1x1x1); | |||
| } | |||
| if (is_cudnn_supported(args)) { | |||
| if (auto algo = get_cudnn_algo()) | |||
| return sm_algo_pack.algo2gconv.at(algo); | |||
| if (auto algo = | |||
| megdnn::get_algo_match_attribute<Convolution3DForwardImpl>( | |||
| &sm_algo_pack.group, positive_attr, | |||
| negative_attr)) { | |||
| return algo; | |||
| } | |||
| args = orig_args; | |||
| } | |||
| return megdnn::get_algo_match_attribute<Convolution3DForwardImpl>( | |||
| @@ -129,7 +115,7 @@ size_t Convolution3DForwardImpl::get_workspace_in_bytes( | |||
| const TensorLayout& src, const TensorLayout& filter, | |||
| const TensorLayout& dst) { | |||
| AlgoBase::SizeArgs args(this, src, filter, dst); | |||
| return get_algorithm(this, src, args.filter_meta, dst) | |||
| return get_algorithm(this, src, filter, dst) | |||
| ->get_workspace_in_bytes(args); | |||
| } | |||
| @@ -138,7 +124,7 @@ void Convolution3DForwardImpl::exec(_megdnn_tensor_in src, | |||
| _megdnn_tensor_out dst, | |||
| _megdnn_workspace workspace) { | |||
| AlgoBase::ExecArgs args(this, src, filter, dst, workspace); | |||
| auto algo = get_algorithm(this, src.layout, args.filter_meta, dst.layout); | |||
| auto algo = get_algorithm(this, src.layout, filter.layout, dst.layout); | |||
| algo->check_workspace(args, workspace).exec(args); | |||
| } | |||
| @@ -151,7 +137,7 @@ void Convolution3DBackwardDataImpl::exec(_megdnn_tensor_in filter, | |||
| _megdnn_tensor_out grad, | |||
| _megdnn_workspace workspace) { | |||
| AlgoBase::ExecArgs args(this, filter, diff, grad, workspace); | |||
| auto algo = get_algorithm(this, args.filter_meta, diff.layout, grad.layout); | |||
| auto algo = get_algorithm(this, filter.layout, diff.layout, grad.layout); | |||
| algo->check_workspace(args, workspace).exec(args); | |||
| } | |||
| @@ -169,17 +155,6 @@ Convolution3DBackwardDataImpl::get_algorithm_heuristic( | |||
| const TensorLayout& grad, size_t workspace_limit_in_bytes, | |||
| const AlgoAttribute& positive_attr, | |||
| const AlgoAttribute& negative_attr) { | |||
| auto fm = check_layout_fwd(grad, filter, diff); | |||
| return get_algorithm_heuristic(fm, diff, grad, workspace_limit_in_bytes, | |||
| positive_attr, negative_attr); | |||
| } | |||
| Convolution3DBackwardDataImpl::Algorithm* | |||
| Convolution3DBackwardDataImpl::get_algorithm_heuristic( | |||
| const CanonizedFilterMeta& filter, const TensorLayout& diff, | |||
| const TensorLayout& grad, size_t workspace_limit_in_bytes, | |||
| const AlgoAttribute& positive_attr, | |||
| const AlgoAttribute& negative_attr) { | |||
| AlgoBase::SizeArgs args(this, filter, diff, grad); | |||
| if (args.filter_meta.group > 1 && | |||
| @@ -215,14 +190,11 @@ Convolution3DBackwardDataImpl::get_algorithm_heuristic( | |||
| } | |||
| if (args.filter_meta.group > 1) { | |||
| auto orig_args = args; | |||
| TensorLayout a, b; | |||
| AlgoGroupConvGeneral::modify_size_args(args, a, b); | |||
| if (is_cudnn_supported(args.as_fwd_args())) { | |||
| if (auto algo = get_cudnn_algo()) | |||
| return sm_algo_pack.algo2gconv.at(algo); | |||
| if (auto algo = megdnn::get_algo_match_attribute< | |||
| Convolution3DBackwardDataImpl>( | |||
| &sm_algo_pack.group, positive_attr, negative_attr)) { | |||
| return algo; | |||
| } | |||
| args = orig_args; | |||
| } | |||
| return megdnn::get_algo_match_attribute<Convolution3DBackwardDataImpl>( | |||
| @@ -234,7 +206,7 @@ size_t Convolution3DBackwardDataImpl::get_workspace_in_bytes( | |||
| const TensorLayout& filter, const TensorLayout& diff, | |||
| const TensorLayout& grad) { | |||
| AlgoBase::SizeArgs args(this, filter, diff, grad); | |||
| return get_algorithm(this, args.filter_meta, diff, grad) | |||
| return get_algorithm(this, filter, diff, grad) | |||
| ->get_workspace_in_bytes(args); | |||
| } | |||
| @@ -248,7 +220,7 @@ void Convolution3DBackwardFilterImpl::exec(_megdnn_tensor_in src, | |||
| _megdnn_workspace workspace) { | |||
| AlgoBase::ExecArgs args(this, src, diff, grad, workspace); | |||
| auto algo = | |||
| get_algorithm(this, src.layout, diff.layout, args.grad_filter_meta); | |||
| get_algorithm(this, src.layout, diff.layout, grad.layout); | |||
| algo->check_workspace(args, workspace).exec(args); | |||
| } | |||
| @@ -266,17 +238,6 @@ Convolution3DBackwardFilterImpl::get_algorithm_heuristic( | |||
| const TensorLayout& grad, size_t workspace_limit_in_bytes, | |||
| const AlgoAttribute& positive_attr, | |||
| const AlgoAttribute& negative_attr) { | |||
| auto fm = check_layout_fwd(src, grad, diff); | |||
| return get_algorithm_heuristic(src, diff, fm, workspace_limit_in_bytes, | |||
| positive_attr, negative_attr); | |||
| } | |||
| Convolution3DBackwardFilterImpl::Algorithm* | |||
| Convolution3DBackwardFilterImpl::get_algorithm_heuristic( | |||
| const TensorLayout& src, const TensorLayout& diff, | |||
| const CanonizedFilterMeta& grad, size_t workspace_limit_in_bytes, | |||
| const AlgoAttribute& positive_attr, | |||
| const AlgoAttribute& negative_attr) { | |||
| AlgoBase::SizeArgs args(this, src, diff, grad); | |||
| if (args.grad_filter_meta.group > 1 && | |||
| @@ -310,15 +271,13 @@ Convolution3DBackwardFilterImpl::get_algorithm_heuristic( | |||
| if (auto algo = get_cudnn_algo()) | |||
| return algo; | |||
| } | |||
| if (args.grad_filter_meta.group > 1) { | |||
| auto orig_args = args; | |||
| TensorLayout a, b; | |||
| AlgoGroupConvGeneral::modify_size_args(args, a, b); | |||
| if (is_cudnn_supported(args.as_fwd_args())) { | |||
| if (auto algo = get_cudnn_algo()) | |||
| return sm_algo_pack.algo2gconv.at(algo); | |||
| if (auto algo = megdnn::get_algo_match_attribute< | |||
| Convolution3DBackwardFilterImpl>( | |||
| &sm_algo_pack.group, positive_attr, negative_attr)) { | |||
| return algo; | |||
| } | |||
| args = orig_args; | |||
| } | |||
| return megdnn::get_algo_match_attribute<Convolution3DBackwardFilterImpl>( | |||
| @@ -330,7 +289,7 @@ size_t Convolution3DBackwardFilterImpl::get_workspace_in_bytes( | |||
| const TensorLayout& src, const TensorLayout& diff, | |||
| const TensorLayout& grad) { | |||
| AlgoBase::SizeArgs args(this, src, diff, grad); | |||
| return get_algorithm(this, src, diff, args.grad_filter_meta) | |||
| return get_algorithm(this, src, diff, grad) | |||
| ->get_workspace_in_bytes(args); | |||
| } | |||
| @@ -21,17 +21,6 @@ public: | |||
| using Convolution3DForward::Convolution3DForward; | |||
| void exec(_megdnn_tensor_in src, _megdnn_tensor_in filter, | |||
| _megdnn_tensor_out dst, _megdnn_workspace workspace) override; | |||
| AlgorithmInfo get_algorithm_info_heuristic(const TensorLayout& src, | |||
| const CanonizedFilterMeta& filter, | |||
| const TensorLayout& dst, | |||
| size_t workspace_limit_in_bytes, | |||
| const AlgoAttribute& positive_attr, | |||
| const AlgoAttribute& negative_attr) { | |||
| return get_algorithm_heuristic(src, filter, dst, | |||
| workspace_limit_in_bytes, positive_attr, | |||
| negative_attr) | |||
| ->info(); | |||
| } | |||
| size_t get_workspace_in_bytes(const TensorLayout& src, | |||
| const TensorLayout& filter, | |||
| const TensorLayout& dst) override; | |||
| @@ -57,13 +46,6 @@ protected: | |||
| const AlgoAttribute& negative_attr) override; | |||
| private: | |||
| Algorithm* get_algorithm_heuristic(const TensorLayout& src, | |||
| const CanonizedFilterMeta& filter, | |||
| const TensorLayout& dst, | |||
| size_t workspace_limit_in_bytes, | |||
| const AlgoAttribute& positive_attr, | |||
| const AlgoAttribute& negative_attr); | |||
| static AlgoPack sm_algo_pack; | |||
| }; | |||
| @@ -72,16 +54,6 @@ public: | |||
| using Convolution3DBackwardData::Convolution3DBackwardData; | |||
| void exec(_megdnn_tensor_in filter, _megdnn_tensor_in diff, | |||
| _megdnn_tensor_out grad, _megdnn_workspace workspace) override; | |||
| AlgorithmInfo get_algorithm_info_heuristic( | |||
| const CanonizedFilterMeta& filter, const TensorLayout& diff, | |||
| const TensorLayout& grad, size_t workspace_limit_in_bytes, | |||
| const AlgoAttribute& positive_attr, | |||
| const AlgoAttribute& negative_attr) { | |||
| return get_algorithm_heuristic(filter, diff, grad, | |||
| workspace_limit_in_bytes, positive_attr, | |||
| negative_attr) | |||
| ->info(); | |||
| } | |||
| size_t get_workspace_in_bytes(const TensorLayout& filter, | |||
| const TensorLayout& diff, | |||
| const TensorLayout& grad) override; | |||
| @@ -109,13 +81,6 @@ protected: | |||
| const AlgoAttribute& negative_attr) override; | |||
| private: | |||
| Algorithm* get_algorithm_heuristic(const CanonizedFilterMeta& filter, | |||
| const TensorLayout& diff, | |||
| const TensorLayout& grad, | |||
| size_t workspace_limit_in_bytes, | |||
| const AlgoAttribute& positive_attr, | |||
| const AlgoAttribute& negative_attr); | |||
| static AlgoPack sm_algo_pack; | |||
| }; | |||
| @@ -127,17 +92,6 @@ public: | |||
| size_t get_workspace_in_bytes(const TensorLayout& src, | |||
| const TensorLayout& diff, | |||
| const TensorLayout& grad) override; | |||
| AlgorithmInfo get_algorithm_info_heuristic( | |||
| const TensorLayout& src, const TensorLayout& diff, | |||
| const CanonizedFilterMeta& grad, size_t workspace_limit_in_bytes, | |||
| const AlgoAttribute& positive_attr, | |||
| const AlgoAttribute& negative_attr) { | |||
| return get_algorithm_heuristic(src, diff, grad, | |||
| workspace_limit_in_bytes, positive_attr, | |||
| negative_attr) | |||
| ->info(); | |||
| } | |||
| const char* get_algorithm_set_name() const override; | |||
| class AlgoBase; | |||
| @@ -162,13 +116,6 @@ protected: | |||
| const AlgoAttribute& negative_attr) override; | |||
| private: | |||
| Algorithm* get_algorithm_heuristic(const TensorLayout& src, | |||
| const TensorLayout& diff, | |||
| const CanonizedFilterMeta& grad, | |||
| size_t workspace_limit_in_bytes, | |||
| const AlgoAttribute& positive_attr, | |||
| const AlgoAttribute& negative_attr); | |||
| static AlgoPack sm_algo_pack; | |||
| }; | |||
| } // namespace cuda | |||
| @@ -66,12 +66,10 @@ TEST_F(CUDA, GROUP_CONVOLUTION3D_FORWARD_1x1x1) { | |||
| auto run = [&](size_t N, size_t IC, size_t ID, size_t IH, size_t IW, | |||
| size_t FD, size_t FH, size_t FW, size_t OC, size_t group) { | |||
| Checker<Convolution3D> checker(handle_cuda()); | |||
| #if CUDNN_MAJOR <= 6 | |||
| bool require_algo = true; | |||
| checker.set_before_exec_callback( | |||
| AlgoChecker<Convolution3DForward>{ | |||
| "group_conv3d:1x1x1", &require_algo}); | |||
| #endif | |||
| checker.set_before_exec_callback(AlgoChecker<Convolution3DForward>( | |||
| ExecutionPolicyAlgoName{"CUDA:GROUP_CONV3D_FORWARD", | |||
| {{"1x1x1", | |||
| {}}}})); | |||
| Convolution3D::Param param; | |||
| param.sparse = Convolution3D::Param::Sparse::GROUP; | |||
| auto ICg = IC / group; | |||
| @@ -125,6 +123,45 @@ TEST_F(CUDA, GROUP_CONVOLUTION3D_BACKWARD_DATA) { | |||
| run(2, 32, 64, 64, 64, 3, 3, 3, 32, 62, 62, 62, 0, 0, 0, 1, 1, 1, 4); | |||
| } | |||
| TEST_F(CUDA, GROUP_CONVOLUTION3D_BACKWARD_DATA_CUDNN) { | |||
| auto run = [&](size_t N, size_t IC, size_t ID, size_t IH, size_t IW, | |||
| size_t FD, size_t FH, size_t FW, size_t OC, size_t OD, | |||
| size_t OH, size_t OW, size_t PD, size_t PH, size_t PW, | |||
| size_t SD, size_t SH, size_t SW, size_t group) { | |||
| Checker<Convolution3DBackwardData> checker(handle_cuda()); | |||
| checker.set_before_exec_callback( | |||
| AlgoChecker<Convolution3DBackwardData>(ExecutionPolicyAlgoName{ | |||
| "CUDA:GROUP_CONV3D_BACKWARD_DATA", {{"CUDNN", {}}}})); | |||
| Convolution3DBackwardData::Param param; | |||
| param.sparse = Convolution3D::Param::Sparse::GROUP; | |||
| param.pad_d = PD; | |||
| param.pad_h = PH; | |||
| param.pad_w = PW; | |||
| param.stride_d = SD; | |||
| param.stride_h = SH; | |||
| param.stride_w = SW; | |||
| auto ICg = IC / group; | |||
| auto OCg = OC / group; | |||
| checker.set_param(param).exec({{group, OCg, ICg, FD, FH, FW}, | |||
| {N, OC, OD, OH, OW}, | |||
| {N, IC, ID, IH, IW}}); | |||
| }; | |||
| // bug case in prev ver | |||
| run(1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 1, 3, 0, 0, 1, 1, 1, 1, 2); | |||
| run(1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 0, 0, 1, 1, 1, 2, 2); | |||
| run(1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 2, 1, 0, 1, 0, 1, 2, 1, 2); | |||
| run(1, 2, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 0, 0, 2, 1, 1, 2); | |||
| // normal case | |||
| run(2, 64, 7, 7, 7, 3, 3, 3, 32, 5, 5, 5, 0, 0, 0, 1, 1, 1, 2); | |||
| // padded case | |||
| run(2, 32, 7, 7, 7, 3, 3, 3, 64, 7, 7, 7, 1, 1, 1, 1, 1, 1, 4); | |||
| // strided case | |||
| run(2, 32, 7, 7, 7, 3, 3, 3, 64, 3, 3, 3, 0, 0, 0, 2, 2, 2, 8); | |||
| // bigger case | |||
| run(2, 32, 64, 64, 64, 3, 3, 3, 32, 62, 62, 62, 0, 0, 0, 1, 1, 1, 4); | |||
| } | |||
| TEST_F(CUDA, GROUP_CONVOLUTION3D_BACKWARD_FILTER) { | |||
| auto run = [&](size_t N, size_t IC, size_t ID, size_t IH, size_t IW, | |||
| size_t FD, size_t FH, size_t FW, size_t OC, size_t OD, | |||
| @@ -153,6 +190,39 @@ TEST_F(CUDA, GROUP_CONVOLUTION3D_BACKWARD_FILTER) { | |||
| run(2, 32, 7, 7, 7, 3, 3, 3, 64, 3, 3, 3, 0, 0, 0, 2, 2, 2, 8); | |||
| } | |||
| TEST_F(CUDA, GROUP_CONVOLUTION3D_BACKWARD_FILTER_CUDNN) { | |||
| auto run = [&](size_t N, size_t IC, size_t ID, size_t IH, size_t IW, | |||
| size_t FD, size_t FH, size_t FW, size_t OC, size_t OD, | |||
| size_t OH, size_t OW, size_t PD, size_t PH, size_t PW, | |||
| size_t SD, size_t SH, size_t SW, size_t group) { | |||
| Checker<Convolution3DBackwardFilter> checker(handle_cuda()); | |||
| checker.set_before_exec_callback( | |||
| AlgoChecker<Convolution3DBackwardFilter>( | |||
| ExecutionPolicyAlgoName{ | |||
| "CUDA:GROUP_CONV3D_BACKWARD_FILTER", | |||
| {{"CUDNN", {}}}})); | |||
| Convolution3DBackwardFilter::Param param; | |||
| param.sparse = Convolution3D::Param::Sparse::GROUP; | |||
| param.pad_d = PD; | |||
| param.pad_h = PH; | |||
| param.pad_w = PW; | |||
| param.stride_d = SD; | |||
| param.stride_h = SH; | |||
| param.stride_w = SW; | |||
| auto ICg = IC / group; | |||
| auto OCg = OC / group; | |||
| checker.set_param(param).exec({{N, IC, ID, IH, IW}, | |||
| {N, OC, OD, OH, OW}, | |||
| {group, OCg, ICg, FD, FH, FW}}); | |||
| }; | |||
| // normal case | |||
| run(2, 64, 7, 7, 7, 3, 3, 3, 32, 5, 5, 5, 0, 0, 0, 1, 1, 1, 2); | |||
| // padded case | |||
| run(2, 32, 7, 7, 7, 3, 3, 3, 64, 7, 7, 7, 1, 1, 1, 1, 1, 1, 4); | |||
| // strided case | |||
| run(2, 32, 7, 7, 7, 3, 3, 3, 64, 3, 3, 3, 0, 0, 0, 2, 2, 2, 8); | |||
| } | |||
| } // namespace test | |||
| } // namespace megdnn | |||