GitOrigin-RevId: d2e1e14d41
tags/v1.1.0
| @@ -378,15 +378,15 @@ public: | |||||
| _megdnn_workspace workspace) = 0; | _megdnn_workspace workspace) = 0; | ||||
| /** | /** | ||||
| * \brief execute weight preprocessing, read weights form filter and write | |||||
| * to preprocessed_filter after preprocessed. | |||||
| * \brief execute weight preprocessing, read weights form filter and bias, | |||||
| * write to preprocessed_filter after preprocessed. | |||||
| * | * | ||||
| * \praram[in] workspace the needed tmp workspace when exec_preprocess | * \praram[in] workspace the needed tmp workspace when exec_preprocess | ||||
| * running, the size is got by get_preprocess_workspace_in_bytes | * running, the size is got by get_preprocess_workspace_in_bytes | ||||
| */ | */ | ||||
| virtual void exec_preprocess(const TensorLayout& src_layout, | virtual void exec_preprocess(const TensorLayout& src_layout, | ||||
| _megdnn_tensor_in filter, | _megdnn_tensor_in filter, | ||||
| const TensorLayout& bias_layout, | |||||
| _megdnn_tensor_in bias, | |||||
| const TensorLayout& z_layout, | const TensorLayout& z_layout, | ||||
| const TensorLayout& dst_layout, | const TensorLayout& dst_layout, | ||||
| PreprocessedFilter* preprocessed_filter, | PreprocessedFilter* preprocessed_filter, | ||||
| @@ -238,11 +238,11 @@ ConvBiasForwardImpl::deduce_preprocessed_filter_layout( | |||||
| void ConvBiasForwardImpl::exec_preprocess( | void ConvBiasForwardImpl::exec_preprocess( | ||||
| const TensorLayout& src_layout, _megdnn_tensor_in filter, | const TensorLayout& src_layout, _megdnn_tensor_in filter, | ||||
| const TensorLayout& bias_layout, const TensorLayout& z_layout, | |||||
| _megdnn_tensor_in bias, const TensorLayout& z_layout, | |||||
| const TensorLayout& dst_layout, PreprocessedFilter* preprocessed_filter, | const TensorLayout& dst_layout, PreprocessedFilter* preprocessed_filter, | ||||
| _megdnn_workspace workspace) { | _megdnn_workspace workspace) { | ||||
| TensorND src{nullptr, src_layout}, dst{nullptr, dst_layout}, | TensorND src{nullptr, src_layout}, dst{nullptr, dst_layout}, | ||||
| z{nullptr, z_layout}, bias{nullptr, bias_layout}; | |||||
| z{nullptr, z_layout}; | |||||
| AlgoBase::ExecArgs args(this, src, filter, bias, z, dst, workspace, | AlgoBase::ExecArgs args(this, src, filter, bias, z, dst, workspace, | ||||
| preprocessed_filter); | preprocessed_filter); | ||||
| auto algo = get_algorithm(this, src.layout, filter.layout, bias.layout, | auto algo = get_algorithm(this, src.layout, filter.layout, bias.layout, | ||||
| @@ -49,7 +49,7 @@ public: | |||||
| const TensorLayout&, const TensorLayout&, const TensorLayout&, | const TensorLayout&, const TensorLayout&, const TensorLayout&, | ||||
| const TensorLayout&, const TensorLayout&) override; | const TensorLayout&, const TensorLayout&) override; | ||||
| void exec_preprocess(const TensorLayout&, _megdnn_tensor_in, | void exec_preprocess(const TensorLayout&, _megdnn_tensor_in, | ||||
| const TensorLayout&, const TensorLayout&, | |||||
| _megdnn_tensor_in, const TensorLayout&, | |||||
| const TensorLayout&, PreprocessedFilter*, | const TensorLayout&, PreprocessedFilter*, | ||||
| _megdnn_workspace) override; | _megdnn_workspace) override; | ||||
| const char* get_algorithm_set_name() const override; | const char* get_algorithm_set_name() const override; | ||||
| @@ -178,15 +178,14 @@ void ConvBiasImpl::exec(_megdnn_tensor_in src, _megdnn_tensor_in filter, | |||||
| void ConvBiasImpl::exec_preprocess(const TensorLayout& src_layout, | void ConvBiasImpl::exec_preprocess(const TensorLayout& src_layout, | ||||
| _megdnn_tensor_in filter, | _megdnn_tensor_in filter, | ||||
| const TensorLayout& bias_layout, | |||||
| _megdnn_tensor_in bias, | |||||
| const TensorLayout& z_layout, | const TensorLayout& z_layout, | ||||
| const TensorLayout& dst_layout, | const TensorLayout& dst_layout, | ||||
| PreprocessedFilter* preprocessed_filter, | PreprocessedFilter* preprocessed_filter, | ||||
| _megdnn_workspace workspace) { | _megdnn_workspace workspace) { | ||||
| //! exec_preprocess currently only support preprocess weights before exec, | |||||
| //! src/dst/bias/z will be ignored, just set to nullptr | |||||
| TensorND src{nullptr, src_layout}, dst{nullptr, dst_layout}, | |||||
| bias{nullptr, bias_layout}; | |||||
| //! exec_preprocess currently only support preprocess weights and bias | |||||
| //! before exec, src/dst/z will be ignored, just set to nullptr | |||||
| TensorND src{nullptr, src_layout}, dst{nullptr, dst_layout}; | |||||
| auto fparam = make_ncb_kern_param(src, filter, bias, dst, workspace, | auto fparam = make_ncb_kern_param(src, filter, bias, dst, workspace, | ||||
| preprocessed_filter); | preprocessed_filter); | ||||
| //! should not pass workspace_size limit otherwise can not find match algo | //! should not pass workspace_size limit otherwise can not find match algo | ||||
| @@ -196,7 +195,7 @@ void ConvBiasImpl::exec_preprocess(const TensorLayout& src_layout, | |||||
| exec_preprocess_with_ncb_kern(fparam, algo); | exec_preprocess_with_ncb_kern(fparam, algo); | ||||
| } else { | } else { | ||||
| naive::ConvBiasForwardImpl::exec_preprocess( | naive::ConvBiasForwardImpl::exec_preprocess( | ||||
| src_layout, filter, bias_layout, z_layout, dst_layout, | |||||
| src_layout, filter, bias, z_layout, dst_layout, | |||||
| preprocessed_filter, workspace); | preprocessed_filter, workspace); | ||||
| } | } | ||||
| } | } | ||||
| @@ -57,7 +57,7 @@ public: | |||||
| void exec_preprocess(const TensorLayout& src_layout, | void exec_preprocess(const TensorLayout& src_layout, | ||||
| _megdnn_tensor_in filter, | _megdnn_tensor_in filter, | ||||
| const TensorLayout& bias_layout, | |||||
| _megdnn_tensor_in bias, | |||||
| const TensorLayout& z_layout, | const TensorLayout& z_layout, | ||||
| const TensorLayout& dst_layout, | const TensorLayout& dst_layout, | ||||
| PreprocessedFilter* preprocessed_filter, | PreprocessedFilter* preprocessed_filter, | ||||
| @@ -59,7 +59,7 @@ public: | |||||
| } | } | ||||
| void exec_preprocess(const TensorLayout&, _megdnn_tensor_in, | void exec_preprocess(const TensorLayout&, _megdnn_tensor_in, | ||||
| const TensorLayout&, const TensorLayout&, | |||||
| _megdnn_tensor_in, const TensorLayout&, | |||||
| const TensorLayout&, PreprocessedFilter*, | const TensorLayout&, PreprocessedFilter*, | ||||
| _megdnn_workspace) override {} | _megdnn_workspace) override {} | ||||
| @@ -601,7 +601,7 @@ struct OprWeightPreprocessProxy<ConvBiasForward> | |||||
| tensors[3].layout, tensors[4].layout); | tensors[3].layout, tensors[4].layout); | ||||
| WorkspaceWrapper preprocess_workspace(opr->handle(), | WorkspaceWrapper preprocess_workspace(opr->handle(), | ||||
| preprocess_workspace_size); | preprocess_workspace_size); | ||||
| opr->exec_preprocess(tensors[0].layout, tensors[1], tensors[2].layout, | |||||
| opr->exec_preprocess(tensors[0].layout, tensors[1], tensors[2], | |||||
| tensors[3].layout, tensors[4].layout, | tensors[3].layout, tensors[4].layout, | ||||
| &preprocessed_filter, | &preprocessed_filter, | ||||
| preprocess_workspace.workspace()); | preprocess_workspace.workspace()); | ||||
| @@ -1955,6 +1955,39 @@ typename DnnOp::Algorithm* try_find_any_weight_preprocess_algo( | |||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| template <typename DnnOp, typename... Args> | |||||
| typename DnnOp::Algorithm* try_find_any_bias_preprocess_algo( | |||||
| DnnOp* dnn_op, const char* mgb_info, Maybe<bool>& found, | |||||
| Args&& ...args) { | |||||
| if (found.valid()) { | |||||
| if (found.val()) { | |||||
| return dnn_op->execution_policy().algorithm; | |||||
| } else { | |||||
| return nullptr; | |||||
| } | |||||
| } | |||||
| for (auto&& algo : dnn_op->get_all_algorithms( | |||||
| std::forward<Args>(args)...)) { | |||||
| dnn_op->execution_policy().algorithm = algo; | |||||
| auto layouts = dnn_op->deduce_preprocessed_filter_layout( | |||||
| std::forward<Args>(args)...); | |||||
| if (layouts.size() <= 1) | |||||
| continue; | |||||
| bool valid = false; | |||||
| if (!layouts[1].is_empty()) { | |||||
| valid = true; | |||||
| break; | |||||
| } | |||||
| if (valid) { | |||||
| found.emplace(true); | |||||
| return algo; | |||||
| } | |||||
| } | |||||
| found.emplace(false); | |||||
| mgb_log_warn("Can't find bias preprocess algo for op %s", mgb_info); | |||||
| return nullptr; | |||||
| } | |||||
| void test_free_memory_in_weight_preprocess(int record_level, CompNode cn) { | void test_free_memory_in_weight_preprocess(int record_level, CompNode cn) { | ||||
| HostTensorGenerator<> gen; | HostTensorGenerator<> gen; | ||||
| auto graph = ComputingGraph::make(); | auto graph = ComputingGraph::make(); | ||||
| @@ -2152,4 +2185,54 @@ TEST(TestGraph, FreeMemoryInWeightPreprocessWithMultiReader) { | |||||
| .empty()); | .empty()); | ||||
| } | } | ||||
| TEST(TestGraph, FreeBias) { | |||||
| HostTensorGenerator<> gen; | |||||
| auto graph = ComputingGraph::make(); | |||||
| auto cn = CompNode::load("xpu0"); | |||||
| graph->options().graph_opt.weight_preprocess = true; | |||||
| auto mkvar = [&](const char* name, const TensorShape& shp) { | |||||
| return opr::Host2DeviceCopy::make(*graph, gen(shp, cn)).rename(name); | |||||
| }; | |||||
| auto mkcvar = [&](const char* name, const TensorShape& shp) { | |||||
| return opr::SharedDeviceTensor::make_const(*graph, *gen(shp, cn)) | |||||
| .rename(name); | |||||
| }; | |||||
| auto x = mkvar("x", {1, 32, 16, 16}); | |||||
| // ConvBias test dense | |||||
| opr::ConvBias::Param param_conv_bias; | |||||
| param_conv_bias.pad_h = param_conv_bias.pad_w = 0; | |||||
| param_conv_bias.sparse = opr::ConvBias::Param::Sparse::DENSE; | |||||
| auto w1 = mkcvar("w1", {32, 32, 1, 1}), b1 = mkcvar("b1", {1, 32, 1, 1}); | |||||
| auto conv1 = opr::ConvBias::make(x, w1, b1, param_conv_bias); | |||||
| Maybe<bool> wp1; | |||||
| conv1.node()->owner_opr()->cast_final_safe<opr::ConvBias>() | |||||
| .setup_algo_chooser([&](const cg::OperatorNodeBase* opr) { | |||||
| return try_find_any_bias_preprocess_algo( | |||||
| opr->cast_final_safe<opr::ConvBias>().megdnn_opr(), | |||||
| opr->cname(), wp1, | |||||
| opr->input(0)->layout(), opr->input(1)->layout(), | |||||
| opr->input(2)->layout(), TensorLayout{}, | |||||
| opr->output(0)->layout()); | |||||
| }); | |||||
| HostTensorND host_y; | |||||
| auto func =graph->compile({make_callback_copy(conv1, host_y)}); | |||||
| //!flag the no need memory of var | |||||
| func->execute(); | |||||
| //!free the no need memory of var | |||||
| func->execute(); | |||||
| auto check = [&](SymbolVar v) { | |||||
| ASSERT_TRUE(v.node()->contain_flag(VarNode::Flag::MEMORY_NO_NEED)); | |||||
| ASSERT_TRUE(v.node()->dev_tensor().empty()); | |||||
| ASSERT_TRUE(v.node()->owner_opr() | |||||
| ->cast_final_safe<opr::SharedDeviceTensor>() | |||||
| .get_dev_tensor() | |||||
| .empty()); | |||||
| }; | |||||
| ASSERT_TRUE(wp1.valid()); | |||||
| if (wp1.val()) { | |||||
| check(b1); | |||||
| } | |||||
| } | |||||
| // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} | // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} | ||||
| @@ -961,17 +961,37 @@ void ConvBiasForward::scn_do_execute_preprocess() { | |||||
| z_layout = input(3)->layout(); | z_layout = input(3)->layout(); | ||||
| } | } | ||||
| megdnn_opr()->exec_preprocess( | megdnn_opr()->exec_preprocess( | ||||
| input(0)->layout(), input(1)->dev_tensor().as_megdnn(), bias_layout, | |||||
| z_layout, output(0)->layout(), preprocessed_filter(), | |||||
| input(0)->layout(), input(1)->dev_tensor().as_megdnn(), | |||||
| input(2)->dev_tensor().as_megdnn(), z_layout, output(0)->layout(), | |||||
| preprocessed_filter(), | |||||
| intl::get_megdnn_workspace_from_var(output().back())); | intl::get_megdnn_workspace_from_var(output().back())); | ||||
| //! Flag the input(1) no use later, which can be freed when no other | |||||
| //! Flag the weight and bias no use later, which can be freed when no other | |||||
| //! var depend on its dev_value, host_value and shape. | //! var depend on its dev_value, host_value and shape. | ||||
| auto receiver_info = | |||||
| auto receiver_info_weight = | |||||
| input(1)->owner_graph()->var_receiver_in_current_comp_seq(input(1)); | input(1)->owner_graph()->var_receiver_in_current_comp_seq(input(1)); | ||||
| if (receiver_info.dev_value == 1 && receiver_info.host_value == 0 && | |||||
| receiver_info.shape == 0) { | |||||
| if (receiver_info_weight.dev_value == 1 && | |||||
| receiver_info_weight.host_value == 0 && | |||||
| receiver_info_weight.shape == 0) { | |||||
| input(1)->add_flag(VarNode::Flag::MEMORY_NO_NEED); | input(1)->add_flag(VarNode::Flag::MEMORY_NO_NEED); | ||||
| } | } | ||||
| //! if bias is preprocessd | |||||
| if (input().size() > 3) { | |||||
| auto preprocessed_layouts = | |||||
| megdnn_opr()->deduce_preprocessed_filter_layout( | |||||
| input(0)->layout(), input(1)->layout(), bias_layout, | |||||
| z_layout, output(0)->layout()); | |||||
| if (preprocessed_layouts.size() > 1 && | |||||
| !preprocessed_layouts[1].is_empty()) { | |||||
| auto receiver_info_bias = | |||||
| input(2)->owner_graph()->var_receiver_in_current_comp_seq( | |||||
| input(2)); | |||||
| if (receiver_info_bias.dev_value == 1 && | |||||
| receiver_info_bias.host_value == 0 && | |||||
| receiver_info_bias.shape == 0) { | |||||
| input(2)->add_flag(VarNode::Flag::MEMORY_NO_NEED); | |||||
| } | |||||
| } | |||||
| } | |||||
| } | } | ||||
| /* ===================== LocalShareForward ==================== */ | /* ===================== LocalShareForward ==================== */ | ||||
| @@ -178,9 +178,26 @@ typename TimedProfiler<Opr>::TResult TimedProfiler<Opr>::prof_impl( | |||||
| for (size_t i = 0; i < flt_val.size(); i++) { | for (size_t i = 0; i < flt_val.size(); i++) { | ||||
| pf.tensors[i] = flt_val[i].as_megdnn(); | pf.tensors[i] = flt_val[i].as_megdnn(); | ||||
| } | } | ||||
| APPLY(_(megdnn_opr)->exec_preprocess(args..., &pf, mdn_workspace), | |||||
| std::forward_as_tuple(layouts[0], inp_val[1].as_megdnn()), | |||||
| array_skip<2>(layouts)); | |||||
| if_constexpr<opr_contain_bias<Opr>()>( | |||||
| //! convbias | |||||
| [&](auto __) { | |||||
| APPLY(__(megdnn_opr) | |||||
| ->exec_preprocess(args..., &pf, | |||||
| mdn_workspace), | |||||
| std::forward_as_tuple(layouts[0], | |||||
| inp_val[1].as_megdnn(), | |||||
| inp_val[2].as_megdnn()), | |||||
| array_skip<arity_in - 1>(layouts)); | |||||
| }, | |||||
| //! Convolution | |||||
| [&](auto __) { | |||||
| APPLY(__(megdnn_opr) | |||||
| ->exec_preprocess(args..., &pf, | |||||
| mdn_workspace), | |||||
| std::forward_as_tuple(layouts[0], | |||||
| inp_val[1].as_megdnn()), | |||||
| array_skip<2>(layouts)); | |||||
| }); | |||||
| } | } | ||||
| }); | }); | ||||
| @@ -75,6 +75,11 @@ constexpr bool opr_supports_preprocess() { | |||||
| std::is_same<Opr, megdnn::ConvBias>::value; | std::is_same<Opr, megdnn::ConvBias>::value; | ||||
| } | } | ||||
| template <typename Opr> | |||||
| constexpr bool opr_contain_bias() { | |||||
| return std::is_same<Opr, megdnn::ConvBias>::value; | |||||
| } | |||||
| template <typename Opr, bool has_prep> | template <typename Opr, bool has_prep> | ||||
| struct PreprocessFilterImpl { | struct PreprocessFilterImpl { | ||||
| using T = union {}; | using T = union {}; | ||||