GitOrigin-RevId: 2eba697d85
tags/v1.11.1
| @@ -435,31 +435,6 @@ void NetworkImplDft::cross_compnode_model_detect() { | |||
| m_nr_device_type = nr_used_device_type.size(); | |||
| } | |||
| void NetworkImplDft::adapt_option_valid() { | |||
| auto&& options = m_load_config.comp_graph->options(); | |||
| if (m_user_config->options.force_output_use_user_specified_memory) { | |||
| for (auto&& out : m_load_result.output_var_list) { | |||
| auto opr = out.node()->owner_opr(); | |||
| //! all the dest operator inherit from ReadonlyFwdHelper can't | |||
| //! support force_output_use_user_specified_memory options | |||
| if (opr->try_cast_final<mgb::opr::Reshape>() || | |||
| opr->try_cast_final<mgb::opr::Broadcast>() || | |||
| opr->try_cast_final<mgb::opr::Subtensor>() || | |||
| opr->try_cast_final<mgb::opr::AxisAddRemove>() || | |||
| opr->try_cast_final<mgb::opr::Dimshuffle>()) { | |||
| m_user_config->options.force_output_use_user_specified_memory = false; | |||
| options.force_output_use_user_specified_memory = false; | |||
| LITE_WARN( | |||
| "detect the unsupported dest operator %s when config " | |||
| "force_output_use_user_specified_memory, set " | |||
| "force_output_use_user_specified_memory to false\n", | |||
| opr->cname()); | |||
| break; | |||
| } | |||
| } | |||
| } | |||
| } | |||
| void NetworkImplDft::layout_transform_optimization() { | |||
| if (m_set_layout_transform) { | |||
| mgb::ThinHashMap<mgb::SymbolVar, mgb::SymbolVar> out_var_map; | |||
| @@ -611,10 +586,6 @@ void NetworkImplDft::configure_after_loaded() { | |||
| layout_transform_optimization(); | |||
| //! some optimization option maybe invalid in some case, so here just | |||
| //! auto determine whether some options will apply. | |||
| adapt_option_valid(); | |||
| //! find how many compnode the model has, this should call before update_io | |||
| cross_compnode_model_detect(); | |||
| @@ -239,9 +239,6 @@ private: | |||
| //! optimized output tensor copy | |||
| void output_tensor_copy_optimize(Var var, std::shared_ptr<Tensor> tensor); | |||
| //! adapt option valid, it should call after update_io | |||
| void adapt_option_valid(); | |||
| //! configure and optimize network after loaded | |||
| void configure_after_loaded(); | |||
| @@ -1,4 +1,5 @@ | |||
| #include "./network.h" | |||
| #include "megbrain/opr/tensor_manip.h" | |||
| using namespace mgb; | |||
| @@ -137,6 +138,35 @@ SymbolVar Network::add_concat(SymbolVar f, SymbolVar g, int axis) { | |||
| return opr::Concat::make({f, g}, axis); | |||
| } | |||
| SymbolVar Network::add_dimshuffle(SymbolVar f, std::vector<int> pattern) { | |||
| return opr::Dimshuffle::make(f, pattern); | |||
| } | |||
| SymbolVar Network::add_axisaddremove(SymbolVar f) { | |||
| return opr::AxisAddRemove::make( | |||
| f, {{opr::AxisAddRemove::AxisDesc::Method::REMOVE, {0}}}); | |||
| } | |||
| SymbolVar Network::add_subtensor(SymbolVar f) { | |||
| using AIdx = opr::indexing::AxisIndexer; | |||
| return opr::Subtensor::make( | |||
| f, {AIdx::make_interval(0, f.make_scalar(0), None, None)}); | |||
| } | |||
| SymbolVar Network::add_reshape(SymbolVar f) { | |||
| auto shp = opr::GetVarShape::make(f); | |||
| return opr::Reshape::make(f, shp); | |||
| } | |||
| SymbolVar Network::add_broadcast(SymbolVar f) { | |||
| auto shp = opr::GetVarShape::make(f); | |||
| return opr::Broadcast::make(f, shp); | |||
| } | |||
| SymbolVar Network::add_copy(SymbolVar f) { | |||
| return opr::Copy::make(f); | |||
| } | |||
| SymbolVar mgb::create_block( | |||
| Network& network, SymbolVar f_in, size_t stride, size_t num_outputs1, | |||
| bool has_proj, DType out_dtype) { | |||
| @@ -53,6 +53,12 @@ public: | |||
| opr::Pooling::Param::Mode mode = opr::Pooling::Param::Mode::MAX); | |||
| SymbolVar add_type_cvt(SymbolVar f, DType out_dtype = dtype::Float32()); | |||
| SymbolVar add_concat(SymbolVar f, SymbolVar g, int axis = 0); | |||
| SymbolVar add_dimshuffle(SymbolVar f, std::vector<int> pattern); | |||
| SymbolVar add_axisaddremove(SymbolVar f); | |||
| SymbolVar add_subtensor(SymbolVar f); | |||
| SymbolVar add_reshape(SymbolVar f); | |||
| SymbolVar add_broadcast(SymbolVar f); | |||
| SymbolVar add_copy(SymbolVar f); | |||
| }; | |||
| SymbolVar create_block( | |||
| @@ -45,6 +45,35 @@ struct TestGraph { | |||
| m_out_var = m_network->add_concat(f, -f); | |||
| } | |||
| void create_relayout_out_graph(int mem_forward_opr_type) { | |||
| input_tensor = m_gen({1, 3, 32, 32}, m_cn); | |||
| auto input = opr::Host2DeviceCopy::make(*m_network->graph, input_tensor, m_cn) | |||
| .rename("input"); | |||
| auto f = m_network->add_conv( | |||
| input, 4, {3, 3}, dtype::Float32(), true, {2, 2}, {0, 0}); | |||
| f = m_network->add_elemwise( | |||
| {f}, dtype::Float32(), opr::Elemwise::Param::Mode::EXP); | |||
| f = m_network->add_conv(f, 8, {3, 3}, dtype::Float32(), true, {1, 1}, {1, 1}); | |||
| f = m_network->add_pooling(f, {2, 2}, {2, 2}); | |||
| //! dimshuffle | |||
| if (mem_forward_opr_type == 0) { | |||
| f = m_network->add_dimshuffle(f, {0, 2, 3, 1}); | |||
| //! BroadCast | |||
| } else if (mem_forward_opr_type == 1) { | |||
| f = m_network->add_broadcast(f); | |||
| //! Subtensor | |||
| } else if (mem_forward_opr_type == 2) { | |||
| f = m_network->add_subtensor(f); | |||
| //! AxisAddRemove | |||
| } else if (mem_forward_opr_type == 3) { | |||
| f = m_network->add_axisaddremove(f); | |||
| //! Reshape | |||
| } else if (mem_forward_opr_type == 4) { | |||
| f = m_network->add_reshape(f); | |||
| } | |||
| m_out_var = m_network->add_copy(f); | |||
| } | |||
| void create_graph_with_subtensor_forward() { | |||
| input_tensor = m_gen({2, 3, 32, 32}, m_cn); | |||
| auto input = opr::Host2DeviceCopy::make(*m_network->graph, input_tensor, m_cn) | |||
| @@ -211,6 +240,67 @@ TEST(TestNoCopy, IONoCopyPtrEQ) { | |||
| } | |||
| } | |||
| namespace { | |||
| auto test_memory_forward_io_no_copy(int opr_type, TensorShape shape) { | |||
| auto test_graph = TestGraph(); | |||
| auto compute_graph = test_graph.m_network->graph; | |||
| compute_graph->options().force_output_use_user_specified_memory = true; | |||
| test_graph.create_relayout_out_graph(opr_type); | |||
| HostTensorND truth; | |||
| auto func = test_graph.compile_without_copy(); | |||
| //! because the output tensor not assign user memory, so it will wrong | |||
| ASSERT_THROW(func->execute(), MegBrainError); | |||
| auto&& outvar = func->get_output_vars()[0]; | |||
| ASSERT_EQ(outvar, test_graph.m_out_var.node()); | |||
| size_t times = 10; | |||
| for (size_t i = 0; i < times; i++) { | |||
| auto input_tensor = test_graph.input_tensor; | |||
| auto layout = input_tensor->layout(); | |||
| size_t length = layout.total_nr_elems(); | |||
| auto storage = TensorStorage<HostTensorStorageTrait>(test_graph.m_cn); | |||
| storage.ensure_size(length * sizeof(float)); | |||
| float* ptr = storage.ptr()->as<float>(); | |||
| for (size_t d = 0; d < length; d++) { | |||
| ptr[d] = i / 5 + 3; | |||
| } | |||
| input_tensor->reset(storage, layout); | |||
| DeviceTensorND dv(test_graph.m_cn, shape); | |||
| outvar->init_mem_plan(&dv); | |||
| outvar->reset_dev_tensor_from_tensor(dv); | |||
| func->execute(); | |||
| func->wait(); | |||
| if (i % 5 == 0) { | |||
| truth.copy_from(func->get_output_vars()[0]->dev_tensor()).sync(); | |||
| continue; | |||
| } | |||
| HostTensorND to_check; | |||
| to_check.copy_from(func->get_output_vars()[0]->dev_tensor()).sync(); | |||
| MGB_ASSERT_TENSOR_EQ(to_check, truth); | |||
| } | |||
| } | |||
| } // namespace | |||
| TEST(TestNoCopy, IONoCopyEndWithDimshuffle) { | |||
| test_memory_forward_io_no_copy(0, {1, 7, 7, 8}); | |||
| } | |||
| TEST(TestNoCopy, IONoCopyEndWithReshape) { | |||
| test_memory_forward_io_no_copy(4, {1, 8, 7, 7}); | |||
| } | |||
| TEST(TestNoCopy, IONoCopyEndWithAxisAddRemove) { | |||
| test_memory_forward_io_no_copy(3, {8, 7, 7}); | |||
| } | |||
| TEST(TestNoCopy, IONoCopyEndWithBroadCast) { | |||
| test_memory_forward_io_no_copy(1, {1, 8, 7, 7}); | |||
| } | |||
| TEST(TestNoCopy, IONoCopyEndWithSubtensor) { | |||
| test_memory_forward_io_no_copy(2, {1, 8, 7, 7}); | |||
| } | |||
| TEST(TestNoCopy, IONoCopyCorrect) { | |||
| auto test_graph = TestGraph(); | |||
| auto compute_graph = test_graph.m_network->graph; | |||
| @@ -1,7 +1,25 @@ | |||
| #include "megbrain/serialization/serializer.h" | |||
| #include "megbrain/gopt/inference.h" | |||
| #include "megbrain/opr/io.h" | |||
| #include "megbrain/opr/tensor_manip.h" | |||
| #include "megbrain/opr/utility.h" | |||
| namespace { | |||
| bool is_opr_memforward_var(mgb::VarNode* var) { | |||
| if (var) { | |||
| auto opr = var->owner_opr(); | |||
| if (opr->try_cast_final<mgb::opr::Reshape>() || | |||
| opr->try_cast_final<mgb::opr::Broadcast>() || | |||
| opr->try_cast_final<mgb::opr::Subtensor>() || | |||
| opr->try_cast_final<mgb::opr::AxisAddRemove>() || | |||
| opr->try_cast_final<mgb::opr::Dimshuffle>()) { | |||
| return true; | |||
| } | |||
| }; | |||
| return false; | |||
| } | |||
| } // namespace | |||
| namespace mgb { | |||
| namespace serialization { | |||
| @@ -42,6 +60,14 @@ void GraphLoader::LoadResult::graph_compile_ahead() { | |||
| //! just do basic optimize_for_inference ahead, and replace the var in | |||
| //! LoadResult | |||
| if (graph->options().force_output_use_user_specified_memory) { | |||
| //! if the output var is like dimshuffle, reshape, it maybe memory forward to | |||
| //! the output, so add a Copy operator in the end. | |||
| for (auto& var : output_var_list) { | |||
| if (is_opr_memforward_var(var.node())) { | |||
| std::string name = var.node()->name(); | |||
| var = opr::Copy::make(var, name); | |||
| } | |||
| } | |||
| auto options = gopt::OptimizeForInferenceOptions{}; | |||
| auto new_vars = gopt::optimize_for_inference(output_var_list, options); | |||
| output_var_list = new_vars; | |||
| @@ -62,6 +88,7 @@ void GraphLoader::LoadResult::graph_compile_ahead() { | |||
| found, "can't find var name %s when optimize_for_inference. ", | |||
| var.node()->cname()); | |||
| } | |||
| output_var_map_id = var_map_id; | |||
| } | |||
| } | |||