GitOrigin-RevId: f0e917f716
tags/v1.0.0-rc1
| @@ -1 +1,2 @@ | |||
| from .sublinear_memory_config import SublinearMemoryConfig | |||
| from .tracing import exclude_from_trace, trace | |||
| @@ -0,0 +1,56 @@ | |||
| # -*- coding: utf-8 -*- | |||
| # MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
| # | |||
| # Copyright (c) 2014-2020 Megvii Inc. All rights reserved. | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, | |||
| # software distributed under the License is distributed on an | |||
| # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| from ..device import get_device_count | |||
| class SublinearMemoryConfig: | |||
| r""" | |||
| Configuration for sublinear memory optimization. | |||
| :param thresh_nr_try: number of samples both for searching in linear space | |||
| and around current thresh in sublinear memory optimization. Default: 10. | |||
| It can also be set through the environmental variable 'MGB_SUBLINEAR_MEMORY_THRESH_NR_TRY'. | |||
| :param genetic_nr_iter: number of iterations to find the best checkpoints in genetic algorithm. | |||
| Default: 0. | |||
| It can also be set through the environmental variable 'MGB_SUBLINEAR_MEMORY_GENETIC_NR_ITER'. | |||
| :param genetic_pool_size: number of samples for the crossover random selection | |||
| during genetic optimization. Default: 20. | |||
| It can also be set through the environmental variable 'MGB_SUBLINEAR_MEMORY_GENETIC_POOL_SIZE'. | |||
| :param lb_memory: memory lower bound of bottleneck size in MB for sublinear memory optimization. | |||
| It can be used to perform manual tradeoff between memory and speed. Default: 0. | |||
| It can also be set through the environmental variable 'MGB_SUBLINEAR_MEMORY_LOWER_BOUND_MB'. | |||
| :param num_worker: number of thread workers to search the optimum checkpoints | |||
| in sublinear memory optimization. Default: half of cpu number in the system. | |||
| Note: the value must be greater or equal to one. | |||
| It can also be set through the environmental variable 'MGB_SUBLINEAR_MEMORY_WORKERS'. | |||
| Note that the environmental variable MGB_COMP_GRAPH_OPT must be set to 'enable_sublinear_memory_opt=1' | |||
| in order for the above environmental variable to be effective. | |||
| """ | |||
| def __init__( | |||
| self, | |||
| thresh_nr_try: int = 10, | |||
| genetic_nr_iter: int = 0, | |||
| genetic_pool_size: int = 20, | |||
| lb_memory: int = 0, | |||
| num_worker: int = max(1, get_device_count("cpu") // 2), | |||
| ): | |||
| assert thresh_nr_try >= 0, "thresh_nr_try must be greater or equal to zero" | |||
| self.thresh_nr_try = thresh_nr_try | |||
| assert genetic_nr_iter >= 0, "genetic_nr_iter must be greater or equal to zero" | |||
| self.genetic_nr_iter = genetic_nr_iter | |||
| assert ( | |||
| genetic_pool_size >= 0 | |||
| ), "genetic_pool_size must be greater or equal to zero" | |||
| self.genetic_pool_size = genetic_pool_size | |||
| self.lb_memory = lb_memory | |||
| assert num_worker > 0, "num_worker must be greater or equal to one" | |||
| self.num_worker = num_worker | |||
| @@ -7,6 +7,7 @@ from ..core.ops.special import Const | |||
| from ..core.tensor import megbrain_graph as G | |||
| from ..core.tensor.core import OpBase, apply | |||
| from ..core.tensor.raw_tensor import OpDef, RawTensor, as_raw_tensor | |||
| from .sublinear_memory_config import SublinearMemoryConfig | |||
| class TraceMismatchError(RuntimeError): | |||
| @@ -72,11 +73,18 @@ class trace: | |||
| self.__init__(*args, **kwargs) | |||
| return self | |||
| def __init__(self, function, symbolic=False, capture_as_const=False): | |||
| def __init__( | |||
| self, | |||
| function, | |||
| symbolic=False, | |||
| capture_as_const=False, | |||
| sublinear_memory_config: SublinearMemoryConfig = None, | |||
| ): | |||
| self.__wrapped__ = function | |||
| self._symbolic = symbolic | |||
| self._capture_as_const = capture_as_const | |||
| self._capture_static_shape = False | |||
| self._sublinear_memory_config = sublinear_memory_config | |||
| self._untraced = True | |||
| self._tinfo = [] # handle -> TensorInfo | |||
| @@ -227,6 +235,7 @@ class trace: | |||
| G.OutputNode(x._LazyEvalTensor__varnode).outputs[0] | |||
| for x in lazy_eval_tensors | |||
| ] | |||
| self._apply_graph_options(self._lazy_eval_graph) | |||
| self._lazy_eval_graph.compile(*readers) | |||
| self._lazy_eval_graph() | |||
| for r, x in zip(readers, lazy_eval_tensors): | |||
| @@ -259,9 +268,26 @@ class trace: | |||
| info.exported = True | |||
| info.data_read = True | |||
| def _apply_graph_options(self, graph): | |||
| # sublinear | |||
| if self._sublinear_memory_config is not None: | |||
| graph.options.enable_sublinear_memory_opt = True | |||
| sublinear_config = graph.options.sublinear_mem_config | |||
| sublinear_config.lb_memory = self._sublinear_memory_config.lb_memory | |||
| sublinear_config.genetic_nr_iter = ( | |||
| self._sublinear_memory_config.genetic_nr_iter | |||
| ) | |||
| sublinear_config.genetic_pool_size = ( | |||
| self._sublinear_memory_config.genetic_pool_size | |||
| ) | |||
| sublinear_config.thresh_nr_try = self._sublinear_memory_config.thresh_nr_try | |||
| sublinear_config.num_worker = self._sublinear_memory_config.num_worker | |||
| def _compile(self): | |||
| graph = self._graph = G.Graph() | |||
| graph.options.no_force_inplace = True | |||
| self._apply_graph_options(graph) | |||
| # graph.options.graph_opt_level = 0 | |||
| need_reset_nodes = self._need_reset_nodes = [] | |||
| # links enforce ordering of I/O nodes | |||
| @@ -119,6 +119,7 @@ void init_graph_rt(py::module m) { | |||
| DEF_READWRITE(enable_memory_swap) | |||
| DEF_READWRITE(comp_node_seq_record_level) | |||
| DEF_READWRITE(no_force_inplace) | |||
| DEF_READWRITE(sublinear_mem_config) | |||
| // DEF_READWRITE(eager_evaluation) | |||
| // DEF_READWRITE(imperative_proxy_graph) | |||
| // DEF_READWRITE(extra_vardeps) | |||
| @@ -142,6 +143,16 @@ void init_graph_rt(py::module m) { | |||
| #undef CURRENT_CLASS | |||
| #define CURRENT_CLASS cg::ComputingGraph::Options::SublinearMemConfig | |||
| py::class_<cg::ComputingGraph::Options::SublinearMemConfig>(PyComputingGraphOptions, "SublinearMemConfig") | |||
| DEF_READWRITE(thresh_nr_try) | |||
| DEF_READWRITE(genetic_nr_iter) | |||
| DEF_READWRITE(genetic_pool_size) | |||
| DEF_READWRITE(lb_memory) | |||
| DEF_READWRITE(num_worker); | |||
| #undef CURRENT_CLASS | |||
| auto common = rel_import("common", m, 1); | |||
| common.def("invoke_op", [](const OpDef& def, const std::vector<cg::VarNode*> inputs, cg::ComputingGraph* graph) { | |||
| @@ -19,6 +19,7 @@ import megengine.functional as F | |||
| from megengine import jit | |||
| from megengine.core._trace_option import set_tensor_shape | |||
| from megengine.functional.debug_param import set_conv_execution_strategy | |||
| from megengine.jit import SublinearMemoryConfig | |||
| from megengine.module import AvgPool2d, BatchNorm2d, Conv2d, Linear, Module | |||
| from megengine.optimizer import SGD | |||
| from megengine.tensor import Tensor | |||
| @@ -217,14 +218,14 @@ def test_correctness(): | |||
| set_conv_execution_strategy("HEURISTIC_REPRODUCIBLE") | |||
| run_train(model_path, False, False, max_err=1e-5) | |||
| # run_test(model_path, True, False) | |||
| # run_test(model_path, True, True) | |||
| run_train(model_path, True, False, max_err=1e-5) | |||
| run_train(model_path, True, True, max_err=1e-5) | |||
| # sublinear | |||
| # config = SublinearMemoryConfig(genetic_nr_iter=10) | |||
| # run_test( | |||
| # model_path, True, True, sublinear_memory_config=config, max_err=1e-5, | |||
| # ) | |||
| config = SublinearMemoryConfig(genetic_nr_iter=10) | |||
| run_train( | |||
| model_path, True, True, sublinear_memory_config=config, max_err=1e-5, | |||
| ) | |||
| run_eval(model_path, False, max_err=1e-7) | |||
| # run_eval(model_path, True, max_err=1e-7) # XXX: fix me | |||
| run_eval(model_path, True, max_err=1e-7) | |||
| @@ -298,23 +298,23 @@ class trace: | |||
| if self._sublinear_memory_config is not None: | |||
| cg.set_option("enable_sublinear_memory_opt", True) | |||
| cg.set_option( | |||
| "sublinear_mem_cofig.lb_memory", | |||
| "sublinear_mem_config.lb_memory", | |||
| self._sublinear_memory_config.lb_memory, | |||
| ) | |||
| cg.set_option( | |||
| "sublinear_mem_cofig.genetic_nr_iter", | |||
| "sublinear_mem_config.genetic_nr_iter", | |||
| self._sublinear_memory_config.genetic_nr_iter, | |||
| ) | |||
| cg.set_option( | |||
| "sublinear_mem_cofig.genetic_pool_size", | |||
| "sublinear_mem_config.genetic_pool_size", | |||
| self._sublinear_memory_config.genetic_pool_size, | |||
| ) | |||
| cg.set_option( | |||
| "sublinear_mem_cofig.thresh_nr_try", | |||
| "sublinear_mem_config.thresh_nr_try", | |||
| self._sublinear_memory_config.thresh_nr_try, | |||
| ) | |||
| cg.set_option( | |||
| "sublinear_mem_cofig.num_worker", | |||
| "sublinear_mem_config.num_worker", | |||
| self._sublinear_memory_config.num_worker, | |||
| ) | |||
| # pack allreduce | |||
| @@ -116,11 +116,11 @@ bool _config::set_comp_graph_option( | |||
| SET_CG_OPTION(allocate_static_mem_after_graph_compile); | |||
| SET_CG_OPTION(log_level); | |||
| SET_CG_OPTION(enable_sublinear_memory_opt); | |||
| SET_CG_OPTION(sublinear_mem_cofig.lb_memory); | |||
| SET_CG_OPTION(sublinear_mem_cofig.genetic_nr_iter); | |||
| SET_CG_OPTION(sublinear_mem_cofig.genetic_pool_size); | |||
| SET_CG_OPTION(sublinear_mem_cofig.thresh_nr_try); | |||
| SET_CG_OPTION(sublinear_mem_cofig.num_worker); | |||
| SET_CG_OPTION(sublinear_mem_config.lb_memory); | |||
| SET_CG_OPTION(sublinear_mem_config.genetic_nr_iter); | |||
| SET_CG_OPTION(sublinear_mem_config.genetic_pool_size); | |||
| SET_CG_OPTION(sublinear_mem_config.thresh_nr_try); | |||
| SET_CG_OPTION(sublinear_mem_config.num_worker); | |||
| SET_CG_OPTION(enable_var_mem_defragment); | |||
| SET_CG_OPTION(eager_evaluation); | |||
| SET_CG_OPTION(enable_memory_swap); | |||
| @@ -219,7 +219,7 @@ ComputingGraphImpl::Components::Components(ComputingGraphImpl* owner) | |||
| grad_manager{owner}, | |||
| #if MGB_ENABLE_SUBLINEAR | |||
| seq_modifier_for_sublinear_memory{owner, | |||
| &(owner->options().sublinear_mem_cofig)}, | |||
| &(owner->options().sublinear_mem_config)}, | |||
| #endif | |||
| #if MGB_ENABLE_MEMORY_SWAP | |||
| memory_swap_support{owner}, | |||
| @@ -409,7 +409,7 @@ class ComputingGraph : public std::enable_shared_from_this<ComputingGraph>, | |||
| int genetic_pool_size = 20; | |||
| int lb_memory = 0; | |||
| int num_worker = sys::get_cpu_count() / 2; | |||
| } sublinear_mem_cofig; | |||
| } sublinear_mem_config; | |||
| //! do not re-profile to select best impl algo when input shape | |||
| //! changes (use previous algo) | |||
| @@ -522,7 +522,7 @@ TEST(TestSublinearMemory, BadOpr) { | |||
| set_priority(z, 3); | |||
| graph->options().graph_opt_level = 0; | |||
| graph->options().enable_sublinear_memory_opt = 1; | |||
| graph->options().sublinear_mem_cofig.genetic_nr_iter = 50; | |||
| graph->options().sublinear_mem_config.genetic_nr_iter = 50; | |||
| auto func = graph->compile({{y, {}}, {z, {}}}); | |||
| auto&& results = static_cast<cg::ComputingGraphImpl*>(graph.get()) | |||
| ->seq_modifier_for_sublinear_memory().prev_min_bottleneck(); | |||