GitOrigin-RevId: 826788113a
tags/v1.10.0
| @@ -557,7 +557,14 @@ void init_ops(py::module m) { | |||||
| m.def( | m.def( | ||||
| "delete_rng_handle", | "delete_rng_handle", | ||||
| [](size_t handle) { | [](size_t handle) { | ||||
| if (mgb::imperative::python::interpreter_for_py->check_available()) { | |||||
| mgb::imperative::python::interpreter_for_py->sync(); | |||||
| } | |||||
| mgb::CompNode::sync_all(); | mgb::CompNode::sync_all(); | ||||
| mgb::CompNode::foreach ([](mgb::CompNode cn) { | |||||
| auto err = cn.check_async_error(); | |||||
| mgb_assert(!err, "%s", err->what()); | |||||
| }); | |||||
| py_task_q.wait_all_task_finish(); | py_task_q.wait_all_task_finish(); | ||||
| rng::delete_handle(handle); | rng::delete_handle(handle); | ||||
| }, | }, | ||||
| @@ -11,13 +11,17 @@ import sys | |||||
| import pytest | import pytest | ||||
| import megengine.functional | |||||
| import megengine.module | |||||
| from megengine import Parameter | |||||
| from megengine.core._imperative_rt.core2 import sync | |||||
| from megengine.core import _config as config | |||||
| from megengine.core import _trace_option as trace_option | |||||
| from megengine.core import get_option | |||||
| from megengine.core._imperative_rt.core2 import ( | |||||
| _get_amp_dtype_autocast, | |||||
| _get_amp_high_prec_dtype, | |||||
| _get_amp_low_prec_dtype, | |||||
| _get_convert_inputs, | |||||
| ) | |||||
| from megengine.core.tensor import amp | |||||
| from megengine.device import get_device_count | from megengine.device import get_device_count | ||||
| from megengine.jit import trace as _trace | |||||
| from megengine.module import Linear, Module | |||||
| sys.path.append(os.path.join(os.path.dirname(__file__), "helpers")) | sys.path.append(os.path.join(os.path.dirname(__file__), "helpers")) | ||||
| @@ -41,3 +45,58 @@ def skip_distributed(request): | |||||
| platform.system() | platform.system() | ||||
| ) | ) | ||||
| ) | ) | ||||
| @pytest.fixture(autouse=True) | |||||
| def run_around_tests(): | |||||
| env_vars1 = { | |||||
| "symbolic_shape": trace_option.use_symbolic_shape(), | |||||
| "async_level": get_option("async_level"), | |||||
| "enable_drop": get_option("enable_drop"), | |||||
| "max_recompute_time": get_option("max_recompute_time"), | |||||
| "catch_worker_execption": get_option("catch_worker_execption"), | |||||
| "enable_host_compute": get_option("enable_host_compute"), | |||||
| # "record_computing_path": get_option("record_computing_path"), | |||||
| "disable_memory_forwarding": get_option("disable_memory_forwarding"), | |||||
| "enable_dtr_auto_drop": get_option("enable_dtr_auto_drop"), | |||||
| "enable_dtr_sqrt_sampling": get_option("enable_dtr_sqrt_sampling"), | |||||
| "dtr_eviction_threshold": get_option("dtr_eviction_threshold"), | |||||
| "dtr_evictee_minimum_size": get_option("dtr_evictee_minimum_size"), | |||||
| "benchmark_kernel": config.benchmark_kernel, | |||||
| "deterministic_kernel": config.deterministic_kernel, | |||||
| "compute_mode": config._compute_mode, | |||||
| "conv_format": config._conv_format, | |||||
| "amp_enabled": amp.enabled, | |||||
| "convert_inputs": _get_convert_inputs(), | |||||
| "amp_dtype_autocast": _get_amp_dtype_autocast(), | |||||
| "amp_high_prec_dtype": _get_amp_high_prec_dtype(), | |||||
| "amp_low_prec_dtype": _get_amp_low_prec_dtype(), | |||||
| } | |||||
| yield | |||||
| env_vars2 = { | |||||
| "symbolic_shape": trace_option.use_symbolic_shape(), | |||||
| "async_level": get_option("async_level"), | |||||
| "enable_drop": get_option("enable_drop"), | |||||
| "max_recompute_time": get_option("max_recompute_time"), | |||||
| "catch_worker_execption": get_option("catch_worker_execption"), | |||||
| "enable_host_compute": get_option("enable_host_compute"), | |||||
| # "record_computing_path": get_option("record_computing_path"), | |||||
| "disable_memory_forwarding": get_option("disable_memory_forwarding"), | |||||
| "enable_dtr_auto_drop": get_option("enable_dtr_auto_drop"), | |||||
| "enable_dtr_sqrt_sampling": get_option("enable_dtr_sqrt_sampling"), | |||||
| "dtr_eviction_threshold": get_option("dtr_eviction_threshold"), | |||||
| "dtr_evictee_minimum_size": get_option("dtr_evictee_minimum_size"), | |||||
| "benchmark_kernel": config.benchmark_kernel, | |||||
| "deterministic_kernel": config.deterministic_kernel, | |||||
| "compute_mode": config._compute_mode, | |||||
| "conv_format": config._conv_format, | |||||
| "amp_enabled": amp.enabled, | |||||
| "convert_inputs": _get_convert_inputs(), | |||||
| "amp_dtype_autocast": _get_amp_dtype_autocast(), | |||||
| "amp_high_prec_dtype": _get_amp_high_prec_dtype(), | |||||
| "amp_low_prec_dtype": _get_amp_low_prec_dtype(), | |||||
| } | |||||
| for key in env_vars1: | |||||
| assert ( | |||||
| env_vars1[key] == env_vars2[key] | |||||
| ), "{} have been changed after test".format(key) | |||||
| @@ -37,7 +37,7 @@ if [[ "$TEST_PLAT" =~ "local" ]]; then | |||||
| PY_IGNORE_IMPORTMISMATCH=1 python3 -m pytest -s -v $test_dirs -m 'not isolated_distributed' | PY_IGNORE_IMPORTMISMATCH=1 python3 -m pytest -s -v $test_dirs -m 'not isolated_distributed' | ||||
| if [[ "$TEST_PLAT" =~ "cuda" ]]; then | if [[ "$TEST_PLAT" =~ "cuda" ]]; then | ||||
| echo "test GPU pytest now" | echo "test GPU pytest now" | ||||
| PY_IGNORE_IMPORTMISMATCH=1 python3 -m pytest -s -v $test_dirs -m 'isolated_distributed' | |||||
| PY_IGNORE_IMPORTMISMATCH=1 python3 -m pytest -s -v $test_dirs -m 'isolated_distributed' --ignore=./integration/test_dtr.py | |||||
| fi | fi | ||||
| else | else | ||||
| cd $(dirname "${BASH_SOURCE[0]}")/.. | cd $(dirname "${BASH_SOURCE[0]}")/.. | ||||
| @@ -39,8 +39,6 @@ from megengine.random import uniform | |||||
| get_device_count("xpu") <= 2, reason="xpu counts need > 2", | get_device_count("xpu") <= 2, reason="xpu counts need > 2", | ||||
| ) | ) | ||||
| def test_gaussian_op(): | def test_gaussian_op(): | ||||
| # FIXME: remove this sync | |||||
| mge.core.set_option("async_level", 0) | |||||
| set_global_seed(1024) | set_global_seed(1024) | ||||
| shape = ( | shape = ( | ||||
| 8, | 8, | ||||
| @@ -516,4 +514,3 @@ def test_rng_empty_tensor(is_symbolic): | |||||
| np.testing.assert_equal(out.numpy().shape, (0,)) | np.testing.assert_equal(out.numpy().shape, (0,)) | ||||
| if is_symbolic is None: | if is_symbolic is None: | ||||
| break | break | ||||
| mge.core.set_option("async_level", 2) | |||||
| @@ -227,6 +227,11 @@ SmallVector<TensorPtr> apply_on_physical_tensor( | |||||
| TensorLayout dst_layout = TensorLayout({layout_a[0], layout_b[1]}, dst_dtype); | TensorLayout dst_layout = TensorLayout({layout_a[0], layout_b[1]}, dst_dtype); | ||||
| dst_layout.init_contiguous_stride(); | dst_layout.init_contiguous_stride(); | ||||
| if (matmul.transposeA) | |||||
| std::swap(layout_a.shape[0], layout_a.shape[1]); | |||||
| if (matmul.transposeB) | |||||
| std::swap(layout_b.shape[0], layout_b.shape[1]); | |||||
| DeviceTensorND out = | DeviceTensorND out = | ||||
| BlobManager::inst()->alloc_workspace_with_defrag(cn, dst_layout); | BlobManager::inst()->alloc_workspace_with_defrag(cn, dst_layout); | ||||
| size_t sz = setup_algo<megdnn::MatrixMul>( | size_t sz = setup_algo<megdnn::MatrixMul>( | ||||
| @@ -80,13 +80,12 @@ SmallVector<TensorPtr> apply_on_physical_tensor( | |||||
| op_def.policy(), false); | op_def.policy(), false); | ||||
| megdnn::Workspace dnn_wk; | megdnn::Workspace dnn_wk; | ||||
| if (wk_size != 0) { | |||||
| auto wk = Blob::make(cn, wk_size); | |||||
| dnn_wk.raw_ptr = wk->storage().get(); | |||||
| dnn_wk.size = wk_size; | |||||
| if (wk_size) { | |||||
| TensorLayout w_layout({wk_size}, dtype::Byte()); | |||||
| dnn_wk = caller.create_workspace(w_layout); | |||||
| } | } | ||||
| dnn_opr->exec(inp_tensornd, out_devtensor.as_megdnn(), {}); | |||||
| dnn_opr->exec(inp_tensornd, out_devtensor.as_megdnn(), dnn_wk); | |||||
| return {Tensor::make(out_devtensor)}; | return {Tensor::make(out_devtensor)}; | ||||
| } | } | ||||
| @@ -174,10 +174,9 @@ SmallVector<TensorPtr> apply_on_physical_tensor( | |||||
| megdnn::Workspace dnn_wk; | megdnn::Workspace dnn_wk; | ||||
| auto wk_size = dnn_op.op->get_workspace_in_bytes(src, layout); | auto wk_size = dnn_op.op->get_workspace_in_bytes(src, layout); | ||||
| if (wk_size != 0) { | |||||
| auto wk = Blob::make(comp_node, wk_size); | |||||
| dnn_wk.raw_ptr = wk->storage().get(); | |||||
| dnn_wk.size = wk_size; | |||||
| if (wk_size) { | |||||
| TensorLayout w_layout({wk_size}, dtype::Byte()); | |||||
| dnn_wk = dnn_op.create_workspace(w_layout); | |||||
| } | } | ||||
| DeviceTensorND out = | DeviceTensorND out = | ||||
| @@ -14,7 +14,7 @@ | |||||
| #include "megbrain_build_config.h" | #include "megbrain_build_config.h" | ||||
| #define MGE_MAJOR 1 | #define MGE_MAJOR 1 | ||||
| #define MGE_MINOR 8 | |||||
| #define MGE_MINOR 9999 | |||||
| #define MGE_PATCH 0 | #define MGE_PATCH 0 | ||||
| // for rc version, could be like "rc1", "rc2", etc | // for rc version, could be like "rc1", "rc2", etc | ||||