GitOrigin-RevId: 7f6629ae1f
tags/v1.0.0-rc1
| @@ -697,8 +697,10 @@ endif() | |||
| if(MGE_WITH_PYTHON_MODULE) | |||
| if(MGE_BUILD_IMPERATIVE_RT) | |||
| add_subdirectory(imperative) | |||
| message("-- Enable imperative python wrapper runtime") | |||
| else() | |||
| add_subdirectory(python_module) | |||
| message("-- Enable legacy python wrapper runtime") | |||
| endif() | |||
| endif() | |||
| @@ -342,7 +342,11 @@ template <typename T> | |||
| struct SafeMultiplies; | |||
| template <typename T> | |||
| #if __cplusplus >= 201703L | |||
| struct _SafeMultipliesImplUnsigned { | |||
| #else | |||
| struct _SafeMultipliesImplUnsigned : public std::binary_function<T, T, T> { | |||
| #endif | |||
| static MEGDNN_CONSTEXPR size_t nbits = sizeof(T) * 8; | |||
| static size_t clz(unsigned x) { | |||
| @@ -70,8 +70,10 @@ if (MEG_WITH_ROCM) | |||
| target_link_libraries (megdnn_test ${MGE_ROCM_LIBS}) | |||
| endif () | |||
| if(APPLE OR ANDROID) | |||
| target_link_libraries(megdnn_test dl) | |||
| else() | |||
| target_link_libraries(megdnn_test dl rt) | |||
| if(UNIX) | |||
| if(APPLE OR ANDROID) | |||
| target_link_libraries(megdnn_test dl) | |||
| else() | |||
| target_link_libraries(megdnn_test dl rt) | |||
| endif() | |||
| endif() | |||
| @@ -89,7 +89,7 @@ public: | |||
| auto ptr = tensor.ptr<int>(); | |||
| for (size_t n = 0; n < size; ++n) { | |||
| std::set<int> used; | |||
| std::random_shuffle(seq.begin(), seq.end()); | |||
| COMPAT_RANDOM(seq.begin(), seq.end()); | |||
| for (size_t step = 0; step < stride; ++step) { | |||
| megdnn_assert(used.size() < m_size); | |||
| ptr[n * stride + step] = seq[step]; | |||
| @@ -75,7 +75,7 @@ Float16PeriodicalRNG::Float16PeriodicalRNG() : m_offset(0) { | |||
| i2f.i = static_cast<uint16_t>(x); | |||
| m_sequence.push_back(i2f.f); | |||
| } | |||
| std::random_shuffle(m_sequence.begin(), m_sequence.end()); | |||
| COMPAT_RANDOM(m_sequence.begin(), m_sequence.end()); | |||
| } | |||
| Float16PeriodicalRNG::Float16PeriodicalRNG(size_t range) : m_offset(0) { | |||
| @@ -99,7 +99,7 @@ Float16PeriodicalRNG::Float16PeriodicalRNG(size_t range) : m_offset(0) { | |||
| m_sequence.push_back(i2f.f); | |||
| } | |||
| std::random_shuffle(m_sequence.begin(), m_sequence.end()); | |||
| COMPAT_RANDOM(m_sequence.begin(), m_sequence.end()); | |||
| } | |||
| void Float16PeriodicalRNG::gen(const TensorND& tensor) { | |||
| @@ -19,6 +19,16 @@ | |||
| namespace megdnn { | |||
| namespace test { | |||
| #if __cplusplus >= 201703L | |||
| #define COMPAT_RANDOM(begin, end) \ | |||
| { \ | |||
| std::default_random_engine rng_engine; \ | |||
| std::shuffle(begin, end, rng_engine); \ | |||
| } | |||
| #else | |||
| #define COMPAT_RANDOM(begin, end) std::random_shuffle(begin, end); | |||
| #endif | |||
| class RNG { | |||
| protected: | |||
| class RNGxorshf; | |||
| @@ -24,15 +24,16 @@ class ArgmxxRNG final: public RNG { | |||
| void gen(const TensorND &tensor) override { | |||
| auto offset = tensor.layout.span().low_elem; | |||
| auto nr_elems = tensor.layout.span().dist_elem(); | |||
| #define cb(DType) \ | |||
| if (tensor.layout.dtype == DType()) { \ | |||
| using ctype = typename DTypeTrait<DType>::ctype; \ | |||
| auto ptr = tensor.ptr<ctype>(); \ | |||
| for (size_t i = 0; i < nr_elems; ++i) { \ | |||
| ptr[offset+i] = i; \ | |||
| } \ | |||
| std::random_shuffle(ptr + offset, ptr + offset + nr_elems); \ | |||
| } | |||
| #define cb(DType) \ | |||
| if (tensor.layout.dtype == DType()) { \ | |||
| using ctype = typename DTypeTrait<DType>::ctype; \ | |||
| auto ptr = tensor.ptr<ctype>(); \ | |||
| for (size_t i = 0; i < nr_elems; ++i) { \ | |||
| ptr[offset + i] = i; \ | |||
| } \ | |||
| COMPAT_RANDOM(ptr + offset, ptr + offset + nr_elems); \ | |||
| } | |||
| MEGDNN_FOREACH_COMPUTING_DTYPE(cb); | |||
| #undef cb | |||
| } | |||
| @@ -32,7 +32,7 @@ class ArgsortRNG final : public RNG { | |||
| } else { | |||
| for (int i = 0; i < n; ++i) | |||
| ptr[i] = static_cast<T>(i - n / 2); | |||
| std::random_shuffle(ptr, ptr + n); | |||
| COMPAT_RANDOM(ptr, ptr + n); | |||
| } | |||
| } | |||
| @@ -86,7 +86,7 @@ void run_backward_test(Handle* handle, DType dtype) { | |||
| for (size_t j = 0; j < n; ++j) { | |||
| ptr[j] = j; | |||
| } | |||
| std::random_shuffle(ptr, ptr + n); | |||
| COMPAT_RANDOM(ptr, ptr + n); | |||
| ptr += n; | |||
| } | |||
| } | |||
| @@ -361,9 +361,8 @@ TEST_F(CUDA, BENCHMARK_RELAYOUT_7) { | |||
| for (size_t r = 0; r < _dim.size(); r++) | |||
| permutation[r] = r; | |||
| for (int nsample = 0; nsample < 50; nsample++) { | |||
| std::random_shuffle(_dim.begin(), _dim.end()); | |||
| std::random_shuffle(permutation.begin(), permutation.end()); | |||
| COMPAT_RANDOM(_dim.begin(), _dim.end()); | |||
| COMPAT_RANDOM(permutation.begin(), permutation.end()); | |||
| if (!isTrivial(permutation)) { | |||
| run({{_dim[0], _dim[1], _dim[2], _dim[3], _dim[4], _dim[5], | |||
| _dim[6]}, | |||
| @@ -451,9 +450,10 @@ TEST_F(CUDA, BENCHMARK_RELAYOUT_5) { | |||
| printf("vol %d cur_ratio %lf | %lf\n", vol, cur_ratio, vol_re); | |||
| // printVec(dim); | |||
| std::random_shuffle(dim.begin(), dim.end()); | |||
| COMPAT_RANDOM(dim.begin(), dim.end()); | |||
| while (isTrivial(permutation)) { | |||
| std::random_shuffle(permutation.begin(), permutation.end()); | |||
| COMPAT_RANDOM(permutation.begin(), permutation.end()); | |||
| } | |||
| run({{dim[0], dim[1], dim[2], dim[3], dim[4]}, dtype::Int32()}, | |||
| @@ -603,8 +603,9 @@ TEST_F(CUDA, BENCHMARK_LAST_CONTIG_ALIGN_TEST) { | |||
| for (size_t r = 0; r < _dim.size(); r++) | |||
| permutation[r] = r; | |||
| for (int nsample = 0; nsample < 20; nsample++) { | |||
| std::random_shuffle(_dim.begin(), _dim.end() - 1); | |||
| std::random_shuffle(permutation.begin(), permutation.end() - 1); | |||
| COMPAT_RANDOM(_dim.begin(), _dim.end() - 1); | |||
| COMPAT_RANDOM(permutation.begin(), permutation.end() - 1); | |||
| if (nsample < 5) | |||
| _dim[5] = (u.gen_single_val() / 4 + 1) * 4; | |||
| @@ -24,7 +24,7 @@ using namespace test; | |||
| TEST_F(CUDA, SLEEP) { | |||
| auto opr = this->handle_cuda()->create_operator<Sleep>(); | |||
| auto opr = this->handle_cuda()->create_operator<megdnn::SleepForward>(); | |||
| auto run = [&](float time) -> double { | |||
| opr->param() = {time}; | |||
| @@ -24,16 +24,17 @@ class ArgmxxRNG final: public RNG { | |||
| void gen(const TensorND &tensor) override { | |||
| auto offset = tensor.layout.span().low_elem; | |||
| auto nr_elems = tensor.layout.span().dist_elem(); | |||
| #define cb(DType) \ | |||
| if (tensor.layout.dtype == DType()) { \ | |||
| using ctype = typename DTypeTrait<DType>::ctype; \ | |||
| auto ptr = tensor.ptr<ctype>(); \ | |||
| for (size_t i = 0; i < nr_elems; ++i) { \ | |||
| ptr[offset+i] = i; \ | |||
| } \ | |||
| std::random_shuffle(ptr + offset, ptr + offset + nr_elems); \ | |||
| return; \ | |||
| } | |||
| #define cb(DType) \ | |||
| if (tensor.layout.dtype == DType()) { \ | |||
| using ctype = typename DTypeTrait<DType>::ctype; \ | |||
| auto ptr = tensor.ptr<ctype>(); \ | |||
| for (size_t i = 0; i < nr_elems; ++i) { \ | |||
| ptr[offset + i] = i; \ | |||
| } \ | |||
| COMPAT_RANDOM(ptr + offset, ptr + offset + nr_elems); \ | |||
| return; \ | |||
| } | |||
| MEGDNN_FOREACH_COMPUTING_DTYPE_FLOAT(cb); | |||
| #undef cb | |||
| megdnn_throw(megdnn_mangle(ssprintf("Unsupported DType: %s", | |||
| @@ -76,7 +76,11 @@ add_custom_target(_version_ld SOURCES ${VERSION_SCRIPT}) | |||
| add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/pybind11 ${PROJECT_BINARY_DIR}/third_party/pybind11) | |||
| pybind11_add_module(${MODULE_NAME} NO_EXTRAS ${SRCS}) | |||
| target_link_libraries(${MODULE_NAME} PRIVATE gen_op_def megbrain megdnn -Wl,--version-script=${VERSION_SCRIPT}) | |||
| if (APPLE OR MSVC OR WIN32) | |||
| target_link_libraries(${MODULE_NAME} PRIVATE gen_op_def megbrain megdnn) | |||
| else() | |||
| target_link_libraries(${MODULE_NAME} PRIVATE gen_op_def megbrain megdnn -Wl,--version-script=${VERSION_SCRIPT}) | |||
| endif() | |||
| if (MGE_WITH_DISTRIBUTED) | |||
| message("Imperative configured to link megray") | |||
| target_link_libraries(${MODULE_NAME} PRIVATE megray) | |||
| @@ -91,6 +95,10 @@ set_target_properties(${MODULE_NAME} PROPERTIES | |||
| SUFFIX ${CMAKE_SHARED_LIBRARY_SUFFIX} | |||
| LIBRARY_OUTPUT_DIRECTORY ${MEGENGINE_DIR}/${PACKAGE_NAME}/core | |||
| ) | |||
| if (APPLE OR MSVC OR WIN32) | |||
| message("-- overwriting SUFFIX at macos and windows before config by set_target_properties") | |||
| pybind11_extension(${MODULE_NAME}) | |||
| endif() | |||
| add_dependencies(${MODULE_NAME} gen_opr_py _version_ld) | |||
| if(MGE_WITH_TEST AND MGE_ENABLE_RTTI) | |||
| @@ -8,6 +8,67 @@ | |||
| # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| import os | |||
| import sys | |||
| import platform | |||
| import ctypes | |||
| if sys.platform == "win32": | |||
| lib_path = os.path.join(os.path.dirname(__file__), "core/lib") | |||
| dll_paths = list(filter(os.path.exists, [lib_path,])) | |||
| assert len(dll_paths) > 0 | |||
| kernel32 = ctypes.WinDLL("kernel32.dll", use_last_error=True) | |||
| has_load_library_attr = hasattr(kernel32, "AddDllDirectory") | |||
| old_error_mode = kernel32.SetErrorMode(0x0001) | |||
| kernel32.LoadLibraryW.restype = ctypes.c_void_p | |||
| if has_load_library_attr: | |||
| kernel32.AddDllDirectory.restype = ctypes.c_void_p | |||
| kernel32.LoadLibraryExW.restype = ctypes.c_void_p | |||
| for dll_path in dll_paths: | |||
| if sys.version_info >= (3, 8): | |||
| os.add_dll_directory(dll_path) | |||
| elif has_load_library_attr: | |||
| res = kernel32.AddDllDirectory(dll_path) | |||
| if res is None: | |||
| err = ctypes.WinError(ctypes.get_last_error()) | |||
| err.strerror += ' Error adding "{}" to the DLL search PATH.'.format( | |||
| dll_path | |||
| ) | |||
| raise err | |||
| else: | |||
| print("WARN: python or OS env have some issue, may load DLL failed!!!") | |||
| import glob | |||
| dlls = glob.glob(os.path.join(lib_path, "*.dll")) | |||
| path_patched = False | |||
| for dll in dlls: | |||
| is_loaded = False | |||
| if has_load_library_attr: | |||
| res = kernel32.LoadLibraryExW(dll, None, 0x00001100) | |||
| last_error = ctypes.get_last_error() | |||
| if res is None and last_error != 126: | |||
| err = ctypes.WinError(last_error) | |||
| err.strerror += ' Error loading "{}" or one of its dependencies.'.format( | |||
| dll | |||
| ) | |||
| raise err | |||
| elif res is not None: | |||
| is_loaded = True | |||
| if not is_loaded: | |||
| if not path_patched: | |||
| os.environ["PATH"] = ";".join(dll_paths + [os.environ["PATH"]]) | |||
| path_patched = True | |||
| res = kernel32.LoadLibraryW(dll) | |||
| if res is None: | |||
| err = ctypes.WinError(ctypes.get_last_error()) | |||
| err.strerror += ' Error loading "{}" or one of its dependencies.'.format( | |||
| dll | |||
| ) | |||
| raise err | |||
| kernel32.SetErrorMode(old_error_mode) | |||
| from .core._imperative_rt.utils import _set_fork_exec_path_for_timed_func | |||
| from .device import * | |||
| @@ -6,10 +6,14 @@ | |||
| # Unless required by applicable law or agreed to in writing, | |||
| # software distributed under the License is distributed on an | |||
| # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| import resource | |||
| import platform | |||
| import sys | |||
| import threading | |||
| # Windows do not imp resource package | |||
| if platform.system() != "Windows": | |||
| import resource | |||
| class AlternativeRecursionLimit: | |||
| r"""A reentrant context manager for setting global recursion limits. | |||
| @@ -28,16 +32,24 @@ class AlternativeRecursionLimit: | |||
| with self.lock: | |||
| if self.count == 0: | |||
| self.orig_py_limit = sys.getrecursionlimit() | |||
| if platform.system() != "Windows": | |||
| ( | |||
| self.orig_rlim_stack_soft, | |||
| self.orig_rlim_stack_hard, | |||
| ) = resource.getrlimit(resource.RLIMIT_STACK) | |||
| resource.setrlimit( | |||
| resource.RLIMIT_STACK, | |||
| (self.orig_rlim_stack_hard, self.orig_rlim_stack_hard), | |||
| ) | |||
| # increase recursion limit | |||
| sys.setrecursionlimit(self.new_py_limit) | |||
| # FIXME: https://bugs.python.org/issue34602, python3 release version | |||
| # on Macos always have this issue, not all user install python3 from src | |||
| try: | |||
| resource.setrlimit( | |||
| resource.RLIMIT_STACK, | |||
| (self.orig_rlim_stack_hard, self.orig_rlim_stack_hard), | |||
| ) | |||
| except ValueError as exc: | |||
| if platform.system() != "Darwin": | |||
| raise exc | |||
| # increase recursion limit | |||
| sys.setrecursionlimit(self.new_py_limit) | |||
| self.count += 1 | |||
| def __exit__(self, type, value, traceback): | |||
| @@ -45,10 +57,16 @@ class AlternativeRecursionLimit: | |||
| self.count -= 1 | |||
| if self.count == 0: | |||
| sys.setrecursionlimit(self.orig_py_limit) | |||
| resource.setrlimit( | |||
| resource.RLIMIT_STACK, | |||
| (self.orig_rlim_stack_soft, self.orig_rlim_stack_hard), | |||
| ) | |||
| if platform.system() != "Windows": | |||
| try: | |||
| resource.setrlimit( | |||
| resource.RLIMIT_STACK, | |||
| (self.orig_rlim_stack_soft, self.orig_rlim_stack_hard), | |||
| ) | |||
| except ValueError as exc: | |||
| if platform.system() != "Darwin": | |||
| raise exc | |||
| _max_recursion_limit_context_manager = AlternativeRecursionLimit(2 ** 31 - 1) | |||
| @@ -9,6 +9,7 @@ | |||
| import os | |||
| import re | |||
| import pathlib | |||
| import platform | |||
| from distutils.file_util import copy_file | |||
| from setuptools import setup, find_packages, Extension | |||
| from setuptools.command.build_ext import build_ext as _build_ext | |||
| @@ -29,7 +30,10 @@ class build_ext(_build_ext): | |||
| extdir.parent.mkdir(parents=True, exist_ok=True) | |||
| modpath = self.get_ext_fullname(ext.name).split('.') | |||
| modpath[-1] += '.so' | |||
| if platform.system() == 'Windows': | |||
| modpath[-1] += '.pyd' | |||
| else: | |||
| modpath[-1] += '.so' | |||
| modpath = str(pathlib.Path(*modpath).resolve()) | |||
| copy_file(modpath, fullpath, verbose=self.verbose, dry_run=self.dry_run) | |||
| @@ -47,6 +51,14 @@ if local_version: | |||
| __version__ = '{}+{}'.format(__version__, local_version) | |||
| packages = find_packages(exclude=['test']) | |||
| package_data = [ | |||
| str(f.relative_to('megengine')) | |||
| for f in pathlib.Path('megengine', 'core', 'include').glob('**/*') | |||
| ] | |||
| package_data += [ | |||
| str(f.relative_to('megengine')) | |||
| for f in pathlib.Path('megengine', 'core', 'lib').glob('**/*') | |||
| ] | |||
| with open('requires.txt') as f: | |||
| requires = f.read().splitlines() | |||
| @@ -63,6 +75,9 @@ setup_kwargs = dict( | |||
| author='Megvii Engine Team', | |||
| author_email=email, | |||
| packages=packages, | |||
| package_data={ | |||
| 'megengine': package_data, | |||
| }, | |||
| ext_modules=[PrecompiledExtesion('megengine.core._imperative_rt')], | |||
| install_requires=requires, | |||
| extras_require={ | |||
| @@ -9,15 +9,6 @@ | |||
| #include "megbrain/utils/mempool.h" | |||
| #include "./numpy_dtypes.h" | |||
| /* | |||
| * demangle typeid, see | |||
| * http://stackoverflow.com/questions/281818/unmangling-the-result-of-stdtype-infoname | |||
| */ | |||
| #ifdef __GNUG__ | |||
| #include <cstdlib> | |||
| #include <memory> | |||
| #include <cxxabi.h> | |||
| namespace py = pybind11; | |||
| PyTaskDipatcher py_task_q = {}; | |||
| @@ -34,10 +25,18 @@ py::module rel_import(py::str name, py::module m, int level) { | |||
| return import(name, m.attr("__dict__"), py::arg("level")=level); | |||
| } | |||
| /* | |||
| * demangle typeid, see | |||
| * http://stackoverflow.com/questions/281818/unmangling-the-result-of-stdtype-infoname | |||
| */ | |||
| #ifdef __GNUG__ | |||
| #include <cxxabi.h> | |||
| #include <cstdlib> | |||
| #include <memory> | |||
| namespace { | |||
| std::string demangle_typeid(const char* name) { | |||
| int status = -4; // some arbitrary value to eliminate the compiler warning | |||
| // enable c++11 by passing the flag -std=c++11 to g++ | |||
| @@ -48,7 +47,7 @@ std::string demangle_typeid(const char* name) { | |||
| return (status==0) ? res.get() : name ; | |||
| } | |||
| } | |||
| } // namespace | |||
| #else | |||
| namespace { | |||
| @@ -1,4 +1,8 @@ | |||
| #include "utils.h" | |||
| #ifdef WIN32 | |||
| #include <stdio.h> | |||
| #include <windows.h> | |||
| #endif | |||
| #include <pybind11/operators.h> | |||
| #include <atomic> | |||
| @@ -8,6 +8,7 @@ | |||
| # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| import multiprocessing as mp | |||
| import os | |||
| import platform | |||
| import re | |||
| import subprocess | |||
| import sys | |||
| @@ -196,6 +197,9 @@ def run_test( | |||
| @pytest.mark.isolated_distributed | |||
| @pytest.mark.skipif( | |||
| platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM" | |||
| ) | |||
| def test_dp_correctness(): | |||
| model_name = "mnist_model_with_test.mge" | |||
| model_path = os.path.join(os.path.dirname(__file__), model_name) | |||
| @@ -35,7 +35,7 @@ from megengine.functional.distributed import ( | |||
| platform.system() == "Darwin", reason="do not imp GPU mode at macos now" | |||
| ) | |||
| @pytest.mark.skipif( | |||
| platform.system() == "Windows", reason="do not imp GPU mode at Windows now" | |||
| platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM" | |||
| ) | |||
| @pytest.mark.isolated_distributed | |||
| def test_reduce_sum(): | |||
| @@ -77,7 +77,7 @@ def test_reduce_sum(): | |||
| platform.system() == "Darwin", reason="do not imp GPU mode at macos now" | |||
| ) | |||
| @pytest.mark.skipif( | |||
| platform.system() == "Windows", reason="do not imp GPU mode at Windows now" | |||
| platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM" | |||
| ) | |||
| @pytest.mark.isolated_distributed | |||
| def test_broadcast(): | |||
| @@ -115,7 +115,7 @@ def test_broadcast(): | |||
| platform.system() == "Darwin", reason="do not imp GPU mode at macos now" | |||
| ) | |||
| @pytest.mark.skipif( | |||
| platform.system() == "Windows", reason="do not imp GPU mode at Windows now" | |||
| platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM" | |||
| ) | |||
| @pytest.mark.isolated_distributed | |||
| def test_all_gather(): | |||
| @@ -154,7 +154,7 @@ def test_all_gather(): | |||
| platform.system() == "Darwin", reason="do not imp GPU mode at macos now" | |||
| ) | |||
| @pytest.mark.skipif( | |||
| platform.system() == "Windows", reason="do not imp GPU mode at Windows now" | |||
| platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM" | |||
| ) | |||
| @pytest.mark.isolated_distributed | |||
| def test_reduce_scatter_sum(): | |||
| @@ -193,7 +193,7 @@ def test_reduce_scatter_sum(): | |||
| platform.system() == "Darwin", reason="do not imp GPU mode at macos now" | |||
| ) | |||
| @pytest.mark.skipif( | |||
| platform.system() == "Windows", reason="do not imp GPU mode at Windows now" | |||
| platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM" | |||
| ) | |||
| @pytest.mark.isolated_distributed | |||
| def test_all_reduce_sum(): | |||
| @@ -232,7 +232,7 @@ def test_all_reduce_sum(): | |||
| platform.system() == "Darwin", reason="do not imp GPU mode at macos now" | |||
| ) | |||
| @pytest.mark.skipif( | |||
| platform.system() == "Windows", reason="do not imp GPU mode at Windows now" | |||
| platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM" | |||
| ) | |||
| @pytest.mark.isolated_distributed | |||
| def test_all_reduce_max(): | |||
| @@ -271,7 +271,7 @@ def test_all_reduce_max(): | |||
| platform.system() == "Darwin", reason="do not imp GPU mode at macos now" | |||
| ) | |||
| @pytest.mark.skipif( | |||
| platform.system() == "Windows", reason="do not imp GPU mode at Windows now" | |||
| platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM" | |||
| ) | |||
| @pytest.mark.isolated_distributed | |||
| def test_all_reduce_min(): | |||
| @@ -310,7 +310,7 @@ def test_all_reduce_min(): | |||
| platform.system() == "Darwin", reason="do not imp GPU mode at macos now" | |||
| ) | |||
| @pytest.mark.skipif( | |||
| platform.system() == "Windows", reason="do not imp GPU mode at Windows now" | |||
| platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM" | |||
| ) | |||
| @pytest.mark.isolated_distributed | |||
| def test_gather(): | |||
| @@ -352,7 +352,7 @@ def test_gather(): | |||
| platform.system() == "Darwin", reason="do not imp GPU mode at macos now" | |||
| ) | |||
| @pytest.mark.skipif( | |||
| platform.system() == "Windows", reason="do not imp GPU mode at Windows now" | |||
| platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM" | |||
| ) | |||
| @pytest.mark.isolated_distributed | |||
| def test_scatter(): | |||
| @@ -390,7 +390,7 @@ def test_scatter(): | |||
| platform.system() == "Darwin", reason="do not imp GPU mode at macos now" | |||
| ) | |||
| @pytest.mark.skipif( | |||
| platform.system() == "Windows", reason="do not imp GPU mode at Windows now" | |||
| platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM" | |||
| ) | |||
| @pytest.mark.isolated_distributed | |||
| def test_all_to_all(): | |||
| @@ -430,7 +430,7 @@ def test_all_to_all(): | |||
| platform.system() == "Darwin", reason="do not imp GPU mode at macos now" | |||
| ) | |||
| @pytest.mark.skipif( | |||
| platform.system() == "Windows", reason="do not imp GPU mode at Windows now" | |||
| platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM" | |||
| ) | |||
| @pytest.mark.isolated_distributed | |||
| def test_io_remote(): | |||
| @@ -6,6 +6,7 @@ | |||
| # Unless required by applicable law or agreed to in writing, | |||
| # software distributed under the License is distributed on an | |||
| # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| import platform | |||
| import weakref | |||
| import numpy as np | |||
| @@ -51,6 +52,9 @@ def save_to(self, name="grad"): | |||
| @pytest.mark.isolated_distributed | |||
| @pytest.mark.skipif( | |||
| platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM" | |||
| ) | |||
| def test_dist_grad(): | |||
| world_size = 2 | |||
| x_np = np.random.rand(10).astype("float32") | |||
| @@ -9,7 +9,17 @@ | |||
| #include "megbrain/imperative/profiler.h" | |||
| #if defined(_MSC_VER) || defined(WIN32) | |||
| #include <windows.h> | |||
| #define getpid GetCurrentProcessId | |||
| #else | |||
| #include <sys/unistd.h> | |||
| #endif | |||
| #if defined(__APPLE__) || defined(__MACOSX) | |||
| #include <unistd.h> | |||
| #endif | |||
| #include <variant> | |||
| #include "megbrain/imperative/ops/opr_attr.h" | |||
| @@ -16,6 +16,10 @@ | |||
| #include "megbrain/imperative/ops/opr_attr.h" | |||
| #include "megbrain/imperative/ops/backward_graph.h" | |||
| #if __cplusplus >= 201703L | |||
| #include <optional> | |||
| #endif | |||
| namespace mgb { | |||
| namespace imperative { | |||
| @@ -38,8 +38,11 @@ if(CXX_SUPPORT_WCLASS_MEMACCESS) | |||
| endif() | |||
| if(UNIX) | |||
| target_link_libraries(imperative_test dl rt) | |||
| if(APPLE OR ANDROID) | |||
| target_link_libraries(imperative_test dl) | |||
| else() | |||
| target_link_libraries(imperative_test dl rt) | |||
| endif() | |||
| endif() | |||
| install(TARGETS imperative_test RUNTIME DESTINATION test) | |||
| @@ -81,7 +81,10 @@ else() | |||
| target_link_libraries(mgb megbrain megdnn -Wl,--version-script=${VERSION_SCRIPT}) | |||
| endif() | |||
| target_include_directories(mgb PRIVATE ${PYTHON_INCLUDE_DIRS} src/cpp ${CMAKE_CURRENT_BINARY_DIR} ${NUMPY_INCLUDE_DIR}) | |||
| target_link_libraries(mgb ${PYTHON_LIBRARIES}) | |||
| # only windows need link PYTHON_LIBRARIES | |||
| if(MSVC OR WIN32) | |||
| target_link_libraries(mgb ${PYTHON_LIBRARIES}) | |||
| endif() | |||
| if (MGE_WITH_DISTRIBUTED) | |||
| target_link_libraries(mgb megray) | |||
| @@ -30,11 +30,17 @@ | |||
| 4e: add C:\Program Files\NVIDIA GPU Computing Toolkit\cudnn-10.1-windows10-x64-v7.6.5.32\cuda\bin to system Path env | |||
| 4f: add C:\Program Files\NVIDIA GPU Computing Toolkit\TensorRT-6.0.1.5\lib Path | |||
| if u do not do 4d/4e/4f, CUDA runtime can not find dll | |||
| 5: install python3 (DFT 3.8.3) to /c/Users/${USER}/mge_whl_python_env/3.8.3 and | |||
| put it to PATH env and run python3 -m pip install numpy (if u want to build with training mode or build python whl) | |||
| 6: install swig from install gui (if u want to build with training mode or build python whl) | |||
| a: download swig: https://nchc.dl.sourceforge.net/project/swig/swigwin/swigwin-4.0.2/swigwin-4.0.2.zip | |||
| b: install swig to /c/Users/${USER}/swigwin-4.0.2 | |||
| c: apply scripts/whl/windows/fix-ptr-define-issue.patch to c/Users/${USER}/swigwin-4.0.2 | |||
| ``` | |||
| ### linux host build | |||
| ``` | |||
| 1: cmake, which version > 3.14.4 | |||
| 2: gcc/g++, which version > 6 | |||
| 2: gcc/g++, which version > 6, (gcc/g++ >= 7, if need build training) | |||
| 3: install build-essential git git-lfs gfortran libgfortran-6-dev autoconf gnupg flex bison gperf curl | |||
| 4: zlib1g-dev gcc-multilib g++-multilib lib32ncurses5-dev libxml2-utils xsltproc unzip libtool: | |||
| 5: librdmacm-dev rdmacm-utils python3-dev swig python3-numpy texinfo | |||
| @@ -47,6 +53,7 @@ | |||
| 3: brew install python python3 swig coreutils | |||
| 4: install at least xcode command line tool: https://developer.apple.com/xcode/ | |||
| 5: about cuda: we do not support CUDA on macos | |||
| 6: python3 -m pip install numpy (if u want to build with training mode or build python whl) | |||
| ``` | |||
| ### cross build for arm-android | |||
| now we support windows/linux/macos cross build to arm-android | |||
| @@ -9,6 +9,7 @@ function usage() { | |||
| echo "-t : Build with training mode, default inference only" | |||
| echo "-m : Build with m32 mode(only for windows build), default m64" | |||
| echo "-r : remove old build dir before make, default off" | |||
| echo "-n : enable new python runtime(valid when training mode with -t, default is legacy runtime)" | |||
| echo "-h : show usage" | |||
| echo "append other cmake config by export EXTRA_CMAKE_ARGS=..." | |||
| echo "example: $0 -d" | |||
| @@ -22,9 +23,10 @@ MGE_WINDOWS_BUILD_ARCH=x64 | |||
| MGE_WINDOWS_BUILD_MARCH=m64 | |||
| MGE_ARCH=x86_64 | |||
| REMOVE_OLD_BUILD=false | |||
| MGE_BUILD_IMPERATIVE_RT=OFF | |||
| echo "EXTRA_CMAKE_ARGS: ${EXTRA_CMAKE_ARGS}" | |||
| while getopts "rhdctm" arg | |||
| while getopts "rhdctmn" arg | |||
| do | |||
| case $arg in | |||
| d) | |||
| @@ -48,11 +50,15 @@ do | |||
| REMOVE_OLD_BUILD=true | |||
| ;; | |||
| m) | |||
| echo "build for m32(only use for windows)" | |||
| echo "build for m32(only valid use for windows)" | |||
| MGE_WINDOWS_BUILD_ARCH=x86 | |||
| MGE_WINDOWS_BUILD_MARCH=m32 | |||
| MGE_ARCH=i386 | |||
| ;; | |||
| n) | |||
| echo "Enable imperative python wrapper runtime" | |||
| MGE_BUILD_IMPERATIVE_RT=ON | |||
| ;; | |||
| ?) | |||
| echo "unkonw argument" | |||
| usage | |||
| @@ -101,6 +107,7 @@ function cmake_build() { | |||
| cmake \ | |||
| -DCMAKE_BUILD_TYPE=$BUILD_TYPE \ | |||
| -DMGE_INFERENCE_ONLY=$MGE_INFERENCE_ONLY \ | |||
| -DMGE_BUILD_IMPERATIVE_RT=${MGE_BUILD_IMPERATIVE_RT} \ | |||
| -DMGE_WITH_CUDA=$MGE_WITH_CUDA \ | |||
| -DCMAKE_INSTALL_PREFIX=$INSTALL_DIR \ | |||
| ${EXTRA_CMAKE_ARGS} \ | |||
| @@ -112,7 +119,7 @@ function cmake_build() { | |||
| function windows_env_err() { | |||
| echo "check windows env failed!!" | |||
| echo "please install LLVM/clang-cl/cmake/python at Visual Studio Extensions" | |||
| echo "please install env refs for: scripts/cmake-build/BUILD_README.md" | |||
| exit -1 | |||
| } | |||
| @@ -178,6 +185,25 @@ function prepare_env_for_windows_build() { | |||
| export CPATH=$CPATH:$NIVIDA_INSTALL_PRE/${TRT_V}/include:$NIVIDA_INSTALL_PRE/CUDA/${CUDA_V}/include:$NIVIDA_INSTALL_PRE/CUDA/${CUDA_V}/include/nvtx3:$PC_CUDNN_INCLUDE_DIRS | |||
| export LIBRARY_PATH=$LIBRARY_PATH:$LD_LIBRARY_PATH | |||
| export INCLUDE=$INCLUDE:$CPATH | |||
| # python version will be config by whl build script or ci script, we need | |||
| # a DFT version for build success when we just call host_build.sh | |||
| if [[ -z ${ALREADY_CONFIG_PYTHON_VER} ]] | |||
| then | |||
| echo "config a default python3" | |||
| DFT_PYTHON_BIN=/c/Users/${USER}/mge_whl_python_env/3.8.3 | |||
| if [ ! -f "${DFT_PYTHON_BIN}/python3.exe" ]; then | |||
| echo "ERR: can not find ${DFT_PYTHON_BIN}/python3.exe , Invalid env" | |||
| windows_env_err | |||
| else | |||
| echo "put python3 to env..." | |||
| export PATH=${DFT_PYTHON_BIN}:$PATH | |||
| which python3 | |||
| fi | |||
| fi | |||
| echo "export swig pwd to PATH" | |||
| export PATH=/c/Users/${USER}/swigwin-4.0.2::$PATH | |||
| } | |||
| WINDOWS_BUILD_TARGET="Ninja all > build.log" | |||
| @@ -218,6 +244,7 @@ function cmake_build_windows() { | |||
| vcvarsall.bat $MGE_WINDOWS_BUILD_ARCH && cmake -G "Ninja" \ | |||
| -DMGE_ARCH=$MGE_ARCH \ | |||
| -DMGE_INFERENCE_ONLY=$MGE_INFERENCE_ONLY \ | |||
| -DMGE_BUILD_IMPERATIVE_RT=${MGE_BUILD_IMPERATIVE_RT} \ | |||
| -DMGE_WITH_CUDA=$MGE_WITH_CUDA \ | |||
| -DCMAKE_BUILD_TYPE=$BUILD_TYPE \ | |||
| -DCMAKE_INSTALL_PREFIX:PATH=$INSTALL_DIR \ | |||
| @@ -230,8 +257,18 @@ function cmake_build_windows() { | |||
| ${WINDOWS_BUILD_TARGET}" | |||
| } | |||
| if [ ${MGE_BUILD_IMPERATIVE_RT} = "ON" ] && [ ${MGE_INFERENCE_ONLY} = "ON" ]; then | |||
| echo "ERR: MGE_BUILD_IMPERATIVE_RT(-n) only valid when enable training mode(-t)" | |||
| echo "pls remove -n or add -t" | |||
| exit -1 | |||
| fi | |||
| if [[ $OS =~ "NT" ]]; then | |||
| if [ ${MGE_ARCH} = "i386" ] && [ ${MGE_INFERENCE_ONLY} = "OFF" ]; then | |||
| echo "ERR: training mode(-t) only support 64 bit mode" | |||
| echo "pls remove -t or remove -m" | |||
| exit -1 | |||
| fi | |||
| config_windows_build_target | |||
| cmake_build_windows $MGE_WITH_CUDA $MGE_INFERENCE_ONLY $BUILD_TYPE | |||
| else | |||
| @@ -53,10 +53,6 @@ | |||
| d0: /c/Users/${USER}/mge_whl_python_env/3.8.3/python3.exe -m pip install --upgrade pip | |||
| d1: /c/Users/${USER}/mge_whl_python_env/3.8.3/python3.exe -m pip install -r python_module/requires-test.txt | |||
| d2: /c/Users/${USER}/mge_whl_python_env/3.8.3/python3.exe -m pip install numpy wheel requests tqdm tabulate | |||
| 5: install swig from install gui | |||
| a: download swig: https://nchc.dl.sourceforge.net/project/swig/swigwin/swigwin-4.0.2/swigwin-4.0.2.zip | |||
| b: install swig to /c/Users/${USER}/swigwin-4.0.2 | |||
| c: apply scripts/whl/windows/fix-ptr-define-issue.patch to c/Users/${USER}/swigwin-4.0.2 | |||
| ``` | |||
| # how to build | |||
| @@ -90,6 +86,11 @@ | |||
| ``` | |||
| ALL_PYTHON=3.5.9 ./scripts/whl/macos/macos_build_whl.sh | |||
| ``` | |||
| If you want to build with imperative rt, set env BUILD_IMPERATIVE="ON", eg: | |||
| ``` | |||
| ALL_PYTHON=3.5.9 BUILD_IMPERATIVE="ON" ./scripts/whl/macos/macos_build_whl.sh | |||
| ``` | |||
| ## build for windows | |||
| ``` | |||
| ./scripts/whl/windows/windows_build_whl.sh | |||
| @@ -102,5 +103,7 @@ | |||
| If you want to build windows whl with cuda, also a specific Python verison. eg: | |||
| ``` | |||
| WINDOWS_WHL_WITH_CUDA="true" ALL_PYTHON=3.5.4 ./scripts/whl/windows/windows_build_whl.sh | |||
| WINDOWS_WHL_WITH_CUDA="ON" ALL_PYTHON=3.5.4 ./scripts/whl/windows/windows_build_whl.sh | |||
| ``` | |||
| If you want to build with imperative rt, set env BUILD_IMPERATIVE="ON", eg: | |||
| BUILD_IMPERATIVE="ON" WINDOWS_WHL_WITH_CUDA="ON" ALL_PYTHON=3.5.4 ./scripts/whl/windows/windows_build_whl.sh | |||
| @@ -65,16 +65,18 @@ function config_python_env() { | |||
| fi | |||
| echo ${ver} | |||
| #config a dir to trick cmake find a null pythonlib | |||
| PYTHON_LIBRARY=${PYTHON_DIR}lib/ | |||
| if [ "$1" = "3.5.9" ]; then | |||
| PYTHON_INCLUDE_DIR=${PYTHON_DIR}include/python3.5m | |||
| PYTHON_LIBRARY=${PYTHON_DIR}/lib/libpython3.5m.dylib | |||
| elif [ "$1" = "3.6.10" ]; then | |||
| PYTHON_INCLUDE_DIR=${PYTHON_DIR}include/python3.6m | |||
| PYTHON_LIBRARY=${PYTHON_DIR}/lib/libpython3.6m.dylib | |||
| elif [ "$1" = "3.7.7" ]; then | |||
| PYTHON_INCLUDE_DIR=${PYTHON_DIR}include/python3.7m | |||
| PYTHON_LIBRARY=${PYTHON_DIR}/lib/libpython3.7m.dylib | |||
| elif [ "$1" = "3.8.3" ]; then | |||
| PYTHON_INCLUDE_DIR=${PYTHON_DIR}include/python3.8 | |||
| PYTHON_LIBRARY=${PYTHON_DIR}/lib/libpython3.8.dylib | |||
| else | |||
| echo "ERR: DO NOT SUPPORT PYTHON VERSION" | |||
| echo "now support list: ${FULL_PYTHON_VER}" | |||
| @@ -82,6 +84,11 @@ function config_python_env() { | |||
| fi | |||
| } | |||
| if [[ -z ${BUILD_IMPERATIVE} ]] | |||
| then | |||
| BUILD_IMPERATIVE="OFF" | |||
| fi | |||
| function do_build() { | |||
| for ver in ${ALL_PYTHON} | |||
| do | |||
| @@ -89,7 +96,7 @@ function do_build() { | |||
| config_python_env ${ver} | |||
| #check env | |||
| if [ ! -d "$PYTHON_LIBRARY" ]; then | |||
| if [ ! -f "$PYTHON_LIBRARY" ]; then | |||
| echo "ERR: can not find $PYTHON_LIBRARY , Invalid python package" | |||
| err_env | |||
| fi | |||
| @@ -102,14 +109,20 @@ function do_build() { | |||
| #append cmake args for config python | |||
| export EXTRA_CMAKE_ARGS="-DCMAKE_PREFIX_PATH=${PYTHON_DIR} -DPYTHON_LIBRARY=${PYTHON_LIBRARY} -DPYTHON_INCLUDE_DIR=${PYTHON_INCLUDE_DIR} " | |||
| #config build type to RelWithDebInfo to enable MGB_ENABLE_DEBUG_UTIL etc | |||
| export EXTRA_CMAKE_ARGS=${EXTRA_CMAKE_ARGS}" -DCMAKE_BUILD_TYPE=RelWithDebInfo " | |||
| export EXTRA_CMAKE_ARGS="${EXTRA_CMAKE_ARGS} -DCMAKE_BUILD_TYPE=RelWithDebInfo " | |||
| #call build and install | |||
| #FIXME: cmake do not triger update python config, after | |||
| #change PYTHON_LIBRARY and PYTHON_INCLUDE_DIR, so add | |||
| #-r to remove build cache after a new ver build, which | |||
| #will be more slow build than without -r | |||
| ${SRC_DIR}/scripts/cmake-build/host_build.sh -t -r | |||
| if [ ${BUILD_IMPERATIVE} = "ON" ]; then | |||
| echo "build whl with IMPERATIVE python rt" | |||
| ${SRC_DIR}/scripts/cmake-build/host_build.sh -t -n -r | |||
| else | |||
| echo "build whl with legacy python rt" | |||
| ${SRC_DIR}/scripts/cmake-build/host_build.sh -t -r | |||
| fi | |||
| #call setup.py | |||
| BUILD_DIR=${SRC_DIR}/build_dir/host/MGE_WITH_CUDA_OFF/MGE_INFERENCE_ONLY_OFF/Release/build/ | |||
| @@ -121,12 +134,47 @@ function do_build() { | |||
| fi | |||
| mkdir -p staging | |||
| if [ ${BUILD_IMPERATIVE} = "ON" ]; then | |||
| echo "build whl with IMPERATIVE python rt" | |||
| cp -a imperative/python/{megengine,setup.py,requires.txt,requires-style.txt,requires-test.txt} staging/ | |||
| cd ${BUILD_DIR}/staging/megengine/core | |||
| rt_file=`ls _imperative_rt.*.so` | |||
| echo "rt file is: ${rt_file}" | |||
| if [[ -z ${rt_file} ]] | |||
| then | |||
| echo "ERR: can not find valid rt file" | |||
| exit -1 | |||
| fi | |||
| llvm-strip -s ${rt_file} | |||
| mv ${rt_file} _imperative_rt.so | |||
| echo "check so valid or not..." | |||
| otool_out=`otool -L _imperative_rt.so` | |||
| if [[ "${otool_out}" =~ "ython" ]]; then | |||
| echo "ERR: invalid _imperative_rt.so which depend on python lib, detail: log" | |||
| echo ${otool_out} | |||
| exit -1 | |||
| else | |||
| echo "valid..." | |||
| fi | |||
| else | |||
| echo "build whl with legacy python rt" | |||
| cp -a python_module/{megengine,setup.py,requires.txt,requires-style.txt,requires-test.txt} staging/ | |||
| cd ${BUILD_DIR}/staging/megengine/_internal | |||
| #FIXME: set lib suffix to dylib may be better, BUT we find after distutils.file_util.copy_file | |||
| #will change to .so at macos even we set suffix to dylib, at the same time, macos also support .so | |||
| echo "check so valid or not..." | |||
| llvm-strip -s _mgb.so | |||
| otool_out=`otool -L _mgb.so` | |||
| if [[ "${otool_out}" =~ "ython" ]]; then | |||
| echo "ERR: invalid _mgb.so which depend on python lib, detail: log" | |||
| echo ${otool_out} | |||
| exit -1 | |||
| else | |||
| echo "valid..." | |||
| fi | |||
| fi | |||
| cp -a python_module/{megengine,setup.py,requires.txt,requires-style.txt,requires-test.txt} staging/ | |||
| cd ${BUILD_DIR}/staging/megengine/_internal | |||
| #FIXME: set lib suffix to dylib may be better, BUT we find after distutils.file_util.copy_file | |||
| #will change to .so at macos even we set suffix to dylib, at the same time, macos also support .so | |||
| llvm-strip -s _mgb.so | |||
| cd ${BUILD_DIR}/staging | |||
| ${PYTHON_DIR}/bin/python3 setup.py bdist_wheel | |||
| cd ${BUILD_DIR}/staging/dist/ | |||
| @@ -14,8 +14,6 @@ function err_env() { | |||
| } | |||
| function append_path_env_and_check() { | |||
| echo "export swig pwd to PATH" | |||
| export PATH=/c/Users/${USER}/swigwin-4.0.2::$PATH | |||
| echo "export vs2019 install path" | |||
| export VS_PATH=/c/Program\ Files\ \(x86\)/Microsoft\ Visual\ Studio/2019/Enterprise | |||
| # for llvm-strip | |||
| @@ -62,7 +60,7 @@ function config_python_env() { | |||
| if [[ -z ${WINDOWS_WHL_WITH_CUDA} ]] | |||
| then | |||
| WINDOWS_WHL_WITH_CUDA="false" | |||
| WINDOWS_WHL_WITH_CUDA="OFF" | |||
| fi | |||
| @@ -74,26 +72,46 @@ CUBLAS_LIB="/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1/bin/cublas6 | |||
| CURAND_LIB="/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1/bin/curand64_10.dll" | |||
| CUBLASLT_LIB="/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1/bin/cublasLt64_10.dll" | |||
| CUDART_LIB="/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1/bin/cudart64_101.dll" | |||
| function depend_real_copy() { | |||
| REAL_DST=$1 | |||
| echo "real copy lib to $1" | |||
| cp "${TRT_LIB}" ${REAL_DST} | |||
| cp "${CUDNN_LIB}" ${REAL_DST} | |||
| cp "${CUSOLVER_LIB}" ${REAL_DST} | |||
| cp "${CUBLAS_LIB}" ${REAL_DST} | |||
| cp "${CURAND_LIB}" ${REAL_DST} | |||
| cp "${CUBLASLT_LIB}" ${REAL_DST} | |||
| cp "${CUDART_LIB}" ${REAL_DST} | |||
| } | |||
| function copy_more_dll() { | |||
| # for python whl real use | |||
| CP_DST=${BUILD_DIR}/staging/megengine/_internal/lib | |||
| rm -rf ${CP_DST} | |||
| mkdir ${CP_DST} | |||
| if [ ${BUILD_IMPERATIVE} = "ON" ]; then | |||
| echo "config BUILD_IMPERATIVE core lib dir" | |||
| CP_WHL_DST=${BUILD_DIR}/staging/megengine/core/lib | |||
| else | |||
| echo "config legacy python lib dir" | |||
| CP_WHL_DST=${BUILD_DIR}/staging/megengine/_internal/lib | |||
| fi | |||
| rm -rf ${CP_WHL_DST} | |||
| mkdir ${CP_WHL_DST} | |||
| # workround for cpu-only version import failed, use a | |||
| # empty.file to triger setup.py to create a null empty | |||
| echo "empty" > ${CP_WHL_DST}/empty.file | |||
| if [ ${WINDOWS_WHL_WITH_CUDA} = "true" ]; then | |||
| if [ ${WINDOWS_WHL_WITH_CUDA} = "ON" ]; then | |||
| echo "copy nvidia lib to whl use...." | |||
| cp "${TRT_LIB}" ${CP_DST} | |||
| cp "${CUDNN_LIB}" ${CP_DST} | |||
| cp "${CUSOLVER_LIB}" ${CP_DST} | |||
| cp "${CUBLAS_LIB}" ${CP_DST} | |||
| cp "${CURAND_LIB}" ${CP_DST} | |||
| cp "${CUBLASLT_LIB}" ${CP_DST} | |||
| cp "${CUDART_LIB}" ${CP_DST} | |||
| depend_real_copy ${CP_WHL_DST} | |||
| fi | |||
| } | |||
| if [[ -z ${BUILD_IMPERATIVE} ]] | |||
| then | |||
| BUILD_IMPERATIVE="OFF" | |||
| fi | |||
| function do_build() { | |||
| for ver in ${ALL_PYTHON} | |||
| do | |||
| @@ -118,21 +136,31 @@ function do_build() { | |||
| #force LINK a real PYTHON_LIBRARY file, after test we do not find the symbols conflict with python | |||
| #export EXTRA_CMAKE_ARGS="-DPYTHON_LIBRARY=${PYTHON_LIBRARY} -DPYTHON_INCLUDE_DIR=${PYTHON_INCLUDE_DIR} " | |||
| #config build type to RelWithDebInfo to enable MGB_ENABLE_DEBUG_UTIL etc | |||
| export EXTRA_CMAKE_ARGS=${EXTRA_CMAKE_ARGS}" -DCMAKE_BUILD_TYPE=RelWithDebInfo " | |||
| export EXTRA_CMAKE_ARGS="${EXTRA_CMAKE_ARGS} -DCMAKE_BUILD_TYPE=RelWithDebInfo " | |||
| #call build and install | |||
| #FIXME: cmake do not triger update python config, after | |||
| #change PYTHON_LIBRARY and PYTHON_INCLUDE_DIR, so add | |||
| #-r to remove build cache after a new ver build, which | |||
| #will be more slow build than without -r | |||
| if [ ${WINDOWS_WHL_WITH_CUDA} = "true" ]; then | |||
| BUILD_ARGS=" -t -r" | |||
| if [ ${BUILD_IMPERATIVE} = "ON" ]; then | |||
| echo "build whl with IMPERATIVE python rt" | |||
| BUILD_ARGS="${BUILD_ARGS} -n " | |||
| else | |||
| echo "build whl with legacy python rt" | |||
| fi | |||
| if [ ${WINDOWS_WHL_WITH_CUDA} = "ON" ]; then | |||
| echo "build windows whl with cuda" | |||
| ${SRC_DIR}/scripts/cmake-build/host_build.sh -t -r -c | |||
| BUILD_ARGS="${BUILD_ARGS} -c " | |||
| else | |||
| echo "build windows whl with cpu only" | |||
| ${SRC_DIR}/scripts/cmake-build/host_build.sh -t -r | |||
| fi | |||
| echo "host_build.sh BUILD_ARGS: ${BUILD_ARGS}" | |||
| ${SRC_DIR}/scripts/cmake-build/host_build.sh ${BUILD_ARGS} | |||
| #call setup.py | |||
| BUILD_DIR=${SRC_DIR}/build_dir/host/build/ | |||
| cd ${BUILD_DIR} | |||
| @@ -143,10 +171,27 @@ function do_build() { | |||
| fi | |||
| mkdir -p staging | |||
| if [ ${BUILD_IMPERATIVE} = "ON" ]; then | |||
| echo "build whl with IMPERATIVE python rt" | |||
| cp -a imperative/python/{megengine,setup.py,requires.txt,requires-style.txt,requires-test.txt} staging/ | |||
| cd ${BUILD_DIR}/staging/megengine/core | |||
| rt_file=`ls _imperative_rt.*.pyd` | |||
| echo "rt file is: ${rt_file}" | |||
| if [[ -z ${rt_file} ]] | |||
| then | |||
| echo "ERR: can not find valid rt file" | |||
| exit -1 | |||
| fi | |||
| llvm-strip -s ${rt_file} | |||
| mv ${rt_file} _imperative_rt.pyd | |||
| else | |||
| echo "build whl with legacy python rt" | |||
| cp -a python_module/{megengine,setup.py,requires.txt,requires-style.txt,requires-test.txt} staging/ | |||
| cd ${BUILD_DIR}/staging/megengine/_internal | |||
| llvm-strip -s _mgb.pyd | |||
| fi | |||
| cp -a python_module/{megengine,setup.py,requires.txt,requires-style.txt,requires-test.txt} staging/ | |||
| cd ${BUILD_DIR}/staging/megengine/_internal | |||
| llvm-strip -s _mgb.pyd | |||
| copy_more_dll | |||
| cd ${BUILD_DIR}/staging | |||
| ${PYTHON_DIR}/python3 setup.py bdist_wheel | |||
| @@ -175,5 +220,6 @@ function third_party_prepare() { | |||
| } | |||
| ###################### | |||
| export ALREADY_CONFIG_PYTHON_VER="yes" | |||
| third_party_prepare | |||
| do_build | |||
| @@ -33,6 +33,11 @@ class RNGxorshf { | |||
| uint64_t s[2]; | |||
| public: | |||
| #if __cplusplus >= 201703L | |||
| typedef uint64_t result_type; | |||
| static constexpr uint64_t min() { return 0; } | |||
| static constexpr uint64_t max() { return UINT64_MAX; } | |||
| #endif | |||
| RNGxorshf(uint64_t seed) { | |||
| std::mt19937_64 gen(seed); | |||
| s[0] = gen(); | |||
| @@ -936,8 +941,12 @@ void SeqModifierForSublinearMemory::ActionSearcherSingleCN::search_genetic() { | |||
| } | |||
| } | |||
| m_cur_records = records; | |||
| #if __cplusplus >= 201703L | |||
| std::shuffle(perm.begin(), perm.end(), rng); | |||
| #else | |||
| std::random_shuffle(perm.begin(), perm.end(), | |||
| [&](size_t x) { return rng() % x; }); | |||
| #endif | |||
| for (size_t i = 0; i < length; ++i) { | |||
| invoke_search(mutation(mutation(records[i].first))); | |||
| invoke_search(crossover(records[i].first, records[perm[i]].first)); | |||
| @@ -705,7 +705,12 @@ TEST(TestOprBlas, MatrixInverse) { | |||
| } | |||
| auto ptr = inp[0]->ptr<float>(); | |||
| for (size_t i = 0; i < batch; ++i, ptr += n * n) { | |||
| #if __cplusplus >= 201703L | |||
| std::default_random_engine rng_engine; | |||
| std::shuffle(perm.begin(), perm.end(), rng_engine); | |||
| #else | |||
| std::random_shuffle(perm.begin(), perm.end()); | |||
| #endif | |||
| for (size_t j = 0; j < n; ++j) { | |||
| ptr[j * n + perm[j]] += 5; | |||
| } | |||
| @@ -36,7 +36,12 @@ void run_all_gather(const std::vector<size_t>& axis_size, bool& success, | |||
| sleep_time.push_back(i * 0.05 + 0.1); | |||
| tot_axis_size += axis_size[i]; | |||
| } | |||
| #if __cplusplus >= 201703L | |||
| std::default_random_engine rng_engine; | |||
| std::shuffle(sleep_time.begin(), sleep_time.end(), rng_engine); | |||
| #else | |||
| std::random_shuffle(sleep_time.begin(), sleep_time.end()); | |||
| #endif | |||
| auto constexpr DEVICE_TYPE = CompNode::DeviceType::CUDA; | |||
| size_t nr_dev = std::min<size_t>( | |||
| @@ -18,7 +18,11 @@ endif() | |||
| add_executable(megbrain_test ${SOURCES}) | |||
| target_link_libraries(megbrain_test gtest) | |||
| target_link_libraries(megbrain_test megengine) | |||
| if(MSVC OR WIN32) | |||
| target_link_libraries(megbrain_test megbrain megdnn) | |||
| else() | |||
| target_link_libraries(megbrain_test megengine) | |||
| endif() | |||
| if(CXX_SUPPORT_WCLASS_MEMACCESS) | |||
| if(MGE_WITH_CUDA) | |||
| target_compile_options(megbrain_test PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler=-Wno-class-memaccess>" | |||
| @@ -28,10 +32,12 @@ if(CXX_SUPPORT_WCLASS_MEMACCESS) | |||
| endif() | |||
| endif() | |||
| if(APPLE OR ANDROID) | |||
| target_link_libraries(megbrain_test dl) | |||
| else() | |||
| target_link_libraries(megbrain_test dl rt) | |||
| if(UNIX) | |||
| if(APPLE OR ANDROID) | |||
| target_link_libraries(megbrain_test dl) | |||
| else() | |||
| target_link_libraries(megbrain_test dl rt) | |||
| endif() | |||
| endif() | |||
| if (MGE_WITH_DISTRIBUTED) | |||