GitOrigin-RevId: 7f6629ae1f
tags/v1.0.0-rc1
| @@ -697,8 +697,10 @@ endif() | |||||
| if(MGE_WITH_PYTHON_MODULE) | if(MGE_WITH_PYTHON_MODULE) | ||||
| if(MGE_BUILD_IMPERATIVE_RT) | if(MGE_BUILD_IMPERATIVE_RT) | ||||
| add_subdirectory(imperative) | add_subdirectory(imperative) | ||||
| message("-- Enable imperative python wrapper runtime") | |||||
| else() | else() | ||||
| add_subdirectory(python_module) | add_subdirectory(python_module) | ||||
| message("-- Enable legacy python wrapper runtime") | |||||
| endif() | endif() | ||||
| endif() | endif() | ||||
| @@ -342,7 +342,11 @@ template <typename T> | |||||
| struct SafeMultiplies; | struct SafeMultiplies; | ||||
| template <typename T> | template <typename T> | ||||
| #if __cplusplus >= 201703L | |||||
| struct _SafeMultipliesImplUnsigned { | |||||
| #else | |||||
| struct _SafeMultipliesImplUnsigned : public std::binary_function<T, T, T> { | struct _SafeMultipliesImplUnsigned : public std::binary_function<T, T, T> { | ||||
| #endif | |||||
| static MEGDNN_CONSTEXPR size_t nbits = sizeof(T) * 8; | static MEGDNN_CONSTEXPR size_t nbits = sizeof(T) * 8; | ||||
| static size_t clz(unsigned x) { | static size_t clz(unsigned x) { | ||||
| @@ -70,8 +70,10 @@ if (MEG_WITH_ROCM) | |||||
| target_link_libraries (megdnn_test ${MGE_ROCM_LIBS}) | target_link_libraries (megdnn_test ${MGE_ROCM_LIBS}) | ||||
| endif () | endif () | ||||
| if(APPLE OR ANDROID) | |||||
| target_link_libraries(megdnn_test dl) | |||||
| else() | |||||
| target_link_libraries(megdnn_test dl rt) | |||||
| if(UNIX) | |||||
| if(APPLE OR ANDROID) | |||||
| target_link_libraries(megdnn_test dl) | |||||
| else() | |||||
| target_link_libraries(megdnn_test dl rt) | |||||
| endif() | |||||
| endif() | endif() | ||||
| @@ -89,7 +89,7 @@ public: | |||||
| auto ptr = tensor.ptr<int>(); | auto ptr = tensor.ptr<int>(); | ||||
| for (size_t n = 0; n < size; ++n) { | for (size_t n = 0; n < size; ++n) { | ||||
| std::set<int> used; | std::set<int> used; | ||||
| std::random_shuffle(seq.begin(), seq.end()); | |||||
| COMPAT_RANDOM(seq.begin(), seq.end()); | |||||
| for (size_t step = 0; step < stride; ++step) { | for (size_t step = 0; step < stride; ++step) { | ||||
| megdnn_assert(used.size() < m_size); | megdnn_assert(used.size() < m_size); | ||||
| ptr[n * stride + step] = seq[step]; | ptr[n * stride + step] = seq[step]; | ||||
| @@ -75,7 +75,7 @@ Float16PeriodicalRNG::Float16PeriodicalRNG() : m_offset(0) { | |||||
| i2f.i = static_cast<uint16_t>(x); | i2f.i = static_cast<uint16_t>(x); | ||||
| m_sequence.push_back(i2f.f); | m_sequence.push_back(i2f.f); | ||||
| } | } | ||||
| std::random_shuffle(m_sequence.begin(), m_sequence.end()); | |||||
| COMPAT_RANDOM(m_sequence.begin(), m_sequence.end()); | |||||
| } | } | ||||
| Float16PeriodicalRNG::Float16PeriodicalRNG(size_t range) : m_offset(0) { | Float16PeriodicalRNG::Float16PeriodicalRNG(size_t range) : m_offset(0) { | ||||
| @@ -99,7 +99,7 @@ Float16PeriodicalRNG::Float16PeriodicalRNG(size_t range) : m_offset(0) { | |||||
| m_sequence.push_back(i2f.f); | m_sequence.push_back(i2f.f); | ||||
| } | } | ||||
| std::random_shuffle(m_sequence.begin(), m_sequence.end()); | |||||
| COMPAT_RANDOM(m_sequence.begin(), m_sequence.end()); | |||||
| } | } | ||||
| void Float16PeriodicalRNG::gen(const TensorND& tensor) { | void Float16PeriodicalRNG::gen(const TensorND& tensor) { | ||||
| @@ -19,6 +19,16 @@ | |||||
| namespace megdnn { | namespace megdnn { | ||||
| namespace test { | namespace test { | ||||
| #if __cplusplus >= 201703L | |||||
| #define COMPAT_RANDOM(begin, end) \ | |||||
| { \ | |||||
| std::default_random_engine rng_engine; \ | |||||
| std::shuffle(begin, end, rng_engine); \ | |||||
| } | |||||
| #else | |||||
| #define COMPAT_RANDOM(begin, end) std::random_shuffle(begin, end); | |||||
| #endif | |||||
| class RNG { | class RNG { | ||||
| protected: | protected: | ||||
| class RNGxorshf; | class RNGxorshf; | ||||
| @@ -24,15 +24,16 @@ class ArgmxxRNG final: public RNG { | |||||
| void gen(const TensorND &tensor) override { | void gen(const TensorND &tensor) override { | ||||
| auto offset = tensor.layout.span().low_elem; | auto offset = tensor.layout.span().low_elem; | ||||
| auto nr_elems = tensor.layout.span().dist_elem(); | auto nr_elems = tensor.layout.span().dist_elem(); | ||||
| #define cb(DType) \ | |||||
| if (tensor.layout.dtype == DType()) { \ | |||||
| using ctype = typename DTypeTrait<DType>::ctype; \ | |||||
| auto ptr = tensor.ptr<ctype>(); \ | |||||
| for (size_t i = 0; i < nr_elems; ++i) { \ | |||||
| ptr[offset+i] = i; \ | |||||
| } \ | |||||
| std::random_shuffle(ptr + offset, ptr + offset + nr_elems); \ | |||||
| } | |||||
| #define cb(DType) \ | |||||
| if (tensor.layout.dtype == DType()) { \ | |||||
| using ctype = typename DTypeTrait<DType>::ctype; \ | |||||
| auto ptr = tensor.ptr<ctype>(); \ | |||||
| for (size_t i = 0; i < nr_elems; ++i) { \ | |||||
| ptr[offset + i] = i; \ | |||||
| } \ | |||||
| COMPAT_RANDOM(ptr + offset, ptr + offset + nr_elems); \ | |||||
| } | |||||
| MEGDNN_FOREACH_COMPUTING_DTYPE(cb); | MEGDNN_FOREACH_COMPUTING_DTYPE(cb); | ||||
| #undef cb | #undef cb | ||||
| } | } | ||||
| @@ -32,7 +32,7 @@ class ArgsortRNG final : public RNG { | |||||
| } else { | } else { | ||||
| for (int i = 0; i < n; ++i) | for (int i = 0; i < n; ++i) | ||||
| ptr[i] = static_cast<T>(i - n / 2); | ptr[i] = static_cast<T>(i - n / 2); | ||||
| std::random_shuffle(ptr, ptr + n); | |||||
| COMPAT_RANDOM(ptr, ptr + n); | |||||
| } | } | ||||
| } | } | ||||
| @@ -86,7 +86,7 @@ void run_backward_test(Handle* handle, DType dtype) { | |||||
| for (size_t j = 0; j < n; ++j) { | for (size_t j = 0; j < n; ++j) { | ||||
| ptr[j] = j; | ptr[j] = j; | ||||
| } | } | ||||
| std::random_shuffle(ptr, ptr + n); | |||||
| COMPAT_RANDOM(ptr, ptr + n); | |||||
| ptr += n; | ptr += n; | ||||
| } | } | ||||
| } | } | ||||
| @@ -361,9 +361,8 @@ TEST_F(CUDA, BENCHMARK_RELAYOUT_7) { | |||||
| for (size_t r = 0; r < _dim.size(); r++) | for (size_t r = 0; r < _dim.size(); r++) | ||||
| permutation[r] = r; | permutation[r] = r; | ||||
| for (int nsample = 0; nsample < 50; nsample++) { | for (int nsample = 0; nsample < 50; nsample++) { | ||||
| std::random_shuffle(_dim.begin(), _dim.end()); | |||||
| std::random_shuffle(permutation.begin(), permutation.end()); | |||||
| COMPAT_RANDOM(_dim.begin(), _dim.end()); | |||||
| COMPAT_RANDOM(permutation.begin(), permutation.end()); | |||||
| if (!isTrivial(permutation)) { | if (!isTrivial(permutation)) { | ||||
| run({{_dim[0], _dim[1], _dim[2], _dim[3], _dim[4], _dim[5], | run({{_dim[0], _dim[1], _dim[2], _dim[3], _dim[4], _dim[5], | ||||
| _dim[6]}, | _dim[6]}, | ||||
| @@ -451,9 +450,10 @@ TEST_F(CUDA, BENCHMARK_RELAYOUT_5) { | |||||
| printf("vol %d cur_ratio %lf | %lf\n", vol, cur_ratio, vol_re); | printf("vol %d cur_ratio %lf | %lf\n", vol, cur_ratio, vol_re); | ||||
| // printVec(dim); | // printVec(dim); | ||||
| std::random_shuffle(dim.begin(), dim.end()); | |||||
| COMPAT_RANDOM(dim.begin(), dim.end()); | |||||
| while (isTrivial(permutation)) { | while (isTrivial(permutation)) { | ||||
| std::random_shuffle(permutation.begin(), permutation.end()); | |||||
| COMPAT_RANDOM(permutation.begin(), permutation.end()); | |||||
| } | } | ||||
| run({{dim[0], dim[1], dim[2], dim[3], dim[4]}, dtype::Int32()}, | run({{dim[0], dim[1], dim[2], dim[3], dim[4]}, dtype::Int32()}, | ||||
| @@ -603,8 +603,9 @@ TEST_F(CUDA, BENCHMARK_LAST_CONTIG_ALIGN_TEST) { | |||||
| for (size_t r = 0; r < _dim.size(); r++) | for (size_t r = 0; r < _dim.size(); r++) | ||||
| permutation[r] = r; | permutation[r] = r; | ||||
| for (int nsample = 0; nsample < 20; nsample++) { | for (int nsample = 0; nsample < 20; nsample++) { | ||||
| std::random_shuffle(_dim.begin(), _dim.end() - 1); | |||||
| std::random_shuffle(permutation.begin(), permutation.end() - 1); | |||||
| COMPAT_RANDOM(_dim.begin(), _dim.end() - 1); | |||||
| COMPAT_RANDOM(permutation.begin(), permutation.end() - 1); | |||||
| if (nsample < 5) | if (nsample < 5) | ||||
| _dim[5] = (u.gen_single_val() / 4 + 1) * 4; | _dim[5] = (u.gen_single_val() / 4 + 1) * 4; | ||||
| @@ -24,7 +24,7 @@ using namespace test; | |||||
| TEST_F(CUDA, SLEEP) { | TEST_F(CUDA, SLEEP) { | ||||
| auto opr = this->handle_cuda()->create_operator<Sleep>(); | |||||
| auto opr = this->handle_cuda()->create_operator<megdnn::SleepForward>(); | |||||
| auto run = [&](float time) -> double { | auto run = [&](float time) -> double { | ||||
| opr->param() = {time}; | opr->param() = {time}; | ||||
| @@ -24,16 +24,17 @@ class ArgmxxRNG final: public RNG { | |||||
| void gen(const TensorND &tensor) override { | void gen(const TensorND &tensor) override { | ||||
| auto offset = tensor.layout.span().low_elem; | auto offset = tensor.layout.span().low_elem; | ||||
| auto nr_elems = tensor.layout.span().dist_elem(); | auto nr_elems = tensor.layout.span().dist_elem(); | ||||
| #define cb(DType) \ | |||||
| if (tensor.layout.dtype == DType()) { \ | |||||
| using ctype = typename DTypeTrait<DType>::ctype; \ | |||||
| auto ptr = tensor.ptr<ctype>(); \ | |||||
| for (size_t i = 0; i < nr_elems; ++i) { \ | |||||
| ptr[offset+i] = i; \ | |||||
| } \ | |||||
| std::random_shuffle(ptr + offset, ptr + offset + nr_elems); \ | |||||
| return; \ | |||||
| } | |||||
| #define cb(DType) \ | |||||
| if (tensor.layout.dtype == DType()) { \ | |||||
| using ctype = typename DTypeTrait<DType>::ctype; \ | |||||
| auto ptr = tensor.ptr<ctype>(); \ | |||||
| for (size_t i = 0; i < nr_elems; ++i) { \ | |||||
| ptr[offset + i] = i; \ | |||||
| } \ | |||||
| COMPAT_RANDOM(ptr + offset, ptr + offset + nr_elems); \ | |||||
| return; \ | |||||
| } | |||||
| MEGDNN_FOREACH_COMPUTING_DTYPE_FLOAT(cb); | MEGDNN_FOREACH_COMPUTING_DTYPE_FLOAT(cb); | ||||
| #undef cb | #undef cb | ||||
| megdnn_throw(megdnn_mangle(ssprintf("Unsupported DType: %s", | megdnn_throw(megdnn_mangle(ssprintf("Unsupported DType: %s", | ||||
| @@ -76,7 +76,11 @@ add_custom_target(_version_ld SOURCES ${VERSION_SCRIPT}) | |||||
| add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/pybind11 ${PROJECT_BINARY_DIR}/third_party/pybind11) | add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/pybind11 ${PROJECT_BINARY_DIR}/third_party/pybind11) | ||||
| pybind11_add_module(${MODULE_NAME} NO_EXTRAS ${SRCS}) | pybind11_add_module(${MODULE_NAME} NO_EXTRAS ${SRCS}) | ||||
| target_link_libraries(${MODULE_NAME} PRIVATE gen_op_def megbrain megdnn -Wl,--version-script=${VERSION_SCRIPT}) | |||||
| if (APPLE OR MSVC OR WIN32) | |||||
| target_link_libraries(${MODULE_NAME} PRIVATE gen_op_def megbrain megdnn) | |||||
| else() | |||||
| target_link_libraries(${MODULE_NAME} PRIVATE gen_op_def megbrain megdnn -Wl,--version-script=${VERSION_SCRIPT}) | |||||
| endif() | |||||
| if (MGE_WITH_DISTRIBUTED) | if (MGE_WITH_DISTRIBUTED) | ||||
| message("Imperative configured to link megray") | message("Imperative configured to link megray") | ||||
| target_link_libraries(${MODULE_NAME} PRIVATE megray) | target_link_libraries(${MODULE_NAME} PRIVATE megray) | ||||
| @@ -91,6 +95,10 @@ set_target_properties(${MODULE_NAME} PROPERTIES | |||||
| SUFFIX ${CMAKE_SHARED_LIBRARY_SUFFIX} | SUFFIX ${CMAKE_SHARED_LIBRARY_SUFFIX} | ||||
| LIBRARY_OUTPUT_DIRECTORY ${MEGENGINE_DIR}/${PACKAGE_NAME}/core | LIBRARY_OUTPUT_DIRECTORY ${MEGENGINE_DIR}/${PACKAGE_NAME}/core | ||||
| ) | ) | ||||
| if (APPLE OR MSVC OR WIN32) | |||||
| message("-- overwriting SUFFIX at macos and windows before config by set_target_properties") | |||||
| pybind11_extension(${MODULE_NAME}) | |||||
| endif() | |||||
| add_dependencies(${MODULE_NAME} gen_opr_py _version_ld) | add_dependencies(${MODULE_NAME} gen_opr_py _version_ld) | ||||
| if(MGE_WITH_TEST AND MGE_ENABLE_RTTI) | if(MGE_WITH_TEST AND MGE_ENABLE_RTTI) | ||||
| @@ -8,6 +8,67 @@ | |||||
| # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
| import os | import os | ||||
| import sys | import sys | ||||
| import platform | |||||
| import ctypes | |||||
| if sys.platform == "win32": | |||||
| lib_path = os.path.join(os.path.dirname(__file__), "core/lib") | |||||
| dll_paths = list(filter(os.path.exists, [lib_path,])) | |||||
| assert len(dll_paths) > 0 | |||||
| kernel32 = ctypes.WinDLL("kernel32.dll", use_last_error=True) | |||||
| has_load_library_attr = hasattr(kernel32, "AddDllDirectory") | |||||
| old_error_mode = kernel32.SetErrorMode(0x0001) | |||||
| kernel32.LoadLibraryW.restype = ctypes.c_void_p | |||||
| if has_load_library_attr: | |||||
| kernel32.AddDllDirectory.restype = ctypes.c_void_p | |||||
| kernel32.LoadLibraryExW.restype = ctypes.c_void_p | |||||
| for dll_path in dll_paths: | |||||
| if sys.version_info >= (3, 8): | |||||
| os.add_dll_directory(dll_path) | |||||
| elif has_load_library_attr: | |||||
| res = kernel32.AddDllDirectory(dll_path) | |||||
| if res is None: | |||||
| err = ctypes.WinError(ctypes.get_last_error()) | |||||
| err.strerror += ' Error adding "{}" to the DLL search PATH.'.format( | |||||
| dll_path | |||||
| ) | |||||
| raise err | |||||
| else: | |||||
| print("WARN: python or OS env have some issue, may load DLL failed!!!") | |||||
| import glob | |||||
| dlls = glob.glob(os.path.join(lib_path, "*.dll")) | |||||
| path_patched = False | |||||
| for dll in dlls: | |||||
| is_loaded = False | |||||
| if has_load_library_attr: | |||||
| res = kernel32.LoadLibraryExW(dll, None, 0x00001100) | |||||
| last_error = ctypes.get_last_error() | |||||
| if res is None and last_error != 126: | |||||
| err = ctypes.WinError(last_error) | |||||
| err.strerror += ' Error loading "{}" or one of its dependencies.'.format( | |||||
| dll | |||||
| ) | |||||
| raise err | |||||
| elif res is not None: | |||||
| is_loaded = True | |||||
| if not is_loaded: | |||||
| if not path_patched: | |||||
| os.environ["PATH"] = ";".join(dll_paths + [os.environ["PATH"]]) | |||||
| path_patched = True | |||||
| res = kernel32.LoadLibraryW(dll) | |||||
| if res is None: | |||||
| err = ctypes.WinError(ctypes.get_last_error()) | |||||
| err.strerror += ' Error loading "{}" or one of its dependencies.'.format( | |||||
| dll | |||||
| ) | |||||
| raise err | |||||
| kernel32.SetErrorMode(old_error_mode) | |||||
| from .core._imperative_rt.utils import _set_fork_exec_path_for_timed_func | from .core._imperative_rt.utils import _set_fork_exec_path_for_timed_func | ||||
| from .device import * | from .device import * | ||||
| @@ -6,10 +6,14 @@ | |||||
| # Unless required by applicable law or agreed to in writing, | # Unless required by applicable law or agreed to in writing, | ||||
| # software distributed under the License is distributed on an | # software distributed under the License is distributed on an | ||||
| # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
| import resource | |||||
| import platform | |||||
| import sys | import sys | ||||
| import threading | import threading | ||||
| # Windows do not imp resource package | |||||
| if platform.system() != "Windows": | |||||
| import resource | |||||
| class AlternativeRecursionLimit: | class AlternativeRecursionLimit: | ||||
| r"""A reentrant context manager for setting global recursion limits. | r"""A reentrant context manager for setting global recursion limits. | ||||
| @@ -28,16 +32,24 @@ class AlternativeRecursionLimit: | |||||
| with self.lock: | with self.lock: | ||||
| if self.count == 0: | if self.count == 0: | ||||
| self.orig_py_limit = sys.getrecursionlimit() | self.orig_py_limit = sys.getrecursionlimit() | ||||
| if platform.system() != "Windows": | |||||
| ( | ( | ||||
| self.orig_rlim_stack_soft, | self.orig_rlim_stack_soft, | ||||
| self.orig_rlim_stack_hard, | self.orig_rlim_stack_hard, | ||||
| ) = resource.getrlimit(resource.RLIMIT_STACK) | ) = resource.getrlimit(resource.RLIMIT_STACK) | ||||
| resource.setrlimit( | |||||
| resource.RLIMIT_STACK, | |||||
| (self.orig_rlim_stack_hard, self.orig_rlim_stack_hard), | |||||
| ) | |||||
| # increase recursion limit | |||||
| sys.setrecursionlimit(self.new_py_limit) | |||||
| # FIXME: https://bugs.python.org/issue34602, python3 release version | |||||
| # on Macos always have this issue, not all user install python3 from src | |||||
| try: | |||||
| resource.setrlimit( | |||||
| resource.RLIMIT_STACK, | |||||
| (self.orig_rlim_stack_hard, self.orig_rlim_stack_hard), | |||||
| ) | |||||
| except ValueError as exc: | |||||
| if platform.system() != "Darwin": | |||||
| raise exc | |||||
| # increase recursion limit | |||||
| sys.setrecursionlimit(self.new_py_limit) | |||||
| self.count += 1 | self.count += 1 | ||||
| def __exit__(self, type, value, traceback): | def __exit__(self, type, value, traceback): | ||||
| @@ -45,10 +57,16 @@ class AlternativeRecursionLimit: | |||||
| self.count -= 1 | self.count -= 1 | ||||
| if self.count == 0: | if self.count == 0: | ||||
| sys.setrecursionlimit(self.orig_py_limit) | sys.setrecursionlimit(self.orig_py_limit) | ||||
| resource.setrlimit( | |||||
| resource.RLIMIT_STACK, | |||||
| (self.orig_rlim_stack_soft, self.orig_rlim_stack_hard), | |||||
| ) | |||||
| if platform.system() != "Windows": | |||||
| try: | |||||
| resource.setrlimit( | |||||
| resource.RLIMIT_STACK, | |||||
| (self.orig_rlim_stack_soft, self.orig_rlim_stack_hard), | |||||
| ) | |||||
| except ValueError as exc: | |||||
| if platform.system() != "Darwin": | |||||
| raise exc | |||||
| _max_recursion_limit_context_manager = AlternativeRecursionLimit(2 ** 31 - 1) | _max_recursion_limit_context_manager = AlternativeRecursionLimit(2 ** 31 - 1) | ||||
| @@ -9,6 +9,7 @@ | |||||
| import os | import os | ||||
| import re | import re | ||||
| import pathlib | import pathlib | ||||
| import platform | |||||
| from distutils.file_util import copy_file | from distutils.file_util import copy_file | ||||
| from setuptools import setup, find_packages, Extension | from setuptools import setup, find_packages, Extension | ||||
| from setuptools.command.build_ext import build_ext as _build_ext | from setuptools.command.build_ext import build_ext as _build_ext | ||||
| @@ -29,7 +30,10 @@ class build_ext(_build_ext): | |||||
| extdir.parent.mkdir(parents=True, exist_ok=True) | extdir.parent.mkdir(parents=True, exist_ok=True) | ||||
| modpath = self.get_ext_fullname(ext.name).split('.') | modpath = self.get_ext_fullname(ext.name).split('.') | ||||
| modpath[-1] += '.so' | |||||
| if platform.system() == 'Windows': | |||||
| modpath[-1] += '.pyd' | |||||
| else: | |||||
| modpath[-1] += '.so' | |||||
| modpath = str(pathlib.Path(*modpath).resolve()) | modpath = str(pathlib.Path(*modpath).resolve()) | ||||
| copy_file(modpath, fullpath, verbose=self.verbose, dry_run=self.dry_run) | copy_file(modpath, fullpath, verbose=self.verbose, dry_run=self.dry_run) | ||||
| @@ -47,6 +51,14 @@ if local_version: | |||||
| __version__ = '{}+{}'.format(__version__, local_version) | __version__ = '{}+{}'.format(__version__, local_version) | ||||
| packages = find_packages(exclude=['test']) | packages = find_packages(exclude=['test']) | ||||
| package_data = [ | |||||
| str(f.relative_to('megengine')) | |||||
| for f in pathlib.Path('megengine', 'core', 'include').glob('**/*') | |||||
| ] | |||||
| package_data += [ | |||||
| str(f.relative_to('megengine')) | |||||
| for f in pathlib.Path('megengine', 'core', 'lib').glob('**/*') | |||||
| ] | |||||
| with open('requires.txt') as f: | with open('requires.txt') as f: | ||||
| requires = f.read().splitlines() | requires = f.read().splitlines() | ||||
| @@ -63,6 +75,9 @@ setup_kwargs = dict( | |||||
| author='Megvii Engine Team', | author='Megvii Engine Team', | ||||
| author_email=email, | author_email=email, | ||||
| packages=packages, | packages=packages, | ||||
| package_data={ | |||||
| 'megengine': package_data, | |||||
| }, | |||||
| ext_modules=[PrecompiledExtesion('megengine.core._imperative_rt')], | ext_modules=[PrecompiledExtesion('megengine.core._imperative_rt')], | ||||
| install_requires=requires, | install_requires=requires, | ||||
| extras_require={ | extras_require={ | ||||
| @@ -9,15 +9,6 @@ | |||||
| #include "megbrain/utils/mempool.h" | #include "megbrain/utils/mempool.h" | ||||
| #include "./numpy_dtypes.h" | #include "./numpy_dtypes.h" | ||||
| /* | |||||
| * demangle typeid, see | |||||
| * http://stackoverflow.com/questions/281818/unmangling-the-result-of-stdtype-infoname | |||||
| */ | |||||
| #ifdef __GNUG__ | |||||
| #include <cstdlib> | |||||
| #include <memory> | |||||
| #include <cxxabi.h> | |||||
| namespace py = pybind11; | namespace py = pybind11; | ||||
| PyTaskDipatcher py_task_q = {}; | PyTaskDipatcher py_task_q = {}; | ||||
| @@ -34,10 +25,18 @@ py::module rel_import(py::str name, py::module m, int level) { | |||||
| return import(name, m.attr("__dict__"), py::arg("level")=level); | return import(name, m.attr("__dict__"), py::arg("level")=level); | ||||
| } | } | ||||
| /* | |||||
| * demangle typeid, see | |||||
| * http://stackoverflow.com/questions/281818/unmangling-the-result-of-stdtype-infoname | |||||
| */ | |||||
| #ifdef __GNUG__ | |||||
| #include <cxxabi.h> | |||||
| #include <cstdlib> | |||||
| #include <memory> | |||||
| namespace { | namespace { | ||||
| std::string demangle_typeid(const char* name) { | std::string demangle_typeid(const char* name) { | ||||
| int status = -4; // some arbitrary value to eliminate the compiler warning | int status = -4; // some arbitrary value to eliminate the compiler warning | ||||
| // enable c++11 by passing the flag -std=c++11 to g++ | // enable c++11 by passing the flag -std=c++11 to g++ | ||||
| @@ -48,7 +47,7 @@ std::string demangle_typeid(const char* name) { | |||||
| return (status==0) ? res.get() : name ; | return (status==0) ? res.get() : name ; | ||||
| } | } | ||||
| } | |||||
| } // namespace | |||||
| #else | #else | ||||
| namespace { | namespace { | ||||
| @@ -1,4 +1,8 @@ | |||||
| #include "utils.h" | #include "utils.h" | ||||
| #ifdef WIN32 | |||||
| #include <stdio.h> | |||||
| #include <windows.h> | |||||
| #endif | |||||
| #include <pybind11/operators.h> | #include <pybind11/operators.h> | ||||
| #include <atomic> | #include <atomic> | ||||
| @@ -8,6 +8,7 @@ | |||||
| # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
| import multiprocessing as mp | import multiprocessing as mp | ||||
| import os | import os | ||||
| import platform | |||||
| import re | import re | ||||
| import subprocess | import subprocess | ||||
| import sys | import sys | ||||
| @@ -196,6 +197,9 @@ def run_test( | |||||
| @pytest.mark.isolated_distributed | @pytest.mark.isolated_distributed | ||||
| @pytest.mark.skipif( | |||||
| platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM" | |||||
| ) | |||||
| def test_dp_correctness(): | def test_dp_correctness(): | ||||
| model_name = "mnist_model_with_test.mge" | model_name = "mnist_model_with_test.mge" | ||||
| model_path = os.path.join(os.path.dirname(__file__), model_name) | model_path = os.path.join(os.path.dirname(__file__), model_name) | ||||
| @@ -35,7 +35,7 @@ from megengine.functional.distributed import ( | |||||
| platform.system() == "Darwin", reason="do not imp GPU mode at macos now" | platform.system() == "Darwin", reason="do not imp GPU mode at macos now" | ||||
| ) | ) | ||||
| @pytest.mark.skipif( | @pytest.mark.skipif( | ||||
| platform.system() == "Windows", reason="do not imp GPU mode at Windows now" | |||||
| platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM" | |||||
| ) | ) | ||||
| @pytest.mark.isolated_distributed | @pytest.mark.isolated_distributed | ||||
| def test_reduce_sum(): | def test_reduce_sum(): | ||||
| @@ -77,7 +77,7 @@ def test_reduce_sum(): | |||||
| platform.system() == "Darwin", reason="do not imp GPU mode at macos now" | platform.system() == "Darwin", reason="do not imp GPU mode at macos now" | ||||
| ) | ) | ||||
| @pytest.mark.skipif( | @pytest.mark.skipif( | ||||
| platform.system() == "Windows", reason="do not imp GPU mode at Windows now" | |||||
| platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM" | |||||
| ) | ) | ||||
| @pytest.mark.isolated_distributed | @pytest.mark.isolated_distributed | ||||
| def test_broadcast(): | def test_broadcast(): | ||||
| @@ -115,7 +115,7 @@ def test_broadcast(): | |||||
| platform.system() == "Darwin", reason="do not imp GPU mode at macos now" | platform.system() == "Darwin", reason="do not imp GPU mode at macos now" | ||||
| ) | ) | ||||
| @pytest.mark.skipif( | @pytest.mark.skipif( | ||||
| platform.system() == "Windows", reason="do not imp GPU mode at Windows now" | |||||
| platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM" | |||||
| ) | ) | ||||
| @pytest.mark.isolated_distributed | @pytest.mark.isolated_distributed | ||||
| def test_all_gather(): | def test_all_gather(): | ||||
| @@ -154,7 +154,7 @@ def test_all_gather(): | |||||
| platform.system() == "Darwin", reason="do not imp GPU mode at macos now" | platform.system() == "Darwin", reason="do not imp GPU mode at macos now" | ||||
| ) | ) | ||||
| @pytest.mark.skipif( | @pytest.mark.skipif( | ||||
| platform.system() == "Windows", reason="do not imp GPU mode at Windows now" | |||||
| platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM" | |||||
| ) | ) | ||||
| @pytest.mark.isolated_distributed | @pytest.mark.isolated_distributed | ||||
| def test_reduce_scatter_sum(): | def test_reduce_scatter_sum(): | ||||
| @@ -193,7 +193,7 @@ def test_reduce_scatter_sum(): | |||||
| platform.system() == "Darwin", reason="do not imp GPU mode at macos now" | platform.system() == "Darwin", reason="do not imp GPU mode at macos now" | ||||
| ) | ) | ||||
| @pytest.mark.skipif( | @pytest.mark.skipif( | ||||
| platform.system() == "Windows", reason="do not imp GPU mode at Windows now" | |||||
| platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM" | |||||
| ) | ) | ||||
| @pytest.mark.isolated_distributed | @pytest.mark.isolated_distributed | ||||
| def test_all_reduce_sum(): | def test_all_reduce_sum(): | ||||
| @@ -232,7 +232,7 @@ def test_all_reduce_sum(): | |||||
| platform.system() == "Darwin", reason="do not imp GPU mode at macos now" | platform.system() == "Darwin", reason="do not imp GPU mode at macos now" | ||||
| ) | ) | ||||
| @pytest.mark.skipif( | @pytest.mark.skipif( | ||||
| platform.system() == "Windows", reason="do not imp GPU mode at Windows now" | |||||
| platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM" | |||||
| ) | ) | ||||
| @pytest.mark.isolated_distributed | @pytest.mark.isolated_distributed | ||||
| def test_all_reduce_max(): | def test_all_reduce_max(): | ||||
| @@ -271,7 +271,7 @@ def test_all_reduce_max(): | |||||
| platform.system() == "Darwin", reason="do not imp GPU mode at macos now" | platform.system() == "Darwin", reason="do not imp GPU mode at macos now" | ||||
| ) | ) | ||||
| @pytest.mark.skipif( | @pytest.mark.skipif( | ||||
| platform.system() == "Windows", reason="do not imp GPU mode at Windows now" | |||||
| platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM" | |||||
| ) | ) | ||||
| @pytest.mark.isolated_distributed | @pytest.mark.isolated_distributed | ||||
| def test_all_reduce_min(): | def test_all_reduce_min(): | ||||
| @@ -310,7 +310,7 @@ def test_all_reduce_min(): | |||||
| platform.system() == "Darwin", reason="do not imp GPU mode at macos now" | platform.system() == "Darwin", reason="do not imp GPU mode at macos now" | ||||
| ) | ) | ||||
| @pytest.mark.skipif( | @pytest.mark.skipif( | ||||
| platform.system() == "Windows", reason="do not imp GPU mode at Windows now" | |||||
| platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM" | |||||
| ) | ) | ||||
| @pytest.mark.isolated_distributed | @pytest.mark.isolated_distributed | ||||
| def test_gather(): | def test_gather(): | ||||
| @@ -352,7 +352,7 @@ def test_gather(): | |||||
| platform.system() == "Darwin", reason="do not imp GPU mode at macos now" | platform.system() == "Darwin", reason="do not imp GPU mode at macos now" | ||||
| ) | ) | ||||
| @pytest.mark.skipif( | @pytest.mark.skipif( | ||||
| platform.system() == "Windows", reason="do not imp GPU mode at Windows now" | |||||
| platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM" | |||||
| ) | ) | ||||
| @pytest.mark.isolated_distributed | @pytest.mark.isolated_distributed | ||||
| def test_scatter(): | def test_scatter(): | ||||
| @@ -390,7 +390,7 @@ def test_scatter(): | |||||
| platform.system() == "Darwin", reason="do not imp GPU mode at macos now" | platform.system() == "Darwin", reason="do not imp GPU mode at macos now" | ||||
| ) | ) | ||||
| @pytest.mark.skipif( | @pytest.mark.skipif( | ||||
| platform.system() == "Windows", reason="do not imp GPU mode at Windows now" | |||||
| platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM" | |||||
| ) | ) | ||||
| @pytest.mark.isolated_distributed | @pytest.mark.isolated_distributed | ||||
| def test_all_to_all(): | def test_all_to_all(): | ||||
| @@ -430,7 +430,7 @@ def test_all_to_all(): | |||||
| platform.system() == "Darwin", reason="do not imp GPU mode at macos now" | platform.system() == "Darwin", reason="do not imp GPU mode at macos now" | ||||
| ) | ) | ||||
| @pytest.mark.skipif( | @pytest.mark.skipif( | ||||
| platform.system() == "Windows", reason="do not imp GPU mode at Windows now" | |||||
| platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM" | |||||
| ) | ) | ||||
| @pytest.mark.isolated_distributed | @pytest.mark.isolated_distributed | ||||
| def test_io_remote(): | def test_io_remote(): | ||||
| @@ -6,6 +6,7 @@ | |||||
| # Unless required by applicable law or agreed to in writing, | # Unless required by applicable law or agreed to in writing, | ||||
| # software distributed under the License is distributed on an | # software distributed under the License is distributed on an | ||||
| # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
| import platform | |||||
| import weakref | import weakref | ||||
| import numpy as np | import numpy as np | ||||
| @@ -51,6 +52,9 @@ def save_to(self, name="grad"): | |||||
| @pytest.mark.isolated_distributed | @pytest.mark.isolated_distributed | ||||
| @pytest.mark.skipif( | |||||
| platform.system() == "Windows", reason="windows disable MGB_ENABLE_OPR_MM" | |||||
| ) | |||||
| def test_dist_grad(): | def test_dist_grad(): | ||||
| world_size = 2 | world_size = 2 | ||||
| x_np = np.random.rand(10).astype("float32") | x_np = np.random.rand(10).astype("float32") | ||||
| @@ -9,7 +9,17 @@ | |||||
| #include "megbrain/imperative/profiler.h" | #include "megbrain/imperative/profiler.h" | ||||
| #if defined(_MSC_VER) || defined(WIN32) | |||||
| #include <windows.h> | |||||
| #define getpid GetCurrentProcessId | |||||
| #else | |||||
| #include <sys/unistd.h> | #include <sys/unistd.h> | ||||
| #endif | |||||
| #if defined(__APPLE__) || defined(__MACOSX) | |||||
| #include <unistd.h> | |||||
| #endif | |||||
| #include <variant> | #include <variant> | ||||
| #include "megbrain/imperative/ops/opr_attr.h" | #include "megbrain/imperative/ops/opr_attr.h" | ||||
| @@ -16,6 +16,10 @@ | |||||
| #include "megbrain/imperative/ops/opr_attr.h" | #include "megbrain/imperative/ops/opr_attr.h" | ||||
| #include "megbrain/imperative/ops/backward_graph.h" | #include "megbrain/imperative/ops/backward_graph.h" | ||||
| #if __cplusplus >= 201703L | |||||
| #include <optional> | |||||
| #endif | |||||
| namespace mgb { | namespace mgb { | ||||
| namespace imperative { | namespace imperative { | ||||
| @@ -38,8 +38,11 @@ if(CXX_SUPPORT_WCLASS_MEMACCESS) | |||||
| endif() | endif() | ||||
| if(UNIX) | if(UNIX) | ||||
| target_link_libraries(imperative_test dl rt) | |||||
| if(APPLE OR ANDROID) | |||||
| target_link_libraries(imperative_test dl) | |||||
| else() | |||||
| target_link_libraries(imperative_test dl rt) | |||||
| endif() | |||||
| endif() | endif() | ||||
| install(TARGETS imperative_test RUNTIME DESTINATION test) | install(TARGETS imperative_test RUNTIME DESTINATION test) | ||||
| @@ -81,7 +81,10 @@ else() | |||||
| target_link_libraries(mgb megbrain megdnn -Wl,--version-script=${VERSION_SCRIPT}) | target_link_libraries(mgb megbrain megdnn -Wl,--version-script=${VERSION_SCRIPT}) | ||||
| endif() | endif() | ||||
| target_include_directories(mgb PRIVATE ${PYTHON_INCLUDE_DIRS} src/cpp ${CMAKE_CURRENT_BINARY_DIR} ${NUMPY_INCLUDE_DIR}) | target_include_directories(mgb PRIVATE ${PYTHON_INCLUDE_DIRS} src/cpp ${CMAKE_CURRENT_BINARY_DIR} ${NUMPY_INCLUDE_DIR}) | ||||
| target_link_libraries(mgb ${PYTHON_LIBRARIES}) | |||||
| # only windows need link PYTHON_LIBRARIES | |||||
| if(MSVC OR WIN32) | |||||
| target_link_libraries(mgb ${PYTHON_LIBRARIES}) | |||||
| endif() | |||||
| if (MGE_WITH_DISTRIBUTED) | if (MGE_WITH_DISTRIBUTED) | ||||
| target_link_libraries(mgb megray) | target_link_libraries(mgb megray) | ||||
| @@ -30,11 +30,17 @@ | |||||
| 4e: add C:\Program Files\NVIDIA GPU Computing Toolkit\cudnn-10.1-windows10-x64-v7.6.5.32\cuda\bin to system Path env | 4e: add C:\Program Files\NVIDIA GPU Computing Toolkit\cudnn-10.1-windows10-x64-v7.6.5.32\cuda\bin to system Path env | ||||
| 4f: add C:\Program Files\NVIDIA GPU Computing Toolkit\TensorRT-6.0.1.5\lib Path | 4f: add C:\Program Files\NVIDIA GPU Computing Toolkit\TensorRT-6.0.1.5\lib Path | ||||
| if u do not do 4d/4e/4f, CUDA runtime can not find dll | if u do not do 4d/4e/4f, CUDA runtime can not find dll | ||||
| 5: install python3 (DFT 3.8.3) to /c/Users/${USER}/mge_whl_python_env/3.8.3 and | |||||
| put it to PATH env and run python3 -m pip install numpy (if u want to build with training mode or build python whl) | |||||
| 6: install swig from install gui (if u want to build with training mode or build python whl) | |||||
| a: download swig: https://nchc.dl.sourceforge.net/project/swig/swigwin/swigwin-4.0.2/swigwin-4.0.2.zip | |||||
| b: install swig to /c/Users/${USER}/swigwin-4.0.2 | |||||
| c: apply scripts/whl/windows/fix-ptr-define-issue.patch to c/Users/${USER}/swigwin-4.0.2 | |||||
| ``` | ``` | ||||
| ### linux host build | ### linux host build | ||||
| ``` | ``` | ||||
| 1: cmake, which version > 3.14.4 | 1: cmake, which version > 3.14.4 | ||||
| 2: gcc/g++, which version > 6 | |||||
| 2: gcc/g++, which version > 6, (gcc/g++ >= 7, if need build training) | |||||
| 3: install build-essential git git-lfs gfortran libgfortran-6-dev autoconf gnupg flex bison gperf curl | 3: install build-essential git git-lfs gfortran libgfortran-6-dev autoconf gnupg flex bison gperf curl | ||||
| 4: zlib1g-dev gcc-multilib g++-multilib lib32ncurses5-dev libxml2-utils xsltproc unzip libtool: | 4: zlib1g-dev gcc-multilib g++-multilib lib32ncurses5-dev libxml2-utils xsltproc unzip libtool: | ||||
| 5: librdmacm-dev rdmacm-utils python3-dev swig python3-numpy texinfo | 5: librdmacm-dev rdmacm-utils python3-dev swig python3-numpy texinfo | ||||
| @@ -47,6 +53,7 @@ | |||||
| 3: brew install python python3 swig coreutils | 3: brew install python python3 swig coreutils | ||||
| 4: install at least xcode command line tool: https://developer.apple.com/xcode/ | 4: install at least xcode command line tool: https://developer.apple.com/xcode/ | ||||
| 5: about cuda: we do not support CUDA on macos | 5: about cuda: we do not support CUDA on macos | ||||
| 6: python3 -m pip install numpy (if u want to build with training mode or build python whl) | |||||
| ``` | ``` | ||||
| ### cross build for arm-android | ### cross build for arm-android | ||||
| now we support windows/linux/macos cross build to arm-android | now we support windows/linux/macos cross build to arm-android | ||||
| @@ -9,6 +9,7 @@ function usage() { | |||||
| echo "-t : Build with training mode, default inference only" | echo "-t : Build with training mode, default inference only" | ||||
| echo "-m : Build with m32 mode(only for windows build), default m64" | echo "-m : Build with m32 mode(only for windows build), default m64" | ||||
| echo "-r : remove old build dir before make, default off" | echo "-r : remove old build dir before make, default off" | ||||
| echo "-n : enable new python runtime(valid when training mode with -t, default is legacy runtime)" | |||||
| echo "-h : show usage" | echo "-h : show usage" | ||||
| echo "append other cmake config by export EXTRA_CMAKE_ARGS=..." | echo "append other cmake config by export EXTRA_CMAKE_ARGS=..." | ||||
| echo "example: $0 -d" | echo "example: $0 -d" | ||||
| @@ -22,9 +23,10 @@ MGE_WINDOWS_BUILD_ARCH=x64 | |||||
| MGE_WINDOWS_BUILD_MARCH=m64 | MGE_WINDOWS_BUILD_MARCH=m64 | ||||
| MGE_ARCH=x86_64 | MGE_ARCH=x86_64 | ||||
| REMOVE_OLD_BUILD=false | REMOVE_OLD_BUILD=false | ||||
| MGE_BUILD_IMPERATIVE_RT=OFF | |||||
| echo "EXTRA_CMAKE_ARGS: ${EXTRA_CMAKE_ARGS}" | echo "EXTRA_CMAKE_ARGS: ${EXTRA_CMAKE_ARGS}" | ||||
| while getopts "rhdctm" arg | |||||
| while getopts "rhdctmn" arg | |||||
| do | do | ||||
| case $arg in | case $arg in | ||||
| d) | d) | ||||
| @@ -48,11 +50,15 @@ do | |||||
| REMOVE_OLD_BUILD=true | REMOVE_OLD_BUILD=true | ||||
| ;; | ;; | ||||
| m) | m) | ||||
| echo "build for m32(only use for windows)" | |||||
| echo "build for m32(only valid use for windows)" | |||||
| MGE_WINDOWS_BUILD_ARCH=x86 | MGE_WINDOWS_BUILD_ARCH=x86 | ||||
| MGE_WINDOWS_BUILD_MARCH=m32 | MGE_WINDOWS_BUILD_MARCH=m32 | ||||
| MGE_ARCH=i386 | MGE_ARCH=i386 | ||||
| ;; | ;; | ||||
| n) | |||||
| echo "Enable imperative python wrapper runtime" | |||||
| MGE_BUILD_IMPERATIVE_RT=ON | |||||
| ;; | |||||
| ?) | ?) | ||||
| echo "unkonw argument" | echo "unkonw argument" | ||||
| usage | usage | ||||
| @@ -101,6 +107,7 @@ function cmake_build() { | |||||
| cmake \ | cmake \ | ||||
| -DCMAKE_BUILD_TYPE=$BUILD_TYPE \ | -DCMAKE_BUILD_TYPE=$BUILD_TYPE \ | ||||
| -DMGE_INFERENCE_ONLY=$MGE_INFERENCE_ONLY \ | -DMGE_INFERENCE_ONLY=$MGE_INFERENCE_ONLY \ | ||||
| -DMGE_BUILD_IMPERATIVE_RT=${MGE_BUILD_IMPERATIVE_RT} \ | |||||
| -DMGE_WITH_CUDA=$MGE_WITH_CUDA \ | -DMGE_WITH_CUDA=$MGE_WITH_CUDA \ | ||||
| -DCMAKE_INSTALL_PREFIX=$INSTALL_DIR \ | -DCMAKE_INSTALL_PREFIX=$INSTALL_DIR \ | ||||
| ${EXTRA_CMAKE_ARGS} \ | ${EXTRA_CMAKE_ARGS} \ | ||||
| @@ -112,7 +119,7 @@ function cmake_build() { | |||||
| function windows_env_err() { | function windows_env_err() { | ||||
| echo "check windows env failed!!" | echo "check windows env failed!!" | ||||
| echo "please install LLVM/clang-cl/cmake/python at Visual Studio Extensions" | |||||
| echo "please install env refs for: scripts/cmake-build/BUILD_README.md" | |||||
| exit -1 | exit -1 | ||||
| } | } | ||||
| @@ -178,6 +185,25 @@ function prepare_env_for_windows_build() { | |||||
| export CPATH=$CPATH:$NIVIDA_INSTALL_PRE/${TRT_V}/include:$NIVIDA_INSTALL_PRE/CUDA/${CUDA_V}/include:$NIVIDA_INSTALL_PRE/CUDA/${CUDA_V}/include/nvtx3:$PC_CUDNN_INCLUDE_DIRS | export CPATH=$CPATH:$NIVIDA_INSTALL_PRE/${TRT_V}/include:$NIVIDA_INSTALL_PRE/CUDA/${CUDA_V}/include:$NIVIDA_INSTALL_PRE/CUDA/${CUDA_V}/include/nvtx3:$PC_CUDNN_INCLUDE_DIRS | ||||
| export LIBRARY_PATH=$LIBRARY_PATH:$LD_LIBRARY_PATH | export LIBRARY_PATH=$LIBRARY_PATH:$LD_LIBRARY_PATH | ||||
| export INCLUDE=$INCLUDE:$CPATH | export INCLUDE=$INCLUDE:$CPATH | ||||
| # python version will be config by whl build script or ci script, we need | |||||
| # a DFT version for build success when we just call host_build.sh | |||||
| if [[ -z ${ALREADY_CONFIG_PYTHON_VER} ]] | |||||
| then | |||||
| echo "config a default python3" | |||||
| DFT_PYTHON_BIN=/c/Users/${USER}/mge_whl_python_env/3.8.3 | |||||
| if [ ! -f "${DFT_PYTHON_BIN}/python3.exe" ]; then | |||||
| echo "ERR: can not find ${DFT_PYTHON_BIN}/python3.exe , Invalid env" | |||||
| windows_env_err | |||||
| else | |||||
| echo "put python3 to env..." | |||||
| export PATH=${DFT_PYTHON_BIN}:$PATH | |||||
| which python3 | |||||
| fi | |||||
| fi | |||||
| echo "export swig pwd to PATH" | |||||
| export PATH=/c/Users/${USER}/swigwin-4.0.2::$PATH | |||||
| } | } | ||||
| WINDOWS_BUILD_TARGET="Ninja all > build.log" | WINDOWS_BUILD_TARGET="Ninja all > build.log" | ||||
| @@ -218,6 +244,7 @@ function cmake_build_windows() { | |||||
| vcvarsall.bat $MGE_WINDOWS_BUILD_ARCH && cmake -G "Ninja" \ | vcvarsall.bat $MGE_WINDOWS_BUILD_ARCH && cmake -G "Ninja" \ | ||||
| -DMGE_ARCH=$MGE_ARCH \ | -DMGE_ARCH=$MGE_ARCH \ | ||||
| -DMGE_INFERENCE_ONLY=$MGE_INFERENCE_ONLY \ | -DMGE_INFERENCE_ONLY=$MGE_INFERENCE_ONLY \ | ||||
| -DMGE_BUILD_IMPERATIVE_RT=${MGE_BUILD_IMPERATIVE_RT} \ | |||||
| -DMGE_WITH_CUDA=$MGE_WITH_CUDA \ | -DMGE_WITH_CUDA=$MGE_WITH_CUDA \ | ||||
| -DCMAKE_BUILD_TYPE=$BUILD_TYPE \ | -DCMAKE_BUILD_TYPE=$BUILD_TYPE \ | ||||
| -DCMAKE_INSTALL_PREFIX:PATH=$INSTALL_DIR \ | -DCMAKE_INSTALL_PREFIX:PATH=$INSTALL_DIR \ | ||||
| @@ -230,8 +257,18 @@ function cmake_build_windows() { | |||||
| ${WINDOWS_BUILD_TARGET}" | ${WINDOWS_BUILD_TARGET}" | ||||
| } | } | ||||
| if [ ${MGE_BUILD_IMPERATIVE_RT} = "ON" ] && [ ${MGE_INFERENCE_ONLY} = "ON" ]; then | |||||
| echo "ERR: MGE_BUILD_IMPERATIVE_RT(-n) only valid when enable training mode(-t)" | |||||
| echo "pls remove -n or add -t" | |||||
| exit -1 | |||||
| fi | |||||
| if [[ $OS =~ "NT" ]]; then | if [[ $OS =~ "NT" ]]; then | ||||
| if [ ${MGE_ARCH} = "i386" ] && [ ${MGE_INFERENCE_ONLY} = "OFF" ]; then | |||||
| echo "ERR: training mode(-t) only support 64 bit mode" | |||||
| echo "pls remove -t or remove -m" | |||||
| exit -1 | |||||
| fi | |||||
| config_windows_build_target | config_windows_build_target | ||||
| cmake_build_windows $MGE_WITH_CUDA $MGE_INFERENCE_ONLY $BUILD_TYPE | cmake_build_windows $MGE_WITH_CUDA $MGE_INFERENCE_ONLY $BUILD_TYPE | ||||
| else | else | ||||
| @@ -53,10 +53,6 @@ | |||||
| d0: /c/Users/${USER}/mge_whl_python_env/3.8.3/python3.exe -m pip install --upgrade pip | d0: /c/Users/${USER}/mge_whl_python_env/3.8.3/python3.exe -m pip install --upgrade pip | ||||
| d1: /c/Users/${USER}/mge_whl_python_env/3.8.3/python3.exe -m pip install -r python_module/requires-test.txt | d1: /c/Users/${USER}/mge_whl_python_env/3.8.3/python3.exe -m pip install -r python_module/requires-test.txt | ||||
| d2: /c/Users/${USER}/mge_whl_python_env/3.8.3/python3.exe -m pip install numpy wheel requests tqdm tabulate | d2: /c/Users/${USER}/mge_whl_python_env/3.8.3/python3.exe -m pip install numpy wheel requests tqdm tabulate | ||||
| 5: install swig from install gui | |||||
| a: download swig: https://nchc.dl.sourceforge.net/project/swig/swigwin/swigwin-4.0.2/swigwin-4.0.2.zip | |||||
| b: install swig to /c/Users/${USER}/swigwin-4.0.2 | |||||
| c: apply scripts/whl/windows/fix-ptr-define-issue.patch to c/Users/${USER}/swigwin-4.0.2 | |||||
| ``` | ``` | ||||
| # how to build | # how to build | ||||
| @@ -90,6 +86,11 @@ | |||||
| ``` | ``` | ||||
| ALL_PYTHON=3.5.9 ./scripts/whl/macos/macos_build_whl.sh | ALL_PYTHON=3.5.9 ./scripts/whl/macos/macos_build_whl.sh | ||||
| ``` | ``` | ||||
| If you want to build with imperative rt, set env BUILD_IMPERATIVE="ON", eg: | |||||
| ``` | |||||
| ALL_PYTHON=3.5.9 BUILD_IMPERATIVE="ON" ./scripts/whl/macos/macos_build_whl.sh | |||||
| ``` | |||||
| ## build for windows | ## build for windows | ||||
| ``` | ``` | ||||
| ./scripts/whl/windows/windows_build_whl.sh | ./scripts/whl/windows/windows_build_whl.sh | ||||
| @@ -102,5 +103,7 @@ | |||||
| If you want to build windows whl with cuda, also a specific Python verison. eg: | If you want to build windows whl with cuda, also a specific Python verison. eg: | ||||
| ``` | ``` | ||||
| WINDOWS_WHL_WITH_CUDA="true" ALL_PYTHON=3.5.4 ./scripts/whl/windows/windows_build_whl.sh | |||||
| WINDOWS_WHL_WITH_CUDA="ON" ALL_PYTHON=3.5.4 ./scripts/whl/windows/windows_build_whl.sh | |||||
| ``` | ``` | ||||
| If you want to build with imperative rt, set env BUILD_IMPERATIVE="ON", eg: | |||||
| BUILD_IMPERATIVE="ON" WINDOWS_WHL_WITH_CUDA="ON" ALL_PYTHON=3.5.4 ./scripts/whl/windows/windows_build_whl.sh | |||||
| @@ -65,16 +65,18 @@ function config_python_env() { | |||||
| fi | fi | ||||
| echo ${ver} | echo ${ver} | ||||
| #config a dir to trick cmake find a null pythonlib | |||||
| PYTHON_LIBRARY=${PYTHON_DIR}lib/ | |||||
| if [ "$1" = "3.5.9" ]; then | if [ "$1" = "3.5.9" ]; then | ||||
| PYTHON_INCLUDE_DIR=${PYTHON_DIR}include/python3.5m | PYTHON_INCLUDE_DIR=${PYTHON_DIR}include/python3.5m | ||||
| PYTHON_LIBRARY=${PYTHON_DIR}/lib/libpython3.5m.dylib | |||||
| elif [ "$1" = "3.6.10" ]; then | elif [ "$1" = "3.6.10" ]; then | ||||
| PYTHON_INCLUDE_DIR=${PYTHON_DIR}include/python3.6m | PYTHON_INCLUDE_DIR=${PYTHON_DIR}include/python3.6m | ||||
| PYTHON_LIBRARY=${PYTHON_DIR}/lib/libpython3.6m.dylib | |||||
| elif [ "$1" = "3.7.7" ]; then | elif [ "$1" = "3.7.7" ]; then | ||||
| PYTHON_INCLUDE_DIR=${PYTHON_DIR}include/python3.7m | PYTHON_INCLUDE_DIR=${PYTHON_DIR}include/python3.7m | ||||
| PYTHON_LIBRARY=${PYTHON_DIR}/lib/libpython3.7m.dylib | |||||
| elif [ "$1" = "3.8.3" ]; then | elif [ "$1" = "3.8.3" ]; then | ||||
| PYTHON_INCLUDE_DIR=${PYTHON_DIR}include/python3.8 | PYTHON_INCLUDE_DIR=${PYTHON_DIR}include/python3.8 | ||||
| PYTHON_LIBRARY=${PYTHON_DIR}/lib/libpython3.8.dylib | |||||
| else | else | ||||
| echo "ERR: DO NOT SUPPORT PYTHON VERSION" | echo "ERR: DO NOT SUPPORT PYTHON VERSION" | ||||
| echo "now support list: ${FULL_PYTHON_VER}" | echo "now support list: ${FULL_PYTHON_VER}" | ||||
| @@ -82,6 +84,11 @@ function config_python_env() { | |||||
| fi | fi | ||||
| } | } | ||||
| if [[ -z ${BUILD_IMPERATIVE} ]] | |||||
| then | |||||
| BUILD_IMPERATIVE="OFF" | |||||
| fi | |||||
| function do_build() { | function do_build() { | ||||
| for ver in ${ALL_PYTHON} | for ver in ${ALL_PYTHON} | ||||
| do | do | ||||
| @@ -89,7 +96,7 @@ function do_build() { | |||||
| config_python_env ${ver} | config_python_env ${ver} | ||||
| #check env | #check env | ||||
| if [ ! -d "$PYTHON_LIBRARY" ]; then | |||||
| if [ ! -f "$PYTHON_LIBRARY" ]; then | |||||
| echo "ERR: can not find $PYTHON_LIBRARY , Invalid python package" | echo "ERR: can not find $PYTHON_LIBRARY , Invalid python package" | ||||
| err_env | err_env | ||||
| fi | fi | ||||
| @@ -102,14 +109,20 @@ function do_build() { | |||||
| #append cmake args for config python | #append cmake args for config python | ||||
| export EXTRA_CMAKE_ARGS="-DCMAKE_PREFIX_PATH=${PYTHON_DIR} -DPYTHON_LIBRARY=${PYTHON_LIBRARY} -DPYTHON_INCLUDE_DIR=${PYTHON_INCLUDE_DIR} " | export EXTRA_CMAKE_ARGS="-DCMAKE_PREFIX_PATH=${PYTHON_DIR} -DPYTHON_LIBRARY=${PYTHON_LIBRARY} -DPYTHON_INCLUDE_DIR=${PYTHON_INCLUDE_DIR} " | ||||
| #config build type to RelWithDebInfo to enable MGB_ENABLE_DEBUG_UTIL etc | #config build type to RelWithDebInfo to enable MGB_ENABLE_DEBUG_UTIL etc | ||||
| export EXTRA_CMAKE_ARGS=${EXTRA_CMAKE_ARGS}" -DCMAKE_BUILD_TYPE=RelWithDebInfo " | |||||
| export EXTRA_CMAKE_ARGS="${EXTRA_CMAKE_ARGS} -DCMAKE_BUILD_TYPE=RelWithDebInfo " | |||||
| #call build and install | #call build and install | ||||
| #FIXME: cmake do not triger update python config, after | #FIXME: cmake do not triger update python config, after | ||||
| #change PYTHON_LIBRARY and PYTHON_INCLUDE_DIR, so add | #change PYTHON_LIBRARY and PYTHON_INCLUDE_DIR, so add | ||||
| #-r to remove build cache after a new ver build, which | #-r to remove build cache after a new ver build, which | ||||
| #will be more slow build than without -r | #will be more slow build than without -r | ||||
| ${SRC_DIR}/scripts/cmake-build/host_build.sh -t -r | |||||
| if [ ${BUILD_IMPERATIVE} = "ON" ]; then | |||||
| echo "build whl with IMPERATIVE python rt" | |||||
| ${SRC_DIR}/scripts/cmake-build/host_build.sh -t -n -r | |||||
| else | |||||
| echo "build whl with legacy python rt" | |||||
| ${SRC_DIR}/scripts/cmake-build/host_build.sh -t -r | |||||
| fi | |||||
| #call setup.py | #call setup.py | ||||
| BUILD_DIR=${SRC_DIR}/build_dir/host/MGE_WITH_CUDA_OFF/MGE_INFERENCE_ONLY_OFF/Release/build/ | BUILD_DIR=${SRC_DIR}/build_dir/host/MGE_WITH_CUDA_OFF/MGE_INFERENCE_ONLY_OFF/Release/build/ | ||||
| @@ -121,12 +134,47 @@ function do_build() { | |||||
| fi | fi | ||||
| mkdir -p staging | mkdir -p staging | ||||
| if [ ${BUILD_IMPERATIVE} = "ON" ]; then | |||||
| echo "build whl with IMPERATIVE python rt" | |||||
| cp -a imperative/python/{megengine,setup.py,requires.txt,requires-style.txt,requires-test.txt} staging/ | |||||
| cd ${BUILD_DIR}/staging/megengine/core | |||||
| rt_file=`ls _imperative_rt.*.so` | |||||
| echo "rt file is: ${rt_file}" | |||||
| if [[ -z ${rt_file} ]] | |||||
| then | |||||
| echo "ERR: can not find valid rt file" | |||||
| exit -1 | |||||
| fi | |||||
| llvm-strip -s ${rt_file} | |||||
| mv ${rt_file} _imperative_rt.so | |||||
| echo "check so valid or not..." | |||||
| otool_out=`otool -L _imperative_rt.so` | |||||
| if [[ "${otool_out}" =~ "ython" ]]; then | |||||
| echo "ERR: invalid _imperative_rt.so which depend on python lib, detail: log" | |||||
| echo ${otool_out} | |||||
| exit -1 | |||||
| else | |||||
| echo "valid..." | |||||
| fi | |||||
| else | |||||
| echo "build whl with legacy python rt" | |||||
| cp -a python_module/{megengine,setup.py,requires.txt,requires-style.txt,requires-test.txt} staging/ | |||||
| cd ${BUILD_DIR}/staging/megengine/_internal | |||||
| #FIXME: set lib suffix to dylib may be better, BUT we find after distutils.file_util.copy_file | |||||
| #will change to .so at macos even we set suffix to dylib, at the same time, macos also support .so | |||||
| echo "check so valid or not..." | |||||
| llvm-strip -s _mgb.so | |||||
| otool_out=`otool -L _mgb.so` | |||||
| if [[ "${otool_out}" =~ "ython" ]]; then | |||||
| echo "ERR: invalid _mgb.so which depend on python lib, detail: log" | |||||
| echo ${otool_out} | |||||
| exit -1 | |||||
| else | |||||
| echo "valid..." | |||||
| fi | |||||
| fi | |||||
| cp -a python_module/{megengine,setup.py,requires.txt,requires-style.txt,requires-test.txt} staging/ | |||||
| cd ${BUILD_DIR}/staging/megengine/_internal | |||||
| #FIXME: set lib suffix to dylib may be better, BUT we find after distutils.file_util.copy_file | |||||
| #will change to .so at macos even we set suffix to dylib, at the same time, macos also support .so | |||||
| llvm-strip -s _mgb.so | |||||
| cd ${BUILD_DIR}/staging | cd ${BUILD_DIR}/staging | ||||
| ${PYTHON_DIR}/bin/python3 setup.py bdist_wheel | ${PYTHON_DIR}/bin/python3 setup.py bdist_wheel | ||||
| cd ${BUILD_DIR}/staging/dist/ | cd ${BUILD_DIR}/staging/dist/ | ||||
| @@ -14,8 +14,6 @@ function err_env() { | |||||
| } | } | ||||
| function append_path_env_and_check() { | function append_path_env_and_check() { | ||||
| echo "export swig pwd to PATH" | |||||
| export PATH=/c/Users/${USER}/swigwin-4.0.2::$PATH | |||||
| echo "export vs2019 install path" | echo "export vs2019 install path" | ||||
| export VS_PATH=/c/Program\ Files\ \(x86\)/Microsoft\ Visual\ Studio/2019/Enterprise | export VS_PATH=/c/Program\ Files\ \(x86\)/Microsoft\ Visual\ Studio/2019/Enterprise | ||||
| # for llvm-strip | # for llvm-strip | ||||
| @@ -62,7 +60,7 @@ function config_python_env() { | |||||
| if [[ -z ${WINDOWS_WHL_WITH_CUDA} ]] | if [[ -z ${WINDOWS_WHL_WITH_CUDA} ]] | ||||
| then | then | ||||
| WINDOWS_WHL_WITH_CUDA="false" | |||||
| WINDOWS_WHL_WITH_CUDA="OFF" | |||||
| fi | fi | ||||
| @@ -74,26 +72,46 @@ CUBLAS_LIB="/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1/bin/cublas6 | |||||
| CURAND_LIB="/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1/bin/curand64_10.dll" | CURAND_LIB="/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1/bin/curand64_10.dll" | ||||
| CUBLASLT_LIB="/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1/bin/cublasLt64_10.dll" | CUBLASLT_LIB="/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1/bin/cublasLt64_10.dll" | ||||
| CUDART_LIB="/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1/bin/cudart64_101.dll" | CUDART_LIB="/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1/bin/cudart64_101.dll" | ||||
| function depend_real_copy() { | |||||
| REAL_DST=$1 | |||||
| echo "real copy lib to $1" | |||||
| cp "${TRT_LIB}" ${REAL_DST} | |||||
| cp "${CUDNN_LIB}" ${REAL_DST} | |||||
| cp "${CUSOLVER_LIB}" ${REAL_DST} | |||||
| cp "${CUBLAS_LIB}" ${REAL_DST} | |||||
| cp "${CURAND_LIB}" ${REAL_DST} | |||||
| cp "${CUBLASLT_LIB}" ${REAL_DST} | |||||
| cp "${CUDART_LIB}" ${REAL_DST} | |||||
| } | |||||
| function copy_more_dll() { | function copy_more_dll() { | ||||
| # for python whl real use | # for python whl real use | ||||
| CP_DST=${BUILD_DIR}/staging/megengine/_internal/lib | |||||
| rm -rf ${CP_DST} | |||||
| mkdir ${CP_DST} | |||||
| if [ ${BUILD_IMPERATIVE} = "ON" ]; then | |||||
| echo "config BUILD_IMPERATIVE core lib dir" | |||||
| CP_WHL_DST=${BUILD_DIR}/staging/megengine/core/lib | |||||
| else | |||||
| echo "config legacy python lib dir" | |||||
| CP_WHL_DST=${BUILD_DIR}/staging/megengine/_internal/lib | |||||
| fi | |||||
| rm -rf ${CP_WHL_DST} | |||||
| mkdir ${CP_WHL_DST} | |||||
| # workround for cpu-only version import failed, use a | |||||
| # empty.file to triger setup.py to create a null empty | |||||
| echo "empty" > ${CP_WHL_DST}/empty.file | |||||
| if [ ${WINDOWS_WHL_WITH_CUDA} = "true" ]; then | |||||
| if [ ${WINDOWS_WHL_WITH_CUDA} = "ON" ]; then | |||||
| echo "copy nvidia lib to whl use...." | echo "copy nvidia lib to whl use...." | ||||
| cp "${TRT_LIB}" ${CP_DST} | |||||
| cp "${CUDNN_LIB}" ${CP_DST} | |||||
| cp "${CUSOLVER_LIB}" ${CP_DST} | |||||
| cp "${CUBLAS_LIB}" ${CP_DST} | |||||
| cp "${CURAND_LIB}" ${CP_DST} | |||||
| cp "${CUBLASLT_LIB}" ${CP_DST} | |||||
| cp "${CUDART_LIB}" ${CP_DST} | |||||
| depend_real_copy ${CP_WHL_DST} | |||||
| fi | fi | ||||
| } | } | ||||
| if [[ -z ${BUILD_IMPERATIVE} ]] | |||||
| then | |||||
| BUILD_IMPERATIVE="OFF" | |||||
| fi | |||||
| function do_build() { | function do_build() { | ||||
| for ver in ${ALL_PYTHON} | for ver in ${ALL_PYTHON} | ||||
| do | do | ||||
| @@ -118,21 +136,31 @@ function do_build() { | |||||
| #force LINK a real PYTHON_LIBRARY file, after test we do not find the symbols conflict with python | #force LINK a real PYTHON_LIBRARY file, after test we do not find the symbols conflict with python | ||||
| #export EXTRA_CMAKE_ARGS="-DPYTHON_LIBRARY=${PYTHON_LIBRARY} -DPYTHON_INCLUDE_DIR=${PYTHON_INCLUDE_DIR} " | #export EXTRA_CMAKE_ARGS="-DPYTHON_LIBRARY=${PYTHON_LIBRARY} -DPYTHON_INCLUDE_DIR=${PYTHON_INCLUDE_DIR} " | ||||
| #config build type to RelWithDebInfo to enable MGB_ENABLE_DEBUG_UTIL etc | #config build type to RelWithDebInfo to enable MGB_ENABLE_DEBUG_UTIL etc | ||||
| export EXTRA_CMAKE_ARGS=${EXTRA_CMAKE_ARGS}" -DCMAKE_BUILD_TYPE=RelWithDebInfo " | |||||
| export EXTRA_CMAKE_ARGS="${EXTRA_CMAKE_ARGS} -DCMAKE_BUILD_TYPE=RelWithDebInfo " | |||||
| #call build and install | #call build and install | ||||
| #FIXME: cmake do not triger update python config, after | #FIXME: cmake do not triger update python config, after | ||||
| #change PYTHON_LIBRARY and PYTHON_INCLUDE_DIR, so add | #change PYTHON_LIBRARY and PYTHON_INCLUDE_DIR, so add | ||||
| #-r to remove build cache after a new ver build, which | #-r to remove build cache after a new ver build, which | ||||
| #will be more slow build than without -r | #will be more slow build than without -r | ||||
| if [ ${WINDOWS_WHL_WITH_CUDA} = "true" ]; then | |||||
| BUILD_ARGS=" -t -r" | |||||
| if [ ${BUILD_IMPERATIVE} = "ON" ]; then | |||||
| echo "build whl with IMPERATIVE python rt" | |||||
| BUILD_ARGS="${BUILD_ARGS} -n " | |||||
| else | |||||
| echo "build whl with legacy python rt" | |||||
| fi | |||||
| if [ ${WINDOWS_WHL_WITH_CUDA} = "ON" ]; then | |||||
| echo "build windows whl with cuda" | echo "build windows whl with cuda" | ||||
| ${SRC_DIR}/scripts/cmake-build/host_build.sh -t -r -c | |||||
| BUILD_ARGS="${BUILD_ARGS} -c " | |||||
| else | else | ||||
| echo "build windows whl with cpu only" | echo "build windows whl with cpu only" | ||||
| ${SRC_DIR}/scripts/cmake-build/host_build.sh -t -r | |||||
| fi | fi | ||||
| echo "host_build.sh BUILD_ARGS: ${BUILD_ARGS}" | |||||
| ${SRC_DIR}/scripts/cmake-build/host_build.sh ${BUILD_ARGS} | |||||
| #call setup.py | #call setup.py | ||||
| BUILD_DIR=${SRC_DIR}/build_dir/host/build/ | BUILD_DIR=${SRC_DIR}/build_dir/host/build/ | ||||
| cd ${BUILD_DIR} | cd ${BUILD_DIR} | ||||
| @@ -143,10 +171,27 @@ function do_build() { | |||||
| fi | fi | ||||
| mkdir -p staging | mkdir -p staging | ||||
| if [ ${BUILD_IMPERATIVE} = "ON" ]; then | |||||
| echo "build whl with IMPERATIVE python rt" | |||||
| cp -a imperative/python/{megengine,setup.py,requires.txt,requires-style.txt,requires-test.txt} staging/ | |||||
| cd ${BUILD_DIR}/staging/megengine/core | |||||
| rt_file=`ls _imperative_rt.*.pyd` | |||||
| echo "rt file is: ${rt_file}" | |||||
| if [[ -z ${rt_file} ]] | |||||
| then | |||||
| echo "ERR: can not find valid rt file" | |||||
| exit -1 | |||||
| fi | |||||
| llvm-strip -s ${rt_file} | |||||
| mv ${rt_file} _imperative_rt.pyd | |||||
| else | |||||
| echo "build whl with legacy python rt" | |||||
| cp -a python_module/{megengine,setup.py,requires.txt,requires-style.txt,requires-test.txt} staging/ | |||||
| cd ${BUILD_DIR}/staging/megengine/_internal | |||||
| llvm-strip -s _mgb.pyd | |||||
| fi | |||||
| cp -a python_module/{megengine,setup.py,requires.txt,requires-style.txt,requires-test.txt} staging/ | |||||
| cd ${BUILD_DIR}/staging/megengine/_internal | |||||
| llvm-strip -s _mgb.pyd | |||||
| copy_more_dll | copy_more_dll | ||||
| cd ${BUILD_DIR}/staging | cd ${BUILD_DIR}/staging | ||||
| ${PYTHON_DIR}/python3 setup.py bdist_wheel | ${PYTHON_DIR}/python3 setup.py bdist_wheel | ||||
| @@ -175,5 +220,6 @@ function third_party_prepare() { | |||||
| } | } | ||||
| ###################### | ###################### | ||||
| export ALREADY_CONFIG_PYTHON_VER="yes" | |||||
| third_party_prepare | third_party_prepare | ||||
| do_build | do_build | ||||
| @@ -33,6 +33,11 @@ class RNGxorshf { | |||||
| uint64_t s[2]; | uint64_t s[2]; | ||||
| public: | public: | ||||
| #if __cplusplus >= 201703L | |||||
| typedef uint64_t result_type; | |||||
| static constexpr uint64_t min() { return 0; } | |||||
| static constexpr uint64_t max() { return UINT64_MAX; } | |||||
| #endif | |||||
| RNGxorshf(uint64_t seed) { | RNGxorshf(uint64_t seed) { | ||||
| std::mt19937_64 gen(seed); | std::mt19937_64 gen(seed); | ||||
| s[0] = gen(); | s[0] = gen(); | ||||
| @@ -936,8 +941,12 @@ void SeqModifierForSublinearMemory::ActionSearcherSingleCN::search_genetic() { | |||||
| } | } | ||||
| } | } | ||||
| m_cur_records = records; | m_cur_records = records; | ||||
| #if __cplusplus >= 201703L | |||||
| std::shuffle(perm.begin(), perm.end(), rng); | |||||
| #else | |||||
| std::random_shuffle(perm.begin(), perm.end(), | std::random_shuffle(perm.begin(), perm.end(), | ||||
| [&](size_t x) { return rng() % x; }); | [&](size_t x) { return rng() % x; }); | ||||
| #endif | |||||
| for (size_t i = 0; i < length; ++i) { | for (size_t i = 0; i < length; ++i) { | ||||
| invoke_search(mutation(mutation(records[i].first))); | invoke_search(mutation(mutation(records[i].first))); | ||||
| invoke_search(crossover(records[i].first, records[perm[i]].first)); | invoke_search(crossover(records[i].first, records[perm[i]].first)); | ||||
| @@ -705,7 +705,12 @@ TEST(TestOprBlas, MatrixInverse) { | |||||
| } | } | ||||
| auto ptr = inp[0]->ptr<float>(); | auto ptr = inp[0]->ptr<float>(); | ||||
| for (size_t i = 0; i < batch; ++i, ptr += n * n) { | for (size_t i = 0; i < batch; ++i, ptr += n * n) { | ||||
| #if __cplusplus >= 201703L | |||||
| std::default_random_engine rng_engine; | |||||
| std::shuffle(perm.begin(), perm.end(), rng_engine); | |||||
| #else | |||||
| std::random_shuffle(perm.begin(), perm.end()); | std::random_shuffle(perm.begin(), perm.end()); | ||||
| #endif | |||||
| for (size_t j = 0; j < n; ++j) { | for (size_t j = 0; j < n; ++j) { | ||||
| ptr[j * n + perm[j]] += 5; | ptr[j * n + perm[j]] += 5; | ||||
| } | } | ||||
| @@ -36,7 +36,12 @@ void run_all_gather(const std::vector<size_t>& axis_size, bool& success, | |||||
| sleep_time.push_back(i * 0.05 + 0.1); | sleep_time.push_back(i * 0.05 + 0.1); | ||||
| tot_axis_size += axis_size[i]; | tot_axis_size += axis_size[i]; | ||||
| } | } | ||||
| #if __cplusplus >= 201703L | |||||
| std::default_random_engine rng_engine; | |||||
| std::shuffle(sleep_time.begin(), sleep_time.end(), rng_engine); | |||||
| #else | |||||
| std::random_shuffle(sleep_time.begin(), sleep_time.end()); | std::random_shuffle(sleep_time.begin(), sleep_time.end()); | ||||
| #endif | |||||
| auto constexpr DEVICE_TYPE = CompNode::DeviceType::CUDA; | auto constexpr DEVICE_TYPE = CompNode::DeviceType::CUDA; | ||||
| size_t nr_dev = std::min<size_t>( | size_t nr_dev = std::min<size_t>( | ||||
| @@ -18,7 +18,11 @@ endif() | |||||
| add_executable(megbrain_test ${SOURCES}) | add_executable(megbrain_test ${SOURCES}) | ||||
| target_link_libraries(megbrain_test gtest) | target_link_libraries(megbrain_test gtest) | ||||
| target_link_libraries(megbrain_test megengine) | |||||
| if(MSVC OR WIN32) | |||||
| target_link_libraries(megbrain_test megbrain megdnn) | |||||
| else() | |||||
| target_link_libraries(megbrain_test megengine) | |||||
| endif() | |||||
| if(CXX_SUPPORT_WCLASS_MEMACCESS) | if(CXX_SUPPORT_WCLASS_MEMACCESS) | ||||
| if(MGE_WITH_CUDA) | if(MGE_WITH_CUDA) | ||||
| target_compile_options(megbrain_test PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler=-Wno-class-memaccess>" | target_compile_options(megbrain_test PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler=-Wno-class-memaccess>" | ||||
| @@ -28,10 +32,12 @@ if(CXX_SUPPORT_WCLASS_MEMACCESS) | |||||
| endif() | endif() | ||||
| endif() | endif() | ||||
| if(APPLE OR ANDROID) | |||||
| target_link_libraries(megbrain_test dl) | |||||
| else() | |||||
| target_link_libraries(megbrain_test dl rt) | |||||
| if(UNIX) | |||||
| if(APPLE OR ANDROID) | |||||
| target_link_libraries(megbrain_test dl) | |||||
| else() | |||||
| target_link_libraries(megbrain_test dl rt) | |||||
| endif() | |||||
| endif() | endif() | ||||
| if (MGE_WITH_DISTRIBUTED) | if (MGE_WITH_DISTRIBUTED) | ||||