GitOrigin-RevId: 3bb0b46311
tags/v1.8.0
| @@ -1145,6 +1145,14 @@ if(TARGET _imperative_rt) | |||||
| COMMAND ${CMAKE_COMMAND} -E create_symlink | COMMAND ${CMAKE_COMMAND} -E create_symlink | ||||
| ${CMAKE_CURRENT_BINARY_DIR}/imperative/python/${PACKAGE_NAME}/version.py | ${CMAKE_CURRENT_BINARY_DIR}/imperative/python/${PACKAGE_NAME}/version.py | ||||
| ${CMAKE_CURRENT_SOURCE_DIR}/imperative/python/${PACKAGE_NAME}/version.py | ${CMAKE_CURRENT_SOURCE_DIR}/imperative/python/${PACKAGE_NAME}/version.py | ||||
| COMMAND ${CMAKE_COMMAND} -E create_symlink | |||||
| ${CMAKE_CURRENT_SOURCE_DIR}/src/custom/include | |||||
| ${CMAKE_CURRENT_SOURCE_DIR}/imperative/python/${PACKAGE_NAME}/core/include | |||||
| COMMAND ${CMAKE_COMMAND} -E make_directory | |||||
| ${CMAKE_CURRENT_SOURCE_DIR}/imperative/python/${PACKAGE_NAME}/core/lib | |||||
| COMMAND ${CMAKE_COMMAND} -E create_symlink | |||||
| ${CMAKE_CURRENT_BINARY_DIR}/src/$<TARGET_FILE_NAME:${MGE_SHARED_LIB}> | |||||
| ${CMAKE_CURRENT_SOURCE_DIR}/imperative/python/${PACKAGE_NAME}/core/lib/$<TARGET_FILE_NAME:${MGE_SHARED_LIB}> | |||||
| DEPENDS _imperative_rt | DEPENDS _imperative_rt | ||||
| VERBATIM | VERBATIM | ||||
| ) | ) | ||||
| @@ -67,8 +67,11 @@ add_custom_command( | |||||
| COMMAND ${CMAKE_COMMAND} -E copy ${PROJECT_SOURCE_DIR}/LICENSE ${PROJECT_SOURCE_DIR}/ACKNOWLEDGMENTS ${PROJECT_BINARY_DIR} | COMMAND ${CMAKE_COMMAND} -E copy ${PROJECT_SOURCE_DIR}/LICENSE ${PROJECT_SOURCE_DIR}/ACKNOWLEDGMENTS ${PROJECT_BINARY_DIR} | ||||
| COMMAND ${CMAKE_COMMAND} -E remove -f ${CMAKE_CURRENT_SOURCE_DIR}/python/megengine/core/$<TARGET_FILE_NAME:${MODULE_NAME}> # clean develop | COMMAND ${CMAKE_COMMAND} -E remove -f ${CMAKE_CURRENT_SOURCE_DIR}/python/megengine/core/$<TARGET_FILE_NAME:${MODULE_NAME}> # clean develop | ||||
| COMMAND ${CMAKE_COMMAND} -E remove -f ${CMAKE_CURRENT_SOURCE_DIR}/python/megengine/version.py # clean develop | COMMAND ${CMAKE_COMMAND} -E remove -f ${CMAKE_CURRENT_SOURCE_DIR}/python/megengine/version.py # clean develop | ||||
| COMMAND ${CMAKE_COMMAND} -E remove -f ${CMAKE_CURRENT_SOURCE_DIR}/python/megengine/core/include # clean develop | |||||
| COMMAND ${CMAKE_COMMAND} -E remove -f ${CMAKE_CURRENT_SOURCE_DIR}/python/megengine/core/lib # clean develop | |||||
| COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_SOURCE_DIR}/python/megengine ${CMAKE_CURRENT_BINARY_DIR}/python/megengine | COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_SOURCE_DIR}/python/megengine ${CMAKE_CURRENT_BINARY_DIR}/python/megengine | ||||
| COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_SOURCE_DIR}/python/test ${CMAKE_CURRENT_BINARY_DIR}/python/test | COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_SOURCE_DIR}/python/test ${CMAKE_CURRENT_BINARY_DIR}/python/test | ||||
| COMMAND ${CMAKE_COMMAND} -E copy_directory ${PROJECT_SOURCE_DIR}/src/custom/include ${CMAKE_CURRENT_BINARY_DIR}/python/megengine/core/include | |||||
| COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/python/setup.py ${CMAKE_CURRENT_BINARY_DIR}/python/setup.py | COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/python/setup.py ${CMAKE_CURRENT_BINARY_DIR}/python/setup.py | ||||
| COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/python/requires.txt ${CMAKE_CURRENT_BINARY_DIR}/python/requires.txt | COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/python/requires.txt ${CMAKE_CURRENT_BINARY_DIR}/python/requires.txt | ||||
| COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/python/requires-style.txt ${CMAKE_CURRENT_BINARY_DIR}/python/requires-style.txt | COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/python/requires-style.txt ${CMAKE_CURRENT_BINARY_DIR}/python/requires-style.txt | ||||
| @@ -7,11 +7,14 @@ | |||||
| # software distributed under the License is distributed on an | # software distributed under the License is distributed on an | ||||
| # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
| import os | |||||
| from .._imperative_rt.ops._custom import ( | from .._imperative_rt.ops._custom import ( | ||||
| _get_custom_op_list, | _get_custom_op_list, | ||||
| _install, | _install, | ||||
| _make_custom_op, | _make_custom_op, | ||||
| _uninstall, | _uninstall, | ||||
| get_custom_op_abi_tag, | |||||
| ) | ) | ||||
| __all__ = ["load"] | __all__ = ["load"] | ||||
| @@ -25,8 +28,16 @@ def _gen_custom_op_maker(custom_op_name): | |||||
| def load(lib_path): | def load(lib_path): | ||||
| op_in_this_lib = _install(lib_path[0:-3], lib_path) | |||||
| lib_path = os.path.abspath(lib_path) | |||||
| lib_name = os.path.splitext(lib_path)[0] | |||||
| op_in_this_lib = _install(lib_name, lib_path) | |||||
| for op in op_in_this_lib: | for op in op_in_this_lib: | ||||
| op_maker = _gen_custom_op_maker(op) | op_maker = _gen_custom_op_maker(op) | ||||
| globals()[op] = op_maker | globals()[op] = op_maker | ||||
| __all__.append(op) | __all__.append(op) | ||||
| def unload(lib_path): | |||||
| lib_path = os.path.abspath(lib_path) | |||||
| lib_name = os.path.splitext(lib_path)[0] | |||||
| _uninstall(lib_name) | |||||
| @@ -0,0 +1,909 @@ | |||||
| # MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
| # | |||||
| # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
| # | |||||
| # Unless required by applicable law or agreed to in writing, | |||||
| # software distributed under the License is distributed on an | |||||
| # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| import collections | |||||
| import ctypes | |||||
| import glob | |||||
| import os | |||||
| import re | |||||
| import subprocess | |||||
| import sys | |||||
| import time | |||||
| from typing import List, Optional, Union | |||||
| from ..core.ops.custom import load | |||||
| from ..logger import get_logger | |||||
| def _get_win_folder_with_ctypes(csidl_name): | |||||
| csidl_const = { | |||||
| "CSIDL_APPDATA": 26, | |||||
| "CSIDL_COMMON_APPDATA": 35, | |||||
| "CSIDL_LOCAL_APPDATA": 28, | |||||
| }[csidl_name] | |||||
| buf = ctypes.create_unicode_buffer(1024) | |||||
| ctypes.windll.shell32.SHGetFolderPathW(None, csidl_const, None, 0, buf) | |||||
| # Downgrade to short path name if have highbit chars. See | |||||
| # <http://bugs.activestate.com/show_bug.cgi?id=85099>. | |||||
| has_high_char = False | |||||
| for c in buf: | |||||
| if ord(c) > 255: | |||||
| has_high_char = True | |||||
| break | |||||
| if has_high_char: | |||||
| buf2 = ctypes.create_unicode_buffer(1024) | |||||
| if ctypes.windll.kernel32.GetShortPathNameW(buf.value, buf2, 1024): | |||||
| buf = buf2 | |||||
| return buf.value | |||||
| system = sys.platform | |||||
| if system == "win32": | |||||
| _get_win_folder = _get_win_folder_with_ctypes | |||||
| PLAT_TO_VCVARS = { | |||||
| "win-amd64": "x86_amd64", | |||||
| } | |||||
| logger = get_logger() | |||||
| # environment varible | |||||
| ev_custom_op_root_dir = "MGE_CUSTOM_OP_DIR" | |||||
| ev_cuda_root_dir = "CUDA_ROOT_DIR" | |||||
| ev_cudnn_root_dir = "CUDNN_ROOT_DIR" | |||||
| # operating system | |||||
| IS_WINDOWS = system == "win32" | |||||
| IS_LINUX = system == "linux" | |||||
| IS_MACOS = system == "darwin" | |||||
| MGE_PATH = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) | |||||
| MGE_INC_PATH = os.path.join(MGE_PATH, "core", "include") | |||||
| MGE_LIB_PATH = os.path.join(MGE_PATH, "core", "lib") | |||||
| MGE_ABI_VER = 0 | |||||
| # compile version | |||||
| MINIMUM_GCC_VERSION = (5, 0, 0) | |||||
| MINIMUM_CLANG_CL_VERSION = (12, 0, 1) | |||||
| # compile flags | |||||
| COMMON_MSVC_FLAGS = [ | |||||
| "/MD", | |||||
| "/wd4002", | |||||
| "/wd4819", | |||||
| "/EHsc", | |||||
| ] | |||||
| MSVC_IGNORE_CUDAFE_WARNINGS = [ | |||||
| "field_without_dll_interface", | |||||
| ] | |||||
| COMMON_NVCC_FLAGS = [] | |||||
| # Finds the CUDA install path | |||||
| def _find_cuda_root_dir() -> Optional[str]: | |||||
| cuda_root_dir = os.environ.get(ev_cuda_root_dir) | |||||
| if cuda_root_dir is None: | |||||
| try: | |||||
| which = "where" if IS_WINDOWS else "which" | |||||
| with open(os.devnull, "w") as devnull: | |||||
| nvcc = ( | |||||
| subprocess.check_output([which, "nvcc"], stderr=devnull) | |||||
| .decode() | |||||
| .rstrip("\r\n") | |||||
| ) | |||||
| cuda_root_dir = os.path.dirname(os.path.dirname(nvcc)) | |||||
| except Exception: | |||||
| if IS_WINDOWS: | |||||
| cuda_root_dir = os.environ.get("CUDA_PATH", None) | |||||
| if cuda_root_dir == None: | |||||
| cuda_root_dirs = glob.glob( | |||||
| "C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v*.*" | |||||
| ) | |||||
| if len(cuda_root_dirs) == 0: | |||||
| cuda_root_dir = "" | |||||
| else: | |||||
| cuda_root_dir = cuda_root_dirs[0] | |||||
| else: | |||||
| cuda_root_dir = "/usr/local/cuda" | |||||
| if not os.path.exists(cuda_root_dir): | |||||
| cuda_root_dir = None | |||||
| return cuda_root_dir | |||||
| def _find_cudnn_root_dir() -> Optional[str]: | |||||
| cudnn_root_dir = os.environ.get(ev_cudnn_root_dir) | |||||
| return cudnn_root_dir | |||||
| CUDA_ROOT_DIR = _find_cuda_root_dir() | |||||
| CUDNN_ROOT_DIR = _find_cudnn_root_dir() | |||||
| ##################################################################### | |||||
| # Phase 1 | |||||
| ##################################################################### | |||||
| def _is_cuda_file(path: str) -> bool: | |||||
| valid_ext = [".cu", ".cuh"] | |||||
| return os.path.splitext(path)[1] in valid_ext | |||||
| # Return full path to the user-specific cache dir for this application. | |||||
| # Typical user cache directories are: | |||||
| # Mac OS X: ~/Library/Caches/<AppName> | |||||
| # Unix: ~/.cache/<AppName> (XDG default) | |||||
| # Windows: C:\Users\<username>\AppData\Local\<AppAuthor>\<AppName>\Cache | |||||
| def _get_user_cache_dir(appname=None, appauthor=None, version=None, opinion=True): | |||||
| if system == "win32": | |||||
| appauthor = appname if appauthor is None else appauthor | |||||
| path = os.path.normpath(_get_win_folder("CSIDL_LOCAL_APPDATA")) | |||||
| if appname: | |||||
| if appauthor is not False: | |||||
| path = os.path.join(path, appauthor) | |||||
| else: | |||||
| path = os.path.join(path, appname) | |||||
| if opinion: | |||||
| path = os.path.join(path, "Cache") | |||||
| elif system == "darwin": | |||||
| path = os.path.expanduser("~/Library/Caches") | |||||
| if appname: | |||||
| path = os.path.join(path, appname) | |||||
| else: | |||||
| path = os.getenv("XDG_CACHE_HOME", os.path.expanduser("~/.cache")) | |||||
| if appname: | |||||
| path = os.path.join(path, appname) | |||||
| if appname and version: | |||||
| path = os.path.join(path, version) | |||||
| return path | |||||
| # Returns the path to the root folder under which custom op will built. | |||||
| def _get_default_build_root() -> str: | |||||
| return os.path.realpath(_get_user_cache_dir(appname="mge_custom_op")) | |||||
| def _get_build_dir(name: str) -> str: | |||||
| custom_op_root_dir = os.environ.get(ev_custom_op_root_dir) | |||||
| if custom_op_root_dir is None: | |||||
| custom_op_root_dir = _get_default_build_root() | |||||
| build_dir = os.path.join(custom_op_root_dir, name) | |||||
| return build_dir | |||||
| ##################################################################### | |||||
| # Phase 2 | |||||
| ##################################################################### | |||||
| def update_hash(seed, value): | |||||
| # using boost::hash_combine | |||||
| # https://www.boost.org/doc/libs/1_35_0/doc/html/boost/hash_combine_id241013.html | |||||
| return seed ^ (hash(value) + 0x9E3779B9 + (seed << 6) + (seed >> 2)) | |||||
| def hash_source_files(hash_value, source_files): | |||||
| for filename in source_files: | |||||
| with open(filename) as file: | |||||
| hash_value = update_hash(hash_value, file.read()) | |||||
| return hash_value | |||||
| def hash_build_args(hash_value, build_args): | |||||
| for group in build_args: | |||||
| for arg in group: | |||||
| hash_value = update_hash(hash_value, arg) | |||||
| return hash_value | |||||
| Entry = collections.namedtuple("Entry", "version, hash") | |||||
| class Versioner(object): | |||||
| def __init__(self): | |||||
| self.entries = {} | |||||
| def get_version(self, name): | |||||
| entry = self.entries.get(name) | |||||
| return None if entry is None else entry.version | |||||
| def bump_version_if_changed( | |||||
| self, name, sources, build_args, build_dir, with_cuda, with_cudnn, abi_tag | |||||
| ): | |||||
| hash_value = 0 | |||||
| hash_value = hash_source_files(hash_value, sources) | |||||
| hash_value = hash_build_args(hash_value, build_args) | |||||
| hash_value = update_hash(hash_value, build_dir) | |||||
| hash_value = update_hash(hash_value, with_cuda) | |||||
| hash_value = update_hash(hash_value, with_cudnn) | |||||
| hash_value = update_hash(hash_value, abi_tag) | |||||
| entry = self.entries.get(name) | |||||
| if entry is None: | |||||
| self.entries[name] = entry = Entry(0, hash_value) | |||||
| elif hash_value != entry.hash: | |||||
| self.entries[name] = entry = Entry(entry.version + 1, hash_value) | |||||
| return entry.version | |||||
| custom_op_versioner = Versioner() | |||||
| def version_check( | |||||
| name, sources, build_args, build_dir, with_cuda, with_cudnn, abi_tag, | |||||
| ): | |||||
| old_version = custom_op_versioner.get_version(name) | |||||
| version = custom_op_versioner.bump_version_if_changed( | |||||
| name, sources, build_args, build_dir, with_cuda, with_cudnn, abi_tag, | |||||
| ) | |||||
| return version, old_version | |||||
| ##################################################################### | |||||
| # Phase 3 | |||||
| ##################################################################### | |||||
| def _check_ninja_availability(): | |||||
| try: | |||||
| subprocess.check_output("ninja --version".split()) | |||||
| except Exception: | |||||
| raise RuntimeError( | |||||
| "Ninja is required to build custom op, please install ninja and update your PATH" | |||||
| ) | |||||
| def _mge_is_built_from_src(): | |||||
| file_path = os.path.abspath(__file__) | |||||
| if "site-packages" in file_path: | |||||
| return False | |||||
| else: | |||||
| return True | |||||
| def _accepted_compilers_for_platform(): | |||||
| if IS_WINDOWS: | |||||
| return ["clang-cl"] | |||||
| if IS_MACOS: | |||||
| return ["clang++", "clang"] | |||||
| if IS_LINUX: | |||||
| return ["g++", "gcc", "gnu-c++", "gnu-cc"] | |||||
| # Verifies that the compiler is the expected one for the current platform. | |||||
| def _check_compiler_existed_for_platform(compiler: str) -> bool: | |||||
| # there is no suitable cmd like `which` on windows, so we assume the compiler is always true on windows | |||||
| if IS_WINDOWS: | |||||
| try: | |||||
| version_string = subprocess.check_output( | |||||
| ["clang-cl", "--version"], stderr=subprocess.STDOUT | |||||
| ).decode() | |||||
| return True | |||||
| except Exception: | |||||
| return False | |||||
| # use os.path.realpath to resolve any symlinks, in particular from "c++" to e.g. "g++". | |||||
| which = subprocess.check_output(["which", compiler], stderr=subprocess.STDOUT) | |||||
| compiler_path = os.path.realpath(which.decode().strip()) | |||||
| if any(name in compiler_path for name in _accepted_compilers_for_platform()): | |||||
| return True | |||||
| version_string = subprocess.check_output( | |||||
| [compiler, "-v"], stderr=subprocess.STDOUT | |||||
| ).decode() | |||||
| if sys.platform.startswith("linux"): | |||||
| pattern = re.compile("^COLLECT_GCC=(.*)$", re.MULTILINE) | |||||
| results = re.findall(pattern, version_string) | |||||
| if len(results) != 1: | |||||
| return False | |||||
| compiler_path = os.path.realpath(results[0].strip()) | |||||
| return any(name in compiler_path for name in _accepted_compilers_for_platform()) | |||||
| if sys.platform.startswith("darwin"): | |||||
| return version_string.startswith("Apple clang") | |||||
| return False | |||||
| # Verifies that the given compiler is ABI-compatible with MegEngine. | |||||
| def _check_compiler_abi_compatibility(compiler: str): | |||||
| # we think if the megengine is built from source, the user will use the same compiler to compile the custom op | |||||
| if _mge_is_built_from_src() or os.environ.get("MGE_CHECK_ABI", "1") == "0": | |||||
| return True | |||||
| # [TODO] There is no particular minimum version we need for clang, so we"re good here. | |||||
| if sys.platform.startswith("darwin"): | |||||
| return True | |||||
| try: | |||||
| if sys.platform.startswith("linux"): | |||||
| minimum_required_version = MINIMUM_GCC_VERSION | |||||
| versionstr = subprocess.check_output( | |||||
| [compiler, "-dumpfullversion", "-dumpversion"] | |||||
| ) | |||||
| version = versionstr.decode().strip().split(".") | |||||
| else: | |||||
| minimum_required_version = MINIMUM_CLANG_CL_VERSION | |||||
| compiler_info = subprocess.check_output( | |||||
| [compiler, "--version"], stderr=subprocess.STDOUT | |||||
| ) | |||||
| match = re.search(r"(\d+)\.(\d+)\.(\d+)", compiler_info.decode().strip()) | |||||
| version = (0, 0, 0) if match is None else match.groups() | |||||
| except Exception: | |||||
| _, error, _ = sys.exc_info() | |||||
| logger.warning( | |||||
| "Error checking compiler version for {}: {}".format(compiler, error) | |||||
| ) | |||||
| return False | |||||
| if tuple(map(int, version)) >= minimum_required_version: | |||||
| return True | |||||
| return False | |||||
| def _check_compiler_comatibility(): | |||||
| # we use clang-cl on windows, refer: https://clang.llvm.org/docs/UsersManual.html#clang-cl | |||||
| compiler = ( | |||||
| os.environ.get("CXX", "clang-cl") | |||||
| if IS_WINDOWS | |||||
| else os.environ.get("CXX", "c++") | |||||
| ) | |||||
| existed = _check_compiler_existed_for_platform(compiler) | |||||
| if existed == False: | |||||
| log_str = ( | |||||
| "Cannot find compiler which is compatible with the compiler " | |||||
| "MegEngine was built with for this platform, which is {mge_compiler} on " | |||||
| "{platform}. Please use {mge_compiler} to to compile your extension. " | |||||
| "Alternatively, you may compile MegEngine from source using " | |||||
| "{user_compiler}, and then you can also use {user_compiler} to compile " | |||||
| "your extension." | |||||
| ).format( | |||||
| user_compiler=compiler, | |||||
| mge_compiler=_accepted_compilers_for_platform()[0], | |||||
| platform=sys.platform, | |||||
| ) | |||||
| logger.warning(log_str) | |||||
| return False | |||||
| compatible = _check_compiler_abi_compatibility(compiler) | |||||
| if compatible == False: | |||||
| log_str = ( | |||||
| "Your compiler version may be ABI-incompatible with MegEngine! " | |||||
| "Please use a compiler that is ABI-compatible with GCC 5.0 on Linux " | |||||
| "and LLVM/Clang 12.0 on Windows ." | |||||
| ) | |||||
| logger.warning(log_str) | |||||
| return True | |||||
| ##################################################################### | |||||
| # Phase 4 | |||||
| ##################################################################### | |||||
| # Quote command-line arguments for DOS/Windows conventions. | |||||
| def _nt_quote_args(args: Optional[List[str]]) -> List[str]: | |||||
| # Cover None-type | |||||
| if not args: | |||||
| return [] | |||||
| return ['"{}"'.format(arg) if " " in arg else arg for arg in args] | |||||
| # Now we need user to specify the arch of GPU | |||||
| def _get_cuda_arch_flags(cflags: Optional[List[str]] = None) -> List[str]: | |||||
| return [] | |||||
| def _setup_sys_includes(with_cuda: bool, with_cudnn: bool): | |||||
| includes = [os.path.join(MGE_INC_PATH)] | |||||
| if with_cuda: | |||||
| includes.append(os.path.join(CUDA_ROOT_DIR, "include")) | |||||
| if with_cudnn: | |||||
| includes.append(os.path.join(CUDNN_ROOT_DIR, "include")) | |||||
| return includes | |||||
| def _setup_includes(extra_include_paths: List[str], with_cuda: bool, with_cudnn: bool): | |||||
| user_includes = [os.path.abspath(path) for path in extra_include_paths] | |||||
| system_includes = _setup_sys_includes(with_cuda, with_cudnn) | |||||
| if IS_WINDOWS: | |||||
| user_includes += system_includes | |||||
| system_includes.clear() | |||||
| return user_includes, system_includes | |||||
| def _setup_common_cflags(user_includes: List[str], system_includes: List[str]): | |||||
| common_cflags = [] | |||||
| common_cflags += ["-I{}".format(include) for include in user_includes] | |||||
| common_cflags += ["-isystem {}".format(include) for include in system_includes] | |||||
| if not IS_WINDOWS: | |||||
| common_cflags += ["-D_GLIBCXX_USE_CXX11_ABI={}".format(MGE_ABI_VER)] | |||||
| return common_cflags | |||||
| def _setup_cuda_cflags(cflags: List[str], extra_cuda_cflags: List[str]): | |||||
| cuda_flags = cflags + COMMON_NVCC_FLAGS + _get_cuda_arch_flags() | |||||
| if IS_WINDOWS: | |||||
| for flag in COMMON_MSVC_FLAGS: | |||||
| cuda_flags = ["-Xcompiler", flag] + cuda_flags | |||||
| for ignore_warning in MSVC_IGNORE_CUDAFE_WARNINGS: | |||||
| cuda_flags = ["-Xcudafe", "--diag_suppress=" + ignore_warning] + cuda_flags | |||||
| cuda_flags = _nt_quote_args(cuda_flags) | |||||
| cuda_flags += _nt_quote_args(extra_cuda_cflags) | |||||
| else: | |||||
| cuda_flags += ["--compiler-options", '"-fPIC"'] | |||||
| cuda_flags += extra_cuda_cflags | |||||
| if not any(flag.startswith("-std=") for flag in cuda_flags): | |||||
| cuda_flags.append("-std=c++14") | |||||
| if os.getenv("CC") is not None: | |||||
| cuda_flags = ["-ccbin", os.getenv("CC")] + cuda_flags | |||||
| return cuda_flags | |||||
| def _setup_ldflags( | |||||
| extra_ldflags: List[str], with_cuda: bool, with_cudnn: bool | |||||
| ) -> List[str]: | |||||
| ldflags = extra_ldflags | |||||
| if IS_WINDOWS: | |||||
| ldflags.append(os.path.join(MGE_LIB_PATH, "megengine_shared.lib")) | |||||
| if with_cuda: | |||||
| ldflags.append(os.path.join(CUDA_ROOT_DIR, "lib", "x64", "cudart.lib")) | |||||
| if with_cudnn: | |||||
| ldflags.append(os.path.join(CUDNN_ROOT_DIR, "lib", "x64", "cudnn.lib")) | |||||
| else: | |||||
| ldflags.append("-lmegengine_shared -L{}".format(MGE_LIB_PATH)) | |||||
| ldflags.append("-Wl,-rpath,{}".format(MGE_LIB_PATH)) | |||||
| if with_cuda: | |||||
| ldflags.append("-lcudart") | |||||
| ldflags.append("-L{}".format(os.path.join(CUDA_ROOT_DIR, "lib64"))) | |||||
| ldflags.append("-Wl,-rpath,{}".format(os.path.join(CUDA_ROOT_DIR, "lib64"))) | |||||
| if with_cudnn: | |||||
| ldflags.append("-L{}".format(os.path.join(CUDNN_ROOT_DIR, "lib64"))) | |||||
| ldflags.append( | |||||
| "-Wl,-rpath,{}".format(os.path.join(CUDNN_ROOT_DIR, "lib64")) | |||||
| ) | |||||
| return ldflags | |||||
| def _add_shared_flag(ldflags: List[str]): | |||||
| ldflags += ["/LD" if IS_WINDOWS else "-shared"] | |||||
| return ldflags | |||||
| ##################################################################### | |||||
| # Phase 5 | |||||
| ##################################################################### | |||||
| def _obj_file_path(src_file_path: str): | |||||
| file_name = os.path.splitext(os.path.basename(src_file_path))[0] | |||||
| if _is_cuda_file(src_file_path): | |||||
| target = "{}.cuda.o".format(file_name) | |||||
| else: | |||||
| target = "{}.o".format(file_name) | |||||
| return target | |||||
| def _dump_ninja_file( | |||||
| path, | |||||
| cflags, | |||||
| post_cflags, | |||||
| cuda_cflags, | |||||
| cuda_post_cflags, | |||||
| sources, | |||||
| objects, | |||||
| ldflags, | |||||
| library_target, | |||||
| with_cuda, | |||||
| ): | |||||
| def sanitize_flags(flags): | |||||
| return [] if flags is None else [flag.strip() for flag in flags] | |||||
| cflags = sanitize_flags(cflags) | |||||
| post_cflags = sanitize_flags(post_cflags) | |||||
| cuda_cflags = sanitize_flags(cuda_cflags) | |||||
| cuda_post_cflags = sanitize_flags(cuda_post_cflags) | |||||
| ldflags = sanitize_flags(ldflags) | |||||
| assert len(sources) == len(objects) | |||||
| assert len(sources) > 0 | |||||
| if IS_WINDOWS: | |||||
| compiler = os.environ.get("CXX", "clang-cl") | |||||
| else: | |||||
| compiler = os.environ.get("CXX", "c++") | |||||
| # Version 1.3 is required for the `deps` directive. | |||||
| config = ["ninja_required_version = 1.3"] | |||||
| config.append("cxx = {}".format(compiler)) | |||||
| if with_cuda: | |||||
| nvcc = os.path.join(CUDA_ROOT_DIR, "bin", "nvcc") | |||||
| config.append("nvcc = {}".format(nvcc)) | |||||
| flags = ["cflags = {}".format(" ".join(cflags))] | |||||
| flags.append("post_cflags = {}".format(" ".join(post_cflags))) | |||||
| if with_cuda: | |||||
| flags.append("cuda_cflags = {}".format(" ".join(cuda_cflags))) | |||||
| flags.append("cuda_post_cflags = {}".format(" ".join(cuda_post_cflags))) | |||||
| flags.append("ldflags = {}".format(" ".join(ldflags))) | |||||
| # Turn into absolute paths so we can emit them into the ninja build | |||||
| # file wherever it is. | |||||
| sources = [os.path.abspath(file) for file in sources] | |||||
| # See https://ninja-build.org/build.ninja.html for reference. | |||||
| compile_rule = ["rule compile"] | |||||
| if IS_WINDOWS: | |||||
| compile_rule.append( | |||||
| " command = clang-cl /showIncludes $cflags -c $in /Fo$out $post_cflags" | |||||
| ) | |||||
| compile_rule.append(" deps = msvc") | |||||
| else: | |||||
| compile_rule.append( | |||||
| " command = $cxx -MMD -MF $out.d $cflags -c $in -o $out $post_cflags" | |||||
| ) | |||||
| compile_rule.append(" depfile = $out.d") | |||||
| compile_rule.append(" deps = gcc") | |||||
| if with_cuda: | |||||
| cuda_compile_rule = ["rule cuda_compile"] | |||||
| nvcc_gendeps = "" | |||||
| cuda_compile_rule.append( | |||||
| " command = $nvcc {} $cuda_cflags -c $in -o $out $cuda_post_cflags".format( | |||||
| nvcc_gendeps | |||||
| ) | |||||
| ) | |||||
| # Emit one build rule per source to enable incremental build. | |||||
| build = [] | |||||
| for source_file, object_file in zip(sources, objects): | |||||
| is_cuda_source = _is_cuda_file(source_file) and with_cuda | |||||
| rule = "cuda_compile" if is_cuda_source else "compile" | |||||
| if IS_WINDOWS: | |||||
| source_file = source_file.replace(":", "$:") | |||||
| object_file = object_file.replace(":", "$:") | |||||
| source_file = source_file.replace(" ", "$ ") | |||||
| object_file = object_file.replace(" ", "$ ") | |||||
| build.append("build {}: {} {}".format(object_file, rule, source_file)) | |||||
| if library_target is not None: | |||||
| link_rule = ["rule link"] | |||||
| if IS_WINDOWS: | |||||
| link_rule.append(" command = clang-cl $in /nologo $ldflags /out:$out") | |||||
| else: | |||||
| link_rule.append(" command = $cxx $in $ldflags -o $out") | |||||
| link = ["build {}: link {}".format(library_target, " ".join(objects))] | |||||
| default = ["default {}".format(library_target)] | |||||
| else: | |||||
| link_rule, link, default = [], [], [] | |||||
| # 'Blocks' should be separated by newlines, for visual benefit. | |||||
| blocks = [config, flags, compile_rule] | |||||
| if with_cuda: | |||||
| blocks.append(cuda_compile_rule) | |||||
| blocks += [link_rule, build, link, default] | |||||
| with open(path, "w") as build_file: | |||||
| for block in blocks: | |||||
| lines = "\n".join(block) | |||||
| build_file.write("{}\n\n".format(lines)) | |||||
| class FileBaton: | |||||
| def __init__(self, lock_file_path, wait_seconds=0.1): | |||||
| self.lock_file_path = lock_file_path | |||||
| self.wait_seconds = wait_seconds | |||||
| self.fd = None | |||||
| def try_acquire(self): | |||||
| try: | |||||
| self.fd = os.open(self.lock_file_path, os.O_CREAT | os.O_EXCL) | |||||
| return True | |||||
| except FileExistsError: | |||||
| return False | |||||
| def wait(self): | |||||
| while os.path.exists(self.lock_file_path): | |||||
| time.sleep(self.wait_seconds) | |||||
| def release(self): | |||||
| if self.fd is not None: | |||||
| os.close(self.fd) | |||||
| os.remove(self.lock_file_path) | |||||
| ##################################################################### | |||||
| # Phase 6 | |||||
| ##################################################################### | |||||
| def _build_with_ninja(build_dir: str, verbose: bool, error_prefix: str): | |||||
| command = ["ninja", "-v"] | |||||
| env = os.environ.copy() | |||||
| try: | |||||
| sys.stdout.flush() | |||||
| sys.stderr.flush() | |||||
| stdout_fileno = 1 | |||||
| subprocess.run( | |||||
| command, | |||||
| stdout=stdout_fileno if verbose else subprocess.PIPE, | |||||
| stderr=subprocess.STDOUT, | |||||
| cwd=build_dir, | |||||
| check=True, | |||||
| env=env, | |||||
| ) | |||||
| except subprocess.CalledProcessError as e: | |||||
| with open(os.path.join(build_dir, "build.ninja")) as f: | |||||
| lines = f.readlines() | |||||
| print(lines) | |||||
| _, error, _ = sys.exc_info() | |||||
| message = error_prefix | |||||
| if hasattr(error, "output") and error.output: | |||||
| message += ": {}".format(error.output.decode()) | |||||
| raise RuntimeError(message) from e | |||||
| def build( | |||||
| name: str, | |||||
| sources: Union[str, List[str]], | |||||
| extra_cflags: Union[str, List[str]] = [], | |||||
| extra_cuda_cflags: Union[str, List[str]] = [], | |||||
| extra_ldflags: Union[str, List[str]] = [], | |||||
| extra_include_paths: Union[str, List[str]] = [], | |||||
| with_cuda: Optional[bool] = None, | |||||
| build_dir: Optional[bool] = None, | |||||
| verbose: bool = False, | |||||
| abi_tag: Optional[int] = None, | |||||
| ) -> str: | |||||
| r"""Build a Custom Op with ninja in the way of just-in-time (JIT). | |||||
| To build the custom op, a Ninja build file is emitted, which is used to | |||||
| compile the given sources into a dynamic library. | |||||
| By default, the directory to which the build file is emitted and the | |||||
| resulting library compiled to is ``<tmp>/mge_custom_op/<name>``, where | |||||
| ``<tmp>`` is the temporary folder on the current platform and ``<name>`` | |||||
| the name of the custom op. This location can be overridden in two ways. | |||||
| First, if the ``MGE_CUSTOM_OP_DIR`` environment variable is set, it | |||||
| replaces ``<tmp>/mge_custom_op`` and all custom op will be compiled | |||||
| into subfolders of this directory. Second, if the ``build_dir`` | |||||
| argument to this function is supplied, it overrides the entire path, i.e. | |||||
| the library will be compiled into that folder directly. | |||||
| To compile the sources, the default system compiler (``c++``) is used, | |||||
| which can be overridden by setting the ``CXX`` environment variable. To pass | |||||
| additional arguments to the compilation process, ``extra_cflags`` or | |||||
| ``extra_ldflags`` can be provided. For example, to compile your custom op | |||||
| with optimizations, pass ``extra_cflags=['-O3']``. You can also use | |||||
| ``extra_cflags`` to pass further include directories. | |||||
| CUDA support with mixed compilation is provided. Simply pass CUDA source | |||||
| files (``.cu`` or ``.cuh``) along with other sources. Such files will be | |||||
| detected and compiled with nvcc rather than the C++ compiler. This includes | |||||
| passing the CUDA lib64 directory as a library directory, and linking | |||||
| ``cudart``. You can pass additional flags to nvcc via | |||||
| ``extra_cuda_cflags``, just like with ``extra_cflags`` for C++. Various | |||||
| heuristics for finding the CUDA install directory are used, which usually | |||||
| work fine. If not, setting the ``CUDA_ROOT_DIR`` environment variable is the | |||||
| safest option. If you use CUDNN, please also setting the ``CUDNN_ROOT_DIR`` | |||||
| environment variable. | |||||
| Args: | |||||
| name: The name of the custom op to build. | |||||
| sources: A list of relative or absolute paths to C++ source files. | |||||
| extra_cflags: optional list of compiler flags to forward to the build. | |||||
| extra_cuda_cflags: optional list of compiler flags to forward to nvcc | |||||
| when building CUDA sources. | |||||
| extra_ldflags: optional list of linker flags to forward to the build. | |||||
| extra_include_paths: optional list of include directories to forward | |||||
| to the build. | |||||
| with_cuda: Determines whether CUDA headers and libraries are added to | |||||
| the build. If set to ``None`` (default), this value is | |||||
| automatically determined based on the existence of ``.cu`` or | |||||
| ``.cuh`` in ``sources``. Set it to `True`` to force CUDA headers | |||||
| and libraries to be included. | |||||
| build_dir: optional path to use as build workspace. | |||||
| verbose: If ``True``, turns on verbose logging of load steps. | |||||
| abi_tag: Determines the value of MACRO ``_GLIBCXX_USE_CXX11_ABI`` | |||||
| in gcc compiler, should be ``0`` or ``1``. | |||||
| Returns: | |||||
| the compiled dynamic library path | |||||
| """ | |||||
| # phase 1: prepare config | |||||
| if abi_tag != None: | |||||
| global MGE_ABI_VER | |||||
| MGE_ABI_VER = abi_tag | |||||
| def strlist(args, name): | |||||
| assert isinstance(args, str) or isinstance( | |||||
| args, list | |||||
| ), "{} must be str or list[str]".format(name) | |||||
| if isinstance(args, str): | |||||
| return [args] | |||||
| for arg in args: | |||||
| assert isinstance(arg, str) | |||||
| args = [arg.strip() for arg in args] | |||||
| return args | |||||
| sources = strlist(sources, "sources") | |||||
| extra_cflags = strlist(extra_cflags, "extra_cflags") | |||||
| extra_cuda_cflags = strlist(extra_cuda_cflags, "extra_cuda_cflags") | |||||
| extra_ldflags = strlist(extra_ldflags, "extra_ldflags") | |||||
| extra_include_paths = strlist(extra_include_paths, "extra_include_paths") | |||||
| with_cuda = any(map(_is_cuda_file, sources)) if with_cuda is None else with_cuda | |||||
| with_cudnn = any(["cudnn" in f for f in extra_ldflags]) | |||||
| if CUDA_ROOT_DIR == None and with_cuda: | |||||
| print( | |||||
| "No CUDA runtime is found, using {}=/path/to/your/cuda_root_dir".format( | |||||
| ev_cuda_root_dir | |||||
| ) | |||||
| ) | |||||
| if CUDNN_ROOT_DIR == None and with_cudnn: | |||||
| print( | |||||
| "Cannot find the root directory of cudnn, using {}=/path/to/your/cudnn_root_dir".format( | |||||
| ev_cudnn_root_dir | |||||
| ) | |||||
| ) | |||||
| build_dir = os.path.abspath( | |||||
| _get_build_dir(name) if build_dir is None else build_dir | |||||
| ) | |||||
| if not os.path.exists(build_dir): | |||||
| os.makedirs(build_dir, exist_ok=True) | |||||
| if verbose: | |||||
| print("Using {} to build megengine custom op".format(build_dir)) | |||||
| # phase 2: version check | |||||
| version, old_version = version_check( | |||||
| name, | |||||
| sources, | |||||
| [extra_cflags, extra_cuda_cflags, extra_ldflags, extra_include_paths], | |||||
| build_dir, | |||||
| with_cuda, | |||||
| with_cudnn, | |||||
| abi_tag, | |||||
| ) | |||||
| if verbose: | |||||
| if version != old_version and old_version != None: | |||||
| print( | |||||
| "Input conditions of custom op {} have changed, bumping to version {}".format( | |||||
| name, version | |||||
| ) | |||||
| ) | |||||
| print("Building custom op {} with version {}".format(name, version)) | |||||
| if version == old_version: | |||||
| if verbose: | |||||
| print( | |||||
| "No modifications detected for {}, skipping build step...".format(name) | |||||
| ) | |||||
| return | |||||
| name = "{}_v{}".format(name, version) | |||||
| # phase 3: compiler and ninja check | |||||
| _check_ninja_availability() | |||||
| _check_compiler_comatibility() | |||||
| # phase 4: setup the compile flags | |||||
| user_includes, system_includes = _setup_includes( | |||||
| extra_include_paths, with_cuda, with_cudnn | |||||
| ) | |||||
| common_cflags = _setup_common_cflags(user_includes, system_includes) | |||||
| cuda_cflags = ( | |||||
| _setup_cuda_cflags(common_cflags, extra_cuda_cflags) if with_cuda else None | |||||
| ) | |||||
| ldflags = _setup_ldflags(extra_ldflags, with_cuda, with_cudnn) | |||||
| if IS_WINDOWS: | |||||
| cflags = common_cflags + COMMON_MSVC_FLAGS + extra_cflags | |||||
| cflags = _nt_quote_args(cflags) | |||||
| else: | |||||
| cflags = common_cflags + ["-fPIC", "-std=c++14"] + extra_cflags | |||||
| ldflags = _add_shared_flag(ldflags) | |||||
| if sys.platform.startswith("darwin"): | |||||
| ldflags.append("-undefined dynamic_lookup") | |||||
| elif IS_WINDOWS: | |||||
| ldflags += ["/link"] | |||||
| ldflags = _nt_quote_args(ldflags) | |||||
| baton = FileBaton(os.path.join(build_dir, "lock")) | |||||
| if baton.try_acquire(): | |||||
| try: | |||||
| # phase 5: generate ninja build file | |||||
| objs = [_obj_file_path(src) for src in sources] | |||||
| name += ".dll" if IS_WINDOWS else ".so" | |||||
| build_file_path = os.path.join(build_dir, "build.ninja") | |||||
| if verbose: | |||||
| print("Emitting ninja build file {}".format(build_file_path)) | |||||
| _dump_ninja_file( | |||||
| path=build_file_path, | |||||
| cflags=cflags, | |||||
| post_cflags=None, | |||||
| cuda_cflags=cuda_cflags, | |||||
| cuda_post_cflags=None, | |||||
| sources=sources, | |||||
| objects=objs, | |||||
| ldflags=ldflags, | |||||
| library_target=name, | |||||
| with_cuda=with_cuda, | |||||
| ) | |||||
| # phase 6: build with ninja | |||||
| if verbose: | |||||
| print( | |||||
| "Compiling and linking your custom op {}".format( | |||||
| os.path.join(build_dir, name) | |||||
| ) | |||||
| ) | |||||
| _build_with_ninja(build_dir, verbose, "compiling error") | |||||
| finally: | |||||
| baton.release() | |||||
| else: | |||||
| baton.wait() | |||||
| return os.path.join(build_dir, name) | |||||
| def build_and_load( | |||||
| name: str, | |||||
| sources: Union[str, List[str]], | |||||
| extra_cflags: Union[str, List[str]] = [], | |||||
| extra_cuda_cflags: Union[str, List[str]] = [], | |||||
| extra_ldflags: Union[str, List[str]] = [], | |||||
| extra_include_paths: Union[str, List[str]] = [], | |||||
| with_cuda: Optional[bool] = None, | |||||
| build_dir: Optional[bool] = None, | |||||
| verbose: bool = False, | |||||
| abi_tag: Optional[int] = None, | |||||
| ) -> str: | |||||
| r"""Build and Load a Custom Op with ninja in the way of just-in-time (JIT). | |||||
| Same as the function ``build()`` but load the built dynamic library. | |||||
| Args: | |||||
| same as ``build()`` | |||||
| Returns: | |||||
| the compiled dynamic library path | |||||
| """ | |||||
| lib_path = build( | |||||
| name, | |||||
| sources, | |||||
| extra_cflags, | |||||
| extra_cuda_cflags, | |||||
| extra_ldflags, | |||||
| extra_include_paths, | |||||
| with_cuda, | |||||
| build_dir, | |||||
| verbose, | |||||
| abi_tag, | |||||
| ) | |||||
| if verbose: | |||||
| print("Load the compiled custom op {}".format(lib_path)) | |||||
| load(lib_path) | |||||
| return lib_path | |||||
| @@ -766,6 +766,13 @@ void init_custom(pybind11::module m) { | |||||
| m.def("_install", &install_custom); | m.def("_install", &install_custom); | ||||
| m.def("_uninstall", &uninstall_custom); | m.def("_uninstall", &uninstall_custom); | ||||
| m.def("_get_custom_op_list", &get_custom_op_list); | m.def("_get_custom_op_list", &get_custom_op_list); | ||||
| m.def("get_custom_op_abi_tag", [](void) -> int { | |||||
| int ret = 0; | |||||
| #ifdef _GLIBCXX_USE_CXX11_ABI | |||||
| ret = _GLIBCXX_USE_CXX11_ABI; | |||||
| #endif | |||||
| return ret; | |||||
| }); | |||||
| static PyMethodDef method_def = { | static PyMethodDef method_def = { | ||||
| #ifdef METH_FASTCALL | #ifdef METH_FASTCALL | ||||
| @@ -0,0 +1,140 @@ | |||||
| /** | |||||
| * \file imperative/python/test/unit/core/custom_opsrc/elem_add.cpp | |||||
| * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
| * | |||||
| * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, | |||||
| * software distributed under the License is distributed on an | |||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| */ | |||||
| #include "megbrain/custom/custom.h" | |||||
| CUSTOM_OP_REG_BEGIN(ElemAddSmooth) | |||||
| void forward_device_infer( | |||||
| const std::vector<Device>& inputs, const Param& params, | |||||
| std::vector<Device>& outputs) { | |||||
| outputs[0] = inputs[0]; | |||||
| } | |||||
| void forward_shape_infer( | |||||
| const std::vector<Shape>& inputs, const Param& params, | |||||
| std::vector<Shape>& outputs) { | |||||
| outputs[0] = inputs[0]; | |||||
| } | |||||
| void forward_dtype_infer( | |||||
| const std::vector<DType>& inputs, const Param& params, | |||||
| std::vector<DType>& outputs) { | |||||
| outputs[0] = inputs[0]; | |||||
| } | |||||
| void forward_format_infer( | |||||
| const std::vector<Format>& inputs, const Param& params, | |||||
| std::vector<Format>& outputs) { | |||||
| outputs[0] = inputs[0]; | |||||
| } | |||||
| template <typename scalar_t> | |||||
| void forward_kernel( | |||||
| const scalar_t* input0, const scalar_t* input1, scalar_t* output, size_t len, | |||||
| float smooth) { | |||||
| for (size_t i = 0; i < len; ++i) { | |||||
| output[i] = input0[i] + input1[i]; | |||||
| if (output[i] < 0) | |||||
| output[i] += smooth; | |||||
| else | |||||
| output[i] -= smooth; | |||||
| } | |||||
| } | |||||
| void forward_compute( | |||||
| const std::vector<Tensor>& inputs, const Param& params, | |||||
| std::vector<Tensor>& outputs) { | |||||
| DISPATCH_SIGN_INT_AND_FLOAT_TYPES( | |||||
| outputs[0].dtype(), "forward_compute", ([&]() { | |||||
| forward_kernel<scalar_t>( | |||||
| inputs[0].data<scalar_t>(), inputs[1].data<scalar_t>(), | |||||
| outputs[0].data<scalar_t>(), outputs[0].size(), | |||||
| params["smooth"].as<float>()); | |||||
| })); | |||||
| } | |||||
| CUSTOM_OP_REG(ElemAddSmoothForward) | |||||
| .set_description( | |||||
| "Custom ElemAdd Operator With a Smooth Parameter, " | |||||
| "which is used to verify the CPU kernel") | |||||
| .add_input("lhs") | |||||
| .add_input("rhs") | |||||
| .add_output("output") | |||||
| .add_param("smooth", 0.f) | |||||
| .set_device_infer(forward_device_infer) | |||||
| .set_shape_infer(forward_shape_infer) | |||||
| .set_dtype_infer(forward_dtype_infer) | |||||
| .set_format_infer(forward_format_infer) | |||||
| .set_compute(forward_compute); | |||||
| void backward_device_infer( | |||||
| const std::vector<Device>& ograds, const Param& params, | |||||
| std::vector<Device>& igrads) { | |||||
| igrads[0] = ograds[0]; | |||||
| igrads[1] = ograds[0]; | |||||
| } | |||||
| void backward_shape_infer( | |||||
| const std::vector<Shape>& ograds, const Param& params, | |||||
| std::vector<Shape>& igrads) { | |||||
| igrads[0] = ograds[0]; | |||||
| igrads[1] = ograds[0]; | |||||
| } | |||||
| void backward_dtype_infer( | |||||
| const std::vector<DType>& ograds, const Param& params, | |||||
| std::vector<DType>& igrads) { | |||||
| igrads[0] = ograds[0]; | |||||
| igrads[1] = ograds[0]; | |||||
| } | |||||
| void backward_format_infer( | |||||
| const std::vector<Format>& ograds, const Param& params, | |||||
| std::vector<Format>& igrads) { | |||||
| igrads[0] = ograds[0]; | |||||
| igrads[1] = ograds[0]; | |||||
| } | |||||
| template <typename scalar_t> | |||||
| void backward_kernel( | |||||
| const scalar_t* ograd, scalar_t* igrad0, scalar_t* igrad1, size_t len) { | |||||
| for (size_t i = 0; i < len; ++i) { | |||||
| igrad0[i] = ograd[i]; | |||||
| igrad1[i] = ograd[i]; | |||||
| } | |||||
| } | |||||
| void backward_compute( | |||||
| const std::vector<Tensor>& ograds, const Param& params, | |||||
| std::vector<Tensor>& igrads) { | |||||
| DISPATCH_SIGN_INT_AND_FLOAT_TYPES( | |||||
| igrads[0].dtype(), "backward_compute", ([&]() { | |||||
| backward_kernel<scalar_t>( | |||||
| ograds[0].data<scalar_t>(), igrads[0].data<scalar_t>(), | |||||
| igrads[1].data<scalar_t>(), igrads[0].size()); | |||||
| })); | |||||
| } | |||||
| CUSTOM_OP_REG(ElemAddSmoothBackward) | |||||
| .set_description( | |||||
| "Custom ElemAdd Operator With a Smooth Parameter, " | |||||
| "which is used to verify the CPU kernel") | |||||
| .add_input("ograd") | |||||
| .add_output("igrad_lhs") | |||||
| .add_output("igrad_rhs") | |||||
| .set_device_infer(backward_device_infer) | |||||
| .set_shape_infer(backward_shape_infer) | |||||
| .set_dtype_infer(backward_dtype_infer) | |||||
| .set_format_infer(backward_format_infer) | |||||
| .set_compute(backward_compute); | |||||
| CUSTOM_OP_REG_END(ElemAddSmooth) | |||||
| @@ -0,0 +1,65 @@ | |||||
| /** | |||||
| * \file imperative/python/test/unit/core/custom_opsrc/matmul_scale.cpp | |||||
| * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
| * | |||||
| * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, | |||||
| * software distributed under the License is distributed on an | |||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| */ | |||||
| #include "./matmul_scale.h" | |||||
| #include "megbrain/custom/custom.h" | |||||
| CUSTOM_OP_REG_BEGIN(MatMulScale) | |||||
| void forward_shape_infer( | |||||
| const std::vector<Shape>& inputs, const Param& params, | |||||
| std::vector<Shape>& outputs) { | |||||
| outputs[0] = {inputs[0][0], inputs[1][1]}; | |||||
| } | |||||
| void forward_compute( | |||||
| const std::vector<Tensor>& inputs, const Param& params, | |||||
| std::vector<Tensor>& outputs) { | |||||
| matmul_forward_helper( | |||||
| inputs[0], inputs[1], outputs[0], inputs[0].shape()[0], | |||||
| inputs[0].shape()[1], inputs[1].shape()[1], params["scale"].as<float>()); | |||||
| } | |||||
| CUSTOM_OP_REG(MatMulScaleForward) | |||||
| .add_inputs(2) | |||||
| .add_outputs(1) | |||||
| .add_param("scale", 1.0f) | |||||
| .set_shape_infer(forward_shape_infer) | |||||
| .set_compute("cuda", forward_compute); | |||||
| void backward_shape_infer( | |||||
| const std::vector<Shape>& ograd_and_inputs, const Param& params, | |||||
| std::vector<Shape>& outputs) { | |||||
| outputs[0] = ograd_and_inputs[1]; | |||||
| outputs[1] = ograd_and_inputs[2]; | |||||
| } | |||||
| void backward_compute( | |||||
| const std::vector<Tensor>& ograd_and_inputs, const Param& params, | |||||
| std::vector<Tensor>& igrads) { | |||||
| matmul_backward_lhs_helper( | |||||
| ograd_and_inputs[2], ograd_and_inputs[0], igrads[0], | |||||
| ograd_and_inputs[1].shape()[0], ograd_and_inputs[1].shape()[1], | |||||
| ograd_and_inputs[2].shape()[1], params["scale"].as<float>()); | |||||
| matmul_backward_rhs_helper( | |||||
| ograd_and_inputs[1], ograd_and_inputs[0], igrads[1], | |||||
| ograd_and_inputs[1].shape()[0], ograd_and_inputs[1].shape()[1], | |||||
| ograd_and_inputs[2].shape()[1], params["scale"].as<float>()); | |||||
| } | |||||
| CUSTOM_OP_REG(MatMulScaleBackward) | |||||
| .add_inputs(3) | |||||
| .add_outputs(2) | |||||
| .add_param("scale", 1.0f) | |||||
| .set_shape_infer(backward_shape_infer) | |||||
| .set_compute("cuda", backward_compute); | |||||
| CUSTOM_OP_REG_END(MatMulScale) | |||||
| @@ -0,0 +1,97 @@ | |||||
| /** | |||||
| * \file imperative/python/test/unit/core/custom_opsrc/matmul_scale.cu | |||||
| * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
| * | |||||
| * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, | |||||
| * software distributed under the License is distributed on an | |||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| */ | |||||
| #include <cuda.h> | |||||
| #include <cuda_runtime.h> | |||||
| #include <stdio.h> | |||||
| #include "./matmul_scale.h" | |||||
| using namespace custom; | |||||
| // matmul_forward for Mat_mxk * Mat_k*n | |||||
| template <typename T> | |||||
| __global__ void matmul_forward_naive( | |||||
| const T* lhs, const T* rhs, T* res, size_t M, size_t K, size_t N, float scale) { | |||||
| int row = blockIdx.y * blockDim.y + threadIdx.y; | |||||
| int col = blockIdx.x * blockDim.x + threadIdx.x; | |||||
| T acc = 0; | |||||
| for (int i = 0; i < K; ++i) | |||||
| acc += lhs[row * K + i] * rhs[i * N + col]; | |||||
| res[row * N + col] = acc * scale; | |||||
| } | |||||
| // matmul_backward_lhs for Mat_mxk * Mat_k*n = Mat_mxn | |||||
| // that is Mat_mxn * Mat_nxk | |||||
| template <typename T> | |||||
| __global__ void matmul_backward_lhs_naive( | |||||
| const T* rhs, const T* ograd, T* lhs_grad, size_t M, size_t K, size_t N, | |||||
| float scale) { | |||||
| int row = blockIdx.y * blockDim.y + threadIdx.y; | |||||
| int col = blockIdx.x * blockDim.x + threadIdx.x; | |||||
| T acc = 0; | |||||
| for (int i = 0; i < N; ++i) | |||||
| acc += ograd[row * N + i] * rhs[col * N + i]; | |||||
| lhs_grad[row * K + col] = acc / scale; | |||||
| } | |||||
| // matmul_backward_rhs for Mat_mxk * Mat_k*n = Mat_mxn | |||||
| // that is Mat_kxm * Mat_mxn | |||||
| template <typename T> | |||||
| __global__ void matmul_backward_rhs_naive( | |||||
| const T* lhs, const T* ograd, T* rhs_grad, size_t M, size_t K, size_t N, | |||||
| float scale) { | |||||
| int row = blockIdx.y * blockDim.y + threadIdx.y; | |||||
| int col = blockIdx.x * blockDim.x + threadIdx.x; | |||||
| T acc = 0; | |||||
| for (int i = 0; i < M; ++i) | |||||
| acc += lhs[i * K + row] * ograd[i * N + col]; | |||||
| rhs_grad[row * N + col] = acc / scale; | |||||
| } | |||||
| void matmul_forward_helper( | |||||
| const Tensor& lhs, const Tensor& rhs, Tensor& res, size_t M, size_t K, size_t N, | |||||
| float scale) { | |||||
| dim3 block(1, 1); | |||||
| dim3 grid(N / block.x, M / block.y); | |||||
| DISPATCH_INT_AND_FLOAT_TYPES(res.dtype(), "matmul_forward", ([&]() { | |||||
| matmul_forward_naive<scalar_t><<<grid, block>>>( | |||||
| lhs.data<scalar_t>(), rhs.data<scalar_t>(), | |||||
| res.data<scalar_t>(), M, K, N, scale); | |||||
| })); | |||||
| } | |||||
| void matmul_backward_lhs_helper( | |||||
| const Tensor& rhs, const Tensor& ograd, Tensor& lhs_grad, size_t M, size_t K, | |||||
| size_t N, float scale) { | |||||
| dim3 block(1, 1); | |||||
| dim3 grid(K / block.x, M / block.y); | |||||
| DISPATCH_INT_AND_FLOAT_TYPES( | |||||
| lhs_grad.dtype(), "matmul_backward_lhs", ([&]() { | |||||
| matmul_backward_lhs_naive<scalar_t><<<grid, block>>>( | |||||
| rhs.data<scalar_t>(), ograd.data<scalar_t>(), | |||||
| lhs_grad.data<scalar_t>(), M, K, N, scale); | |||||
| })); | |||||
| } | |||||
| void matmul_backward_rhs_helper( | |||||
| const Tensor& lhs, const Tensor& ograd, Tensor& rhs_grad, size_t M, size_t K, | |||||
| size_t N, float scale) { | |||||
| dim3 block(1, 1); | |||||
| dim3 grid(N / block.x, K / block.y); | |||||
| DISPATCH_INT_AND_FLOAT_TYPES( | |||||
| rhs_grad.dtype(), "matmul_backward_rhs", ([&]() { | |||||
| matmul_backward_rhs_naive<scalar_t><<<grid, block>>>( | |||||
| lhs.data<scalar_t>(), ograd.data<scalar_t>(), | |||||
| rhs_grad.data<scalar_t>(), M, K, N, scale); | |||||
| })); | |||||
| } | |||||
| @@ -0,0 +1,24 @@ | |||||
| /** | |||||
| * \file imperative/python/test/unit/core/custom_opsrc/matmul_scale.h | |||||
| * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
| * | |||||
| * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, | |||||
| * software distributed under the License is distributed on an | |||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| */ | |||||
| #include "megbrain/custom/custom.h" | |||||
| using Tensor = custom::Tensor; | |||||
| void matmul_forward_helper( | |||||
| const Tensor& lhs, const Tensor& rhs, Tensor& res, size_t M, size_t K, size_t N, | |||||
| float scale); | |||||
| void matmul_backward_lhs_helper( | |||||
| const Tensor& rhs, const Tensor& ograd, Tensor& lhs_grad, size_t M, size_t K, | |||||
| size_t N, float scale); | |||||
| void matmul_backward_rhs_helper( | |||||
| const Tensor& lhs, const Tensor& ograd, Tensor& rhs_grad, size_t M, size_t K, | |||||
| size_t N, float scale); | |||||
| @@ -0,0 +1,111 @@ | |||||
| # MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
| # | |||||
| # Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
| # | |||||
| # Unless required by applicable law or agreed to in writing, | |||||
| # software distributed under the License is distributed on an | |||||
| # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| import os | |||||
| import platform | |||||
| import shutil | |||||
| import sys | |||||
| import numpy as np | |||||
| import pytest | |||||
| import megengine | |||||
| import megengine.functional as F | |||||
| import megengine.optimizer as optim | |||||
| from megengine import jit | |||||
| from megengine.autodiff import Function, GradManager | |||||
| from megengine.core._imperative_rt.core2 import apply | |||||
| from megengine.core.ops import custom | |||||
| from megengine.device import get_device_count | |||||
| from megengine.module import Conv2d, Linear, Module | |||||
| from megengine.random import normal | |||||
| from megengine.tensor import Parameter, Tensor | |||||
| from megengine.utils import custom_op_tools | |||||
| def compare(ref, real): | |||||
| if ref.shape != real.shape: | |||||
| real = real.T | |||||
| np.testing.assert_allclose(ref, real, rtol=1e-3, atol=1e-5) | |||||
| def build_and_clean(test_func): | |||||
| def wrapper(): | |||||
| cur_dir_path = os.path.dirname(os.path.abspath(__file__)) | |||||
| build_path = os.path.join(cur_dir_path, "custom_opsrc", "build") | |||||
| mgb_root_path = os.path.dirname( | |||||
| os.path.dirname( | |||||
| os.path.dirname(os.path.dirname(os.path.dirname(cur_dir_path))) | |||||
| ) | |||||
| ) | |||||
| extra_include_paths = [os.path.join(mgb_root_path, "src", "custom", "include")] | |||||
| extra_ld_flags = [] | |||||
| if sys.platform != "win32": | |||||
| ld_path = os.environ.get("LD_LIBRARY_PATH") | |||||
| if ld_path != None: | |||||
| ld_dirs = ld_path.split(":") | |||||
| for ld_dir in ld_dirs: | |||||
| if os.path.exists(ld_dir) and os.path.isdir(ld_dir): | |||||
| for lib in os.listdir(ld_dir): | |||||
| if "megengine_shared" in lib: | |||||
| extra_ld_flags += [ | |||||
| "-L{} -Wl,-rpath,{}".format(ld_dir, ld_dir) | |||||
| ] | |||||
| break | |||||
| if get_device_count("gpu") > 0: | |||||
| custom_opsrc = [ | |||||
| os.path.join(cur_dir_path, "custom_opsrc", "matmul_scale.cpp"), | |||||
| os.path.join(cur_dir_path, "custom_opsrc", "matmul_scale.cu"), | |||||
| ] | |||||
| else: | |||||
| custom_opsrc = [os.path.join(cur_dir_path, "custom_opsrc", "elem_add.cpp")] | |||||
| lib_path = custom_op_tools.build_and_load( | |||||
| "test_op", | |||||
| custom_opsrc, | |||||
| extra_include_paths=extra_include_paths, | |||||
| extra_ldflags=extra_ld_flags, | |||||
| build_dir=build_path, | |||||
| verbose=False, | |||||
| abi_tag=custom.get_custom_op_abi_tag(), | |||||
| ) | |||||
| test_func() | |||||
| custom.unload(lib_path) | |||||
| if os.path.exists(build_path): | |||||
| shutil.rmtree(build_path) | |||||
| return wrapper | |||||
| @pytest.mark.skipif( | |||||
| get_device_count("gpu") > 0, reason="elem_add operator is only supported on CPU" | |||||
| ) | |||||
| @build_and_clean | |||||
| def test_custom_op_cpu_build(): | |||||
| assert "ElemAddSmoothForward" in custom._get_custom_op_list() | |||||
| assert "ElemAddSmoothBackward" in custom._get_custom_op_list() | |||||
| assert hasattr(custom, "ElemAddSmoothForward") | |||||
| assert hasattr(custom, "ElemAddSmoothBackward") | |||||
| @pytest.mark.skipif( | |||||
| platform.system() == "Darwin", | |||||
| reason="GPU kernel is only support on Linux and Windows", | |||||
| ) | |||||
| @pytest.mark.skipif( | |||||
| get_device_count("gpu") < 1, reason="matmul scale operator is only supported on GPU" | |||||
| ) | |||||
| @build_and_clean | |||||
| def test_custom_op_gpu_build(): | |||||
| assert "MatMulScaleForward" in custom._get_custom_op_list() | |||||
| assert "MatMulScaleBackward" in custom._get_custom_op_list() | |||||
| assert hasattr(custom, "MatMulScaleForward") | |||||
| assert hasattr(custom, "MatMulScaleBackward") | |||||
| @@ -171,6 +171,7 @@ function do_build() { | |||||
| mkdir -p staging | mkdir -p staging | ||||
| cp -a imperative/python/{megengine,setup.py,requires.txt,requires-style.txt,requires-test.txt} staging/ | cp -a imperative/python/{megengine,setup.py,requires.txt,requires-style.txt,requires-test.txt} staging/ | ||||
| cp -a ${SRC_DIR}/src/custom/include staging/megengine/core/include/ | |||||
| cd ${BUILD_DIR}/staging/megengine/core | cd ${BUILD_DIR}/staging/megengine/core | ||||
| rt_file=`ls _imperative_rt.*.so` | rt_file=`ls _imperative_rt.*.so` | ||||
| echo "rt file is: ${rt_file}" | echo "rt file is: ${rt_file}" | ||||
| @@ -151,6 +151,7 @@ do | |||||
| rm -rf staging | rm -rf staging | ||||
| mkdir -p staging | mkdir -p staging | ||||
| cp -a imperative/python/{megengine,setup.py,requires.txt,requires-style.txt,requires-test.txt} staging/ | cp -a imperative/python/{megengine,setup.py,requires.txt,requires-style.txt,requires-test.txt} staging/ | ||||
| cp -a ${SRC_DIR}/src/custom/include/megbrain staging/megengine/core/include | |||||
| cd ${BUILD_DIR}/staging/megengine/core | cd ${BUILD_DIR}/staging/megengine/core | ||||
| mkdir -p lib/ucx | mkdir -p lib/ucx | ||||
| @@ -77,11 +77,13 @@ CUBLAS_LIB="/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1/bin/cublas6 | |||||
| CURAND_LIB="/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1/bin/curand64_10.dll" | CURAND_LIB="/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1/bin/curand64_10.dll" | ||||
| CUBLASLT_LIB="/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1/bin/cublasLt64_10.dll" | CUBLASLT_LIB="/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1/bin/cublasLt64_10.dll" | ||||
| CUDART_LIB="/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1/bin/cudart64_101.dll" | CUDART_LIB="/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1/bin/cudart64_101.dll" | ||||
| MGE_EXPORT_LIB="${SRC_DIR}/build_dir/host/build/src/megengine_shared.dll" | |||||
| MGE_EXPORT_DLL="${SRC_DIR}/build_dir/host/build/src/megengine_shared.dll" | |||||
| MGE_EXPORT_LIB="${SRC_DIR}/build_dir/host/build/src/megengine_shared.lib" | |||||
| function depend_real_copy() { | function depend_real_copy() { | ||||
| REAL_DST=$1 | REAL_DST=$1 | ||||
| echo "real copy lib to $1" | echo "real copy lib to $1" | ||||
| cp "${MGE_EXPORT_DLL}" ${REAL_DST} | |||||
| cp "${MGE_EXPORT_LIB}" ${REAL_DST} | cp "${MGE_EXPORT_LIB}" ${REAL_DST} | ||||
| if [ ${BUILD_WHL_CPU_ONLY} = "OFF" ]; then | if [ ${BUILD_WHL_CPU_ONLY} = "OFF" ]; then | ||||
| @@ -190,6 +192,7 @@ function do_build() { | |||||
| rm -rf staging | rm -rf staging | ||||
| mkdir -p staging | mkdir -p staging | ||||
| cp -a imperative/python/{megengine,setup.py,requires.txt,requires-style.txt,requires-test.txt} staging/ | cp -a imperative/python/{megengine,setup.py,requires.txt,requires-style.txt,requires-test.txt} staging/ | ||||
| cp -a ${SRC_DIR}/src/custom/include/megbrain staging/megengine/core/include/ | |||||
| cd ${BUILD_DIR}/staging/megengine/core | cd ${BUILD_DIR}/staging/megengine/core | ||||
| rt_file=`ls _imperative_rt.*.pyd` | rt_file=`ls _imperative_rt.*.pyd` | ||||
| echo "rt file is: ${rt_file}" | echo "rt file is: ${rt_file}" | ||||
| @@ -1,3 +1,8 @@ | |||||
| # force define a SHARED target for whl, caused by when build for APPLE | |||||
| # we will force set BUILD_SHARED_LIBS=OFF for xcode needed | |||||
| set(MGE_SHARED_LIB megengine_shared) | |||||
| set(MGE_SHARED_LIB ${MGE_SHARED_LIB} PARENT_SCOPE) | |||||
| if(MGE_WITH_JIT_MLIR) | if(MGE_WITH_JIT_MLIR) | ||||
| add_subdirectory(jit/include/megbrain/jit/mlir/ir) | add_subdirectory(jit/include/megbrain/jit/mlir/ir) | ||||
| endif() | endif() | ||||
| @@ -206,32 +211,30 @@ set (_VER_FILE ${PROJECT_SOURCE_DIR}/src/version.ld) | |||||
| # Build as SHARED or STATIC depending on BUILD_SHARED_LIBS=ON/OFF | # Build as SHARED or STATIC depending on BUILD_SHARED_LIBS=ON/OFF | ||||
| add_library(megengine) | add_library(megengine) | ||||
| # force define a SHARED target for whl, caused by when build for APPLE | |||||
| # we will force set BUILD_SHARED_LIBS=OFF for xcode needed | |||||
| add_library(megengine_shared SHARED) | |||||
| add_library(${MGE_SHARED_LIB} SHARED) | |||||
| target_link_libraries(megengine PRIVATE ${MGE_CUDA_LIBS}) | target_link_libraries(megengine PRIVATE ${MGE_CUDA_LIBS}) | ||||
| target_link_libraries(megengine PUBLIC megbrain megdnn) | target_link_libraries(megengine PUBLIC megbrain megdnn) | ||||
| target_link_libraries(megengine_shared PUBLIC megbrain megdnn) | |||||
| target_link_libraries(megengine_shared PRIVATE ${MGE_CUDA_LIBS}) | |||||
| target_link_libraries(${MGE_SHARED_LIB} PUBLIC megbrain megdnn) | |||||
| target_link_libraries(${MGE_SHARED_LIB} PRIVATE ${MGE_CUDA_LIBS}) | |||||
| if (UNIX AND NOT APPLE) | if (UNIX AND NOT APPLE) | ||||
| target_link_options(megengine PRIVATE -Wl,--no-undefined -Wl,--version-script=${_VER_FILE}) | target_link_options(megengine PRIVATE -Wl,--no-undefined -Wl,--version-script=${_VER_FILE}) | ||||
| set_target_properties(megengine PROPERTIES LINK_DEPENDS ${_VER_FILE}) | set_target_properties(megengine PROPERTIES LINK_DEPENDS ${_VER_FILE}) | ||||
| target_link_options(megengine_shared PRIVATE -Wl,--no-undefined -Wl,--version-script=${_VER_FILE}) | |||||
| set_target_properties(megengine_shared PROPERTIES LINK_DEPENDS ${_VER_FILE}) | |||||
| target_link_options(${MGE_SHARED_LIB} PRIVATE -Wl,--no-undefined -Wl,--version-script=${_VER_FILE}) | |||||
| set_target_properties(${MGE_SHARED_LIB} PROPERTIES LINK_DEPENDS ${_VER_FILE}) | |||||
| endif() | endif() | ||||
| if(WIN32 OR MSVC) | if(WIN32 OR MSVC) | ||||
| target_compile_definitions(megbrain PRIVATE MGE_DLL_EXPORT) | target_compile_definitions(megbrain PRIVATE MGE_DLL_EXPORT) | ||||
| target_compile_definitions(megdnn PRIVATE MGE_DLL_EXPORT) | target_compile_definitions(megdnn PRIVATE MGE_DLL_EXPORT) | ||||
| target_compile_definitions(megengine PRIVATE MGE_DLL_EXPORT) | target_compile_definitions(megengine PRIVATE MGE_DLL_EXPORT) | ||||
| target_compile_definitions(megengine_shared PRIVATE MGE_DLL_EXPORT) | |||||
| target_compile_definitions(${MGE_SHARED_LIB} PRIVATE MGE_DLL_EXPORT) | |||||
| # please do not use WINDOWS_EXPORT_ALL_SYMBOLS, as symbols max than 65535 when build with CUDA | # please do not use WINDOWS_EXPORT_ALL_SYMBOLS, as symbols max than 65535 when build with CUDA | ||||
| #set_target_properties(megengine PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS TRUE) | #set_target_properties(megengine PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS TRUE) | ||||
| #set_target_properties(megengine_shared PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS TRUE) | |||||
| #set_target_properties(${MGE_SHARED_LIB} PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS TRUE) | |||||
| endif() | endif() | ||||
| if (MGE_WITH_DISTRIBUTED) | if (MGE_WITH_DISTRIBUTED) | ||||
| message(VERBOSE "megengine configured to link megray") | message(VERBOSE "megengine configured to link megray") | ||||
| target_link_libraries(megengine PUBLIC megray) | target_link_libraries(megengine PUBLIC megray) | ||||
| target_link_libraries(megengine_shared PUBLIC megray) | |||||
| target_link_libraries(${MGE_SHARED_LIB} PUBLIC megray) | |||||
| endif() | endif() | ||||
| # Do not export targets if MGE_WITH_DISTRIBUTED is on. MegRay is not ready | # Do not export targets if MGE_WITH_DISTRIBUTED is on. MegRay is not ready | ||||
| # for this. | # for this. | ||||
| @@ -18,12 +18,31 @@ | |||||
| #ifndef _WIN32 | #ifndef _WIN32 | ||||
| #include <dlfcn.h> | #include <dlfcn.h> | ||||
| #else | |||||
| #include <windows.h> | |||||
| #endif | #endif | ||||
| using namespace mgb; | using namespace mgb; | ||||
| namespace custom { | namespace custom { | ||||
| #ifdef _WIN32 | |||||
| #define RTLD_LAZY 0 | |||||
| void* dlopen(const char* file, int) { | |||||
| return static_cast<void*>(LoadLibrary(file)); | |||||
| } | |||||
| int dlclose(void* handle) { | |||||
| return static_cast<int>(FreeLibrary(static_cast<HMODULE>(handle))); | |||||
| } | |||||
| const char* dlerror(void) { | |||||
| static char win_err_info[] = "no dlerror info in windows"; | |||||
| return win_err_info; | |||||
| } | |||||
| #endif | |||||
| CustomOpManager* CustomOpManager::inst(void) { | CustomOpManager* CustomOpManager::inst(void) { | ||||
| static CustomOpManager op_manager; | static CustomOpManager op_manager; | ||||
| return &op_manager; | return &op_manager; | ||||
| @@ -127,7 +146,6 @@ std::vector<RunTimeId> CustomOpManager::op_id_list(void) { | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| #ifndef _WIN32 | |||||
| CustomLib::CustomLib(const std::string& path, int mode = RTLD_LAZY) | CustomLib::CustomLib(const std::string& path, int mode = RTLD_LAZY) | ||||
| : m_handle(nullptr, [](void* handle) { dlclose(handle); }) { | : m_handle(nullptr, [](void* handle) { dlclose(handle); }) { | ||||
| auto op_list_before_load = CustomOpManager::inst()->op_name_list(); | auto op_list_before_load = CustomOpManager::inst()->op_name_list(); | ||||
| @@ -146,12 +164,6 @@ CustomLib::CustomLib(const std::string& path, int mode = RTLD_LAZY) | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| #else | |||||
| CustomLib::CustomLib(const std::string& path, int mode = 0) | |||||
| : m_handle(nullptr, [](void* handle) {}) { | |||||
| mgb_assert(false, "custom op is only supported on Linux now"); | |||||
| } | |||||
| #endif | |||||
| const std::vector<std::string>& CustomLib::ops_in_lib(void) const { | const std::vector<std::string>& CustomLib::ops_in_lib(void) const { | ||||
| return m_ops; | return m_ops; | ||||
| @@ -16,7 +16,8 @@ | |||||
| #include "tensor.h" | #include "tensor.h" | ||||
| namespace custom { | namespace custom { | ||||
| std::shared_ptr<CustomOp> op_insert(std::string opname, uint32_t version); | |||||
| MGE_WIN_DECLSPEC_FUC std::shared_ptr<CustomOp> op_insert( | |||||
| std::string opname, uint32_t version); | |||||
| } | } | ||||
| #define CUSTOM_OP_REG(OpName) \ | #define CUSTOM_OP_REG(OpName) \ | ||||
| @@ -32,27 +32,26 @@ namespace custom { | |||||
| using RunTimeId = uint64_t; | using RunTimeId = uint64_t; | ||||
| class ArgInfo { | |||||
| class MGE_WIN_DECLSPEC_FUC ArgInfo { | |||||
| CUSTOM_PIMPL_CLS_DECL(ArgInfo); | CUSTOM_PIMPL_CLS_DECL(ArgInfo); | ||||
| MGE_WIN_DECLSPEC_FUC ArgInfo( | |||||
| const std::string& name, const std::string& desc, | |||||
| ArgInfo(const std::string& name, const std::string& desc, | |||||
| const std::unordered_set<std::string>& dtypes, const int& ndim, | const std::unordered_set<std::string>& dtypes, const int& ndim, | ||||
| const std::string& mem_stgy); | const std::string& mem_stgy); | ||||
| MGE_WIN_DECLSPEC_FUC const std::string& name(void) const; | |||||
| MGE_WIN_DECLSPEC_FUC const std::string& desc(void) const; | |||||
| MGE_WIN_DECLSPEC_FUC const std::unordered_set<std::string>& dtypes(void) const; | |||||
| MGE_WIN_DECLSPEC_FUC int ndim(void) const; | |||||
| MGE_WIN_DECLSPEC_FUC const std::string& mem_strategy(void) const; | |||||
| const std::string& name(void) const; | |||||
| const std::string& desc(void) const; | |||||
| const std::unordered_set<std::string>& dtypes(void) const; | |||||
| int ndim(void) const; | |||||
| const std::string& mem_strategy(void) const; | |||||
| MGE_WIN_DECLSPEC_FUC std::string str() const; | |||||
| std::string str() const; | |||||
| }; | }; | ||||
| class CustomOp { | |||||
| class MGE_WIN_DECLSPEC_FUC CustomOp { | |||||
| std::unique_ptr<void, void_deleter> m_impl; | std::unique_ptr<void, void_deleter> m_impl; | ||||
| public: | public: | ||||
| MGE_WIN_DECLSPEC_FUC CustomOp(const std::string& op_type, uint32_t version); | |||||
| CustomOp(const std::string& op_type, uint32_t version); | |||||
| PREVENT_COPY_AND_ASSIGN(CustomOp); | PREVENT_COPY_AND_ASSIGN(CustomOp); | ||||
| using DeviceInferFuncPtr = | using DeviceInferFuncPtr = | ||||
| @@ -71,70 +70,65 @@ public: | |||||
| void (*)(const std::vector<Tensor>&, const Param&, std::vector<Tensor>&); | void (*)(const std::vector<Tensor>&, const Param&, std::vector<Tensor>&); | ||||
| // write for forward | // write for forward | ||||
| MGE_WIN_DECLSPEC_FUC CustomOp& set_device_infer(DeviceInferFuncPtr func); | |||||
| MGE_WIN_DECLSPEC_FUC CustomOp& set_shape_infer(ShapeInferFuncPtr func); | |||||
| MGE_WIN_DECLSPEC_FUC CustomOp& set_dtype_infer(DTypeInferFuncPtr func); | |||||
| MGE_WIN_DECLSPEC_FUC CustomOp& set_format_infer(FormatInferFuncPtr func); | |||||
| MGE_WIN_DECLSPEC_FUC CustomOp& set_preprocess(PreprocessFuncPtr func); | |||||
| MGE_WIN_DECLSPEC_FUC CustomOp& set_preprocess( | |||||
| const std::string& device, PreprocessFuncPtr func); | |||||
| MGE_WIN_DECLSPEC_FUC CustomOp& set_postprocess(PostprocessFuncPtr func); | |||||
| MGE_WIN_DECLSPEC_FUC CustomOp& set_postprocess( | |||||
| const std::string& device, PostprocessFuncPtr func); | |||||
| MGE_WIN_DECLSPEC_FUC CustomOp& set_compute(ComputeFuncPtr func); | |||||
| MGE_WIN_DECLSPEC_FUC CustomOp& set_compute( | |||||
| const std::string& device, ComputeFuncPtr func); | |||||
| MGE_WIN_DECLSPEC_FUC CustomOp& set_description(const std::string& op_desc); | |||||
| MGE_WIN_DECLSPEC_FUC CustomOp& add_input( | |||||
| CustomOp& set_device_infer(DeviceInferFuncPtr func); | |||||
| CustomOp& set_shape_infer(ShapeInferFuncPtr func); | |||||
| CustomOp& set_dtype_infer(DTypeInferFuncPtr func); | |||||
| CustomOp& set_format_infer(FormatInferFuncPtr func); | |||||
| CustomOp& set_preprocess(PreprocessFuncPtr func); | |||||
| CustomOp& set_preprocess(const std::string& device, PreprocessFuncPtr func); | |||||
| CustomOp& set_postprocess(PostprocessFuncPtr func); | |||||
| CustomOp& set_postprocess(const std::string& device, PostprocessFuncPtr func); | |||||
| CustomOp& set_compute(ComputeFuncPtr func); | |||||
| CustomOp& set_compute(const std::string& device, ComputeFuncPtr func); | |||||
| CustomOp& set_description(const std::string& op_desc); | |||||
| CustomOp& add_input( | |||||
| const std::string& name, const std::string& desc, | const std::string& name, const std::string& desc, | ||||
| const std::initializer_list<std::string>& legal_dtypes = {"float32"}, | const std::initializer_list<std::string>& legal_dtypes = {"float32"}, | ||||
| int dims = -1, const std::string& mem_stgy = "default"); | int dims = -1, const std::string& mem_stgy = "default"); | ||||
| MGE_WIN_DECLSPEC_FUC CustomOp& add_output( | |||||
| CustomOp& add_output( | |||||
| const std::string& name, const std::string& desc, | const std::string& name, const std::string& desc, | ||||
| const std::initializer_list<std::string>& legal_dtypes = {"float32"}, | const std::initializer_list<std::string>& legal_dtypes = {"float32"}, | ||||
| int dims = -1, const std::string& mem_stgy = "default"); | int dims = -1, const std::string& mem_stgy = "default"); | ||||
| MGE_WIN_DECLSPEC_FUC CustomOp& add_input( | |||||
| CustomOp& add_input( | |||||
| const std::string& name, | const std::string& name, | ||||
| const std::initializer_list<std::string>& legal_dtypes = {"float32"}, | const std::initializer_list<std::string>& legal_dtypes = {"float32"}, | ||||
| int dims = -1, const std::string& mem_stgy = "default"); | int dims = -1, const std::string& mem_stgy = "default"); | ||||
| MGE_WIN_DECLSPEC_FUC CustomOp& add_output( | |||||
| CustomOp& add_output( | |||||
| const std::string& name, | const std::string& name, | ||||
| const std::initializer_list<std::string>& legal_dtypes = {"float32"}, | const std::initializer_list<std::string>& legal_dtypes = {"float32"}, | ||||
| int dims = -1, const std::string& mem_stgy = "default"); | int dims = -1, const std::string& mem_stgy = "default"); | ||||
| MGE_WIN_DECLSPEC_FUC CustomOp& add_inputs(const size_t& input_num); | |||||
| MGE_WIN_DECLSPEC_FUC CustomOp& add_outputs(const size_t& output_num); | |||||
| MGE_WIN_DECLSPEC_FUC CustomOp& add_param( | |||||
| const std::string& name, const ParamVal& default_val); | |||||
| MGE_WIN_DECLSPEC_FUC CustomOp& add_param( | |||||
| CustomOp& add_inputs(const size_t& input_num); | |||||
| CustomOp& add_outputs(const size_t& output_num); | |||||
| CustomOp& add_param(const std::string& name, const ParamVal& default_val); | |||||
| CustomOp& add_param( | |||||
| const std::string& name, const std::string& desc, | const std::string& name, const std::string& desc, | ||||
| const ParamVal& default_val); | const ParamVal& default_val); | ||||
| // read | // read | ||||
| MGE_WIN_DECLSPEC_FUC std::string op_type(void) const; | |||||
| MGE_WIN_DECLSPEC_FUC std::string op_desc(void) const; | |||||
| MGE_WIN_DECLSPEC_FUC RunTimeId runtime_id(void) const; | |||||
| MGE_WIN_DECLSPEC_FUC size_t input_num(void) const; | |||||
| MGE_WIN_DECLSPEC_FUC size_t output_num(void) const; | |||||
| MGE_WIN_DECLSPEC_FUC std::string str(void) const; | |||||
| MGE_WIN_DECLSPEC_FUC const ParamInfo& param_info(void) const; | |||||
| MGE_WIN_DECLSPEC_FUC ArgInfo input_info(size_t idx) const; | |||||
| MGE_WIN_DECLSPEC_FUC ArgInfo output_info(size_t idx) const; | |||||
| MGE_WIN_DECLSPEC_FUC const std::vector<ArgInfo>& inputs_info(void) const; | |||||
| MGE_WIN_DECLSPEC_FUC const std::vector<ArgInfo>& outputs_info(void) const; | |||||
| std::string op_type(void) const; | |||||
| std::string op_desc(void) const; | |||||
| RunTimeId runtime_id(void) const; | |||||
| size_t input_num(void) const; | |||||
| size_t output_num(void) const; | |||||
| std::string str(void) const; | |||||
| const ParamInfo& param_info(void) const; | |||||
| ArgInfo input_info(size_t idx) const; | |||||
| ArgInfo output_info(size_t idx) const; | |||||
| const std::vector<ArgInfo>& inputs_info(void) const; | |||||
| const std::vector<ArgInfo>& outputs_info(void) const; | |||||
| // use | // use | ||||
| MGE_WIN_DECLSPEC_FUC std::vector<Device> infer_output_device( | |||||
| std::vector<Device> infer_output_device( | |||||
| const std::vector<Device>&, const Param&) const; | const std::vector<Device>&, const Param&) const; | ||||
| MGE_WIN_DECLSPEC_FUC std::vector<Shape> infer_output_shape( | |||||
| std::vector<Shape> infer_output_shape( | |||||
| const std::vector<Shape>&, const Param&) const; | const std::vector<Shape>&, const Param&) const; | ||||
| MGE_WIN_DECLSPEC_FUC std::vector<DType> infer_output_dtype( | |||||
| std::vector<DType> infer_output_dtype( | |||||
| const std::vector<DType>&, const Param&) const; | const std::vector<DType>&, const Param&) const; | ||||
| MGE_WIN_DECLSPEC_FUC std::vector<Format> infer_output_format( | |||||
| std::vector<Format> infer_output_format( | |||||
| const std::vector<Format>&, const Param&) const; | const std::vector<Format>&, const Param&) const; | ||||
| MGE_WIN_DECLSPEC_FUC void compute( | |||||
| const std::vector<Tensor>&, const Param&, std::vector<Tensor>&) const; | |||||
| void compute(const std::vector<Tensor>&, const Param&, std::vector<Tensor>&) const; | |||||
| }; | }; | ||||
| } // namespace custom | } // namespace custom | ||||
| @@ -23,7 +23,7 @@ class ParamInfoImpl; | |||||
| class ParamImpl; | class ParamImpl; | ||||
| // Schema of a param element | // Schema of a param element | ||||
| class ParamSchema { | |||||
| class MGE_WIN_DECLSPEC_FUC ParamSchema { | |||||
| CUSTOM_PIMPL_CLS_DECL(ParamSchema); | CUSTOM_PIMPL_CLS_DECL(ParamSchema); | ||||
| ParamSchema( | ParamSchema( | ||||
| const std::string& name, const ParamVal& value, | const std::string& name, const ParamVal& value, | ||||
| @@ -36,7 +36,7 @@ class ParamSchema { | |||||
| std::string str(void) const; | std::string str(void) const; | ||||
| }; | }; | ||||
| class ParamInfo { | |||||
| class MGE_WIN_DECLSPEC_FUC ParamInfo { | |||||
| CUSTOM_PIMPL_CLS_DECL(ParamInfo); | CUSTOM_PIMPL_CLS_DECL(ParamInfo); | ||||
| void set_tag(const std::string&); | void set_tag(const std::string&); | ||||
| @@ -46,16 +46,16 @@ class ParamInfo { | |||||
| const std::vector<ParamSchema>& meta(void) const; | const std::vector<ParamSchema>& meta(void) const; | ||||
| }; | }; | ||||
| class Param { | |||||
| class MGE_WIN_DECLSPEC_FUC Param { | |||||
| CUSTOM_PIMPL_CLS_DECL(Param); | CUSTOM_PIMPL_CLS_DECL(Param); | ||||
| MGE_WIN_DECLSPEC_FUC Param(const ParamInfo&); | |||||
| MGE_WIN_DECLSPEC_FUC ParamVal& operator[](const std::string&); | |||||
| MGE_WIN_DECLSPEC_FUC const ParamVal& operator[](const std::string&) const; | |||||
| MGE_WIN_DECLSPEC_FUC const std::unordered_map<std::string, ParamVal>& raw() const; | |||||
| MGE_WIN_DECLSPEC_FUC bool exist(const std::string& name) const; | |||||
| MGE_WIN_DECLSPEC_FUC std::string to_bytes(void) const; | |||||
| MGE_WIN_DECLSPEC_FUC void from_bytes(const std::string&); | |||||
| Param(const ParamInfo&); | |||||
| ParamVal& operator[](const std::string&); | |||||
| const ParamVal& operator[](const std::string&) const; | |||||
| const std::unordered_map<std::string, ParamVal>& raw() const; | |||||
| bool exist(const std::string& name) const; | |||||
| std::string to_bytes(void) const; | |||||
| void from_bytes(const std::string&); | |||||
| }; | }; | ||||
| MGE_WIN_DECLSPEC_FUC bool operator==(const Param&, const Param&); | MGE_WIN_DECLSPEC_FUC bool operator==(const Param&, const Param&); | ||||
| @@ -169,21 +169,21 @@ std::string vec2str(const std::vector<T>& vec) { | |||||
| * Con1: user need to set the type explicitly when class template instantiation | * Con1: user need to set the type explicitly when class template instantiation | ||||
| * Con2: ParamVal<int> can not be assigned to ParamVal<double> | * Con2: ParamVal<int> can not be assigned to ParamVal<double> | ||||
| */ | */ | ||||
| class ParamVal { | |||||
| class MGE_WIN_DECLSPEC_FUC ParamVal { | |||||
| std::unique_ptr<void, void_deleter> m_ptr; | std::unique_ptr<void, void_deleter> m_ptr; | ||||
| ParamDynType m_type; | ParamDynType m_type; | ||||
| public: | public: | ||||
| template <typename T> | template <typename T> | ||||
| MGE_WIN_DECLSPEC_FUC ParamVal(const T& val); | |||||
| ParamVal(const T& val); | |||||
| template <typename T> | template <typename T> | ||||
| MGE_WIN_DECLSPEC_FUC ParamVal(const std::initializer_list<T>& val); | |||||
| ParamVal(const std::initializer_list<T>& val); | |||||
| MGE_WIN_DECLSPEC_FUC ParamVal(); | |||||
| MGE_WIN_DECLSPEC_FUC ParamVal(const char* str); | |||||
| MGE_WIN_DECLSPEC_FUC ParamVal(const std::initializer_list<const char*>& strs); | |||||
| MGE_WIN_DECLSPEC_FUC ParamVal(const std::vector<const char*>& strs); | |||||
| MGE_WIN_DECLSPEC_FUC ParamVal(const ParamVal& rhs); | |||||
| ParamVal(); | |||||
| ParamVal(const char* str); | |||||
| ParamVal(const std::initializer_list<const char*>& strs); | |||||
| ParamVal(const std::vector<const char*>& strs); | |||||
| ParamVal(const ParamVal& rhs); | |||||
| template <typename T> | template <typename T> | ||||
| ParamVal& operator=(const T& rhs); | ParamVal& operator=(const T& rhs); | ||||
| @@ -196,30 +196,39 @@ public: | |||||
| ParamVal& operator=(const ParamVal& rhs); | ParamVal& operator=(const ParamVal& rhs); | ||||
| template <typename T> | template <typename T> | ||||
| MGE_WIN_DECLSPEC_FUC const T& as(void) const; | |||||
| const T& as(void) const; | |||||
| template <typename T> | template <typename T> | ||||
| MGE_WIN_DECLSPEC_FUC T& as(void); | |||||
| MGE_WIN_DECLSPEC_FUC const void* raw_ptr(void) const; | |||||
| MGE_WIN_DECLSPEC_FUC void* raw_ptr(void); | |||||
| MGE_WIN_DECLSPEC_FUC ParamDynType type(void) const; | |||||
| MGE_WIN_DECLSPEC_FUC std::string str(void) const; | |||||
| MGE_WIN_DECLSPEC_FUC size_t size(void) const; | |||||
| MGE_WIN_DECLSPEC_FUC static std::string to_bytes(const ParamVal& value); | |||||
| MGE_WIN_DECLSPEC_FUC static ParamVal from_bytes( | |||||
| const std::string& bytes, size_t& offset); | |||||
| friend ParamVal operator+(const ParamVal& lhs, const ParamVal& rhs); | |||||
| friend ParamVal operator-(const ParamVal& lhs, const ParamVal& rhs); | |||||
| friend ParamVal operator*(const ParamVal& lhs, const ParamVal& rhs); | |||||
| friend ParamVal operator/(const ParamVal& lhs, const ParamVal& rhs); | |||||
| friend bool operator==(const ParamVal& lhs, const ParamVal& rhs); | |||||
| friend bool operator!=(const ParamVal& lhs, const ParamVal& rhs); | |||||
| friend bool operator>(const ParamVal& lhs, const ParamVal& rhs); | |||||
| friend bool operator<(const ParamVal& lhs, const ParamVal& rhs); | |||||
| friend bool operator>=(const ParamVal& lhs, const ParamVal& rhs); | |||||
| friend bool operator<=(const ParamVal& lhs, const ParamVal& rhs); | |||||
| T& as(void); | |||||
| const void* raw_ptr(void) const; | |||||
| void* raw_ptr(void); | |||||
| ParamDynType type(void) const; | |||||
| std::string str(void) const; | |||||
| size_t size(void) const; | |||||
| static std::string to_bytes(const ParamVal& value); | |||||
| static ParamVal from_bytes(const std::string& bytes, size_t& offset); | |||||
| MGE_WIN_DECLSPEC_FUC friend ParamVal operator+( | |||||
| const ParamVal& lhs, const ParamVal& rhs); | |||||
| MGE_WIN_DECLSPEC_FUC friend ParamVal operator-( | |||||
| const ParamVal& lhs, const ParamVal& rhs); | |||||
| MGE_WIN_DECLSPEC_FUC friend ParamVal operator*( | |||||
| const ParamVal& lhs, const ParamVal& rhs); | |||||
| MGE_WIN_DECLSPEC_FUC friend ParamVal operator/( | |||||
| const ParamVal& lhs, const ParamVal& rhs); | |||||
| MGE_WIN_DECLSPEC_FUC friend bool operator==( | |||||
| const ParamVal& lhs, const ParamVal& rhs); | |||||
| MGE_WIN_DECLSPEC_FUC friend bool operator!=( | |||||
| const ParamVal& lhs, const ParamVal& rhs); | |||||
| MGE_WIN_DECLSPEC_FUC friend bool operator>( | |||||
| const ParamVal& lhs, const ParamVal& rhs); | |||||
| MGE_WIN_DECLSPEC_FUC friend bool operator<( | |||||
| const ParamVal& lhs, const ParamVal& rhs); | |||||
| MGE_WIN_DECLSPEC_FUC friend bool operator>=( | |||||
| const ParamVal& lhs, const ParamVal& rhs); | |||||
| MGE_WIN_DECLSPEC_FUC friend bool operator<=( | |||||
| const ParamVal& lhs, const ParamVal& rhs); | |||||
| }; | }; | ||||
| ParamVal operator+(const ParamVal& lhs, const ParamVal& rhs); | ParamVal operator+(const ParamVal& lhs, const ParamVal& rhs); | ||||
| @@ -30,9 +30,9 @@ namespace custom { | |||||
| #define CUSTOM_DEVICE_TYPE_ENUM_DECL(custom_type, builtin_type, builtin_str) \ | #define CUSTOM_DEVICE_TYPE_ENUM_DECL(custom_type, builtin_type, builtin_str) \ | ||||
| custom_type, | custom_type, | ||||
| class Device { | |||||
| MGE_WIN_DECLSPEC_FUC const void* impl() const; | |||||
| MGE_WIN_DECLSPEC_FUC Device(const void* impl); | |||||
| class MGE_WIN_DECLSPEC_FUC Device { | |||||
| const void* impl() const; | |||||
| Device(const void* impl); | |||||
| CUSTOM_PIMPL_CLS_DECL(Device); | CUSTOM_PIMPL_CLS_DECL(Device); | ||||
| public: | public: | ||||
| @@ -40,19 +40,19 @@ public: | |||||
| CUSTOM_FOR_EACH_DEVICE_TYPE(CUSTOM_DEVICE_TYPE_ENUM_DECL) | CUSTOM_FOR_EACH_DEVICE_TYPE(CUSTOM_DEVICE_TYPE_ENUM_DECL) | ||||
| }; | }; | ||||
| MGE_WIN_DECLSPEC_FUC Device(const std::string& device); | |||||
| MGE_WIN_DECLSPEC_FUC Device(const char* device); | |||||
| MGE_WIN_DECLSPEC_FUC Device(DeviceEnum device); | |||||
| Device(const std::string& device); | |||||
| Device(const char* device); | |||||
| Device(DeviceEnum device); | |||||
| MGE_WIN_DECLSPEC_FUC std::string str(void) const; | |||||
| MGE_WIN_DECLSPEC_FUC DeviceEnum enumv(void) const; | |||||
| std::string str(void) const; | |||||
| DeviceEnum enumv(void) const; | |||||
| MGE_WIN_DECLSPEC_FUC static bool is_legal(const std::string& device); | |||||
| MGE_WIN_DECLSPEC_FUC static bool is_legal(DeviceEnum device); | |||||
| MGE_WIN_DECLSPEC_FUC static std::vector<std::string> legal_devices(void); | |||||
| static bool is_legal(const std::string& device); | |||||
| static bool is_legal(DeviceEnum device); | |||||
| static std::vector<std::string> legal_devices(void); | |||||
| friend class Tensor; | friend class Tensor; | ||||
| friend bool operator==(const Device& lhs, const Device& rhs); | |||||
| MGE_WIN_DECLSPEC_FUC friend bool operator==(const Device& lhs, const Device& rhs); | |||||
| CUSTOM_DATA_ADAPTOR_FRIEND_DECL; | CUSTOM_DATA_ADAPTOR_FRIEND_DECL; | ||||
| }; | }; | ||||
| @@ -60,23 +60,23 @@ using DeviceEnum = Device::DeviceEnum; | |||||
| bool operator==(const Device& lhs, const Device& rhs); | bool operator==(const Device& lhs, const Device& rhs); | ||||
| class Shape { | |||||
| MGE_WIN_DECLSPEC_FUC const void* impl() const; | |||||
| MGE_WIN_DECLSPEC_FUC Shape(const void* impl); | |||||
| class MGE_WIN_DECLSPEC_FUC Shape { | |||||
| const void* impl() const; | |||||
| Shape(const void* impl); | |||||
| CUSTOM_PIMPL_CLS_DECL(Shape); | CUSTOM_PIMPL_CLS_DECL(Shape); | ||||
| public: | public: | ||||
| MGE_WIN_DECLSPEC_FUC Shape(const std::vector<size_t>& rhs); | |||||
| MGE_WIN_DECLSPEC_FUC Shape(const std::initializer_list<size_t>& rhs); | |||||
| Shape(const std::vector<size_t>& rhs); | |||||
| Shape(const std::initializer_list<size_t>& rhs); | |||||
| size_t& operator[](size_t idx); | size_t& operator[](size_t idx); | ||||
| size_t operator[](size_t idx) const; | size_t operator[](size_t idx) const; | ||||
| MGE_WIN_DECLSPEC_FUC void ndim(size_t dim); | |||||
| MGE_WIN_DECLSPEC_FUC size_t ndim(void) const; | |||||
| void ndim(size_t dim); | |||||
| size_t ndim(void) const; | |||||
| friend class Tensor; | friend class Tensor; | ||||
| friend bool operator==(const Shape& lhs, const Shape& rhs); | |||||
| MGE_WIN_DECLSPEC_FUC friend bool operator==(const Shape& lhs, const Shape& rhs); | |||||
| CUSTOM_DATA_ADAPTOR_FRIEND_DECL; | CUSTOM_DATA_ADAPTOR_FRIEND_DECL; | ||||
| }; | }; | ||||
| @@ -104,9 +104,9 @@ using bfloat16_t = uint16_t; | |||||
| #define CUSTOM_DTYPE_ENUM_DECL(custom_type, builtin_type, ctype) custom_type, | #define CUSTOM_DTYPE_ENUM_DECL(custom_type, builtin_type, ctype) custom_type, | ||||
| class DType { | |||||
| MGE_WIN_DECLSPEC_FUC const void* impl() const; | |||||
| MGE_WIN_DECLSPEC_FUC DType(const void* impl); | |||||
| class MGE_WIN_DECLSPEC_FUC DType { | |||||
| const void* impl() const; | |||||
| DType(const void* impl); | |||||
| CUSTOM_PIMPL_CLS_DECL(DType); | CUSTOM_PIMPL_CLS_DECL(DType); | ||||
| public: | public: | ||||
| @@ -114,27 +114,33 @@ public: | |||||
| CUSTOM_FOR_EACH_TENSOR_DATA_TYPE(CUSTOM_DTYPE_ENUM_DECL) | CUSTOM_FOR_EACH_TENSOR_DATA_TYPE(CUSTOM_DTYPE_ENUM_DECL) | ||||
| }; | }; | ||||
| MGE_WIN_DECLSPEC_FUC DType(const std::string& dtype); | |||||
| MGE_WIN_DECLSPEC_FUC DType(const char* dtype); | |||||
| MGE_WIN_DECLSPEC_FUC DType( | |||||
| const std::string& dtype, float scale, uint8_t zero_point = 0); | |||||
| MGE_WIN_DECLSPEC_FUC DType(const char* dtype, float scale, uint8_t zero_point = 0); | |||||
| MGE_WIN_DECLSPEC_FUC DType(DTypeEnum dtype); | |||||
| MGE_WIN_DECLSPEC_FUC DType(DTypeEnum dtype, float scale, uint8_t zero_point = 0); | |||||
| MGE_WIN_DECLSPEC_FUC std::string str(void) const; | |||||
| MGE_WIN_DECLSPEC_FUC DTypeEnum enumv() const; | |||||
| MGE_WIN_DECLSPEC_FUC float scale(void) const; | |||||
| MGE_WIN_DECLSPEC_FUC uint8_t zero_point(void) const; | |||||
| DType(const std::string& dtype); | |||||
| DType(const char* dtype); | |||||
| DType(const std::string& dtype, float scale, uint8_t zero_point = 0); | |||||
| DType(const char* dtype, float scale, uint8_t zero_point = 0); | |||||
| DType(DTypeEnum dtype); | |||||
| DType(DTypeEnum dtype, float scale, uint8_t zero_point = 0); | |||||
| std::string str(void) const; | |||||
| DTypeEnum enumv() const; | |||||
| float scale(void) const; | |||||
| uint8_t zero_point(void) const; | |||||
| template <typename T> | template <typename T> | ||||
| MGE_WIN_DECLSPEC_FUC bool is_compatible(void) const; | |||||
| bool is_compatible(void) const; | |||||
| MGE_WIN_DECLSPEC_FUC static bool is_legal(const std::string& dtype); | |||||
| MGE_WIN_DECLSPEC_FUC static bool is_legal(const DTypeEnum& dtype); | |||||
| MGE_WIN_DECLSPEC_FUC static std::vector<std::string> legal_dtypes(void); | |||||
| static bool is_legal(const std::string& dtype); | |||||
| static bool is_legal(const DTypeEnum& dtype); | |||||
| static std::vector<std::string> legal_dtypes(void); | |||||
| friend class Tensor; | friend class Tensor; | ||||
| friend bool operator==(const DType& lhs, const DType& rhs); | |||||
| MGE_WIN_DECLSPEC_FUC friend bool operator==(const DType& lhs, const DType& rhs); | |||||
| MGE_WIN_DECLSPEC_FUC friend bool operator==( | |||||
| const DType& lhs, const std::string& rhs); | |||||
| MGE_WIN_DECLSPEC_FUC friend bool operator==(const DType& lhs, const char* rhs); | |||||
| MGE_WIN_DECLSPEC_FUC friend bool operator==( | |||||
| const std::string& lhs, const DType& rhs); | |||||
| MGE_WIN_DECLSPEC_FUC friend bool operator==(const char* lhs, const DType& rhs); | |||||
| CUSTOM_DATA_ADAPTOR_FRIEND_DECL; | CUSTOM_DATA_ADAPTOR_FRIEND_DECL; | ||||
| }; | }; | ||||
| @@ -180,45 +186,45 @@ bool operator==(const DType& lhs, const char* rhs); | |||||
| bool operator==(const std::string& lhs, const DType& rhs); | bool operator==(const std::string& lhs, const DType& rhs); | ||||
| bool operator==(const char* lhs, const DType& rhs); | bool operator==(const char* lhs, const DType& rhs); | ||||
| class Format { | |||||
| MGE_WIN_DECLSPEC_FUC const void* impl() const; | |||||
| MGE_WIN_DECLSPEC_FUC Format(const void* impl); | |||||
| class MGE_WIN_DECLSPEC_FUC Format { | |||||
| const void* impl() const; | |||||
| Format(const void* impl); | |||||
| CUSTOM_PIMPL_CLS_DECL(Format); | CUSTOM_PIMPL_CLS_DECL(Format); | ||||
| public: | public: | ||||
| MGE_WIN_DECLSPEC_FUC Format(const std::string& format); | |||||
| MGE_WIN_DECLSPEC_FUC Format(const char* format); | |||||
| Format(const std::string& format); | |||||
| Format(const char* format); | |||||
| MGE_WIN_DECLSPEC_FUC std::string str(void) const; | |||||
| MGE_WIN_DECLSPEC_FUC bool is_default(void) const; | |||||
| std::string str(void) const; | |||||
| bool is_default(void) const; | |||||
| friend class Tensor; | friend class Tensor; | ||||
| CUSTOM_DATA_ADAPTOR_FRIEND_DECL; | CUSTOM_DATA_ADAPTOR_FRIEND_DECL; | ||||
| }; | }; | ||||
| class Tensor { | |||||
| class MGE_WIN_DECLSPEC_FUC Tensor { | |||||
| void* m_tensor; | void* m_tensor; | ||||
| MGE_WIN_DECLSPEC_FUC const void* impl(void) const; | |||||
| MGE_WIN_DECLSPEC_FUC Tensor(const void* impl); | |||||
| const void* impl(void) const; | |||||
| Tensor(const void* impl); | |||||
| MGE_WIN_DECLSPEC_FUC const size_t* shapes_raw(void) const; | |||||
| MGE_WIN_DECLSPEC_FUC const ptrdiff_t* strides_raw(void) const; | |||||
| const size_t* shapes_raw(void) const; | |||||
| const ptrdiff_t* strides_raw(void) const; | |||||
| public: | public: | ||||
| Tensor() = delete; | Tensor() = delete; | ||||
| MGE_WIN_DECLSPEC_FUC Tensor(const Tensor& rhs); | |||||
| MGE_WIN_DECLSPEC_FUC Tensor& operator=(const Tensor& rhs); | |||||
| MGE_WIN_DECLSPEC_FUC Shape shape(void) const; | |||||
| MGE_WIN_DECLSPEC_FUC DType dtype(void) const; | |||||
| MGE_WIN_DECLSPEC_FUC Format format(void) const; | |||||
| MGE_WIN_DECLSPEC_FUC Device device(void) const; | |||||
| MGE_WIN_DECLSPEC_FUC size_t size(void) const; | |||||
| MGE_WIN_DECLSPEC_FUC std::vector<ptrdiff_t> stride(void) const; | |||||
| MGE_WIN_DECLSPEC_FUC float scale(void) const; | |||||
| MGE_WIN_DECLSPEC_FUC uint8_t zero_point(void) const; | |||||
| Tensor(const Tensor& rhs); | |||||
| Tensor& operator=(const Tensor& rhs); | |||||
| Shape shape(void) const; | |||||
| DType dtype(void) const; | |||||
| Format format(void) const; | |||||
| Device device(void) const; | |||||
| size_t size(void) const; | |||||
| std::vector<ptrdiff_t> stride(void) const; | |||||
| float scale(void) const; | |||||
| uint8_t zero_point(void) const; | |||||
| void* data(void); | void* data(void); | ||||
| const void* data(void) const; | const void* data(void) const; | ||||
| @@ -19,10 +19,19 @@ | |||||
| namespace custom { | namespace custom { | ||||
| void assert_failed_log( | |||||
| #ifndef MGE_WIN_DECLSPEC_FUC | |||||
| #ifdef _WIN32 | |||||
| #define MGE_WIN_DECLSPEC_FUC __declspec(dllexport) | |||||
| #else | |||||
| #define MGE_WIN_DECLSPEC_FUC | |||||
| #endif | |||||
| #endif | |||||
| MGE_WIN_DECLSPEC_FUC void assert_failed_log( | |||||
| const char* file, int line, const char* func, const char* expr, | const char* file, int line, const char* func, const char* expr, | ||||
| const char* msg_fmt, ...); | const char* msg_fmt, ...); | ||||
| #ifndef _WIN32 | |||||
| #define custom_expect(expr, msg...) \ | #define custom_expect(expr, msg...) \ | ||||
| if (!(expr)) { \ | if (!(expr)) { \ | ||||
| assert_failed_log(__FILE__, __LINE__, __PRETTY_FUNCTION__, #expr, ##msg); \ | assert_failed_log(__FILE__, __LINE__, __PRETTY_FUNCTION__, #expr, ##msg); \ | ||||
| @@ -33,8 +42,22 @@ void assert_failed_log( | |||||
| assert_failed_log(__FILE__, __LINE__, __PRETTY_FUNCTION__, #expr, ##msg); \ | assert_failed_log(__FILE__, __LINE__, __PRETTY_FUNCTION__, #expr, ##msg); \ | ||||
| } \ | } \ | ||||
| assert((expr)) | assert((expr)) | ||||
| #else | |||||
| #define custom_expect(expr, ...) \ | |||||
| if (!(expr)) { \ | |||||
| assert_failed_log( \ | |||||
| __FILE__, __LINE__, __PRETTY_FUNCTION__, #expr, __VA_ARGS__); \ | |||||
| } | |||||
| #define custom_assert(expr, ...) \ | |||||
| if (!(expr)) { \ | |||||
| assert_failed_log( \ | |||||
| __FILE__, __LINE__, __PRETTY_FUNCTION__, #expr, __VA_ARGS__); \ | |||||
| } \ | |||||
| assert((expr)) | |||||
| #endif | |||||
| class UnImpleWarnLog { | |||||
| class MGE_WIN_DECLSPEC_FUC UnImpleWarnLog { | |||||
| public: | public: | ||||
| UnImpleWarnLog( | UnImpleWarnLog( | ||||
| const std::string& func, const std::string& attr, const std::string& val); | const std::string& func, const std::string& attr, const std::string& val); | ||||
| @@ -54,9 +77,9 @@ void impl_deleter(void* ptr) { | |||||
| std::unique_ptr<void, void_deleter> m_impl; \ | std::unique_ptr<void, void_deleter> m_impl; \ | ||||
| \ | \ | ||||
| public: \ | public: \ | ||||
| MGE_WIN_DECLSPEC_FUC Cls(); \ | |||||
| MGE_WIN_DECLSPEC_FUC Cls(const Cls& rhs); \ | |||||
| MGE_WIN_DECLSPEC_FUC Cls& operator=(const Cls& rhs) | |||||
| Cls(); \ | |||||
| Cls(const Cls& rhs); \ | |||||
| Cls& operator=(const Cls& rhs) | |||||
| #define CUSTOM_PIMPL_CLS_DEFINE(Cls) \ | #define CUSTOM_PIMPL_CLS_DEFINE(Cls) \ | ||||
| Cls::Cls() : m_impl(new Cls##Impl(), impl_deleter<Cls##Impl>) {} \ | Cls::Cls() : m_impl(new Cls##Impl(), impl_deleter<Cls##Impl>) {} \ | ||||