diff --git a/mindspore_serving/ccsrc/worker/inference/mindspore_model_wrap.cc b/mindspore_serving/ccsrc/worker/inference/mindspore_model_wrap.cc index bbece03..6c1e2ec 100644 --- a/mindspore_serving/ccsrc/worker/inference/mindspore_model_wrap.cc +++ b/mindspore_serving/ccsrc/worker/inference/mindspore_model_wrap.cc @@ -153,6 +153,8 @@ std::shared_ptr MindSporeModelWrap::TransformModelContext(const std::ma mindspore::ModelContext::SetPrecisionMode(context, value); } else if (key == "acl_option.op_select_impl_mode") { mindspore::ModelContext::SetOpSelectImplMode(context, value); + } else if (key == "gpu_option.enable_trt_infer") { + mindspore::ModelContext::SetGpuTrtInferMode(context, value); } } return context; @@ -397,6 +399,5 @@ ApiBufferTensorWrap::ApiBufferTensorWrap() = default; ApiBufferTensorWrap::ApiBufferTensorWrap(const mindspore::MSTensor &tensor) : tensor_(tensor) {} ApiBufferTensorWrap::~ApiBufferTensorWrap() = default; - } // namespace serving } // namespace mindspore diff --git a/mindspore_serving/worker/_check_version.py b/mindspore_serving/worker/_check_version.py index 1296fec..a927a37 100644 --- a/mindspore_serving/worker/_check_version.py +++ b/mindspore_serving/worker/_check_version.py @@ -15,7 +15,10 @@ """version and config check""" import os import sys +import subprocess from pathlib import Path +import numpy as np +from packaging import version from mindspore_serving import log as logger @@ -163,6 +166,120 @@ class AscendEnvChecker: "Can not find opp path (need by mindspore-ascend), please check if you have set env ASCEND_OPP_PATH, " "you can reference to the installation guidelines https://www.mindspore.cn/install") +class GPUEnvChecker(): + """GPU environment check.""" + + def __init__(self): + self.version = ["10.1"] + # env + self.path = os.getenv("PATH") + self.ld_lib_path = os.getenv("LD_LIBRARY_PATH") + + # check + self.v = "0" + self.cuda_lib_path = self._get_lib_path("libcu") + self.cuda_bin_path = self._get_bin_path("cuda") + + def _get_bin_path(self, bin_name): + """Get bin path by bin name.""" + if bin_name == "cuda": + return self._get_cuda_bin_path() + return [] + + def _get_cuda_bin_path(self): + """Get cuda bin path by lib path.""" + path_list = [] + for path in self.cuda_lib_path: + path = os.path.abspath(path.strip()+"/bin/") + if Path(path).is_dir(): + path_list.append(path) + return np.unique(path_list) + + def _get_nvcc_version(self, is_set_env): + """Get cuda version by nvcc command.""" + nvcc_result = subprocess.run(["nvcc --version | grep release"], + timeout=3, text=True, capture_output=True, check=False, shell=True) + if nvcc_result.returncode: + if not is_set_env: + for path in self.cuda_bin_path: + if Path(path + "/nvcc").is_file(): + os.environ['PATH'] = path + ":" + os.environ['PATH'] + return self._get_nvcc_version(True) + return "" + result = nvcc_result.stdout + for line in result.split('\n'): + if line: + return line.strip().split("release")[1].split(",")[0].strip() + return "" + + def check_env(self): + """Check cuda version.""" + version_match = False + for path in self.cuda_lib_path: + version_file = path + "/version.txt" + if not Path(version_file).is_file(): + continue + if self._check_version(version_file): + version_match = True + break + if not version_match: + if self.v == "0": + logger.warning("Cuda version file version.txt is not found, please confirm that the correct " + "cuda version has been installed, you can refer to the " + "installation guidelines: https://www.mindspore.cn/install") + else: + logger.warning(f"MindSpore version {__version__} and cuda version {self.v} does not match, " + "please refer to the installation guide for version matching " + "information: https://www.mindspore.cn/install") + nvcc_version = self._get_nvcc_version(False) + if nvcc_version and (nvcc_version not in self.version): + logger.warning(f"MindSpore version {__version__} and nvcc(cuda bin) version {nvcc_version} " + "does not match, please refer to the installation guide for version matching " + "information: https://www.mindspore.cn/install") + + def _check_version(self, version_file): + """Check cuda version by version.txt.""" + v = self._read_version(version_file) + v = version.parse(v) + v_str = str(v.major) + "." + str(v.minor) + if v_str not in self.version: + return False + return True + + def _get_lib_path(self, lib_name): + """Get gpu lib path by ldd command.""" + path_list = [] + current_path = os.path.split(os.path.realpath(__file__))[0] + ldd_result = subprocess.run(["ldd " + current_path + "/_c_expression*.so* | grep " + lib_name], + timeout=3, text=True, capture_output=True, check=False, shell=True) + if ldd_result.returncode: + logger.warning(f"{lib_name} so(need by mndspore-gpu) is not found, please confirm that " + f"_c_experssion.so depend on {lib_name}, " + f"and _c_expression.so in directory:{current_path}") + return path_list + result = ldd_result.stdout + for i in result.split('\n'): + path = i.partition("=>")[2] + if path.lower().find("not found") > 0: + logger.warning(f"Cuda {self.version} version(need by mindspore-gpu) is not found, please confirm " + "that the path of cuda is set to the env LD_LIBRARY_PATH, please refer to the " + "installation guidelines: https://www.mindspore.cn/install") + continue + path = path.partition(lib_name)[0] + if path: + path_list.append(os.path.abspath(path.strip() + "../")) + return np.unique(path_list) + + def _read_version(self, file_path): + """Get gpu version info in version.txt.""" + with open(file_path, 'r') as f: + all_info = f.readlines() + for line in all_info: + if line.startswith("CUDA Version"): + self.v = line.strip().split("CUDA Version")[1] + return self.v + return self.v + def check_version_and_env_config(device_type): """check version and env config""" @@ -173,7 +290,8 @@ def check_version_and_env_config(device_type): except ImportError as e: env_checker.check_env(e) elif device_type == "Gpu": - pass + env_checker = GPUEnvChecker() + env_checker.check_env() elif device_type == "Cpu": pass diff --git a/mindspore_serving/worker/register/__init__.py b/mindspore_serving/worker/register/__init__.py index b76bcb3..1c14c3e 100644 --- a/mindspore_serving/worker/register/__init__.py +++ b/mindspore_serving/worker/register/__init__.py @@ -14,7 +14,7 @@ # ============================================================================ """MindSpore Serving Worker, for servable config.""" -from .servable import declare_servable, AclOptions +from .servable import declare_servable, AclOptions, GpuOptions from .method import register_method, call_preprocess, call_servable, call_postprocess from .method import call_preprocess_pipeline, call_postprocess_pipeline @@ -22,6 +22,7 @@ __all__ = [] __all__.extend([ "declare_servable", "AclOptions", + "GpuOptions", 'register_method', 'call_preprocess', 'call_preprocess_pipeline', diff --git a/mindspore_serving/worker/register/servable.py b/mindspore_serving/worker/register/servable.py index 97eb23d..30cebac 100644 --- a/mindspore_serving/worker/register/servable.py +++ b/mindspore_serving/worker/register/servable.py @@ -29,7 +29,7 @@ def declare_servable(servable_file, model_format, with_batch_dim=True, options=N model_format (str): Model format, "OM" or "MindIR", case ignored. with_batch_dim (bool): Whether the first shape dim of the inputs and outputs of model is batch dim, default True. - options (None, AclOptions, map): Options of model, currently AclOptions works. + options (None, AclOptions, GpuOptions, map): Options of model, currently AclOptions, GpuOptions works. without_batch_dim_inputs (None, int, tuple or list of int): Index of inputs that without batch dim when with_batch_dim is True. Raises: @@ -59,7 +59,7 @@ def declare_servable(servable_file, model_format, with_batch_dim=True, options=N for k, w in options.items(): check_type.check_str("options key", k) check_type.check_str(k + " value", w) - elif isinstance(options, AclOptions): + elif isinstance(options, _Options): # pylint: disable=protected-access options = options._as_options_map() elif options is not None: @@ -74,7 +74,17 @@ def declare_servable(servable_file, model_format, with_batch_dim=True, options=N f", options: {options}, without_batch_dim_inputs: {without_batch_dim_inputs}") -class AclOptions: +class _Options: + """ Abstract base class used to build a Options class. """ + + def __init__(self, **kwargs): + """ Initialize Options""" + + def _as_options_map(self): + """Transfer Options to dict of str,str""" + + +class AclOptions(_Options): """ Helper class to set acl options. @@ -101,6 +111,7 @@ class AclOptions: """ def __init__(self, **kwargs): + super(AclOptions, self).__init__() self.insert_op_cfg_path = "" self.input_format = "" self.input_shape = "" @@ -192,7 +203,6 @@ class AclOptions: def set_op_select_impl_mode(self, val): """Set option 'op_select_impl_mode', which means model precision mode, and the value can be "high_performance" or "high_precision", default "high_performance". - Args: val (str): Value of option 'op_select_impl_mode',which can be "high_performance" or "high_precision", default "high_performance". @@ -221,3 +231,47 @@ class AclOptions: if self.op_select_impl_mode: options['acl_option.op_select_impl_mode'] = self.op_select_impl_mode return options + + +class GpuOptions(_Options): + """ + Helper class to set gpu options. + + Args: + enable_trt_infer (bool): Whether enable inference with TensorRT. + + Raises: + RuntimeError: Gpu option is invalid, or value is not str. + + Examples: + >>> from mindspore_serving.worker import register + >>> options = register.GpuOptions(enable_trt_infer=True) + >>> register.declare_servable(servable_file="deeptext.mindir", model_format="MindIR", options=options) + """ + + def __init__(self, **kwargs): + super(GpuOptions, self).__init__() + self.enable_trt_infer = False + val_set_fun = {"enable_trt_infer": self.set_trt_infer_mode} + for k, w in kwargs.items(): + if k not in val_set_fun: + raise RuntimeError("Set gpu option failed, unsupported option " + k) + val_set_fun[k](w) + + def set_trt_infer_mode(self, val): + """Set option 'enable_trt_infer' + + Args: + val (bool): Value of option 'enable_trt_infer'. + Raises: + RuntimeError: The type of value is not bool. + """ + check_type.check_bool('enable_trt_infer', val) + self.enable_trt_infer = val + + def _as_options_map(self): + """Transfer GpuOptions to dict of str,str""" + options = {} + if self.enable_trt_infer: + options['gpu_option.enable_trt_infer'] = str(self.enable_trt_infer) + return options diff --git a/tests/ut/stub/cxx_api/context.cc b/tests/ut/stub/cxx_api/context.cc index d967963..b616249 100644 --- a/tests/ut/stub/cxx_api/context.cc +++ b/tests/ut/stub/cxx_api/context.cc @@ -29,6 +29,8 @@ constexpr auto kModelOptionOutputType = "mindspore.option.output_type"; // "FP3 constexpr auto kModelOptionPrecisionMode = "mindspore.option.precision_mode"; // "force_fp16", "allow_fp32_to_fp16", "must_keep_origin_dtype" or "allow_mix_precision", default as "force_fp16" constexpr auto kModelOptionOpSelectImplMode = "mindspore.option.op_select_impl_mode"; +// "False": Inference with native backend, "True": Inference with Tensor-RT engine, default as "False" +constexpr auto kModelOptionGpuTrtInferMode = "mindspore.option.gpu_trt_infer_mode"; namespace mindspore { struct Context::Data { @@ -182,4 +184,20 @@ std::vector ModelContext::GetOpSelectImplModeChar(const std::shared_ptr(context, kModelOptionOpSelectImplMode); return StringToChar(ref); } + +void ModelContext::SetGpuTrtInferMode(const std::shared_ptr &context, + const std::vector &gpu_trt_infer_mode) { + MS_EXCEPTION_IF_NULL(context); + if (context->data == nullptr) { + context->data = std::make_shared(); + MS_EXCEPTION_IF_NULL(context->data); + } + context->data->params[kModelOptionGpuTrtInferMode] = CharToString(gpu_trt_infer_mode); +} + +std::vector ModelContext::GetGpuTrtInferModeChar(const std::shared_ptr &context) { + MS_EXCEPTION_IF_NULL(context); + const std::string &ref = GetValue(context, kModelOptionGpuTrtInferMode); + return StringToChar(ref); +} } // namespace mindspore diff --git a/tests/ut/stub/include/api/context.h b/tests/ut/stub/include/api/context.h index 90dfa40..5b2c1ca 100644 --- a/tests/ut/stub/include/api/context.h +++ b/tests/ut/stub/include/api/context.h @@ -72,6 +72,9 @@ struct MS_API ModelContext : public Context { const std::string &op_select_impl_mode); static inline std::string GetOpSelectImplMode(const std::shared_ptr &context); + static inline void SetGpuTrtInferMode(const std::shared_ptr &context, const std::string &gpu_trt_infer_mode); + static inline std::string GetGpuTrtInferMode(const std::shared_ptr &context); + private: // api without std::string static void SetInsertOpConfigPath(const std::shared_ptr &context, const std::vector &cfg_path); @@ -89,6 +92,9 @@ struct MS_API ModelContext : public Context { static void SetOpSelectImplMode(const std::shared_ptr &context, const std::vector &op_select_impl_mode); static std::vector GetOpSelectImplModeChar(const std::shared_ptr &context); + + static void SetGpuTrtInferMode(const std::shared_ptr &context, const std::vector &gpu_trt_infer_mode); + static std::vector GetGpuTrtInferModeChar(const std::shared_ptr &context); }; void GlobalContext::SetGlobalDeviceTarget(const std::string &device_target) { @@ -131,5 +137,12 @@ void ModelContext::SetOpSelectImplMode(const std::shared_ptr &context, std::string ModelContext::GetOpSelectImplMode(const std::shared_ptr &context) { return CharToString(GetOpSelectImplModeChar(context)); } + +void ModelContext::SetGpuTrtInferMode(const std::shared_ptr &context, const std::string &gpu_trt_infer_mode) { + SetGpuTrtInferMode(context, StringToChar(gpu_trt_infer_mode)); +} +std::string ModelContext::GetGpuTrtInferMode(const std::shared_ptr &context) { + return CharToString(GetGpuTrtInferModeChar(context)); +} } // namespace mindspore #endif // MINDSPORE_INCLUDE_API_CONTEXT_H