From: @wilfchen Reviewed-by: @xu-yfei,@xu-yfei,@zhangyinxia Signed-off-by: @xu-yfei,@xu-yfeitags/v1.2.0
| @@ -153,6 +153,8 @@ std::shared_ptr<Context> MindSporeModelWrap::TransformModelContext(const std::ma | |||
| mindspore::ModelContext::SetPrecisionMode(context, value); | |||
| } else if (key == "acl_option.op_select_impl_mode") { | |||
| mindspore::ModelContext::SetOpSelectImplMode(context, value); | |||
| } else if (key == "gpu_option.enable_trt_infer") { | |||
| mindspore::ModelContext::SetGpuTrtInferMode(context, value); | |||
| } | |||
| } | |||
| return context; | |||
| @@ -397,6 +399,5 @@ ApiBufferTensorWrap::ApiBufferTensorWrap() = default; | |||
| ApiBufferTensorWrap::ApiBufferTensorWrap(const mindspore::MSTensor &tensor) : tensor_(tensor) {} | |||
| ApiBufferTensorWrap::~ApiBufferTensorWrap() = default; | |||
| } // namespace serving | |||
| } // namespace mindspore | |||
| @@ -15,7 +15,10 @@ | |||
| """version and config check""" | |||
| import os | |||
| import sys | |||
| import subprocess | |||
| from pathlib import Path | |||
| import numpy as np | |||
| from packaging import version | |||
| from mindspore_serving import log as logger | |||
| @@ -163,6 +166,120 @@ class AscendEnvChecker: | |||
| "Can not find opp path (need by mindspore-ascend), please check if you have set env ASCEND_OPP_PATH, " | |||
| "you can reference to the installation guidelines https://www.mindspore.cn/install") | |||
| class GPUEnvChecker(): | |||
| """GPU environment check.""" | |||
| def __init__(self): | |||
| self.version = ["10.1"] | |||
| # env | |||
| self.path = os.getenv("PATH") | |||
| self.ld_lib_path = os.getenv("LD_LIBRARY_PATH") | |||
| # check | |||
| self.v = "0" | |||
| self.cuda_lib_path = self._get_lib_path("libcu") | |||
| self.cuda_bin_path = self._get_bin_path("cuda") | |||
| def _get_bin_path(self, bin_name): | |||
| """Get bin path by bin name.""" | |||
| if bin_name == "cuda": | |||
| return self._get_cuda_bin_path() | |||
| return [] | |||
| def _get_cuda_bin_path(self): | |||
| """Get cuda bin path by lib path.""" | |||
| path_list = [] | |||
| for path in self.cuda_lib_path: | |||
| path = os.path.abspath(path.strip()+"/bin/") | |||
| if Path(path).is_dir(): | |||
| path_list.append(path) | |||
| return np.unique(path_list) | |||
| def _get_nvcc_version(self, is_set_env): | |||
| """Get cuda version by nvcc command.""" | |||
| nvcc_result = subprocess.run(["nvcc --version | grep release"], | |||
| timeout=3, text=True, capture_output=True, check=False, shell=True) | |||
| if nvcc_result.returncode: | |||
| if not is_set_env: | |||
| for path in self.cuda_bin_path: | |||
| if Path(path + "/nvcc").is_file(): | |||
| os.environ['PATH'] = path + ":" + os.environ['PATH'] | |||
| return self._get_nvcc_version(True) | |||
| return "" | |||
| result = nvcc_result.stdout | |||
| for line in result.split('\n'): | |||
| if line: | |||
| return line.strip().split("release")[1].split(",")[0].strip() | |||
| return "" | |||
| def check_env(self): | |||
| """Check cuda version.""" | |||
| version_match = False | |||
| for path in self.cuda_lib_path: | |||
| version_file = path + "/version.txt" | |||
| if not Path(version_file).is_file(): | |||
| continue | |||
| if self._check_version(version_file): | |||
| version_match = True | |||
| break | |||
| if not version_match: | |||
| if self.v == "0": | |||
| logger.warning("Cuda version file version.txt is not found, please confirm that the correct " | |||
| "cuda version has been installed, you can refer to the " | |||
| "installation guidelines: https://www.mindspore.cn/install") | |||
| else: | |||
| logger.warning(f"MindSpore version {__version__} and cuda version {self.v} does not match, " | |||
| "please refer to the installation guide for version matching " | |||
| "information: https://www.mindspore.cn/install") | |||
| nvcc_version = self._get_nvcc_version(False) | |||
| if nvcc_version and (nvcc_version not in self.version): | |||
| logger.warning(f"MindSpore version {__version__} and nvcc(cuda bin) version {nvcc_version} " | |||
| "does not match, please refer to the installation guide for version matching " | |||
| "information: https://www.mindspore.cn/install") | |||
| def _check_version(self, version_file): | |||
| """Check cuda version by version.txt.""" | |||
| v = self._read_version(version_file) | |||
| v = version.parse(v) | |||
| v_str = str(v.major) + "." + str(v.minor) | |||
| if v_str not in self.version: | |||
| return False | |||
| return True | |||
| def _get_lib_path(self, lib_name): | |||
| """Get gpu lib path by ldd command.""" | |||
| path_list = [] | |||
| current_path = os.path.split(os.path.realpath(__file__))[0] | |||
| ldd_result = subprocess.run(["ldd " + current_path + "/_c_expression*.so* | grep " + lib_name], | |||
| timeout=3, text=True, capture_output=True, check=False, shell=True) | |||
| if ldd_result.returncode: | |||
| logger.warning(f"{lib_name} so(need by mndspore-gpu) is not found, please confirm that " | |||
| f"_c_experssion.so depend on {lib_name}, " | |||
| f"and _c_expression.so in directory:{current_path}") | |||
| return path_list | |||
| result = ldd_result.stdout | |||
| for i in result.split('\n'): | |||
| path = i.partition("=>")[2] | |||
| if path.lower().find("not found") > 0: | |||
| logger.warning(f"Cuda {self.version} version(need by mindspore-gpu) is not found, please confirm " | |||
| "that the path of cuda is set to the env LD_LIBRARY_PATH, please refer to the " | |||
| "installation guidelines: https://www.mindspore.cn/install") | |||
| continue | |||
| path = path.partition(lib_name)[0] | |||
| if path: | |||
| path_list.append(os.path.abspath(path.strip() + "../")) | |||
| return np.unique(path_list) | |||
| def _read_version(self, file_path): | |||
| """Get gpu version info in version.txt.""" | |||
| with open(file_path, 'r') as f: | |||
| all_info = f.readlines() | |||
| for line in all_info: | |||
| if line.startswith("CUDA Version"): | |||
| self.v = line.strip().split("CUDA Version")[1] | |||
| return self.v | |||
| return self.v | |||
| def check_version_and_env_config(device_type): | |||
| """check version and env config""" | |||
| @@ -173,7 +290,8 @@ def check_version_and_env_config(device_type): | |||
| except ImportError as e: | |||
| env_checker.check_env(e) | |||
| elif device_type == "Gpu": | |||
| pass | |||
| env_checker = GPUEnvChecker() | |||
| env_checker.check_env() | |||
| elif device_type == "Cpu": | |||
| pass | |||
| @@ -14,7 +14,7 @@ | |||
| # ============================================================================ | |||
| """MindSpore Serving Worker, for servable config.""" | |||
| from .servable import declare_servable, AclOptions | |||
| from .servable import declare_servable, AclOptions, GpuOptions | |||
| from .method import register_method, call_preprocess, call_servable, call_postprocess | |||
| from .method import call_preprocess_pipeline, call_postprocess_pipeline | |||
| @@ -22,6 +22,7 @@ __all__ = [] | |||
| __all__.extend([ | |||
| "declare_servable", | |||
| "AclOptions", | |||
| "GpuOptions", | |||
| 'register_method', | |||
| 'call_preprocess', | |||
| 'call_preprocess_pipeline', | |||
| @@ -29,7 +29,7 @@ def declare_servable(servable_file, model_format, with_batch_dim=True, options=N | |||
| model_format (str): Model format, "OM" or "MindIR", case ignored. | |||
| with_batch_dim (bool): Whether the first shape dim of the inputs and outputs of model is batch dim, | |||
| default True. | |||
| options (None, AclOptions, map): Options of model, currently AclOptions works. | |||
| options (None, AclOptions, GpuOptions, map): Options of model, currently AclOptions, GpuOptions works. | |||
| without_batch_dim_inputs (None, int, tuple or list of int): Index of inputs that without batch dim | |||
| when with_batch_dim is True. | |||
| Raises: | |||
| @@ -59,7 +59,7 @@ def declare_servable(servable_file, model_format, with_batch_dim=True, options=N | |||
| for k, w in options.items(): | |||
| check_type.check_str("options key", k) | |||
| check_type.check_str(k + " value", w) | |||
| elif isinstance(options, AclOptions): | |||
| elif isinstance(options, _Options): | |||
| # pylint: disable=protected-access | |||
| options = options._as_options_map() | |||
| elif options is not None: | |||
| @@ -74,7 +74,17 @@ def declare_servable(servable_file, model_format, with_batch_dim=True, options=N | |||
| f", options: {options}, without_batch_dim_inputs: {without_batch_dim_inputs}") | |||
| class AclOptions: | |||
| class _Options: | |||
| """ Abstract base class used to build a Options class. """ | |||
| def __init__(self, **kwargs): | |||
| """ Initialize Options""" | |||
| def _as_options_map(self): | |||
| """Transfer Options to dict of str,str""" | |||
| class AclOptions(_Options): | |||
| """ | |||
| Helper class to set acl options. | |||
| @@ -101,6 +111,7 @@ class AclOptions: | |||
| """ | |||
| def __init__(self, **kwargs): | |||
| super(AclOptions, self).__init__() | |||
| self.insert_op_cfg_path = "" | |||
| self.input_format = "" | |||
| self.input_shape = "" | |||
| @@ -192,7 +203,6 @@ class AclOptions: | |||
| def set_op_select_impl_mode(self, val): | |||
| """Set option 'op_select_impl_mode', which means model precision mode, and the value can be "high_performance" | |||
| or "high_precision", default "high_performance". | |||
| Args: | |||
| val (str): Value of option 'op_select_impl_mode',which can be "high_performance" or "high_precision", | |||
| default "high_performance". | |||
| @@ -221,3 +231,47 @@ class AclOptions: | |||
| if self.op_select_impl_mode: | |||
| options['acl_option.op_select_impl_mode'] = self.op_select_impl_mode | |||
| return options | |||
| class GpuOptions(_Options): | |||
| """ | |||
| Helper class to set gpu options. | |||
| Args: | |||
| enable_trt_infer (bool): Whether enable inference with TensorRT. | |||
| Raises: | |||
| RuntimeError: Gpu option is invalid, or value is not str. | |||
| Examples: | |||
| >>> from mindspore_serving.worker import register | |||
| >>> options = register.GpuOptions(enable_trt_infer=True) | |||
| >>> register.declare_servable(servable_file="deeptext.mindir", model_format="MindIR", options=options) | |||
| """ | |||
| def __init__(self, **kwargs): | |||
| super(GpuOptions, self).__init__() | |||
| self.enable_trt_infer = False | |||
| val_set_fun = {"enable_trt_infer": self.set_trt_infer_mode} | |||
| for k, w in kwargs.items(): | |||
| if k not in val_set_fun: | |||
| raise RuntimeError("Set gpu option failed, unsupported option " + k) | |||
| val_set_fun[k](w) | |||
| def set_trt_infer_mode(self, val): | |||
| """Set option 'enable_trt_infer' | |||
| Args: | |||
| val (bool): Value of option 'enable_trt_infer'. | |||
| Raises: | |||
| RuntimeError: The type of value is not bool. | |||
| """ | |||
| check_type.check_bool('enable_trt_infer', val) | |||
| self.enable_trt_infer = val | |||
| def _as_options_map(self): | |||
| """Transfer GpuOptions to dict of str,str""" | |||
| options = {} | |||
| if self.enable_trt_infer: | |||
| options['gpu_option.enable_trt_infer'] = str(self.enable_trt_infer) | |||
| return options | |||
| @@ -29,6 +29,8 @@ constexpr auto kModelOptionOutputType = "mindspore.option.output_type"; // "FP3 | |||
| constexpr auto kModelOptionPrecisionMode = "mindspore.option.precision_mode"; | |||
| // "force_fp16", "allow_fp32_to_fp16", "must_keep_origin_dtype" or "allow_mix_precision", default as "force_fp16" | |||
| constexpr auto kModelOptionOpSelectImplMode = "mindspore.option.op_select_impl_mode"; | |||
| // "False": Inference with native backend, "True": Inference with Tensor-RT engine, default as "False" | |||
| constexpr auto kModelOptionGpuTrtInferMode = "mindspore.option.gpu_trt_infer_mode"; | |||
| namespace mindspore { | |||
| struct Context::Data { | |||
| @@ -182,4 +184,20 @@ std::vector<char> ModelContext::GetOpSelectImplModeChar(const std::shared_ptr<Co | |||
| const std::string &ref = GetValue<std::string>(context, kModelOptionOpSelectImplMode); | |||
| return StringToChar(ref); | |||
| } | |||
| void ModelContext::SetGpuTrtInferMode(const std::shared_ptr<Context> &context, | |||
| const std::vector<char> &gpu_trt_infer_mode) { | |||
| MS_EXCEPTION_IF_NULL(context); | |||
| if (context->data == nullptr) { | |||
| context->data = std::make_shared<Data>(); | |||
| MS_EXCEPTION_IF_NULL(context->data); | |||
| } | |||
| context->data->params[kModelOptionGpuTrtInferMode] = CharToString(gpu_trt_infer_mode); | |||
| } | |||
| std::vector<char> ModelContext::GetGpuTrtInferModeChar(const std::shared_ptr<Context> &context) { | |||
| MS_EXCEPTION_IF_NULL(context); | |||
| const std::string &ref = GetValue<std::string>(context, kModelOptionGpuTrtInferMode); | |||
| return StringToChar(ref); | |||
| } | |||
| } // namespace mindspore | |||
| @@ -72,6 +72,9 @@ struct MS_API ModelContext : public Context { | |||
| const std::string &op_select_impl_mode); | |||
| static inline std::string GetOpSelectImplMode(const std::shared_ptr<Context> &context); | |||
| static inline void SetGpuTrtInferMode(const std::shared_ptr<Context> &context, const std::string &gpu_trt_infer_mode); | |||
| static inline std::string GetGpuTrtInferMode(const std::shared_ptr<Context> &context); | |||
| private: | |||
| // api without std::string | |||
| static void SetInsertOpConfigPath(const std::shared_ptr<Context> &context, const std::vector<char> &cfg_path); | |||
| @@ -89,6 +92,9 @@ struct MS_API ModelContext : public Context { | |||
| static void SetOpSelectImplMode(const std::shared_ptr<Context> &context, | |||
| const std::vector<char> &op_select_impl_mode); | |||
| static std::vector<char> GetOpSelectImplModeChar(const std::shared_ptr<Context> &context); | |||
| static void SetGpuTrtInferMode(const std::shared_ptr<Context> &context, const std::vector<char> &gpu_trt_infer_mode); | |||
| static std::vector<char> GetGpuTrtInferModeChar(const std::shared_ptr<Context> &context); | |||
| }; | |||
| void GlobalContext::SetGlobalDeviceTarget(const std::string &device_target) { | |||
| @@ -131,5 +137,12 @@ void ModelContext::SetOpSelectImplMode(const std::shared_ptr<Context> &context, | |||
| std::string ModelContext::GetOpSelectImplMode(const std::shared_ptr<Context> &context) { | |||
| return CharToString(GetOpSelectImplModeChar(context)); | |||
| } | |||
| void ModelContext::SetGpuTrtInferMode(const std::shared_ptr<Context> &context, const std::string &gpu_trt_infer_mode) { | |||
| SetGpuTrtInferMode(context, StringToChar(gpu_trt_infer_mode)); | |||
| } | |||
| std::string ModelContext::GetGpuTrtInferMode(const std::shared_ptr<Context> &context) { | |||
| return CharToString(GetGpuTrtInferModeChar(context)); | |||
| } | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_INCLUDE_API_CONTEXT_H | |||