!181 gpu serving

From: @wilfchen Reviewed-by: @xu-yfei,@xu-yfei,@zhangyinxia Signed-off-by: @xu-yfei,@xu-yfei
5 years ago · 07ed95c751
--- a/mindspore_serving/ccsrc/worker/inference/mindspore_model_wrap.cc
+++ b/mindspore_serving/ccsrc/worker/inference/mindspore_model_wrap.cc
@@ -153,6 +153,8 @@ std::shared_ptr<Context> MindSporeModelWrap::TransformModelContext(const std::ma
      mindspore::ModelContext::SetPrecisionMode(context, value);
    } else if (key == "acl_option.op_select_impl_mode") {
      mindspore::ModelContext::SetOpSelectImplMode(context, value);
    } else if (key == "gpu_option.enable_trt_infer") {
      mindspore::ModelContext::SetGpuTrtInferMode(context, value);
    }
  }
  return context;
@@ -397,6 +399,5 @@ ApiBufferTensorWrap::ApiBufferTensorWrap() = default;
 ApiBufferTensorWrap::ApiBufferTensorWrap(const mindspore::MSTensor &tensor) : tensor_(tensor) {}

 ApiBufferTensorWrap::~ApiBufferTensorWrap() = default;

 }  // namespace serving
 }  // namespace mindspore
--- a/mindspore_serving/worker/_check_version.py
+++ b/mindspore_serving/worker/_check_version.py
@@ -15,7 +15,10 @@
 """version and config check"""
 import os
 import sys
 import subprocess
 from pathlib import Path
 import numpy as np
 from packaging import version
 from mindspore_serving import log as logger


@@ -163,6 +166,120 @@ class AscendEnvChecker:
                "Can not find opp path (need by mindspore-ascend), please check if you have set env ASCEND_OPP_PATH, "
                "you can reference to the installation guidelines https://www.mindspore.cn/install")

 class GPUEnvChecker():
    """GPU environment check."""

    def __init__(self):
        self.version = ["10.1"]
        # env
        self.path = os.getenv("PATH")
        self.ld_lib_path = os.getenv("LD_LIBRARY_PATH")

        # check
        self.v = "0"
        self.cuda_lib_path = self._get_lib_path("libcu")
        self.cuda_bin_path = self._get_bin_path("cuda")

    def _get_bin_path(self, bin_name):
        """Get bin path by bin name."""
        if bin_name == "cuda":
            return self._get_cuda_bin_path()
        return []

    def _get_cuda_bin_path(self):
        """Get cuda bin path by lib path."""
        path_list = []
        for path in self.cuda_lib_path:
            path = os.path.abspath(path.strip()+"/bin/")
            if Path(path).is_dir():
                path_list.append(path)
        return np.unique(path_list)

    def _get_nvcc_version(self, is_set_env):
        """Get cuda version by nvcc command."""
        nvcc_result = subprocess.run(["nvcc --version | grep release"],
                                     timeout=3, text=True, capture_output=True, check=False, shell=True)
        if nvcc_result.returncode:
            if not is_set_env:
                for path in self.cuda_bin_path:
                    if Path(path + "/nvcc").is_file():
                        os.environ['PATH'] = path + ":" + os.environ['PATH']
                        return self._get_nvcc_version(True)
            return ""
        result = nvcc_result.stdout
        for line in result.split('\n'):
            if line:
                return line.strip().split("release")[1].split(",")[0].strip()
        return ""

    def check_env(self):
        """Check cuda version."""
        version_match = False
        for path in self.cuda_lib_path:
            version_file = path + "/version.txt"
            if not Path(version_file).is_file():
                continue
            if self._check_version(version_file):
                version_match = True
                break
        if not version_match:
            if self.v == "0":
                logger.warning("Cuda version file version.txt is not found, please confirm that the correct "
                               "cuda version has been installed, you can refer to the "
                               "installation guidelines: https://www.mindspore.cn/install")
            else:
                logger.warning(f"MindSpore version {__version__} and cuda version {self.v} does not match, "
                               "please refer to the installation guide for version matching "
                               "information: https://www.mindspore.cn/install")
        nvcc_version = self._get_nvcc_version(False)
        if nvcc_version and (nvcc_version not in self.version):
            logger.warning(f"MindSpore version {__version__} and nvcc(cuda bin) version {nvcc_version} "
                           "does not match, please refer to the installation guide for version matching "
                           "information: https://www.mindspore.cn/install")

    def _check_version(self, version_file):
        """Check cuda version by version.txt."""
        v = self._read_version(version_file)
        v = version.parse(v)
        v_str = str(v.major) + "." + str(v.minor)
        if v_str not in self.version:
            return False
        return True

    def _get_lib_path(self, lib_name):
        """Get gpu lib path by ldd command."""
        path_list = []
        current_path = os.path.split(os.path.realpath(__file__))[0]
        ldd_result = subprocess.run(["ldd " + current_path + "/_c_expression*.so* | grep " + lib_name],
                                    timeout=3, text=True, capture_output=True, check=False, shell=True)
        if ldd_result.returncode:
            logger.warning(f"{lib_name} so(need by mndspore-gpu) is not found, please confirm that "
                           f"_c_experssion.so depend on {lib_name}, "
                           f"and _c_expression.so in directory:{current_path}")
            return path_list
        result = ldd_result.stdout
        for i in result.split('\n'):
            path = i.partition("=>")[2]
            if path.lower().find("not found") > 0:
                logger.warning(f"Cuda {self.version} version(need by mindspore-gpu) is not found, please confirm "
                               "that the path of cuda is set to the env LD_LIBRARY_PATH, please refer to the "
                               "installation guidelines: https://www.mindspore.cn/install")
                continue
            path = path.partition(lib_name)[0]
            if path:
                path_list.append(os.path.abspath(path.strip() + "../"))
        return np.unique(path_list)

    def _read_version(self, file_path):
        """Get gpu version info in version.txt."""
        with open(file_path, 'r') as f:
            all_info = f.readlines()
            for line in all_info:
                if line.startswith("CUDA Version"):
                    self.v = line.strip().split("CUDA Version")[1]
                    return self.v
        return self.v


 def check_version_and_env_config(device_type):
    """check version and env config"""
@@ -173,7 +290,8 @@ def check_version_and_env_config(device_type):
        except ImportError as e:
            env_checker.check_env(e)
    elif device_type == "Gpu":
        pass
        env_checker = GPUEnvChecker()
        env_checker.check_env()
    elif device_type == "Cpu":
        pass

--- a/mindspore_serving/worker/register/init.py
+++ b/mindspore_serving/worker/register/init.py
@@ -14,7 +14,7 @@
 # ============================================================================
 """MindSpore Serving Worker, for servable config."""

 from .servable import declare_servable, AclOptions
 from .servable import declare_servable, AclOptions, GpuOptions
 from .method import register_method, call_preprocess, call_servable, call_postprocess
 from .method import call_preprocess_pipeline, call_postprocess_pipeline

@@ -22,6 +22,7 @@ __all__ = []
 __all__.extend([
    "declare_servable",
    "AclOptions",
    "GpuOptions",
    'register_method',
    'call_preprocess',
    'call_preprocess_pipeline',
--- a/mindspore_serving/worker/register/servable.py
+++ b/mindspore_serving/worker/register/servable.py
@@ -29,7 +29,7 @@ def declare_servable(servable_file, model_format, with_batch_dim=True, options=N
        model_format (str): Model format, "OM" or "MindIR", case ignored.
        with_batch_dim (bool): Whether the first shape dim of the inputs and outputs of model is batch dim,
             default True.
        options (None, AclOptions, map): Options of model, currently AclOptions works.
        options (None, AclOptions, GpuOptions, map): Options of model, currently AclOptions, GpuOptions works.
        without_batch_dim_inputs (None, int, tuple or list of int): Index of inputs that without batch dim
            when with_batch_dim is True.
    Raises:
@@ -59,7 +59,7 @@ def declare_servable(servable_file, model_format, with_batch_dim=True, options=N
        for k, w in options.items():
            check_type.check_str("options key", k)
            check_type.check_str(k + " value", w)
    elif isinstance(options, AclOptions):
    elif isinstance(options, _Options):
        # pylint: disable=protected-access
        options = options._as_options_map()
    elif options is not None:
@@ -74,7 +74,17 @@ def declare_servable(servable_file, model_format, with_batch_dim=True, options=N
                f", options: {options}, without_batch_dim_inputs: {without_batch_dim_inputs}")


 class AclOptions:
 class _Options:
    """ Abstract base class used to build a Options class. """

    def __init__(self, **kwargs):
        """ Initialize Options"""

    def _as_options_map(self):
        """Transfer Options to dict of str,str"""


 class AclOptions(_Options):
    """
    Helper class to set acl options.

@@ -101,6 +111,7 @@ class AclOptions:
    """

    def __init__(self, **kwargs):
        super(AclOptions, self).__init__()
        self.insert_op_cfg_path = ""
        self.input_format = ""
        self.input_shape = ""
@@ -192,7 +203,6 @@ class AclOptions:
    def set_op_select_impl_mode(self, val):
        """Set option 'op_select_impl_mode', which means model precision mode, and the value can be "high_performance"
        or "high_precision",  default "high_performance".

        Args:
            val (str): Value of option 'op_select_impl_mode'，which can be "high_performance" or "high_precision",
                default "high_performance".
@@ -221,3 +231,47 @@ class AclOptions:
        if self.op_select_impl_mode:
            options['acl_option.op_select_impl_mode'] = self.op_select_impl_mode
        return options


 class GpuOptions(_Options):
    """
    Helper class to set gpu options.

    Args:
        enable_trt_infer (bool): Whether enable inference with TensorRT.

    Raises:
        RuntimeError: Gpu option is invalid, or value is not str.

    Examples:
        >>> from mindspore_serving.worker import register
        >>> options = register.GpuOptions(enable_trt_infer=True)
        >>> register.declare_servable(servable_file="deeptext.mindir", model_format="MindIR", options=options)
    """

    def __init__(self, **kwargs):
        super(GpuOptions, self).__init__()
        self.enable_trt_infer = False
        val_set_fun = {"enable_trt_infer": self.set_trt_infer_mode}
        for k, w in kwargs.items():
            if k not in val_set_fun:
                raise RuntimeError("Set gpu option failed, unsupported option " + k)
            val_set_fun[k](w)

    def set_trt_infer_mode(self, val):
        """Set option 'enable_trt_infer'

        Args:
            val (bool): Value of option 'enable_trt_infer'.
        Raises:
            RuntimeError: The type of value is not bool.
        """
        check_type.check_bool('enable_trt_infer', val)
        self.enable_trt_infer = val

    def _as_options_map(self):
        """Transfer GpuOptions to dict of str,str"""
        options = {}
        if self.enable_trt_infer:
            options['gpu_option.enable_trt_infer'] = str(self.enable_trt_infer)
        return options
--- a/tests/ut/stub/cxx_api/context.cc
+++ b/tests/ut/stub/cxx_api/context.cc
@@ -29,6 +29,8 @@ constexpr auto kModelOptionOutputType = "mindspore.option.output_type";  // "FP3
 constexpr auto kModelOptionPrecisionMode = "mindspore.option.precision_mode";
 // "force_fp16", "allow_fp32_to_fp16", "must_keep_origin_dtype" or "allow_mix_precision", default as "force_fp16"
 constexpr auto kModelOptionOpSelectImplMode = "mindspore.option.op_select_impl_mode";
 // "False": Inference with native backend, "True": Inference with Tensor-RT engine, default as "False"
 constexpr auto kModelOptionGpuTrtInferMode = "mindspore.option.gpu_trt_infer_mode";

 namespace mindspore {
 struct Context::Data {
@@ -182,4 +184,20 @@ std::vector<char> ModelContext::GetOpSelectImplModeChar(const std::shared_ptr<Co
  const std::string &ref = GetValue<std::string>(context, kModelOptionOpSelectImplMode);
  return StringToChar(ref);
 }

 void ModelContext::SetGpuTrtInferMode(const std::shared_ptr<Context> &context,
                                      const std::vector<char> &gpu_trt_infer_mode) {
  MS_EXCEPTION_IF_NULL(context);
  if (context->data == nullptr) {
    context->data = std::make_shared<Data>();
    MS_EXCEPTION_IF_NULL(context->data);
  }
  context->data->params[kModelOptionGpuTrtInferMode] = CharToString(gpu_trt_infer_mode);
 }

 std::vector<char> ModelContext::GetGpuTrtInferModeChar(const std::shared_ptr<Context> &context) {
  MS_EXCEPTION_IF_NULL(context);
  const std::string &ref = GetValue<std::string>(context, kModelOptionGpuTrtInferMode);
  return StringToChar(ref);
 }
 }  // namespace mindspore
--- a/tests/ut/stub/include/api/context.h
+++ b/tests/ut/stub/include/api/context.h
@@ -72,6 +72,9 @@ struct MS_API ModelContext : public Context {
                                         const std::string &op_select_impl_mode);
  static inline std::string GetOpSelectImplMode(const std::shared_ptr<Context> &context);

  static inline void SetGpuTrtInferMode(const std::shared_ptr<Context> &context, const std::string &gpu_trt_infer_mode);
  static inline std::string GetGpuTrtInferMode(const std::shared_ptr<Context> &context);

 private:
  // api without std::string
  static void SetInsertOpConfigPath(const std::shared_ptr<Context> &context, const std::vector<char> &cfg_path);
@@ -89,6 +92,9 @@ struct MS_API ModelContext : public Context {
  static void SetOpSelectImplMode(const std::shared_ptr<Context> &context,
                                  const std::vector<char> &op_select_impl_mode);
  static std::vector<char> GetOpSelectImplModeChar(const std::shared_ptr<Context> &context);

  static void SetGpuTrtInferMode(const std::shared_ptr<Context> &context, const std::vector<char> &gpu_trt_infer_mode);
  static std::vector<char> GetGpuTrtInferModeChar(const std::shared_ptr<Context> &context);
 };

 void GlobalContext::SetGlobalDeviceTarget(const std::string &device_target) {
@@ -131,5 +137,12 @@ void ModelContext::SetOpSelectImplMode(const std::shared_ptr<Context> &context,
 std::string ModelContext::GetOpSelectImplMode(const std::shared_ptr<Context> &context) {
  return CharToString(GetOpSelectImplModeChar(context));
 }

 void ModelContext::SetGpuTrtInferMode(const std::shared_ptr<Context> &context, const std::string &gpu_trt_infer_mode) {
  SetGpuTrtInferMode(context, StringToChar(gpu_trt_infer_mode));
 }
 std::string ModelContext::GetGpuTrtInferMode(const std::shared_ptr<Context> &context) {
  return CharToString(GetGpuTrtInferModeChar(context));
 }
 }  // namespace mindspore
 #endif  // MINDSPORE_INCLUDE_API_CONTEXT_H