Browse Source

Serving, bugfix 0301

tags/v1.2.0
xuyongfei 5 years ago
parent
commit
bb7a7236a8
15 changed files with 206 additions and 72 deletions
  1. +2
    -1
      mindspore_serving/ccsrc/python/serving_py.cc
  2. +15
    -0
      mindspore_serving/ccsrc/python/worker/worker_py.cc
  3. +2
    -0
      mindspore_serving/ccsrc/python/worker/worker_py.h
  4. +31
    -0
      mindspore_serving/ccsrc/worker/inference/inference.cc
  5. +1
    -0
      mindspore_serving/ccsrc/worker/inference/inference.h
  6. +34
    -29
      mindspore_serving/ccsrc/worker/inference/mindspore_model_wrap.cc
  7. +1
    -0
      mindspore_serving/ccsrc/worker/inference/mindspore_model_wrap.h
  8. +2
    -18
      mindspore_serving/ccsrc/worker/local_servable/local_sevable.cc
  9. +29
    -6
      mindspore_serving/worker/_check_version.py
  10. +1
    -0
      mindspore_serving/worker/_worker.py
  11. +45
    -14
      mindspore_serving/worker/distributed/agent_startup.py
  12. +8
    -2
      mindspore_serving/worker/init_mindspore.py
  13. +2
    -1
      requirements_test.txt
  14. +2
    -1
      setup.py
  15. +31
    -0
      tests/ut/stub/stub_inference.cc

+ 2
- 1
mindspore_serving/ccsrc/python/serving_py.cc View File

@@ -163,7 +163,8 @@ void PyRegWorker(pybind11::module *m_ptr) {
.def_static("push_preprocess_result", &PyWorker::PushPreprocessPyResult)
.def_static("push_preprocess_failed", &PyWorker::PushPreprocessPyFailed)
.def_static("push_postprocess_result", &PyWorker::PushPostprocessPyResult)
.def_static("push_postprocess_failed", &PyWorker::PushPostprocessPyFailed);
.def_static("push_postprocess_failed", &PyWorker::PushPostprocessPyFailed)
.def_static("get_device_type", &PyWorker::GetDeviceType);

py::class_<ServableContext, std::shared_ptr<ServableContext>>(m, "Context_")
.def(py::init<>())


+ 15
- 0
mindspore_serving/ccsrc/python/worker/worker_py.cc View File

@@ -25,6 +25,7 @@
#include "worker/distributed_worker/distributed_servable.h"
#include "worker/grpc/worker_server.h"
#include "worker/distributed_worker/distributed_process/distributed_server.h"
#include "worker/inference/inference.h"

namespace mindspore::serving {

@@ -220,4 +221,18 @@ void PyWorker::StopAndClear() {

int PyWorker::GetBatchSize() { return Worker::GetInstance().GetBatchSize(); }

std::string PyWorker::GetDeviceType() {
auto device_type = InferenceLoader::Instance().GetSupportDeviceType(kDeviceTypeNotSpecified, kUnknownType);
if (device_type == kDeviceTypeAscend || device_type == kDeviceTypeAscendMS || device_type == kDeviceTypeAscendCL) {
return "Ascend";
}
if (device_type == kDeviceTypeGpu) {
return "Gpu";
}
if (device_type == kDeviceTypeCpu) {
return "Cpu";
}
return std::string();
}

} // namespace mindspore::serving

+ 2
- 0
mindspore_serving/ccsrc/python/worker/worker_py.h View File

@@ -56,6 +56,8 @@ class MS_API PyWorker {
static void PushPostprocessPyResult(const py::tuple &output_batch);
static void PushPostprocessPyFailed(int count);

static std::string GetDeviceType();

private:
static void OnEndStartServable(const std::string &servable_directory, const std::string &servable_name,
uint32_t spec_version_number, uint32_t started_version_number);


+ 31
- 0
mindspore_serving/ccsrc/worker/inference/inference.cc View File

@@ -113,4 +113,35 @@ Status InferenceLoader::LoadMindSporeModelWrap() {
}
return SUCCESS;
}

DeviceType InferenceLoader::GetSupportDeviceType(DeviceType device_type, ModelType model_type) {
auto mindspore_infer = CreateMindSporeInfer();
if (mindspore_infer == nullptr) {
MSI_LOG_ERROR << "Create MindSpore infer failed";
return kDeviceTypeNotSpecified;
}
if (model_type == kUnknownType) {
model_type = kMindIR;
}
if (device_type == kDeviceTypeNotSpecified) {
auto ascend_list = {kDeviceTypeAscendCL, kDeviceTypeAscendMS, kDeviceTypeGpu};
for (auto item : ascend_list) {
if (mindspore_infer->CheckModelSupport(item, model_type)) {
return item;
}
}
} else if (device_type == kDeviceTypeAscend) {
auto ascend_list = {kDeviceTypeAscendCL, kDeviceTypeAscendMS};
for (auto item : ascend_list) {
if (mindspore_infer->CheckModelSupport(item, model_type)) {
return item;
}
}
} else {
if (mindspore_infer->CheckModelSupport(device_type, model_type)) {
return device_type;
}
}
return kDeviceTypeNotSpecified;
}
} // namespace mindspore::serving

+ 1
- 0
mindspore_serving/ccsrc/worker/inference/inference.h View File

@@ -135,6 +135,7 @@ class MS_API InferenceLoader {
~InferenceLoader();
static InferenceLoader &Instance();
std::shared_ptr<InferenceBase> CreateMindSporeInfer();
DeviceType GetSupportDeviceType(DeviceType device_type, ModelType model_type);

private:
typedef InferenceBase *(*CreateInferHandle)();


+ 34
- 29
mindspore_serving/ccsrc/worker/inference/mindspore_model_wrap.cc View File

@@ -78,13 +78,9 @@ Status MindSporeModelWrap::LoadModelFromFile(serving::DeviceType device_type, ui
const std::string &file_name, ModelType model_type, bool with_batch_dim,
const std::vector<int> &without_batch_dim_inputs,
const std::map<std::string, std::string> &other_options) {
std::string device_type_str;
if (device_type == kDeviceTypeAscendMS) {
device_type_str = mindspore::kDeviceTypeAscend910;
} else if (device_type == kDeviceTypeAscendCL) {
device_type_str = mindspore::kDeviceTypeAscend310;
} else {
MSI_LOG_EXCEPTION << "Only support Ascend310 or Ascend910 in MindSporeModelWrap";
std::string ms_device_type = GetMsDeviceType(device_type);
if (ms_device_type.empty()) {
return INFER_STATUS_LOG_ERROR(FAILED) << "Invalid device type " << device_type;
}
auto ms_model_type = GetMsModelType(model_type);
if (ms_model_type == mindspore::kUnknownType) {
@@ -93,26 +89,26 @@ Status MindSporeModelWrap::LoadModelFromFile(serving::DeviceType device_type, ui

std::shared_ptr<mindspore::Model> model = nullptr;
try {
mindspore::GlobalContext::SetGlobalDeviceTarget(device_type_str);
mindspore::GlobalContext::SetGlobalDeviceTarget(ms_device_type);
mindspore::GlobalContext::SetGlobalDeviceID(device_id);
auto graph = mindspore::Serialization::LoadModel(file_name, ms_model_type);
auto context = TransformModelContext(other_options);
model = std::make_shared<mindspore::Model>(mindspore::GraphCell(graph), context);
} catch (std::runtime_error &ex) {
return INFER_STATUS_LOG_ERROR(FAILED)
<< "Load model from file failed, model file: " << file_name << ", device_type: '" << device_type_str
<< "Load model from file failed, model file: " << file_name << ", device_type: '" << ms_device_type
<< "', device_id: " << device_id << ", model type: " << model_type << ", options: " << other_options;
}
mindspore::Status status = model->Build();
if (!status.IsOk()) {
return INFER_STATUS_LOG_ERROR(FAILED)
<< "Load model from file failed, model file: " << file_name << ", device_type: '" << device_type_str
<< "Load model from file failed, model file: " << file_name << ", device_type: '" << ms_device_type
<< "', device_id: " << device_id << ", model type: " << model_type << ", options: " << other_options
<< ", build error detail: " << status.ToString();
}
ApiModelInfo api_model_info;
api_model_info.model = model;
api_model_info.device_type = device_type_str;
api_model_info.device_type = ms_device_type;
api_model_info.device_id = device_id;
api_model_info.with_batch_dim = with_batch_dim;
api_model_info.without_batch_dim_inputs = without_batch_dim_inputs;
@@ -122,7 +118,7 @@ Status MindSporeModelWrap::LoadModelFromFile(serving::DeviceType device_type, ui
}
GetModelBatchSize(&api_model_info);
model_ = api_model_info;
MSI_LOG_INFO << "Load model from file success, model file: " << file_name << ", device_type: '" << device_type_str
MSI_LOG_INFO << "Load model from file success, model file: " << file_name << ", device_type: '" << ms_device_type
<< "', device_id: " << device_id << ", model type: " << model_type << ", options: " << other_options;
return SUCCESS;
}
@@ -346,24 +342,15 @@ std::vector<serving::TensorInfo> MindSporeModelWrap::GetOutputInfos() const { re
ssize_t MindSporeModelWrap::GetBatchSize() const { return model_.batch_size; }

bool MindSporeModelWrap::CheckModelSupport(DeviceType device_type, ModelType model_type) const {
std::string device_type_str;
switch (device_type) {
case kDeviceTypeAscendMS:
if (model_type != kMindIR) {
return false;
}
device_type_str = mindspore::kDeviceTypeAscend910;
break;
case kDeviceTypeAscendCL:
if (model_type != kMindIR && model_type != kOM) {
return false;
}
device_type_str = mindspore::kDeviceTypeAscend310;
break;
default:
return false;
std::string ms_device_type = GetMsDeviceType(device_type);
if (ms_device_type.empty()) {
return false;
}
auto ms_model_type = GetMsModelType(model_type);
if (ms_model_type == mindspore::kUnknownType) {
return false;
}
return mindspore::Model::CheckModelSupport(device_type_str, GetMsModelType(model_type));
return mindspore::Model::CheckModelSupport(ms_device_type, ms_model_type);
}

mindspore::ModelType MindSporeModelWrap::GetMsModelType(serving::ModelType model_type) {
@@ -387,6 +374,24 @@ mindspore::ModelType MindSporeModelWrap::GetMsModelType(serving::ModelType model
return ms_model_type;
}

std::string MindSporeModelWrap::GetMsDeviceType(serving::DeviceType device_type) {
std::string device_type_str;
switch (device_type) {
case kDeviceTypeAscendMS:
device_type_str = mindspore::kDeviceTypeAscend910;
break;
case kDeviceTypeAscendCL:
device_type_str = mindspore::kDeviceTypeAscend310;
break;
case kDeviceTypeGpu:
device_type_str = mindspore::kDeviceTypeGPU;
break;
default:
break;
}
return device_type_str;
}

ApiBufferTensorWrap::ApiBufferTensorWrap() = default;

ApiBufferTensorWrap::ApiBufferTensorWrap(const mindspore::MSTensor &tensor) : tensor_(tensor) {}


+ 1
- 0
mindspore_serving/ccsrc/worker/inference/mindspore_model_wrap.h View File

@@ -79,6 +79,7 @@ class MindSporeModelWrap : public InferenceBase {
std::shared_ptr<Context> TransformModelContext(const std::map<std::string, std::string> &other_options);
void GetModelBatchSize(ApiModelInfo *model_info);
static mindspore::ModelType GetMsModelType(serving::ModelType model_type);
static std::string GetMsDeviceType(serving::DeviceType device_type);
};

class ApiBufferTensorWrap : public TensorBase {


+ 2
- 18
mindspore_serving/ccsrc/worker/local_servable/local_sevable.cc View File

@@ -200,27 +200,11 @@ Status LocalModelServable::InitDevice(ModelType model_type, const std::map<std::
Status status;
auto context = ServableContext::Instance();
DeviceType device_type = ServableContext::Instance()->GetDeviceType();
auto get_support_device_type = [this, device_type, model_type]() {
std::vector<DeviceType> support_device_list;
if (device_type == kDeviceTypeNotSpecified || device_type == kDeviceTypeAscend) {
auto ascend_list = {kDeviceTypeAscendCL, kDeviceTypeAscendMS};
for (auto item : ascend_list) {
if (session_->CheckModelSupport(item, model_type)) {
return item;
}
}
} else if (device_type == kDeviceTypeAscendCL || device_type == kDeviceTypeAscendMS) {
if (session_->CheckModelSupport(device_type, model_type)) {
return device_type;
}
}
return kDeviceTypeNotSpecified;
};
auto support_device_type = get_support_device_type();
auto support_device_type = InferenceLoader::Instance().GetSupportDeviceType(device_type, model_type);
if (support_device_type == kDeviceTypeNotSpecified) {
return INFER_STATUS_LOG_ERROR(FAILED)
<< "Not support device type " << device_type << " and model type " << model_type
<< ". Ascend 910 supports MindIR model and Ascend 310 supports OM, MindIR model";
<< ". Ascend 910, Ascend 310 and GPU supports MindIR model, and Ascend 310 supports OM model";
}
context->SetDeviceType(support_device_type);
return SUCCESS;


+ 29
- 6
mindspore_serving/worker/_check_version.py View File

@@ -127,6 +127,19 @@ class AscendEnvChecker:
logger.warning(f"No such directory: {self.op_path}, Please check if Ascend 910 AI software package is "
f"installed correctly.")

def try_set_env_lib(self):
"""try set env but with no warning: LD_LIBRARY_PATH"""
try:
# pylint: disable=unused-import
import te
# pylint: disable=broad-except
except Exception:
if Path(self.tbe_path).is_dir():
if os.getenv('LD_LIBRARY_PATH'):
os.environ['LD_LIBRARY_PATH'] = self.tbe_path + ":" + os.environ['LD_LIBRARY_PATH']
else:
os.environ['LD_LIBRARY_PATH'] = self.tbe_path

def _check_env(self):
"""ascend dependence path check"""
if self.path is None or self.path_check not in self.path:
@@ -151,11 +164,21 @@ class AscendEnvChecker:
"you can reference to the installation guidelines https://www.mindspore.cn/install")


def check_version_and_env_config():
def check_version_and_env_config(device_type):
"""check version and env config"""
env_checker = AscendEnvChecker()
if device_type == "Ascend":
env_checker = AscendEnvChecker()
try:
env_checker.set_env()
except ImportError as e:
env_checker.check_env(e)
elif device_type == "Gpu":
pass
elif device_type == "Cpu":
pass


try:
env_checker.set_env()
except ImportError as e:
env_checker.check_env(e)
def check_version_and_try_set_env_lib():
"""check version and try set env LD_LIBRARY_PATH"""
env_checker = AscendEnvChecker()
env_checker.try_set_env_lib()

+ 1
- 0
mindspore_serving/worker/_worker.py View File

@@ -197,6 +197,7 @@ def start_servable_in_master(servable_directory, servable_name, version_number=0
"Ascend" means the device type can be Ascend910 or Ascend310, etc.
"Davinci" has the same meaning as "Ascend".
None means the device type is determined by the MindSpore environment.
device_id (int): The id of the device the model loads into and runs in.

Examples:
>>> import os


+ 45
- 14
mindspore_serving/worker/distributed/agent_startup.py View File

@@ -18,7 +18,9 @@ import os
import time
import sys
import traceback
import signal
from multiprocessing import Process, Pipe
import psutil

from mindspore_serving._mindspore_serving import ExitSignalHandle_
from mindspore_serving._mindspore_serving import WorkerAgent_, AgentStartUpConfig_
@@ -163,23 +165,52 @@ def _send_exit_msg_to_children(send_pipe_list, subprocess_list):
logger.warning(f"Child {index} is not alive")
index += 1

wait_seconds = 10
for i in range(wait_seconds):
all_exit = True
for process in subprocess_list:
if process.is_alive():
logger.warning(f"There are still child processes that have not exited and will be forcibly killed in "
f"{wait_seconds - i} seconds.")
time.sleep(1)
all_exit = False
break
if all_exit:
logger.info(f"All Child process exited")
return
def wait_exit(wait_seconds, msg):
for i in range(wait_seconds):
all_exit = True
for process in subprocess_list:
if process.is_alive():
logger.warning(f"There are still child processes that have not exited and {msg} in "
f"{wait_seconds - i} seconds.")
time.sleep(1)
all_exit = False
break
if all_exit:
logger.info(f"All Child process exited")
return True
return False

if wait_exit(3, "SIGINT will be sent"):
return
# Send signal SIGINT
for index, process in enumerate(subprocess_list):
if process.is_alive():
logger.warning(f"Send signal SIGINT to {index}")
try:
child_process = psutil.Process(process.pid)
children_of_child = child_process.children(recursive=True)
for item in children_of_child:
os.kill(item.pid, signal.SIGINT)
# pylint: disable=broad-except
except Exception as e:
logger.warning(f"Get exception when send signal SIGINT to children of child {index}, exception: {e}")
os.kill(process.pid, signal.SIGINT)

if wait_exit(10, "will be forcibly killed"):
return

for index, process in enumerate(subprocess_list):
if process.is_alive():
logger.warning(f"Kill Child process {index}")
process.kill()
try:
child_process = psutil.Process(process.pid)
children_of_child = child_process.children(recursive=True)
for item in children_of_child:
os.kill(item.pid, signal.SIGKILL)
# pylint: disable=broad-except
except Exception as e:
logger.warning(f"Get exception when send signal SIGINT to children of child {index}, exception: {e}")
os.kill(process.pid, signal.SIGKILL)


def _start_listening_child_processes(p_recv_pipe, send_pipe_list, subprocess_list):


+ 8
- 2
mindspore_serving/worker/init_mindspore.py View File

@@ -16,7 +16,8 @@
import os
import importlib
from mindspore_serving import log as logger
from ._check_version import check_version_and_env_config
from mindspore_serving._mindspore_serving import Worker_
from ._check_version import check_version_and_env_config, check_version_and_try_set_env_lib

_flag_set_mindspore_cxx_env = False

@@ -50,5 +51,10 @@ def init_mindspore_cxx_env():
if _flag_set_mindspore_cxx_env:
return
_flag_set_mindspore_cxx_env = True
check_version_and_env_config()
check_version_and_try_set_env_lib() # try set env LD_LIBRARY_PATH
_set_mindspore_cxx_env()
device_type = Worker_.get_device_type()
if not device_type:
logger.warning("Failed to get device type")
return
check_version_and_env_config(device_type)

+ 2
- 1
requirements_test.txt View File

@@ -1,4 +1,5 @@
numpy>=1.17.0
protobuf>=3.8.0
grpcio>=1.27.3
requests>=2.22.0
requests>=2.22.0
psutil >= 5.8.0

+ 2
- 1
setup.py View File

@@ -98,7 +98,8 @@ build_dependencies()
required_package = [
'numpy >= 1.17.0',
'protobuf >= 3.8.0',
'grpcio >= 1.27.3'
'grpcio >= 1.27.3',
'psutil >= 5.8.0'
]

package_data = {


+ 31
- 0
tests/ut/stub/stub_inference.cc View File

@@ -32,4 +32,35 @@ std::shared_ptr<InferenceBase> InferenceLoader::CreateMindSporeInfer() {
}

Status InferenceLoader::LoadMindSporeModelWrap() { return SUCCESS; }

DeviceType InferenceLoader::GetSupportDeviceType(DeviceType device_type, ModelType model_type) {
auto mindspore_infer = CreateMindSporeInfer();
if (mindspore_infer == nullptr) {
MSI_LOG_ERROR << "Create MindSpore infer failed";
return kDeviceTypeNotSpecified;
}
if (model_type == kUnknownType) {
model_type = kMindIR;
}
if (device_type == kDeviceTypeNotSpecified) {
auto ascend_list = {kDeviceTypeAscendCL, kDeviceTypeAscendMS, kDeviceTypeGpu};
for (auto item : ascend_list) {
if (mindspore_infer->CheckModelSupport(item, model_type)) {
return item;
}
}
} else if (device_type == kDeviceTypeAscend) {
auto ascend_list = {kDeviceTypeAscendCL, kDeviceTypeAscendMS};
for (auto item : ascend_list) {
if (mindspore_infer->CheckModelSupport(item, model_type)) {
return item;
}
}
} else {
if (mindspore_infer->CheckModelSupport(device_type, model_type)) {
return device_type;
}
}
return kDeviceTypeNotSpecified;
}
} // namespace mindspore::serving

Loading…
Cancel
Save