Merge pull request !4622 from hexia/http_inittags/v0.7.0-beta
| @@ -0,0 +1,11 @@ | |||||
| mindspore_add_pkg(libevent | |||||
| VER 2.1.12 | |||||
| LIBS event event_pthreads | |||||
| URL https://github.com/libevent/libevent/releases/download/release-2.1.12-stable/libevent-2.1.12-stable.tar.gz | |||||
| MD5 b5333f021f880fe76490d8a799cd79f4 | |||||
| CMAKE_OPTION -DCMAKE_BUILD_TYPE:STRING=Release -DBUILD_TESTING=OFF) | |||||
| include_directories(${libevent_INC}) | |||||
| add_library(mindspore::event ALIAS libevent::event) | |||||
| add_library(mindspore::event_pthreads ALIAS libevent::event_pthreads) | |||||
| @@ -29,6 +29,8 @@ if ("${MS_BUILD_GRPC}") | |||||
| include(${CMAKE_SOURCE_DIR}/cmake/external_libs/zlib.cmake) | include(${CMAKE_SOURCE_DIR}/cmake/external_libs/zlib.cmake) | ||||
| # build gRPC | # build gRPC | ||||
| include(${CMAKE_SOURCE_DIR}/cmake/external_libs/grpc.cmake) | include(${CMAKE_SOURCE_DIR}/cmake/external_libs/grpc.cmake) | ||||
| # build event | |||||
| include(${CMAKE_SOURCE_DIR}/cmake/external_libs/libevent.cmake) | |||||
| endif() | endif() | ||||
| include(${CMAKE_SOURCE_DIR}/cmake/external_libs/pybind11.cmake) | include(${CMAKE_SOURCE_DIR}/cmake/external_libs/pybind11.cmake) | ||||
| @@ -163,6 +163,13 @@ if (ENABLE_GPU) | |||||
| ) | ) | ||||
| endif () | endif () | ||||
| if (ENABLE_SERVING OR ENABLE_TESTCASES) | |||||
| file(GLOB_RECURSE LIBEVENT_LIB_LIST | |||||
| ${libevent_LIBPATH}/libevent* | |||||
| ${libevent_LIBPATH}/libevent_pthreads* | |||||
| ) | |||||
| endif () | |||||
| if (NOT ENABLE_GE) | if (NOT ENABLE_GE) | ||||
| if (ENABLE_D) | if (ENABLE_D) | ||||
| if (DEFINED ENV{ASCEND_CUSTOM_PATH}) | if (DEFINED ENV{ASCEND_CUSTOM_PATH}) | ||||
| @@ -191,6 +198,7 @@ if (NOT ENABLE_GE) | |||||
| ${CMAKE_SOURCE_DIR}/graphengine/third_party/prebuild/${CMAKE_HOST_SYSTEM_PROCESSOR}/libslog.so | ${CMAKE_SOURCE_DIR}/graphengine/third_party/prebuild/${CMAKE_HOST_SYSTEM_PROCESSOR}/libslog.so | ||||
| ${CMAKE_SOURCE_DIR}/graphengine/third_party/prebuild/${CMAKE_HOST_SYSTEM_PROCESSOR}/liberror_manager.so | ${CMAKE_SOURCE_DIR}/graphengine/third_party/prebuild/${CMAKE_HOST_SYSTEM_PROCESSOR}/liberror_manager.so | ||||
| ${CMAKE_SOURCE_DIR}/build/graphengine/libc_sec.so | ${CMAKE_SOURCE_DIR}/build/graphengine/libc_sec.so | ||||
| ${LIBEVENT_LIB_LIST} | |||||
| DESTINATION ${INSTALL_LIB_DIR} | DESTINATION ${INSTALL_LIB_DIR} | ||||
| COMPONENT mindspore | COMPONENT mindspore | ||||
| ) | ) | ||||
| @@ -273,4 +281,10 @@ if (ENABLE_SERVING) | |||||
| DESTINATION ${INSTALL_LIB_DIR} | DESTINATION ${INSTALL_LIB_DIR} | ||||
| COMPONENT mindspore | COMPONENT mindspore | ||||
| ) | ) | ||||
| install( | |||||
| FILES ${LIBEVENT_LIB_LIST} | |||||
| DESTINATION ${INSTALL_LIB_DIR} | |||||
| COMPONENT mindspore | |||||
| ) | |||||
| endif () | endif () | ||||
| @@ -23,6 +23,7 @@ | |||||
| #include <sstream> | #include <sstream> | ||||
| #include <memory> | #include <memory> | ||||
| #include <iostream> | #include <iostream> | ||||
| #include <chrono> | |||||
| #ifndef ENABLE_ACL | #ifndef ENABLE_ACL | ||||
| #include "mindspore/core/utils/log_adapter.h" | #include "mindspore/core/utils/log_adapter.h" | ||||
| @@ -101,7 +102,18 @@ class LogWriter { | |||||
| #endif // ENABLE_ACL | #endif // ENABLE_ACL | ||||
| #define MSI_TIME_STAMP_START(name) auto time_start_##name = std::chrono::steady_clock::now(); | |||||
| #define MSI_TIME_STAMP_END(name) \ | |||||
| { \ | |||||
| auto time_end_##name = std::chrono::steady_clock::now(); \ | |||||
| auto time_cost = std::chrono::duration<double, std::milli>(time_end_##name - time_start_##name).count(); \ | |||||
| MSI_LOG_INFO << #name " Time Cost # " << time_cost << " ms ---------------------"; \ | |||||
| } | |||||
| #define INFER_STATUS(code) inference::Status(code) < inference::LogStream() | #define INFER_STATUS(code) inference::Status(code) < inference::LogStream() | ||||
| #define ERROR_INFER_STATUS(status, type, msg) \ | |||||
| MSI_LOG_ERROR << msg; \ | |||||
| status = inference::Status(type, msg) | |||||
| } // namespace mindspore::inference | } // namespace mindspore::inference | ||||
| @@ -74,6 +74,10 @@ class MS_API InferSession { | |||||
| const RequestBase & /*request*/, ReplyBase & /*reply*/) { | const RequestBase & /*request*/, ReplyBase & /*reply*/) { | ||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| virtual Status GetModelInputsInfo(uint32_t graph_id, std::vector<inference::InferTensor> *tensor_list) const { | |||||
| Status status(SUCCESS); | |||||
| return status; | |||||
| } | |||||
| static std::shared_ptr<InferSession> CreateSession(const std::string &device, uint32_t device_id); | static std::shared_ptr<InferSession> CreateSession(const std::string &device, uint32_t device_id); | ||||
| }; | }; | ||||
| @@ -211,5 +211,31 @@ std::string AscendInferenceSession::InputsInfo(const std::vector<ParameterPtr> & | |||||
| return graph + " " + actual; | return graph + " " + actual; | ||||
| } | } | ||||
| void AscendInferenceSession::GetModelInputsInfo(uint32_t graph_id, std::vector<tensor::TensorPtr> *inputs) const { | |||||
| MS_LOG(INFO) << "Start get model inputs, graph id : " << graph_id; | |||||
| auto kernel_graph = GetGraph(graph_id); | |||||
| MS_EXCEPTION_IF_NULL(kernel_graph); | |||||
| auto kernel_graph_inputs = kernel_graph->inputs(); | |||||
| vector<ParameterPtr> paras; | |||||
| // find parameters of graph inputs | |||||
| for (size_t i = 0; i < kernel_graph_inputs.size(); ++i) { | |||||
| if (!kernel_graph_inputs[i]->isa<Parameter>()) { | |||||
| MS_LOG(ERROR) << "Kernel graph inputs have anfnode which is not Parameter."; | |||||
| continue; | |||||
| } | |||||
| auto parameter = kernel_graph_inputs[i]->cast<ParameterPtr>(); | |||||
| if (!AnfAlgo::IsParameterWeight(parameter)) { | |||||
| vector<int> input_shape; | |||||
| auto parameter_shape = AnfAlgo::GetOutputDeviceShape(parameter, 0); | |||||
| (void)std::transform(parameter_shape.begin(), parameter_shape.end(), std::back_inserter(input_shape), | |||||
| [](const size_t dim) { return static_cast<int>(dim); }); | |||||
| auto kernel_build_info = AnfAlgo::GetSelectKernelBuildInfo(parameter); | |||||
| auto data_type = kernel_build_info->GetOutputDeviceType(0); | |||||
| auto ms_tensor = std::make_shared<tensor::Tensor>(data_type, input_shape); | |||||
| inputs->push_back(ms_tensor); | |||||
| } | |||||
| } | |||||
| } | |||||
| } // namespace session | } // namespace session | ||||
| } // namespace mindspore | } // namespace mindspore | ||||
| @@ -45,6 +45,7 @@ class AscendInferenceSession : public AscendSession { | |||||
| template <typename T> | template <typename T> | ||||
| std::string PrintInputShape(std::vector<T> shape) const; | std::string PrintInputShape(std::vector<T> shape) const; | ||||
| std::string InputsInfo(const std::vector<ParameterPtr> ¶s, const std::vector<tensor::TensorPtr> &inputs) const; | std::string InputsInfo(const std::vector<ParameterPtr> ¶s, const std::vector<tensor::TensorPtr> &inputs) const; | ||||
| void GetModelInputsInfo(uint32_t graph_id, std::vector<tensor::TensorPtr> *inputs) const override; | |||||
| }; | }; | ||||
| MS_REG_SESSION(kDavinciInferenceDevice, AscendInferenceSession); | MS_REG_SESSION(kDavinciInferenceDevice, AscendInferenceSession); | ||||
| } // namespace session | } // namespace session | ||||
| @@ -224,7 +224,7 @@ Status MSInferSession::ExecuteModel(uint32_t model_id, const RequestBase &reques | |||||
| for (const auto &tensor : outputs) { | for (const auto &tensor : outputs) { | ||||
| auto out_tensor = reply.add(); | auto out_tensor = reply.add(); | ||||
| if (out_tensor == nullptr) { | if (out_tensor == nullptr) { | ||||
| MS_LOG(ERROR) << "Execute Model " << model_id << " Failed, add output tensor failed"; | |||||
| MS_LOG(ERROR) << "Execute Model " << model_id << " Failed add output tensor failed"; | |||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| MSTensor2ServingTensor(tensor, *out_tensor); | MSTensor2ServingTensor(tensor, *out_tensor); | ||||
| @@ -378,4 +378,18 @@ Status MSInferSession::CheckModelInputs(uint32_t graph_id, const std::vector<ten | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status MSInferSession::GetModelInputsInfo(uint32_t model_id, std::vector<inference::InferTensor> *tensor_list) const { | |||||
| vector<tensor::TensorPtr> inputs; | |||||
| session_impl_->GetModelInputsInfo(model_id, &inputs); | |||||
| if (inputs.size() == 0) { | |||||
| MS_LOG(ERROR) << "The model inputs is NULL"; | |||||
| return FAILED; | |||||
| } | |||||
| for (const auto &tensor : inputs) { | |||||
| InferTensor infer_tensor = InferTensor(); | |||||
| MSTensor2ServingTensor(tensor, infer_tensor); | |||||
| tensor_list->push_back(infer_tensor); | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| } // namespace mindspore::inference | } // namespace mindspore::inference | ||||
| @@ -43,6 +43,7 @@ class MSInferSession : public InferSession { | |||||
| Status LoadModelFromFile(const std::string &file_name, uint32_t &model_id) override; | Status LoadModelFromFile(const std::string &file_name, uint32_t &model_id) override; | ||||
| Status UnloadModel(uint32_t model_id) override; | Status UnloadModel(uint32_t model_id) override; | ||||
| Status ExecuteModel(uint32_t model_id, const RequestBase &inputs, ReplyBase &outputs) override; | Status ExecuteModel(uint32_t model_id, const RequestBase &inputs, ReplyBase &outputs) override; | ||||
| Status GetModelInputsInfo(uint32_t graph_id, std::vector<inference::InferTensor> *tensor_list) const override; | |||||
| private: | private: | ||||
| std::shared_ptr<session::SessionBasic> session_impl_ = nullptr; | std::shared_ptr<session::SessionBasic> session_impl_ = nullptr; | ||||
| @@ -97,6 +97,7 @@ class SessionBasic { | |||||
| std::string *error_msg) const { | std::string *error_msg) const { | ||||
| return true; | return true; | ||||
| } | } | ||||
| virtual void GetModelInputsInfo(uint32_t graph_id, std::vector<tensor::TensorPtr> *inputs) const {} | |||||
| #ifdef ENABLE_DEBUGGER | #ifdef ENABLE_DEBUGGER | ||||
| // set debugger | // set debugger | ||||
| @@ -93,7 +93,10 @@ if (ENABLE_ACL) | |||||
| endif () | endif () | ||||
| include_directories(${CMAKE_BINARY_DIR}) | include_directories(${CMAKE_BINARY_DIR}) | ||||
| add_executable(ms_serving ${SERVING_SRC}) | add_executable(ms_serving ${SERVING_SRC}) | ||||
| #libevent | |||||
| target_link_libraries(ms_serving mindspore::event mindspore::event_pthreads) | |||||
| target_link_libraries(ms_serving ${_REFLECTION} ${_GRPC_GRPCPP} ${_PROTOBUF_LIBPROTOBUF} pthread) | target_link_libraries(ms_serving ${_REFLECTION} ${_GRPC_GRPCPP} ${_PROTOBUF_LIBPROTOBUF} pthread) | ||||
| if (ENABLE_D) | if (ENABLE_D) | ||||
| @@ -0,0 +1,423 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include <map> | |||||
| #include <vector> | |||||
| #include <string> | |||||
| #include <nlohmann/json.hpp> | |||||
| #include "serving/ms_service.pb.h" | |||||
| #include "util/status.h" | |||||
| #include "core/session.h" | |||||
| #include "core/http_process.h" | |||||
| using ms_serving::MSService; | |||||
| using ms_serving::PredictReply; | |||||
| using ms_serving::PredictRequest; | |||||
| using nlohmann::json; | |||||
| namespace mindspore { | |||||
| namespace serving { | |||||
| const int BUF_MAX = 0x1FFFFF; | |||||
| static constexpr char HTTP_DATA[] = "data"; | |||||
| static constexpr char HTTP_TENSOR[] = "tensor"; | |||||
| enum HTTP_TYPE { TYPE_DATA = 0, TYPE_TENSOR }; | |||||
| enum HTTP_DATA_TYPE { HTTP_DATA_NONE, HTTP_DATA_INT, HTTP_DATA_FLOAT }; | |||||
| static const std::map<HTTP_DATA_TYPE, ms_serving::DataType> http_to_infer_map{ | |||||
| {HTTP_DATA_NONE, ms_serving::MS_UNKNOWN}, | |||||
| {HTTP_DATA_INT, ms_serving::MS_INT32}, | |||||
| {HTTP_DATA_FLOAT, ms_serving::MS_FLOAT32}}; | |||||
| Status GetPostMessage(struct evhttp_request *req, std::string *buf) { | |||||
| Status status(SUCCESS); | |||||
| size_t post_size = evbuffer_get_length(req->input_buffer); | |||||
| if (post_size == 0) { | |||||
| ERROR_INFER_STATUS(status, INVALID_INPUTS, "http message invalid"); | |||||
| return status; | |||||
| } else { | |||||
| size_t copy_len = post_size > BUF_MAX ? BUF_MAX : post_size; | |||||
| buf->resize(copy_len); | |||||
| memcpy(buf->data(), evbuffer_pullup(req->input_buffer, -1), copy_len); | |||||
| return status; | |||||
| } | |||||
| } | |||||
| Status CheckRequestValid(struct evhttp_request *http_request) { | |||||
| Status status(SUCCESS); | |||||
| switch (evhttp_request_get_command(http_request)) { | |||||
| case EVHTTP_REQ_POST: | |||||
| return status; | |||||
| default: | |||||
| ERROR_INFER_STATUS(status, INVALID_INPUTS, "http message only support POST right now"); | |||||
| return status; | |||||
| } | |||||
| } | |||||
| void ErrorMessage(struct evhttp_request *req, Status status) { | |||||
| json error_json = {{"error_message", status.StatusMessage()}}; | |||||
| std::string out_error_str = error_json.dump(); | |||||
| struct evbuffer *retbuff = evbuffer_new(); | |||||
| evbuffer_add(retbuff, out_error_str.data(), out_error_str.size()); | |||||
| evhttp_send_reply(req, HTTP_OK, "Client", retbuff); | |||||
| evbuffer_free(retbuff); | |||||
| } | |||||
| Status CheckMessageValid(const json &message_info, HTTP_TYPE *type) { | |||||
| Status status(SUCCESS); | |||||
| int count = 0; | |||||
| if (message_info.find(HTTP_DATA) != message_info.end()) { | |||||
| *type = TYPE_DATA; | |||||
| count++; | |||||
| } | |||||
| if (message_info.find(HTTP_TENSOR) != message_info.end()) { | |||||
| *type = TYPE_TENSOR; | |||||
| count++; | |||||
| } | |||||
| if (count != 1) { | |||||
| ERROR_INFER_STATUS(status, INVALID_INPUTS, "http message must have only one type of (data, tensor, text)"); | |||||
| return status; | |||||
| } | |||||
| return status; | |||||
| } | |||||
| Status GetDataFromJson(const json &json_data, std::string *data, HTTP_DATA_TYPE *type) { | |||||
| Status status(SUCCESS); | |||||
| if (json_data.is_number_integer()) { | |||||
| if (*type == HTTP_DATA_NONE) { | |||||
| *type = HTTP_DATA_INT; | |||||
| } else if (*type != HTTP_DATA_INT) { | |||||
| ERROR_INFER_STATUS(status, INVALID_INPUTS, "the input data type should be consistent"); | |||||
| return status; | |||||
| } | |||||
| auto s_data = json_data.get<int32_t>(); | |||||
| data->append(reinterpret_cast<char *>(&s_data), sizeof(int32_t)); | |||||
| MSI_LOG(INFO) << "data size " << data->size(); | |||||
| } else if (json_data.is_number_float()) { | |||||
| if (*type == HTTP_DATA_NONE) { | |||||
| *type = HTTP_DATA_FLOAT; | |||||
| } else if (*type != HTTP_DATA_FLOAT) { | |||||
| ERROR_INFER_STATUS(status, INVALID_INPUTS, "the input data type should be consistent"); | |||||
| return status; | |||||
| } | |||||
| auto s_data = json_data.get<float>(); | |||||
| data->append(reinterpret_cast<char *>(&s_data), sizeof(float)); | |||||
| MSI_LOG(INFO) << "data size " << data->size(); | |||||
| } else { | |||||
| ERROR_INFER_STATUS(status, INVALID_INPUTS, "the input data type should be int or float"); | |||||
| return status; | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| Status RecusiveGetTensor(const json &json_data, size_t depth, std::vector<int> *shape, std::string *data, | |||||
| HTTP_DATA_TYPE *type) { | |||||
| Status status(SUCCESS); | |||||
| if (depth >= 10) { | |||||
| ERROR_INFER_STATUS(status, INVALID_INPUTS, "the tensor shape dims is larger than 10"); | |||||
| return status; | |||||
| } | |||||
| if (!json_data.is_array()) { | |||||
| ERROR_INFER_STATUS(status, INVALID_INPUTS, "the tensor is constructed illegally"); | |||||
| return status; | |||||
| } | |||||
| int cur_dim = json_data.size(); | |||||
| if (shape->size() <= depth) { | |||||
| shape->push_back(cur_dim); | |||||
| } else if ((*shape)[depth] != cur_dim) { | |||||
| return INFER_STATUS(INVALID_INPUTS) << "the tensor shape is constructed illegally"; | |||||
| } | |||||
| if (json_data.at(0).is_array()) { | |||||
| for (const auto &item : json_data) { | |||||
| status = RecusiveGetTensor(item, depth + 1, shape, data, type); | |||||
| if (status != SUCCESS) { | |||||
| return status; | |||||
| } | |||||
| } | |||||
| } else { | |||||
| // last dim, read the data | |||||
| for (auto item : json_data) { | |||||
| status = GetDataFromJson(item, data, type); | |||||
| if (status != SUCCESS) { | |||||
| return status; | |||||
| } | |||||
| } | |||||
| } | |||||
| return status; | |||||
| } | |||||
| Status TransDataToPredictRequest(const json &message_info, PredictRequest *request) { | |||||
| Status status = SUCCESS; | |||||
| auto tensors = message_info.find(HTTP_DATA); | |||||
| if (tensors == message_info.end()) { | |||||
| ERROR_INFER_STATUS(status, INVALID_INPUTS, "http message do not have data type"); | |||||
| return status; | |||||
| } | |||||
| if (tensors->size() == 0) { | |||||
| ERROR_INFER_STATUS(status, INVALID_INPUTS, "the input tensor list is null"); | |||||
| return status; | |||||
| } | |||||
| for (const auto &tensor : *tensors) { | |||||
| std::string msg_data; | |||||
| HTTP_DATA_TYPE type{HTTP_DATA_NONE}; | |||||
| if (!tensor.is_array()) { | |||||
| ERROR_INFER_STATUS(status, INVALID_INPUTS, "the tensor is constructed illegally"); | |||||
| return status; | |||||
| } | |||||
| if (tensor.size() == 0) { | |||||
| ERROR_INFER_STATUS(status, INVALID_INPUTS, "the input tensor is null"); | |||||
| return status; | |||||
| } | |||||
| for (const auto &tensor_data : tensor) { | |||||
| status = GetDataFromJson(tensor_data, &msg_data, &type); | |||||
| if (status != SUCCESS) { | |||||
| return status; | |||||
| } | |||||
| } | |||||
| auto iter = http_to_infer_map.find(type); | |||||
| if (iter == http_to_infer_map.end()) { | |||||
| ERROR_INFER_STATUS(status, INVALID_INPUTS, "the input type is not supported right now"); | |||||
| return status; | |||||
| } | |||||
| auto infer_tensor = request->add_data(); | |||||
| infer_tensor->set_tensor_type(iter->second); | |||||
| infer_tensor->set_data(msg_data.data(), msg_data.size()); | |||||
| } | |||||
| // get model required shape | |||||
| std::vector<inference::InferTensor> tensor_list; | |||||
| status = Session::Instance().GetModelInputsInfo(tensor_list); | |||||
| if (status != SUCCESS) { | |||||
| ERROR_INFER_STATUS(status, FAILED, "get model inputs info failed"); | |||||
| return status; | |||||
| } | |||||
| if (request->data_size() != static_cast<int64_t>(tensor_list.size())) { | |||||
| ERROR_INFER_STATUS(status, INVALID_INPUTS, "the inputs number is not equal to model required"); | |||||
| return status; | |||||
| } | |||||
| for (int i = 0; i < request->data_size(); i++) { | |||||
| for (size_t j = 0; j < tensor_list[i].shape().size(); ++j) { | |||||
| request->mutable_data(i)->mutable_tensor_shape()->add_dims(tensor_list[i].shape()[i]); | |||||
| } | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| Status TransTensorToPredictRequest(const json &message_info, PredictRequest *request) { | |||||
| Status status(SUCCESS); | |||||
| auto tensors = message_info.find(HTTP_TENSOR); | |||||
| if (tensors == message_info.end()) { | |||||
| ERROR_INFER_STATUS(status, INVALID_INPUTS, "http message do not have tensor type"); | |||||
| return status; | |||||
| } | |||||
| for (const auto &tensor : *tensors) { | |||||
| std::vector<int> shape; | |||||
| std::string msg_data; | |||||
| HTTP_DATA_TYPE type{HTTP_DATA_NONE}; | |||||
| RecusiveGetTensor(tensor, 0, &shape, &msg_data, &type); | |||||
| MSI_LOG(INFO) << shape << ", data = " << msg_data.size(); | |||||
| auto iter = http_to_infer_map.find(type); | |||||
| if (iter == http_to_infer_map.end()) { | |||||
| ERROR_INFER_STATUS(status, INVALID_INPUTS, "the input type is not supported right now"); | |||||
| return status; | |||||
| } | |||||
| auto infer_tensor = request->add_data(); | |||||
| infer_tensor->set_tensor_type(iter->second); | |||||
| infer_tensor->set_data(msg_data.data(), msg_data.size()); | |||||
| for (const auto dim : shape) { | |||||
| infer_tensor->mutable_tensor_shape()->add_dims(dim); | |||||
| } | |||||
| } | |||||
| return status; | |||||
| } | |||||
| Status TransHTTPMsgToPredictRequest(struct evhttp_request *http_request, PredictRequest *request, HTTP_TYPE *type) { | |||||
| Status status = CheckRequestValid(http_request); | |||||
| if (status != SUCCESS) { | |||||
| return status; | |||||
| } | |||||
| std::string post_message; | |||||
| status = GetPostMessage(http_request, &post_message); | |||||
| if (status != SUCCESS) { | |||||
| return status; | |||||
| } | |||||
| json message_info; | |||||
| try { | |||||
| message_info = nlohmann::json::parse(post_message); | |||||
| } catch (nlohmann::json::exception &e) { | |||||
| std::string json_exception = e.what(); | |||||
| std::string error_message = "Illegal JSON format." + json_exception; | |||||
| ERROR_INFER_STATUS(status, INVALID_INPUTS, error_message); | |||||
| return status; | |||||
| } | |||||
| status = CheckMessageValid(message_info, type); | |||||
| if (status != SUCCESS) { | |||||
| return status; | |||||
| } | |||||
| switch (*type) { | |||||
| case TYPE_DATA: | |||||
| status = TransDataToPredictRequest(message_info, request); | |||||
| break; | |||||
| case TYPE_TENSOR: | |||||
| status = TransTensorToPredictRequest(message_info, request); | |||||
| break; | |||||
| default: | |||||
| ERROR_INFER_STATUS(status, INVALID_INPUTS, "http message must have only one type of (data, tensor)"); | |||||
| return status; | |||||
| } | |||||
| return status; | |||||
| } | |||||
| Status GetJsonFromTensor(const ms_serving::Tensor &tensor, int len, int *pos, json *out_json) { | |||||
| Status status(SUCCESS); | |||||
| switch (tensor.tensor_type()) { | |||||
| case ms_serving::MS_INT32: { | |||||
| std::vector<int> result_tensor; | |||||
| for (int j = 0; j < len; j++) { | |||||
| int val; | |||||
| memcpy(&val, reinterpret_cast<const int *>(tensor.data().data()) + *pos + j, sizeof(int)); | |||||
| result_tensor.push_back(val); | |||||
| } | |||||
| *out_json = result_tensor; | |||||
| *pos += len; | |||||
| break; | |||||
| } | |||||
| case ms_serving::MS_FLOAT32: { | |||||
| std::vector<float> result_tensor; | |||||
| for (int j = 0; j < len; j++) { | |||||
| float val; | |||||
| memcpy(&val, reinterpret_cast<const float *>(tensor.data().data()) + *pos + j, sizeof(float)); | |||||
| result_tensor.push_back(val); | |||||
| } | |||||
| *out_json = result_tensor; | |||||
| *pos += len; | |||||
| break; | |||||
| } | |||||
| default: | |||||
| MSI_LOG(ERROR) << "the result type is not supported in restful api, type is " << tensor.tensor_type(); | |||||
| ERROR_INFER_STATUS(status, FAILED, "reply have unsupported type"); | |||||
| } | |||||
| return status; | |||||
| } | |||||
| Status TransPredictReplyToData(const PredictReply &reply, json *out_json) { | |||||
| Status status(SUCCESS); | |||||
| for (int i = 0; i < reply.result_size(); i++) { | |||||
| json tensor_json; | |||||
| int num = 1; | |||||
| for (auto j = 0; j < reply.result(i).tensor_shape().dims_size(); j++) { | |||||
| num *= reply.result(i).tensor_shape().dims(j); | |||||
| } | |||||
| int pos = 0; | |||||
| status = GetJsonFromTensor(reply.result(i), num, &pos, &tensor_json); | |||||
| if (status != SUCCESS) { | |||||
| return status; | |||||
| } | |||||
| (*out_json)["data"].push_back(tensor_json); | |||||
| } | |||||
| return status; | |||||
| } | |||||
| Status RecusiveGetJson(const ms_serving::Tensor &tensor, int depth, int *pos, json *out_json) { | |||||
| Status status(SUCCESS); | |||||
| if (depth >= 10) { | |||||
| ERROR_INFER_STATUS(status, FAILED, "result tensor shape dims is larger than 10"); | |||||
| return status; | |||||
| } | |||||
| if (depth == tensor.tensor_shape().dims_size() - 1) { | |||||
| status = GetJsonFromTensor(tensor, tensor.tensor_shape().dims(depth), pos, out_json); | |||||
| if (status != SUCCESS) { | |||||
| return status; | |||||
| } | |||||
| } else { | |||||
| for (int i = 0; i < tensor.tensor_shape().dims(depth); i++) { | |||||
| json tensor_json; | |||||
| status = RecusiveGetJson(tensor, depth + 1, pos, &tensor_json); | |||||
| if (status != SUCCESS) { | |||||
| return status; | |||||
| } | |||||
| out_json->push_back(tensor_json); | |||||
| } | |||||
| } | |||||
| return status; | |||||
| } | |||||
| Status TransPredictReplyToTensor(const PredictReply &reply, json *out_json) { | |||||
| Status status(SUCCESS); | |||||
| for (int i = 0; i < reply.result_size(); i++) { | |||||
| json tensor_json; | |||||
| int pos = 0; | |||||
| status = RecusiveGetJson(reply.result(i), 0, &pos, &tensor_json); | |||||
| if (status != SUCCESS) { | |||||
| return status; | |||||
| } | |||||
| (*out_json)["tensor"].push_back(tensor_json); | |||||
| } | |||||
| return status; | |||||
| } | |||||
| Status TransPredictReplyToHTTPMsg(const PredictReply &reply, const HTTP_TYPE &type, struct evbuffer *buf) { | |||||
| Status status(SUCCESS); | |||||
| json out_json; | |||||
| switch (type) { | |||||
| case TYPE_DATA: | |||||
| status = TransPredictReplyToData(reply, &out_json); | |||||
| break; | |||||
| case TYPE_TENSOR: | |||||
| status = TransPredictReplyToTensor(reply, &out_json); | |||||
| break; | |||||
| default: | |||||
| ERROR_INFER_STATUS(status, FAILED, "http message must have only one type of (data, tensor)"); | |||||
| return status; | |||||
| } | |||||
| std::string out_str = out_json.dump(); | |||||
| evbuffer_add(buf, out_str.data(), out_str.size()); | |||||
| return status; | |||||
| } | |||||
| void http_handler_msg(struct evhttp_request *req, void *arg) { | |||||
| std::cout << "in handle" << std::endl; | |||||
| PredictRequest request; | |||||
| PredictReply reply; | |||||
| HTTP_TYPE type; | |||||
| auto status = TransHTTPMsgToPredictRequest(req, &request, &type); | |||||
| if (status != SUCCESS) { | |||||
| ErrorMessage(req, status); | |||||
| MSI_LOG(ERROR) << "restful trans to request failed"; | |||||
| return; | |||||
| } | |||||
| MSI_TIME_STAMP_START(Predict) | |||||
| status = Session::Instance().Predict(request, reply); | |||||
| if (status != SUCCESS) { | |||||
| ErrorMessage(req, status); | |||||
| MSI_LOG(ERROR) << "restful predict failed"; | |||||
| } | |||||
| MSI_TIME_STAMP_END(Predict) | |||||
| struct evbuffer *retbuff = evbuffer_new(); | |||||
| status = TransPredictReplyToHTTPMsg(reply, type, retbuff); | |||||
| if (status != SUCCESS) { | |||||
| ErrorMessage(req, status); | |||||
| MSI_LOG(ERROR) << "restful trans to reply failed"; | |||||
| return; | |||||
| } | |||||
| evhttp_send_reply(req, HTTP_OK, "Client", retbuff); | |||||
| evbuffer_free(retbuff); | |||||
| } | |||||
| } // namespace serving | |||||
| } // namespace mindspore | |||||
| @@ -0,0 +1,29 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_SERVING_HTTP_PROCESS_H | |||||
| #define MINDSPORE_SERVING_HTTP_PROCESS_H | |||||
| #include <evhttp.h> | |||||
| #include <event.h> | |||||
| #include <event2/http.h> | |||||
| #include <event2/http_struct.h> | |||||
| namespace mindspore { | |||||
| namespace serving { | |||||
| void http_handler_msg(struct evhttp_request *req, void *arg); | |||||
| } // namespace serving | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_SERVER_H | |||||
| @@ -14,23 +14,25 @@ | |||||
| * limitations under the License. | * limitations under the License. | ||||
| */ | */ | ||||
| #include "core/server.h" | #include "core/server.h" | ||||
| #include <evhttp.h> | |||||
| #include <event.h> | |||||
| #include <event2/thread.h> | |||||
| #include <grpcpp/grpcpp.h> | #include <grpcpp/grpcpp.h> | ||||
| #include <grpcpp/health_check_service_interface.h> | #include <grpcpp/health_check_service_interface.h> | ||||
| #include <grpcpp/ext/proto_server_reflection_plugin.h> | #include <grpcpp/ext/proto_server_reflection_plugin.h> | ||||
| #include <future> | |||||
| #include <memory> | |||||
| #include <string> | #include <string> | ||||
| #include <map> | |||||
| #include <vector> | #include <vector> | ||||
| #include <utility> | #include <utility> | ||||
| #include <memory> | |||||
| #include <future> | |||||
| #include <chrono> | |||||
| #include "include/infer_log.h" | #include "include/infer_log.h" | ||||
| #include "serving/ms_service.grpc.pb.h" | #include "serving/ms_service.grpc.pb.h" | ||||
| #include "core/util/option_parser.h" | #include "core/util/option_parser.h" | ||||
| #include "core/version_control/version_controller.h" | #include "core/version_control/version_controller.h" | ||||
| #include "core/util/file_system_operation.h" | |||||
| #include "core/session.h" | |||||
| #include "core/serving_tensor.h" | #include "core/serving_tensor.h" | ||||
| #include "core/http_process.h" | |||||
| using ms_serving::MSService; | using ms_serving::MSService; | ||||
| using ms_serving::PredictReply; | using ms_serving::PredictReply; | ||||
| @@ -39,93 +41,6 @@ using ms_serving::PredictRequest; | |||||
| namespace mindspore { | namespace mindspore { | ||||
| namespace serving { | namespace serving { | ||||
| #define MSI_TIME_STAMP_START(name) auto time_start_##name = std::chrono::steady_clock::now(); | |||||
| #define MSI_TIME_STAMP_END(name) \ | |||||
| { \ | |||||
| auto time_end_##name = std::chrono::steady_clock::now(); \ | |||||
| auto time_cost = std::chrono::duration<double, std::milli>(time_end_##name - time_start_##name).count(); \ | |||||
| MSI_LOG_INFO << #name " Time Cost # " << time_cost << " ms ---------------------"; \ | |||||
| } | |||||
| Status Session::CreatDeviceSession(const std::string &device, uint32_t device_id) { | |||||
| session_ = inference::InferSession::CreateSession(device, device_id); | |||||
| if (session_ == nullptr) { | |||||
| MSI_LOG(ERROR) << "Creat Session Failed"; | |||||
| return FAILED; | |||||
| } | |||||
| device_type_ = device; | |||||
| return SUCCESS; | |||||
| } | |||||
| Session &Session::Instance() { | |||||
| static Session instance; | |||||
| return instance; | |||||
| } | |||||
| Status Session::Predict(const PredictRequest &request, PredictReply &reply) { | |||||
| if (!model_loaded_) { | |||||
| MSI_LOG(ERROR) << "the model has not loaded"; | |||||
| return FAILED; | |||||
| } | |||||
| if (session_ == nullptr) { | |||||
| MSI_LOG(ERROR) << "the inference session has not be initialized"; | |||||
| return FAILED; | |||||
| } | |||||
| std::lock_guard<std::mutex> lock(mutex_); | |||||
| MSI_LOG(INFO) << "run Predict"; | |||||
| if (request.images_size() > 0) { | |||||
| ServingImagesRequest serving_images(request); | |||||
| ServingRequest serving_request(request); | |||||
| ServingReply serving_reply(reply); | |||||
| Status ret = session_->ExecuteModel(graph_id_, serving_images, serving_request, serving_reply); | |||||
| if (ret != SUCCESS) { | |||||
| MSI_LOG(ERROR) << "execute model with images return failed"; | |||||
| return ret; | |||||
| } | |||||
| } else if (request.data_size() > 0) { | |||||
| ServingRequest serving_request(request); | |||||
| ServingReply serving_reply(reply); | |||||
| Status ret = session_->ExecuteModel(graph_id_, serving_request, serving_reply); | |||||
| if (ret != SUCCESS) { | |||||
| MSI_LOG(ERROR) << "execute model with datas return failed"; | |||||
| return ret; | |||||
| } | |||||
| } | |||||
| MSI_LOG(INFO) << "run Predict finished"; | |||||
| return SUCCESS; | |||||
| } | |||||
| Status Session::Warmup(const MindSporeModelPtr model) { | |||||
| if (session_ == nullptr) { | |||||
| MSI_LOG(ERROR) << "The CreatDeviceSession should be called, before warmup"; | |||||
| return FAILED; | |||||
| } | |||||
| std::lock_guard<std::mutex> lock(mutex_); | |||||
| std::string file_name = model->GetModelPath() + '/' + model->GetModelName(); | |||||
| model_loaded_ = false; | |||||
| MSI_TIME_STAMP_START(LoadModelFromFile) | |||||
| auto ret = session_->LoadModelFromFile(file_name, graph_id_); | |||||
| MSI_TIME_STAMP_END(LoadModelFromFile) | |||||
| if (ret != SUCCESS) { | |||||
| MSI_LOG(ERROR) << "Load graph model failed, file name is " << file_name.c_str(); | |||||
| return ret; | |||||
| } | |||||
| model_loaded_ = true; | |||||
| MSI_LOG(INFO) << "Session Warmup finished"; | |||||
| return SUCCESS; | |||||
| } | |||||
| Status Session::Clear() { | |||||
| if (session_ != nullptr) { | |||||
| session_->UnloadModel(graph_id_); | |||||
| session_->FinalizeEnv(); | |||||
| session_ = nullptr; | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| namespace { | namespace { | ||||
| static const uint32_t uint32max = 0x7FFFFFFF; | static const uint32_t uint32max = 0x7FFFFFFF; | ||||
| std::promise<void> exit_requested; | std::promise<void> exit_requested; | ||||
| @@ -179,6 +94,7 @@ Status Server::BuildAndStart() { | |||||
| signal(SIGINT, HandleSignal); | signal(SIGINT, HandleSignal); | ||||
| signal(SIGTERM, HandleSignal); | signal(SIGTERM, HandleSignal); | ||||
| Status res; | Status res; | ||||
| auto option_args = Options::Instance().GetArgs(); | auto option_args = Options::Instance().GetArgs(); | ||||
| std::string server_address = "0.0.0.0:" + std::to_string(option_args->grpc_port); | std::string server_address = "0.0.0.0:" + std::to_string(option_args->grpc_port); | ||||
| std::string model_path = option_args->model_path; | std::string model_path = option_args->model_path; | ||||
| @@ -198,6 +114,26 @@ Status Server::BuildAndStart() { | |||||
| ClearEnv(); | ClearEnv(); | ||||
| return res; | return res; | ||||
| } | } | ||||
| // init http server | |||||
| struct evhttp *http_server = NULL; | |||||
| struct event_base *eb = NULL; | |||||
| int32_t http_port = option_args->rest_api_port; | |||||
| std::string http_addr = "0.0.0.0"; | |||||
| event_init(); | |||||
| evthread_use_pthreads(); | |||||
| eb = event_base_new(); | |||||
| http_server = evhttp_new(eb); | |||||
| evhttp_bind_socket_with_handle(http_server, http_addr.c_str(), http_port); | |||||
| // http_server = evhttp_start(http_addr.c_str(), http_port); | |||||
| if (http_server == NULL) { | |||||
| MSI_LOG(ERROR) << "http server start failed."; | |||||
| return res; | |||||
| } | |||||
| evhttp_set_timeout(http_server, 5); | |||||
| evhttp_set_gencb(http_server, http_handler_msg, NULL); | |||||
| // grpc server | |||||
| MSServiceImpl ms_service; | MSServiceImpl ms_service; | ||||
| grpc::EnableDefaultHealthCheckService(true); | grpc::EnableDefaultHealthCheckService(true); | ||||
| grpc::reflection::InitProtoReflectionServerBuilderPlugin(); | grpc::reflection::InitProtoReflectionServerBuilderPlugin(); | ||||
| @@ -214,14 +150,23 @@ Status Server::BuildAndStart() { | |||||
| ClearEnv(); | ClearEnv(); | ||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| auto grpc_server_run = [&server]() { server->Wait(); }; | |||||
| std::thread serving_thread(grpc_server_run); | |||||
| MSI_LOG(INFO) << "MS Serving listening on " << server_address; | |||||
| auto grpc_server_run = [&server, &server_address]() { | |||||
| MSI_LOG(INFO) << "MS Serving grpc listening on " << server_address; | |||||
| server->Wait(); | |||||
| }; | |||||
| auto http_server_run = [&eb, &http_addr, &http_port]() { | |||||
| MSI_LOG(INFO) << "MS Serving restful listening on " << http_addr << ":" << http_port; | |||||
| event_base_dispatch(eb); | |||||
| }; | |||||
| std::thread grpc_thread(grpc_server_run); | |||||
| std::thread restful_thread(http_server_run); | |||||
| auto exit_future = exit_requested.get_future(); | auto exit_future = exit_requested.get_future(); | ||||
| exit_future.wait(); | exit_future.wait(); | ||||
| ClearEnv(); | ClearEnv(); | ||||
| server->Shutdown(); | server->Shutdown(); | ||||
| serving_thread.join(); | |||||
| event_base_loopexit(eb, NULL); | |||||
| grpc_thread.join(); | |||||
| restful_thread.join(); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| } // namespace serving | } // namespace serving | ||||
| @@ -16,46 +16,10 @@ | |||||
| #ifndef MINDSPORE_SERVER_H | #ifndef MINDSPORE_SERVER_H | ||||
| #define MINDSPORE_SERVER_H | #define MINDSPORE_SERVER_H | ||||
| #include <string> | |||||
| #include <mutex> | |||||
| #include <vector> | |||||
| #include <memory> | |||||
| #include "util/status.h" | #include "util/status.h" | ||||
| #include "version_control/model.h" | |||||
| #include "include/inference.h" | |||||
| #include "serving/ms_service.pb.h" | |||||
| #include "serving/ms_service.grpc.pb.h" | |||||
| namespace mindspore { | namespace mindspore { | ||||
| namespace serving { | namespace serving { | ||||
| using ms_serving::PredictReply; | |||||
| using ms_serving::PredictRequest; | |||||
| using inference::Status; | |||||
| using inference::SUCCESS; | |||||
| using inference::FAILED; | |||||
| using inference::INVALID_INPUTS; | |||||
| class Session { | |||||
| public: | |||||
| static Session &Instance(); | |||||
| Status CreatDeviceSession(const std::string &device, uint32_t device_id); | |||||
| // Status Predict(const inference::MultiTensor &inputs, inference::MultiTensor &output); | |||||
| Status Predict(const PredictRequest &request, PredictReply &reply); | |||||
| Status Warmup(const MindSporeModelPtr model); | |||||
| Status Clear(); | |||||
| private: | |||||
| Session() = default; | |||||
| ~Session() = default; | |||||
| int sesseion_id_{0}; | |||||
| std::shared_ptr<inference::InferSession> session_{nullptr}; | |||||
| bool model_loaded_ = false; | |||||
| uint32_t graph_id_{0}; | |||||
| std::mutex mutex_; | |||||
| std::string device_type_; | |||||
| }; | |||||
| class Server { | class Server { | ||||
| public: | public: | ||||
| Server() = default; | Server() = default; | ||||
| @@ -0,0 +1,136 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "core/session.h" | |||||
| #include <grpcpp/grpcpp.h> | |||||
| #include <string> | |||||
| #include <map> | |||||
| #include <vector> | |||||
| #include <utility> | |||||
| #include <memory> | |||||
| #include <chrono> | |||||
| #include "include/infer_log.h" | |||||
| #include "serving/ms_service.grpc.pb.h" | |||||
| #include "core/util/option_parser.h" | |||||
| #include "core/version_control/version_controller.h" | |||||
| #include "core/util/file_system_operation.h" | |||||
| #include "core/serving_tensor.h" | |||||
| using ms_serving::MSService; | |||||
| using ms_serving::PredictReply; | |||||
| using ms_serving::PredictRequest; | |||||
| namespace mindspore { | |||||
| namespace serving { | |||||
| Status Session::CreatDeviceSession(const std::string &device, uint32_t device_id) { | |||||
| session_ = inference::InferSession::CreateSession(device, device_id); | |||||
| if (session_ == nullptr) { | |||||
| MSI_LOG(ERROR) << "Creat Session Failed"; | |||||
| return FAILED; | |||||
| } | |||||
| device_type_ = device; | |||||
| return SUCCESS; | |||||
| } | |||||
| Session &Session::Instance() { | |||||
| static Session instance; | |||||
| return instance; | |||||
| } | |||||
| Status Session::Predict(const PredictRequest &request, PredictReply &reply) { | |||||
| if (!model_loaded_) { | |||||
| MSI_LOG(ERROR) << "the model has not loaded"; | |||||
| return FAILED; | |||||
| } | |||||
| if (session_ == nullptr) { | |||||
| MSI_LOG(ERROR) << "the inference session has not be initialized"; | |||||
| return FAILED; | |||||
| } | |||||
| std::lock_guard<std::mutex> lock(mutex_); | |||||
| MSI_LOG(INFO) << "run Predict"; | |||||
| if (request.images_size() > 0) { | |||||
| ServingImagesRequest serving_images(request); | |||||
| ServingRequest serving_request(request); | |||||
| ServingReply serving_reply(reply); | |||||
| Status ret = session_->ExecuteModel(graph_id_, serving_images, serving_request, serving_reply); | |||||
| if (ret != SUCCESS) { | |||||
| MSI_LOG(ERROR) << "execute model with images return failed"; | |||||
| return ret; | |||||
| } | |||||
| } else if (request.data_size() > 0) { | |||||
| ServingRequest serving_request(request); | |||||
| ServingReply serving_reply(reply); | |||||
| Status ret = session_->ExecuteModel(graph_id_, serving_request, serving_reply); | |||||
| if (ret != SUCCESS) { | |||||
| MSI_LOG(ERROR) << "execute model with datas return failed"; | |||||
| return ret; | |||||
| } | |||||
| } | |||||
| MSI_LOG(INFO) << "run Predict finished"; | |||||
| return SUCCESS; | |||||
| } | |||||
| Status Session::Warmup(const MindSporeModelPtr model) { | |||||
| if (session_ == nullptr) { | |||||
| MSI_LOG(ERROR) << "The CreatDeviceSession should be called, before warmup"; | |||||
| return FAILED; | |||||
| } | |||||
| std::lock_guard<std::mutex> lock(mutex_); | |||||
| std::string file_name = model->GetModelPath() + '/' + model->GetModelName(); | |||||
| model_loaded_ = false; | |||||
| MSI_TIME_STAMP_START(LoadModelFromFile) | |||||
| auto ret = session_->LoadModelFromFile(file_name, graph_id_); | |||||
| MSI_TIME_STAMP_END(LoadModelFromFile) | |||||
| if (ret != SUCCESS) { | |||||
| MSI_LOG(ERROR) << "Load graph model failed, file name is " << file_name.c_str(); | |||||
| return ret; | |||||
| } | |||||
| model_loaded_ = true; | |||||
| MSI_LOG(INFO) << "Session Warmup finished"; | |||||
| return SUCCESS; | |||||
| } | |||||
| Status Session::Clear() { | |||||
| if (session_ != nullptr) { | |||||
| session_->UnloadModel(graph_id_); | |||||
| session_->FinalizeEnv(); | |||||
| session_ = nullptr; | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| Status Session::GetModelInputsInfo(std::vector<inference::InferTensor> &tensor_list) { | |||||
| if (!model_loaded_) { | |||||
| MSI_LOG(ERROR) << "the model has not loaded"; | |||||
| return FAILED; | |||||
| } | |||||
| if (session_ == nullptr) { | |||||
| MSI_LOG(ERROR) << "the inference session has not be initialized"; | |||||
| return FAILED; | |||||
| } | |||||
| std::lock_guard<std::mutex> lock(mutex_); | |||||
| Status ret = session_->GetModelInputsInfo(graph_id_, &tensor_list); | |||||
| if (ret != SUCCESS) { | |||||
| MSI_LOG(ERROR) << "get model inputs info failed"; | |||||
| } | |||||
| return ret; | |||||
| } | |||||
| } // namespace serving | |||||
| } // namespace mindspore | |||||
| @@ -0,0 +1,62 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_SERVING_SESSION_H | |||||
| #define MINDSPORE_SERVING_SESSION_H | |||||
| #include <string> | |||||
| #include <mutex> | |||||
| #include <vector> | |||||
| #include <memory> | |||||
| #include "util/status.h" | |||||
| #include "version_control/model.h" | |||||
| #include "include/inference.h" | |||||
| #include "serving/ms_service.pb.h" | |||||
| #include "serving/ms_service.grpc.pb.h" | |||||
| namespace mindspore { | |||||
| namespace serving { | |||||
| using inference::FAILED; | |||||
| using inference::INVALID_INPUTS; | |||||
| using inference::Status; | |||||
| using inference::SUCCESS; | |||||
| using ms_serving::PredictReply; | |||||
| using ms_serving::PredictRequest; | |||||
| class Session { | |||||
| public: | |||||
| static Session &Instance(); | |||||
| Status CreatDeviceSession(const std::string &device, uint32_t device_id); | |||||
| // Status Predict(const inference::MultiTensor &inputs, inference::MultiTensor &output); | |||||
| Status Predict(const PredictRequest &request, PredictReply &reply); | |||||
| Status Warmup(const MindSporeModelPtr model); | |||||
| Status Clear(); | |||||
| Status GetModelInputsInfo(std::vector<inference::InferTensor> &tensor_list); | |||||
| private: | |||||
| Session() = default; | |||||
| ~Session() = default; | |||||
| int sesseion_id_{0}; | |||||
| std::shared_ptr<inference::InferSession> session_{nullptr}; | |||||
| bool model_loaded_ = false; | |||||
| uint32_t graph_id_{0}; | |||||
| std::mutex mutex_; | |||||
| std::string device_type_; | |||||
| }; | |||||
| } // namespace serving | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_SERVER_H | |||||
| @@ -160,6 +160,8 @@ void Options::CreateOptions() { | |||||
| std::vector<Option> options = { | std::vector<Option> options = { | ||||
| Option("port", &args_->grpc_port, | Option("port", &args_->grpc_port, | ||||
| "[Optional] Port to listen on for gRPC API, default is 5500, range from 1 to 65535"), | "[Optional] Port to listen on for gRPC API, default is 5500, range from 1 to 65535"), | ||||
| Option("rest_api_port", &args_->rest_api_port, | |||||
| "[Optional] Port to listen on for RESTful API, default is 5501, range from 1 to 65535"), | |||||
| Option("model_name", &args_->model_name, "[Required] model name "), | Option("model_name", &args_->model_name, "[Required] model name "), | ||||
| Option("model_path", &args_->model_path, "[Required] the path of the model files"), | Option("model_path", &args_->model_path, "[Required] the path of the model files"), | ||||
| Option("device_id", &args_->device_id, "[Optional] the device id, default is 0, range from 0 to 7"), | Option("device_id", &args_->device_id, "[Optional] the device id, default is 0, range from 0 to 7"), | ||||
| @@ -184,6 +186,14 @@ bool Options::CheckOptions() { | |||||
| std::cout << "the port should be in [1~65535]" << std::endl; | std::cout << "the port should be in [1~65535]" << std::endl; | ||||
| return false; | return false; | ||||
| } | } | ||||
| if (args_->rest_api_port < 1 || args_->rest_api_port > 65535) { | |||||
| std::cout << "the rest_api_port should be in [1~65535]" << std::endl; | |||||
| return false; | |||||
| } | |||||
| if (args_->rest_api_port == args_->grpc_port) { | |||||
| std::cout << "the rest_api_port and grpc port should not be same" << std::endl; | |||||
| return false; | |||||
| } | |||||
| return true; | return true; | ||||
| } | } | ||||
| @@ -24,6 +24,7 @@ namespace mindspore { | |||||
| namespace serving { | namespace serving { | ||||
| struct Arguments { | struct Arguments { | ||||
| int32_t grpc_port = 5500; | int32_t grpc_port = 5500; | ||||
| int32_t rest_api_port = 5501; | |||||
| std::string grpc_socket_path; | std::string grpc_socket_path; | ||||
| std::string ssl_config_file; | std::string ssl_config_file; | ||||
| int32_t poll_model_wait_seconds = 1; | int32_t poll_model_wait_seconds = 1; | ||||
| @@ -21,7 +21,7 @@ | |||||
| #include <memory> | #include <memory> | ||||
| #include "util/file_system_operation.h" | #include "util/file_system_operation.h" | ||||
| #include "include/infer_log.h" | #include "include/infer_log.h" | ||||
| #include "core/server.h" | |||||
| #include "core/session.h" | |||||
| namespace mindspore { | namespace mindspore { | ||||
| namespace serving { | namespace serving { | ||||
| @@ -30,10 +30,9 @@ class Net(nn.Cell): | |||||
| def construct(self, x_, y_): | def construct(self, x_, y_): | ||||
| return self.add(x_, y_) | return self.add(x_, y_) | ||||
| x = np.ones(4).astype(np.float32) | |||||
| y = np.ones(4).astype(np.float32) | |||||
| def export_net(): | def export_net(): | ||||
| x = np.ones([2, 2]).astype(np.float32) | |||||
| y = np.ones([2, 2]).astype(np.float32) | |||||
| add = Net() | add = Net() | ||||
| output = add(Tensor(x), Tensor(y)) | output = add(Tensor(x), Tensor(y)) | ||||
| export(add, Tensor(x), Tensor(y), file_name='tensor_add.mindir', file_format='MINDIR') | export(add, Tensor(x), Tensor(y), file_name='tensor_add.mindir', file_format='MINDIR') | ||||
| @@ -37,14 +37,14 @@ def run(): | |||||
| request = ms_service_pb2.PredictRequest() | request = ms_service_pb2.PredictRequest() | ||||
| x = request.data.add() | x = request.data.add() | ||||
| x.tensor_shape.dims.extend([4]) | |||||
| x.tensor_shape.dims.extend([2, 2]) | |||||
| x.tensor_type = ms_service_pb2.MS_FLOAT32 | x.tensor_type = ms_service_pb2.MS_FLOAT32 | ||||
| x.data = (np.ones([4]).astype(np.float32)).tobytes() | |||||
| x.data = (np.ones([2, 2]).astype(np.float32)).tobytes() | |||||
| y = request.data.add() | y = request.data.add() | ||||
| y.tensor_shape.dims.extend([4]) | |||||
| y.tensor_shape.dims.extend([2, 2]) | |||||
| y.tensor_type = ms_service_pb2.MS_FLOAT32 | y.tensor_type = ms_service_pb2.MS_FLOAT32 | ||||
| y.data = (np.ones([4]).astype(np.float32)).tobytes() | |||||
| y.data = (np.ones([2, 2]).astype(np.float32)).tobytes() | |||||
| try: | try: | ||||
| result = stub.Predict(request) | result = stub.Predict(request) | ||||
| @@ -61,13 +61,13 @@ start_service() | |||||
| echo "$2 faile to start." | echo "$2 faile to start." | ||||
| fi | fi | ||||
| result=`grep -E 'MS Serving listening on 0.0.0.0:5500|MS Serving listening on 0.0.0.0:5501' $2_service.log | wc -l` | |||||
| result=`grep -E 'MS Serving grpc listening on 0.0.0.0:5500|MS Serving listening on 0.0.0.0:5501' $2_service.log | wc -l` | |||||
| count=0 | count=0 | ||||
| while [[ ${result} -ne 1 && ${count} -lt 150 ]] | while [[ ${result} -ne 1 && ${count} -lt 150 ]] | ||||
| do | do | ||||
| sleep 1 | sleep 1 | ||||
| count=$(($count+1)) | count=$(($count+1)) | ||||
| result=`grep -E 'MS Serving listening on 0.0.0.0:5500|MS Serving listening on 0.0.0.0:5501' $2_service.log | wc -l` | |||||
| result=`grep -E 'MS Serving grpc listening on 0.0.0.0:5500|MS Serving listening on 0.0.0.0:5501' $2_service.log | wc -l` | |||||
| done | done | ||||
| if [ ${count} -eq 150 ] | if [ ${count} -eq 150 ] | ||||
| @@ -185,7 +185,7 @@ if (ENABLE_GE) | |||||
| endif() | endif() | ||||
| if (CMAKE_SYSTEM_NAME MATCHES "Linux") | if (CMAKE_SYSTEM_NAME MATCHES "Linux") | ||||
| target_link_libraries(ut_tests PRIVATE mindspore::gtest mindspore_gvar ${PYTHON_LIBRARIES} pthread util dl) | |||||
| target_link_libraries(ut_tests PRIVATE mindspore::gtest mindspore::event mindspore::event_pthreads mindspore_gvar ${PYTHON_LIBRARIES} pthread util dl) | |||||
| if (ENABLE_MINDDATA) | if (ENABLE_MINDDATA) | ||||
| target_link_libraries(ut_tests PRIVATE _c_dataengine _c_mindrecord) | target_link_libraries(ut_tests PRIVATE _c_dataengine _c_mindrecord) | ||||
| endif() | endif() | ||||
| @@ -87,4 +87,3 @@ include_directories(${CMAKE_CURRENT_BINARY_DIR}) | |||||
| include_directories(${CMAKE_CURRENT_BINARY_DIR}/../) | include_directories(${CMAKE_CURRENT_BINARY_DIR}/../) | ||||
| add_library(ut_serving_obj OBJECT ${SERVING_SRC_TEST}) | add_library(ut_serving_obj OBJECT ${SERVING_SRC_TEST}) | ||||
| @@ -19,6 +19,7 @@ | |||||
| #include "common/common_test.h" | #include "common/common_test.h" | ||||
| #include "serving/core/server.h" | #include "serving/core/server.h" | ||||
| #include "serving/core/session.h" | |||||
| #include "include/inference.h" | #include "include/inference.h" | ||||
| #include "include/infer_tensor.h" | #include "include/infer_tensor.h" | ||||
| #include "serving/core/serving_tensor.h" | #include "serving/core/serving_tensor.h" | ||||