You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

server.cc 6.8 kB

5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202
  1. /**
  2. * Copyright 2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "core/server.h"
  17. #include <grpcpp/grpcpp.h>
  18. #include <grpcpp/health_check_service_interface.h>
  19. #include <grpcpp/ext/proto_server_reflection_plugin.h>
  20. #include <string>
  21. #include <map>
  22. #include <vector>
  23. #include <utility>
  24. #include <memory>
  25. #include <future>
  26. #include <chrono>
  27. #include "include/infer_log.h"
  28. #include "serving/ms_service.grpc.pb.h"
  29. #include "core/util/option_parser.h"
  30. #include "core/version_control/version_controller.h"
  31. #include "core/util/file_system_operation.h"
  32. #include "core/serving_tensor.h"
  33. using ms_serving::MSService;
  34. using ms_serving::PredictReply;
  35. using ms_serving::PredictRequest;
  36. namespace mindspore {
  37. namespace serving {
  38. #define MSI_TIME_STAMP_START(name) auto time_start_##name = std::chrono::steady_clock::now();
  39. #define MSI_TIME_STAMP_END(name) \
  40. { \
  41. auto time_end_##name = std::chrono::steady_clock::now(); \
  42. auto time_cost = std::chrono::duration<double, std::milli>(time_end_##name - time_start_##name).count(); \
  43. MSI_LOG_INFO << #name " Time Cost " << time_cost << "ms ---------------------"; \
  44. }
  45. Status Session::CreatDeviceSession(const std::string &device, uint32_t device_id) {
  46. session_ = inference::InferSession::CreateSession(device, device_id);
  47. if (session_ == nullptr) {
  48. MSI_LOG(ERROR) << "Creat Session Failed";
  49. return FAILED;
  50. }
  51. device_type_ = device;
  52. return SUCCESS;
  53. }
  54. Session &Session::Instance() {
  55. static Session instance;
  56. return instance;
  57. }
  58. Status Session::Predict(const PredictRequest &request, PredictReply &reply) {
  59. if (!model_loaded_) {
  60. MSI_LOG(ERROR) << "the model has not loaded";
  61. return FAILED;
  62. }
  63. if (session_ == nullptr) {
  64. MSI_LOG(ERROR) << "the inference session has not be initialized";
  65. return FAILED;
  66. }
  67. std::lock_guard<std::mutex> lock(mutex_);
  68. MSI_LOG(INFO) << "run Predict";
  69. ServingRequest serving_request(request);
  70. ServingReply serving_reply(reply);
  71. auto ret = session_->ExecuteModel(graph_id_, serving_request, serving_reply);
  72. MSI_LOG(INFO) << "run Predict finished";
  73. if (!ret) {
  74. MSI_LOG(ERROR) << "execute model return failed";
  75. return FAILED;
  76. }
  77. return SUCCESS;
  78. }
  79. Status Session::Warmup(const MindSporeModelPtr model) {
  80. if (session_ == nullptr) {
  81. MSI_LOG(ERROR) << "The CreatDeviceSession should be called, before warmup";
  82. return FAILED;
  83. }
  84. std::lock_guard<std::mutex> lock(mutex_);
  85. std::string file_name = model->GetModelPath() + '/' + model->GetModelName();
  86. model_loaded_ = false;
  87. MSI_TIME_STAMP_START(LoadModelFromFile)
  88. auto ret = session_->LoadModelFromFile(file_name, graph_id_);
  89. MSI_TIME_STAMP_END(LoadModelFromFile)
  90. if (!ret) {
  91. MSI_LOG(ERROR) << "Load graph model failed, file name is " << file_name.c_str();
  92. return FAILED;
  93. }
  94. model_loaded_ = true;
  95. MSI_LOG(INFO) << "Session Warmup finished";
  96. return SUCCESS;
  97. }
  98. Status Session::Clear() {
  99. if (session_ != nullptr) {
  100. session_->UnloadModel(graph_id_);
  101. session_->FinalizeEnv();
  102. session_ = nullptr;
  103. }
  104. return SUCCESS;
  105. }
  106. namespace {
  107. static const uint32_t uint32max = 0x7FFFFFFF;
  108. std::promise<void> exit_requested;
  109. void ClearEnv() {
  110. Session::Instance().Clear();
  111. // inference::ExitInference();
  112. }
  113. void HandleSignal(int sig) { exit_requested.set_value(); }
  114. } // namespace
  115. // Service Implement
  116. class MSServiceImpl final : public MSService::Service {
  117. grpc::Status Predict(grpc::ServerContext *context, const PredictRequest *request, PredictReply *reply) override {
  118. std::lock_guard<std::mutex> lock(mutex_);
  119. MSI_TIME_STAMP_START(Predict)
  120. auto res = Session::Instance().Predict(*request, *reply);
  121. MSI_TIME_STAMP_END(Predict)
  122. if (res != SUCCESS) {
  123. return grpc::Status::CANCELLED;
  124. }
  125. MSI_LOG(INFO) << "Finish call service Eval";
  126. return grpc::Status::OK;
  127. }
  128. grpc::Status Test(grpc::ServerContext *context, const PredictRequest *request, PredictReply *reply) override {
  129. MSI_LOG(INFO) << "TestService call";
  130. return grpc::Status::OK;
  131. }
  132. std::mutex mutex_;
  133. };
  134. Status Server::BuildAndStart() {
  135. // handle exit signal
  136. signal(SIGINT, HandleSignal);
  137. signal(SIGTERM, HandleSignal);
  138. Status res;
  139. auto option_args = Options::Instance().GetArgs();
  140. std::string server_address = "0.0.0.0:" + std::to_string(option_args->grpc_port);
  141. std::string model_path = option_args->model_path;
  142. std::string model_name = option_args->model_name;
  143. std::string device_type = option_args->device_type;
  144. auto device_id = option_args->device_id;
  145. res = Session::Instance().CreatDeviceSession(device_type, device_id);
  146. if (res != SUCCESS) {
  147. MSI_LOG(ERROR) << "creat session failed";
  148. ClearEnv();
  149. return res;
  150. }
  151. VersionController version_controller(option_args->poll_model_wait_seconds, model_path, model_name);
  152. res = version_controller.Run();
  153. if (res != SUCCESS) {
  154. MSI_LOG(ERROR) << "load model failed";
  155. ClearEnv();
  156. return res;
  157. }
  158. MSServiceImpl ms_service;
  159. grpc::EnableDefaultHealthCheckService(true);
  160. grpc::reflection::InitProtoReflectionServerBuilderPlugin();
  161. // Set the port is not reuseable
  162. auto option = grpc::MakeChannelArgumentOption(GRPC_ARG_ALLOW_REUSEPORT, 0);
  163. grpc::ServerBuilder serverBuilder;
  164. serverBuilder.SetOption(std::move(option));
  165. serverBuilder.SetMaxMessageSize(uint32max);
  166. serverBuilder.AddListeningPort(server_address, grpc::InsecureServerCredentials());
  167. serverBuilder.RegisterService(&ms_service);
  168. std::unique_ptr<grpc::Server> server(serverBuilder.BuildAndStart());
  169. if (server == nullptr) {
  170. MSI_LOG(ERROR) << "The serving server create failed";
  171. ClearEnv();
  172. return FAILED;
  173. }
  174. auto grpc_server_run = [&server]() { server->Wait(); };
  175. std::thread serving_thread(grpc_server_run);
  176. MSI_LOG(INFO) << "MS Serving listening on " << server_address;
  177. auto exit_future = exit_requested.get_future();
  178. exit_future.wait();
  179. ClearEnv();
  180. server->Shutdown();
  181. serving_thread.join();
  182. return SUCCESS;
  183. }
  184. } // namespace serving
  185. } // namespace mindspore