diff --git a/mindspore/ccsrc/ps/core/abstract_node.cc b/mindspore/ccsrc/ps/core/abstract_node.cc index 0e867dd767..105cbd7357 100644 --- a/mindspore/ccsrc/ps/core/abstract_node.cc +++ b/mindspore/ccsrc/ps/core/abstract_node.cc @@ -390,11 +390,9 @@ bool AbstractNode::Disconnect(const std::shared_ptr &client, const ui auto meta = std::make_shared(); meta->set_cmd(NodeCommand::FINISH); - FinishMessage finish_message; - finish_message.set_node_id(node_info_.node_id_); + std::string finish_message = node_info_.node_id_; - if (!SendMessageSync(client, meta, Protos::PROTOBUF, finish_message.SerializeAsString().data(), - finish_message.ByteSizeLong())) { + if (!SendMessageSync(client, meta, Protos::RAW, finish_message.data(), finish_message.length())) { MS_LOG(WARNING) << "The node role:" << CommUtil::NodeRoleToString(node_info_.node_role_) << " the node id:" << node_info_.node_id_ << " send Finish Message timeout!"; } diff --git a/mindspore/ccsrc/ps/core/comm_util.cc b/mindspore/ccsrc/ps/core/comm_util.cc index 5bac87180c..8824d06a88 100644 --- a/mindspore/ccsrc/ps/core/comm_util.cc +++ b/mindspore/ccsrc/ps/core/comm_util.cc @@ -142,9 +142,6 @@ bool CommUtil::Retry(const std::function &func, size_t max_attempts, siz void CommUtil::LogCallback(int severity, const char *msg) { switch (severity) { - case EVENT_LOG_DEBUG: - MS_LOG(DEBUG) << kLibeventLogPrefix << msg; - break; case EVENT_LOG_MSG: MS_LOG(INFO) << kLibeventLogPrefix << msg; break; @@ -155,7 +152,6 @@ void CommUtil::LogCallback(int severity, const char *msg) { MS_LOG(ERROR) << kLibeventLogPrefix << msg; break; default: - MS_LOG(WARNING) << kLibeventLogPrefix << msg; break; } } diff --git a/mindspore/ccsrc/ps/core/node_manager.cc b/mindspore/ccsrc/ps/core/node_manager.cc index ab3dfee8de..db4267d246 100644 --- a/mindspore/ccsrc/ps/core/node_manager.cc +++ b/mindspore/ccsrc/ps/core/node_manager.cc @@ -110,7 +110,7 @@ void NodeManager::UpdateClusterState() { } } if (!timeout_nodes_info_.empty()) { - is_cluster_timeout_ = true; + is_node_timeout_ = true; for (auto it = timeout_nodes_info_.begin(); it != timeout_nodes_info_.end(); ++it) { finish_nodes_id_.insert(it->first); } @@ -138,9 +138,7 @@ void NodeManager::CheckClusterTimeout() { } } -void NodeManager::AddFinishNode(const FinishMessage &finish_message) { - finish_nodes_id_.insert(finish_message.node_id()); -} +void NodeManager::AddFinishNode(const std::string &finish_message) { finish_nodes_id_.insert(finish_message); } std::unordered_map NodeManager::nodes_info() { return nodes_info_; } diff --git a/mindspore/ccsrc/ps/core/node_manager.h b/mindspore/ccsrc/ps/core/node_manager.h index 615f9ef357..81f7584d0f 100644 --- a/mindspore/ccsrc/ps/core/node_manager.h +++ b/mindspore/ccsrc/ps/core/node_manager.h @@ -61,7 +61,7 @@ class NodeManager { std::vector FetchServersMeta(); void UpdateClusterState(); void CheckClusterTimeout(); - void AddFinishNode(const FinishMessage &finish_message); + void AddFinishNode(const std::string &finish_message); std::unordered_map nodes_info(); bool is_cluster_ready(); bool is_cluster_finish(); diff --git a/mindspore/ccsrc/ps/core/scheduler_node.cc b/mindspore/ccsrc/ps/core/scheduler_node.cc index 378e8399ca..926cb474ca 100644 --- a/mindspore/ccsrc/ps/core/scheduler_node.cc +++ b/mindspore/ccsrc/ps/core/scheduler_node.cc @@ -139,10 +139,9 @@ void SchedulerNode::ProcessFinish(std::shared_ptr server, std::shared MS_EXCEPTION_IF_NULL(conn); MS_EXCEPTION_IF_NULL(meta); MS_EXCEPTION_IF_NULL(data); - FinishMessage finish_message; - finish_message.ParseFromArray(data, size); - node_manager_.AddFinishNode(finish_message); - MS_LOG(INFO) << "Process finish message from node id:" << finish_message.node_id(); + auto finish_message = std::make_unique(reinterpret_cast(data), size); + node_manager_.AddFinishNode(*finish_message); + MS_LOG(INFO) << "Process finish message from node id:" << *finish_message; server->SendMessage(conn, meta, Protos::PROTOBUF, data, size); } diff --git a/mindspore/ccsrc/ps/core/server_node.cc b/mindspore/ccsrc/ps/core/server_node.cc index 6f50e3eb05..baaffce57e 100644 --- a/mindspore/ccsrc/ps/core/server_node.cc +++ b/mindspore/ccsrc/ps/core/server_node.cc @@ -18,11 +18,6 @@ namespace mindspore { namespace ps { namespace core { -ServerNode::~ServerNode() { - MS_LOG(INFO) << "Stop server node!"; - Stop(); -} - bool ServerNode::Start(const uint32_t &timeout) { MS_LOG(INFO) << "Start server node!"; Initialize(); diff --git a/mindspore/ccsrc/ps/core/server_node.h b/mindspore/ccsrc/ps/core/server_node.h index eb12747765..09f40c5c8c 100644 --- a/mindspore/ccsrc/ps/core/server_node.h +++ b/mindspore/ccsrc/ps/core/server_node.h @@ -36,7 +36,7 @@ namespace core { class ServerNode : public AbstractNode { public: ServerNode() : server_(nullptr), server_thread_(nullptr) {} - ~ServerNode() override; + ~ServerNode() override = default; bool Start(const uint32_t &timeout = ClusterMetadata::instance()->cluster_available_timeout()) override; bool Stop() override; diff --git a/mindspore/ccsrc/ps/core/worker_node.cc b/mindspore/ccsrc/ps/core/worker_node.cc index 391c8dc546..63584e95cd 100644 --- a/mindspore/ccsrc/ps/core/worker_node.cc +++ b/mindspore/ccsrc/ps/core/worker_node.cc @@ -20,10 +20,6 @@ namespace mindspore { namespace ps { namespace core { -WorkerNode::~WorkerNode() { - MS_LOG(INFO) << "Stop worker node!"; - Stop(); -} bool WorkerNode::Start(const uint32_t &timeout) { MS_LOG(INFO) << "Starting worker node!"; Initialize(); diff --git a/mindspore/ccsrc/ps/core/worker_node.h b/mindspore/ccsrc/ps/core/worker_node.h index c9dc027081..19490b79fd 100644 --- a/mindspore/ccsrc/ps/core/worker_node.h +++ b/mindspore/ccsrc/ps/core/worker_node.h @@ -35,7 +35,7 @@ namespace core { class WorkerNode : public AbstractNode { public: WorkerNode() = default; - ~WorkerNode() override; + ~WorkerNode() override = default; bool Start(const uint32_t &timeout = ClusterMetadata::instance()->cluster_available_timeout()) override; bool Stop() override;