Browse Source

update log

tags/v1.2.0-rc1
chendongsheng 4 years ago
parent
commit
c6a28cadf8
4 changed files with 17 additions and 5 deletions
  1. +2
    -2
      mindspore/ccsrc/ps/core/abstract_node.cc
  2. +1
    -1
      mindspore/ccsrc/ps/core/node_manager.cc
  3. +13
    -2
      mindspore/ccsrc/ps/parameter_server.cc
  4. +1
    -0
      mindspore/ccsrc/ps/parameter_server.h

+ 2
- 2
mindspore/ccsrc/ps/core/abstract_node.cc View File

@@ -392,8 +392,8 @@ bool AbstractNode::Disconnect(const std::shared_ptr<TcpClient> &client, const ui

if (!SendMessageSync(client, meta, Protos::PROTOBUF, finish_message.SerializeAsString().data(),
finish_message.ByteSizeLong())) {
MS_LOG(ERROR) << "The node role:" << CommUtil::NodeRoleToString(node_info_.node_role_)
<< " the node id:" << node_info_.node_id_ << " send Finish Message timeout!";
MS_LOG(WARNING) << "The node role:" << CommUtil::NodeRoleToString(node_info_.node_role_)
<< " the node id:" << node_info_.node_id_ << " send Finish Message timeout!";
}
return WaitForDisconnect(timeout);
}


+ 1
- 1
mindspore/ccsrc/ps/core/node_manager.cc View File

@@ -95,7 +95,7 @@ void NodeManager::UpdateClusterState() {
timeout_nodes_info_.clear();
for (auto it = heartbeats_.begin(); it != heartbeats_.end(); ++it) {
if (it->second.tv_sec + ClusterMetadata::instance()->heartbeat_timeout() < current_time.tv_sec) {
MS_LOG(ERROR) << "The node id:" << it->first << " is timeout!";
MS_LOG(WARNING) << "The node id:" << it->first << " is timeout!";
timeout_nodes_info_[it->first] = nodes_info_[it->first];
}
}


+ 13
- 2
mindspore/ccsrc/ps/parameter_server.cc View File

@@ -493,15 +493,26 @@ void ParameterServer::ServerHandler::Init() {
handlers_[kFinalizeCmd] = &ServerHandler::HandleFinalize;
handlers_[kPushCmd] = &ServerHandler::HandlePushReq;
handlers_[kPullCmd] = &ServerHandler::HandlePullReq;
commands_[kInitWeightsCmd] = "kInitWeightsCmd";
commands_[kInitWeightToOptimIdCmd] = "kInitWeightToOptimIdCmd";
commands_[kInitOptimInputsShapeCmd] = "kInitOptimInputsShapeCmd";
commands_[kInitEmbeddingsCmd] = "kInitEmbeddingsCmd";
commands_[kCheckReadyForPushCmd] = "kCheckReadyForPushCmd";
commands_[kCheckReadyForPullCmd] = "kCheckReadyForPullCmd";
commands_[kEmbeddingLookupCmd] = "kEmbeddingLookupCmd";
commands_[kUpdateEmbeddingsCmd] = "kUpdateEmbeddingsCmd";
commands_[kFinalizeCmd] = "kFinalizeCmd";
commands_[kPushCmd] = "kPushCmd";
commands_[kPullCmd] = "kPullCmd";
}

void ParameterServer::ServerHandler::operator()(std::shared_ptr<core::TcpConnection> conn,
std::shared_ptr<core::MessageMeta> meta, DataPtr data, size_t size) {
auto output = std::make_shared<std::vector<unsigned char>>();
MS_LOG(INFO) << "The command is:" << meta->user_cmd();
if (handlers_.count(meta->user_cmd()) == 0) {
if (commands_.count(meta->user_cmd()) == 0) {
MS_LOG(EXCEPTION) << "The command:" << meta->user_cmd() << " is not supported!";
}
MS_LOG(INFO) << "The command is:" << commands_[meta->user_cmd()];

auto &handler_ptr = handlers_[meta->user_cmd()];
(this->*handler_ptr)(data, size, output);


+ 1
- 0
mindspore/ccsrc/ps/parameter_server.h View File

@@ -112,6 +112,7 @@ class ParameterServer {
ParameterServer *ps_;
typedef void (ServerHandler::*RequestHandler)(DataPtr data, size_t size, VectorPtr res);
std::unordered_map<int, RequestHandler> handlers_;
std::unordered_map<int, std::string> commands_;
std::unordered_map<Key, bool> init_weights_;
std::unordered_map<Key, bool> init_weight_to_optim_;
std::unordered_map<Key, bool> init_optim_info_;


Loading…
Cancel
Save