From 31a265ce55a05dc341b4e991571b5e13b8901e85 Mon Sep 17 00:00:00 2001 From: Jesse Lee Date: Mon, 21 Dec 2020 14:27:59 -0500 Subject: [PATCH] Fix cookie mismatch issue --- .../minddata/dataset/engine/cache/cache_client.cc | 8 +++++--- .../minddata/dataset/engine/cache/cache_server.cc | 13 +++++++++++-- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/mindspore/ccsrc/minddata/dataset/engine/cache/cache_client.cc b/mindspore/ccsrc/minddata/dataset/engine/cache/cache_client.cc index d2f50bf254..72243a4d09 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/cache/cache_client.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/cache/cache_client.cc @@ -219,9 +219,11 @@ Status CacheClient::CreateCache(uint32_t tree_crc, bool generate_id) { // Check the state of the server. For non-mappable case where there is a build phase and a fetch phase, we should // skip the build phase. lck.Unlock(); // GetStat will grab the mutex again. So unlock it to prevent deadlock. - CacheServiceStat stat{}; - RETURN_IF_NOT_OK(GetStat(&stat)); - if (stat.cache_service_state == static_cast(CacheServiceState::kFetchPhase)) { + int8_t out; + RETURN_IF_NOT_OK(GetState(&out)); + auto cache_state = static_cast(out); + if (cache_state == CacheServiceState::kFetchPhase || + (cache_state == CacheServiceState::kBuildPhase && cookie_.empty())) { return Status(StatusCode::kDuplicateKey, __LINE__, __FILE__, "Not an error and we should bypass the build phase"); } } else { diff --git a/mindspore/ccsrc/minddata/dataset/engine/cache/cache_server.cc b/mindspore/ccsrc/minddata/dataset/engine/cache/cache_server.cc index 652d4033e4..257994728b 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/cache/cache_server.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/cache/cache_server.cc @@ -248,7 +248,8 @@ Status CacheServer::CreateService(CacheRequest *rq, CacheReply *reply) { } else { duplicate = true; client_id = it->second->num_clients_.fetch_add(1); - MS_LOG(INFO) << "Duplicate request for " + std::to_string(connection_id) + " to create cache service"; + MS_LOG(INFO) << "Duplicate request from client " + std::to_string(client_id) + " for " + + std::to_string(connection_id) + " to create cache service"; } // Shuffle the worker threads. But we need to release the locks or we will deadlock when calling // the following function @@ -357,7 +358,15 @@ Status CacheServer::FastCacheRow(CacheRequest *rq, CacheReply *reply) { rc = cs->FastCacheRow(src, &id); reply->set_result(std::to_string(id)); } else { - rc = Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, "Cookie mismatch"); + auto state = cs->GetState(); + if (state != CacheServiceState::kFetchPhase) { + rc = Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, + "Cache service is not in fetch phase. The current phase is " + + std::to_string(static_cast(state)) + ". Client id: " + std::to_string(client_id)); + } else { + rc = Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, + "Cookie mismatch. Client id: " + std::to_string(client_id)); + } } } // Return the block to the shared memory only if it is not internal request.