Browse Source

!10379 fix the ps cache data channel dead lock

From: @limingqi107
Reviewed-by: @kisnwang,@cristoval
Signed-off-by: @cristoval
tags/v1.1.0
mindspore-ci-bot Gitee 5 years ago
parent
commit
c8691f937b
4 changed files with 15 additions and 3 deletions
  1. +2
    -2
      mindspore/ccsrc/ps/ps_cache/ps_data/ps_data_channel.cc
  2. +1
    -1
      mindspore/ccsrc/ps/ps_cache/ps_data/ps_data_channel.h
  3. +11
    -0
      mindspore/ccsrc/ps/ps_cache/ps_data/ps_data_prefetch.cc
  4. +1
    -0
      mindspore/ccsrc/ps/ps_cache/ps_data/ps_data_prefetch.h

+ 2
- 2
mindspore/ccsrc/ps/ps_cache/ps_data/ps_data_channel.cc View File

@@ -32,8 +32,8 @@ void PsDataChannel::TryLockChannel() {
current_data_step_++;
}

void PsDataChannel::TryWakeChannel() {
if ((current_graph_step_ != 0) && (current_graph_step_ % step_num_ == 0)) {
void PsDataChannel::TryWakeChannel(bool force_wake) {
if (force_wake || ((current_graph_step_ != 0) && (current_graph_step_ % step_num_ == 0))) {
MS_LOG(INFO) << "Wake up channel:" << channel_name_;
std::lock_guard<std::mutex> locker(channel_mutex_);
channel_open_ = true;


+ 1
- 1
mindspore/ccsrc/ps/ps_cache/ps_data/ps_data_channel.h View File

@@ -38,7 +38,7 @@ class PsDataChannel {
size_t data_size() const { return data_size_; }
void ResetData() { data_ = nullptr; }
void set_step_num(size_t step_num) { step_num_ = step_num; }
void TryWakeChannel();
void TryWakeChannel(bool force_wake = false);

private:
void TryLockChannel();


+ 11
- 0
mindspore/ccsrc/ps/ps_cache/ps_data/ps_data_prefetch.cc View File

@@ -116,6 +116,7 @@ size_t PsDataPrefetch::data_size(const std::string &channel_name) const {

void PsDataPrefetch::NotifyFinalize() {
need_wait_ = false;
WakeAllChannel();
data_prefetch_.notify_one();
data_process_.notify_one();
}
@@ -128,5 +129,15 @@ bool PsDataPrefetch::TryWakeChannel(const std::string &channel_name) {
channel->TryWakeChannel();
return true;
}

void PsDataPrefetch::WakeAllChannel() {
for (auto iter = ps_data_channel_map_.begin(); iter != ps_data_channel_map_.end(); iter++) {
auto channel = iter->second;
if (channel == nullptr) {
return;
}
channel->TryWakeChannel(true);
}
}
} // namespace ps
} // namespace mindspore

+ 1
- 0
mindspore/ccsrc/ps/ps_cache/ps_data/ps_data_prefetch.h View File

@@ -50,6 +50,7 @@ class EXPORT PsDataPrefetch {
PsDataPrefetch(const PsDataPrefetch &) = delete;
PsDataPrefetch &operator=(const PsDataPrefetch &) = delete;
std::shared_ptr<PsDataChannel> ps_data_channel(const std::string &channel_name) const;
void WakeAllChannel();
std::map<std::string, std::shared_ptr<PsDataChannel>> ps_data_channel_map_;
bool cache_enable_;
bool data_ready_;


Loading…
Cancel
Save