diff --git a/mindspore/ccsrc/backend/optimizer/somas/somas.cc b/mindspore/ccsrc/backend/optimizer/somas/somas.cc index a0a88511cc..d5acfe5573 100644 --- a/mindspore/ccsrc/backend/optimizer/somas/somas.cc +++ b/mindspore/ccsrc/backend/optimizer/somas/somas.cc @@ -37,6 +37,7 @@ namespace mindspore { namespace somas { +constexpr auto kGapSize = 512; std::map tensor_type_name_map = {{kCommon, "Common"}, {kOutputOnly, "OutputOnly"}, {kWorkspace, "Workspace"}, @@ -44,7 +45,6 @@ std::map tensor_type_name_map = {{kCommon, "Common"}, {kSummaryInput, "SummaryInput"}, {kRefNodeInput, "RefNodeInput"}, {kRefNodeOutput, "RefNodeOutput"}, - {kGap, "Gap"}, {kUnknown, "Unknown"}}; bool Somas::Allocate(const session::KernelGraph *graph) { @@ -451,62 +451,37 @@ void Somas::UnReuseNodeProcess(const session::KernelGraph *graph) { } } -SomasTensorPtr Somas::CreateGapTensor(size_t gap_tensor_id) { - // real size 512 and lifelong_ - const size_t gap_size = 512; - auto gap_tensor = std::make_shared(gap_tensor_id++, nullptr, nullptr, gap_size, kLifeLongNone); - gap_tensor->type_ = kGap; - gap_tensor->aligned_size_ = gap_size; - tensors_map_[gap_tensor->GetId()] = gap_tensor; - tensors_list_.push_back(gap_tensor); - return gap_tensor; -} - void Somas::GenContiguousList(const session::KernelGraph *graph) { MS_EXCEPTION_IF_NULL(graph); - size_t gap_tensor_id = tensors_list_.size(); for (const auto &node : nodes_list_) { MS_EXCEPTION_IF_NULL(node); if (node->GetType() != kCommunicationNode) { continue; } + + // Contiguous input if ((!node->input_tensors_.empty()) && (!node->input_tensors_[0]->contiguous_)) { + node->input_tensors_[0]->aligned_size_ += kGapSize; + node->input_tensors_[node->input_tensors_.size() - 1]->aligned_size_ += kGapSize; std::vector inputs; - auto input_before_gap = CreateGapTensor(gap_tensor_id); - input_before_gap->contiguous_ = true; - gap_tensor_id++; - inputs.push_back(input_before_gap->GetId()); - for (const auto &input_tensor : node->input_tensors_) { comm_input_total_size_ += input_tensor->aligned_size_; input_tensor->contiguous_ = true; inputs.push_back(input_tensor->GetId()); } - - auto input_after_gap = CreateGapTensor(gap_tensor_id); - gap_tensor_id++; - input_after_gap->contiguous_ = true; - inputs.push_back(input_after_gap->GetId()); contiguous_tensors_list_.push_back(inputs); } + // Contiguous output if ((!node->output_tensors_.empty()) && (!node->output_tensors_[0]->contiguous_)) { + node->output_tensors_[0]->aligned_size_ += kGapSize; + node->output_tensors_[node->output_tensors_.size() - 1]->aligned_size_ += kGapSize; std::vector outputs; - auto output_before_gap = CreateGapTensor(gap_tensor_id); - gap_tensor_id++; - output_before_gap->contiguous_ = true; - outputs.push_back(output_before_gap->GetId()); - for (const auto &output_tensor : node->output_tensors_) { comm_output_total_size_ += output_tensor->aligned_size_; output_tensor->contiguous_ = true; outputs.push_back(output_tensor->GetId()); } - - auto output_after_gap = CreateGapTensor(gap_tensor_id); - gap_tensor_id++; - output_after_gap->contiguous_ = true; - outputs.push_back(output_after_gap->GetId()); contiguous_tensors_list_.push_back(outputs); } } @@ -553,9 +528,6 @@ void Somas::PreprocessingConflicts() { // Atomic: fix any issues on saved lifetimes of tensors for (auto tensor : tensors_list_) { MS_EXCEPTION_IF_NULL(tensor); - if (tensor->IsGap()) { - continue; - } for (auto node : tensor->destinations_) { MS_EXCEPTION_IF_NULL(node); MS_EXCEPTION_IF_NULL(tensor->GetSourceNode()); @@ -581,6 +553,7 @@ void Somas::ComputeConflictPairs() { MS_LOG(INFO) << "End Preprocessing Conflicts"; MS_LOG(INFO) << "Start Conflict Computing (Bitset Model)"; + std::sort(nodes_list_.begin(), nodes_list_.end(), NodeSort); // Loop to add edges within each stream (node order within stream) @@ -651,16 +624,18 @@ void Somas::ComputeConflictPairs() { } for (size_t i = 0; i < tensors_list_.size(); i++) { + auto t0 = tensors_list_[i]; + if (t0->IsLifelong() || t0->IsRefOverlap() || t0->GetAlignedSize() == 0) { + continue; + } + size_t t0_src_node = t0->GetSourceNode()->GetId(); for (size_t j = i + 1; j < tensors_list_.size(); j++) { - auto t0 = tensors_list_[i]; auto t1 = tensors_list_[j]; - if (t0 == t1 || t0->IsGap() || t1->IsGap() || t0->IsLifelong() || t1->IsLifelong() || t0->IsRefOverlap() || - t1->IsRefOverlap() || t0->GetAlignedSize() == 0 || t1->GetAlignedSize() == 0) { + if (t0 == t1 || t1->IsLifelong() || t1->IsRefOverlap() || t1->GetAlignedSize() == 0) { continue; } - size_t t0_src_node = t0->GetSourceNode()->GetId(); size_t t1_src_node = t1->GetSourceNode()->GetId(); if (t0_src_node == t1_src_node) { continue; @@ -879,54 +854,6 @@ bool Somas::Assign(const session::KernelGraph *graph) { tensor1->num_constraints_ = count_constraints; } - // Preprocessing contiguous gaps - MS_LOG(INFO) << "Start Contiguous Gaps Preprocessing"; - for (auto contiguous_list : contiguous_tensors_list_) { - if (contiguous_list.size() < 3) { - MS_LOG(ERROR) << "contiguous_list should have at least one input and two gap, now it is " - << contiguous_list.size(); - } - size_t front_gap_id = contiguous_list[0]; - size_t back_gap_id = contiguous_list[contiguous_list.size() - 1]; - - SomasTensorPtr front_gap = tensors_map_[front_gap_id]; - SomasTensorPtr back_gap = tensors_map_[back_gap_id]; - MS_EXCEPTION_IF_NULL(front_gap); - MS_EXCEPTION_IF_NULL(back_gap); - - // Update conflicts to conflicts of neighbour - size_t front_neighbour_id = contiguous_list[1]; - size_t back_neighbour_id = contiguous_list[contiguous_list.size() - 2]; - for (SomasTensorPtr tensor : tensors_list_) { - MS_EXCEPTION_IF_NULL(tensor); - if (tensor_relation[tensor->GetId()].IsBitTrue(front_neighbour_id) == false) { - tensor_relation[tensor->GetId()].SetBitFalse(front_gap_id); - tensor_relation[front_gap_id].SetBitFalse(tensor->GetId()); - } else { - tensor_relation[tensor->GetId()].SetBitTrue(front_gap_id); - tensor_relation[front_gap_id].SetBitTrue(tensor->GetId()); - } - if (tensor_relation[tensor->GetId()].IsBitTrue(back_neighbour_id) == false) { - tensor_relation[tensor->GetId()].SetBitFalse(back_gap_id); - tensor_relation[back_gap_id].SetBitFalse(tensor->GetId()); - } else { - tensor_relation[tensor->GetId()].SetBitTrue(back_gap_id); - tensor_relation[back_gap_id].SetBitTrue(tensor->GetId()); - } - } - SomasTensorPtr front_neighbour = tensors_map_[front_neighbour_id]; - SomasTensorPtr back_neighbour = tensors_map_[back_neighbour_id]; - MS_EXCEPTION_IF_NULL(front_neighbour); - MS_EXCEPTION_IF_NULL(back_neighbour); - front_gap->num_constraints_ = front_neighbour->num_constraints_; - front_gap->lifetime_.start_ = front_neighbour->lifetime_.end_; - front_gap->lifetime_.end_ = front_neighbour->lifetime_.end_; - back_gap->num_constraints_ = back_neighbour->num_constraints_; - back_gap->lifetime_.start_ = back_neighbour->lifetime_.end_; - back_gap->lifetime_.end_ = back_neighbour->lifetime_.end_; - } - MS_LOG(INFO) << "End Contiguous Gaps Preprocessing"; - // Prepare solver info MS_LOG(INFO) << "Start Loop to create solver info"; for (auto tensor : tensors_list_) { @@ -977,6 +904,11 @@ bool Somas::Assign(const session::KernelGraph *graph) { } MS_LOG(INFO) << "\nEnd Solving Postprocessing for Ref Node"; + // Contiguous gaps postprocessing + for (auto list : contiguous_tensors_list_) { + tensors_map_[list[0]]->offset_ += kGapSize; + } + // Set mem_offset_ value by solver result mem_offset_ = static_cast(somas_solver_->GetMaxOffset()); @@ -1108,12 +1040,11 @@ void Somas::DumpOfflineIR(const string filename) { } for (auto tensor : tensors_list_) { - if (tensor->IsGap()) continue; - if (tensor->IsOutputOnly()) { + if (tensor->IsOutputOnly() || tensor->type_ == TensorType::kRefNodeOutput) { ofs << "Somas EDGE ERROR src=n" << tensor->GetSourceNode()->GetId() << ", srcstm=" << tensor->GetSourceStream()->GetId() << ", dst=nc" << ", dststm=nc" - << ", workspace=0, size=" << tensor->GetAlignedSize() + << ", workspace=0, size=" << tensor->GetOriginalSize() << ", lifelong=" << static_cast(tensor->lifelong_value_) << ", tid=" << tensor->GetId() << ", start=" << tensor->lifetime_.start_ << ", end=" << tensor->lifetime_.end_ << std::endl; } else { @@ -1126,24 +1057,15 @@ void Somas::DumpOfflineIR(const string filename) { ofs << "Somas EDGE src=n" << tensor->GetSourceNode()->GetId() << ", srcstm=" << tensor->GetSourceStream()->GetId() << ", dst=n" << dest_info.first << ", dststm=" << dest_info.second << ", workspace=" << static_cast(tensor->type_ == kWorkspace) - << ", size=" << tensor->GetAlignedSize() << ", lifelong=" << static_cast(tensor->lifelong_value_) + << ", size=" << tensor->GetOriginalSize() << ", lifelong=" << static_cast(tensor->lifelong_value_) << ", tid=" << tensor->GetId() << ", start=" << tensor->lifetime_.start_ << ", end=" << tensor->lifetime_.end_ << std::endl; } } } for (vector tList : contiguous_tensors_list_) { - ofs << "Somas CONTIGUOUS "; - // ignore front and back gaps - for (size_t i = 1; i < tList.size() - 1; ++i) { - if (tensors_map_[tList[i]]->IsGap()) { - ofs << "INPUT"; - break; - } - if (i == tList.size() - 2) ofs << "OUTPUT"; - } + ofs << "Somas CONTIGUOUS"; for (size_t tid : tList) { - if (tensors_map_[tid]->IsGap()) continue; ofs << " " << tid; } ofs << std::endl; diff --git a/mindspore/ccsrc/backend/optimizer/somas/somas_tensor.cc b/mindspore/ccsrc/backend/optimizer/somas/somas_tensor.cc index a0da3a85c1..a61e5a3eb3 100644 --- a/mindspore/ccsrc/backend/optimizer/somas/somas_tensor.cc +++ b/mindspore/ccsrc/backend/optimizer/somas/somas_tensor.cc @@ -43,8 +43,8 @@ SomasTensor::SomasTensor(size_t id, SomasNodePtr source_node, SomasStreamPtr sou } SomasSolverTensorDescPtr SomasTensor::GetSolverTensorDesc() { - if (type_ == kGap) { // ignore lifelong_ value for gaps given to solver, and pass with original_size_ - solver_tensor_desc_->Update(id_, original_size_, offset_, false, num_constraints_); + if (contiguous_) { + solver_tensor_desc_->Update(id_, aligned_size_, offset_, false, num_constraints_); } else { solver_tensor_desc_->Update(id_, aligned_size_, offset_, lifelong_value_ == kLifeLongGraphAll, num_constraints_); } diff --git a/mindspore/ccsrc/backend/optimizer/somas/somas_tensor.h b/mindspore/ccsrc/backend/optimizer/somas/somas_tensor.h index 9e39d83ebc..e2e59d9b9c 100644 --- a/mindspore/ccsrc/backend/optimizer/somas/somas_tensor.h +++ b/mindspore/ccsrc/backend/optimizer/somas/somas_tensor.h @@ -49,7 +49,6 @@ enum TensorType { kSummaryInput, kRefNodeInput, kRefNodeOutput, - kGap, kUnknown }; @@ -105,7 +104,6 @@ class SomasTensor { bool IsSemiLifelongStart() { return lifelong_value_ == kLifeLongGraphStart; } bool IsSemiLifelongEnd() { return lifelong_value_ == kLifeLongGraphEnd; } bool IsRefOverlap() { return ref_overlap_; } - bool IsGap() { return type_ == kGap; } // Computing functions void SetOffset() {