|
|
|
@@ -37,6 +37,7 @@ |
|
|
|
|
|
|
|
namespace mindspore { |
|
|
|
namespace somas { |
|
|
|
constexpr auto kGapSize = 512; |
|
|
|
std::map<TensorType, std::string> tensor_type_name_map = {{kCommon, "Common"}, |
|
|
|
{kOutputOnly, "OutputOnly"}, |
|
|
|
{kWorkspace, "Workspace"}, |
|
|
|
@@ -44,7 +45,6 @@ std::map<TensorType, std::string> tensor_type_name_map = {{kCommon, "Common"}, |
|
|
|
{kSummaryInput, "SummaryInput"}, |
|
|
|
{kRefNodeInput, "RefNodeInput"}, |
|
|
|
{kRefNodeOutput, "RefNodeOutput"}, |
|
|
|
{kGap, "Gap"}, |
|
|
|
{kUnknown, "Unknown"}}; |
|
|
|
|
|
|
|
bool Somas::Allocate(const session::KernelGraph *graph) { |
|
|
|
@@ -451,62 +451,37 @@ void Somas::UnReuseNodeProcess(const session::KernelGraph *graph) { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
SomasTensorPtr Somas::CreateGapTensor(size_t gap_tensor_id) { |
|
|
|
// real size 512 and lifelong_ |
|
|
|
const size_t gap_size = 512; |
|
|
|
auto gap_tensor = std::make_shared<SomasTensor>(gap_tensor_id++, nullptr, nullptr, gap_size, kLifeLongNone); |
|
|
|
gap_tensor->type_ = kGap; |
|
|
|
gap_tensor->aligned_size_ = gap_size; |
|
|
|
tensors_map_[gap_tensor->GetId()] = gap_tensor; |
|
|
|
tensors_list_.push_back(gap_tensor); |
|
|
|
return gap_tensor; |
|
|
|
} |
|
|
|
|
|
|
|
void Somas::GenContiguousList(const session::KernelGraph *graph) { |
|
|
|
MS_EXCEPTION_IF_NULL(graph); |
|
|
|
size_t gap_tensor_id = tensors_list_.size(); |
|
|
|
for (const auto &node : nodes_list_) { |
|
|
|
MS_EXCEPTION_IF_NULL(node); |
|
|
|
if (node->GetType() != kCommunicationNode) { |
|
|
|
continue; |
|
|
|
} |
|
|
|
|
|
|
|
// Contiguous input |
|
|
|
if ((!node->input_tensors_.empty()) && (!node->input_tensors_[0]->contiguous_)) { |
|
|
|
node->input_tensors_[0]->aligned_size_ += kGapSize; |
|
|
|
node->input_tensors_[node->input_tensors_.size() - 1]->aligned_size_ += kGapSize; |
|
|
|
std::vector<size_t> inputs; |
|
|
|
auto input_before_gap = CreateGapTensor(gap_tensor_id); |
|
|
|
input_before_gap->contiguous_ = true; |
|
|
|
gap_tensor_id++; |
|
|
|
inputs.push_back(input_before_gap->GetId()); |
|
|
|
|
|
|
|
for (const auto &input_tensor : node->input_tensors_) { |
|
|
|
comm_input_total_size_ += input_tensor->aligned_size_; |
|
|
|
input_tensor->contiguous_ = true; |
|
|
|
inputs.push_back(input_tensor->GetId()); |
|
|
|
} |
|
|
|
|
|
|
|
auto input_after_gap = CreateGapTensor(gap_tensor_id); |
|
|
|
gap_tensor_id++; |
|
|
|
input_after_gap->contiguous_ = true; |
|
|
|
inputs.push_back(input_after_gap->GetId()); |
|
|
|
contiguous_tensors_list_.push_back(inputs); |
|
|
|
} |
|
|
|
|
|
|
|
// Contiguous output |
|
|
|
if ((!node->output_tensors_.empty()) && (!node->output_tensors_[0]->contiguous_)) { |
|
|
|
node->output_tensors_[0]->aligned_size_ += kGapSize; |
|
|
|
node->output_tensors_[node->output_tensors_.size() - 1]->aligned_size_ += kGapSize; |
|
|
|
std::vector<size_t> outputs; |
|
|
|
auto output_before_gap = CreateGapTensor(gap_tensor_id); |
|
|
|
gap_tensor_id++; |
|
|
|
output_before_gap->contiguous_ = true; |
|
|
|
outputs.push_back(output_before_gap->GetId()); |
|
|
|
|
|
|
|
for (const auto &output_tensor : node->output_tensors_) { |
|
|
|
comm_output_total_size_ += output_tensor->aligned_size_; |
|
|
|
output_tensor->contiguous_ = true; |
|
|
|
outputs.push_back(output_tensor->GetId()); |
|
|
|
} |
|
|
|
|
|
|
|
auto output_after_gap = CreateGapTensor(gap_tensor_id); |
|
|
|
gap_tensor_id++; |
|
|
|
output_after_gap->contiguous_ = true; |
|
|
|
outputs.push_back(output_after_gap->GetId()); |
|
|
|
contiguous_tensors_list_.push_back(outputs); |
|
|
|
} |
|
|
|
} |
|
|
|
@@ -553,9 +528,6 @@ void Somas::PreprocessingConflicts() { |
|
|
|
// Atomic: fix any issues on saved lifetimes of tensors |
|
|
|
for (auto tensor : tensors_list_) { |
|
|
|
MS_EXCEPTION_IF_NULL(tensor); |
|
|
|
if (tensor->IsGap()) { |
|
|
|
continue; |
|
|
|
} |
|
|
|
for (auto node : tensor->destinations_) { |
|
|
|
MS_EXCEPTION_IF_NULL(node); |
|
|
|
MS_EXCEPTION_IF_NULL(tensor->GetSourceNode()); |
|
|
|
@@ -581,6 +553,7 @@ void Somas::ComputeConflictPairs() { |
|
|
|
MS_LOG(INFO) << "End Preprocessing Conflicts"; |
|
|
|
|
|
|
|
MS_LOG(INFO) << "Start Conflict Computing (Bitset Model)"; |
|
|
|
|
|
|
|
std::sort(nodes_list_.begin(), nodes_list_.end(), NodeSort); |
|
|
|
|
|
|
|
// Loop to add edges within each stream (node order within stream) |
|
|
|
@@ -651,16 +624,18 @@ void Somas::ComputeConflictPairs() { |
|
|
|
} |
|
|
|
|
|
|
|
for (size_t i = 0; i < tensors_list_.size(); i++) { |
|
|
|
auto t0 = tensors_list_[i]; |
|
|
|
if (t0->IsLifelong() || t0->IsRefOverlap() || t0->GetAlignedSize() == 0) { |
|
|
|
continue; |
|
|
|
} |
|
|
|
size_t t0_src_node = t0->GetSourceNode()->GetId(); |
|
|
|
for (size_t j = i + 1; j < tensors_list_.size(); j++) { |
|
|
|
auto t0 = tensors_list_[i]; |
|
|
|
auto t1 = tensors_list_[j]; |
|
|
|
|
|
|
|
if (t0 == t1 || t0->IsGap() || t1->IsGap() || t0->IsLifelong() || t1->IsLifelong() || t0->IsRefOverlap() || |
|
|
|
t1->IsRefOverlap() || t0->GetAlignedSize() == 0 || t1->GetAlignedSize() == 0) { |
|
|
|
if (t0 == t1 || t1->IsLifelong() || t1->IsRefOverlap() || t1->GetAlignedSize() == 0) { |
|
|
|
continue; |
|
|
|
} |
|
|
|
|
|
|
|
size_t t0_src_node = t0->GetSourceNode()->GetId(); |
|
|
|
size_t t1_src_node = t1->GetSourceNode()->GetId(); |
|
|
|
if (t0_src_node == t1_src_node) { |
|
|
|
continue; |
|
|
|
@@ -879,54 +854,6 @@ bool Somas::Assign(const session::KernelGraph *graph) { |
|
|
|
tensor1->num_constraints_ = count_constraints; |
|
|
|
} |
|
|
|
|
|
|
|
// Preprocessing contiguous gaps |
|
|
|
MS_LOG(INFO) << "Start Contiguous Gaps Preprocessing"; |
|
|
|
for (auto contiguous_list : contiguous_tensors_list_) { |
|
|
|
if (contiguous_list.size() < 3) { |
|
|
|
MS_LOG(ERROR) << "contiguous_list should have at least one input and two gap, now it is " |
|
|
|
<< contiguous_list.size(); |
|
|
|
} |
|
|
|
size_t front_gap_id = contiguous_list[0]; |
|
|
|
size_t back_gap_id = contiguous_list[contiguous_list.size() - 1]; |
|
|
|
|
|
|
|
SomasTensorPtr front_gap = tensors_map_[front_gap_id]; |
|
|
|
SomasTensorPtr back_gap = tensors_map_[back_gap_id]; |
|
|
|
MS_EXCEPTION_IF_NULL(front_gap); |
|
|
|
MS_EXCEPTION_IF_NULL(back_gap); |
|
|
|
|
|
|
|
// Update conflicts to conflicts of neighbour |
|
|
|
size_t front_neighbour_id = contiguous_list[1]; |
|
|
|
size_t back_neighbour_id = contiguous_list[contiguous_list.size() - 2]; |
|
|
|
for (SomasTensorPtr tensor : tensors_list_) { |
|
|
|
MS_EXCEPTION_IF_NULL(tensor); |
|
|
|
if (tensor_relation[tensor->GetId()].IsBitTrue(front_neighbour_id) == false) { |
|
|
|
tensor_relation[tensor->GetId()].SetBitFalse(front_gap_id); |
|
|
|
tensor_relation[front_gap_id].SetBitFalse(tensor->GetId()); |
|
|
|
} else { |
|
|
|
tensor_relation[tensor->GetId()].SetBitTrue(front_gap_id); |
|
|
|
tensor_relation[front_gap_id].SetBitTrue(tensor->GetId()); |
|
|
|
} |
|
|
|
if (tensor_relation[tensor->GetId()].IsBitTrue(back_neighbour_id) == false) { |
|
|
|
tensor_relation[tensor->GetId()].SetBitFalse(back_gap_id); |
|
|
|
tensor_relation[back_gap_id].SetBitFalse(tensor->GetId()); |
|
|
|
} else { |
|
|
|
tensor_relation[tensor->GetId()].SetBitTrue(back_gap_id); |
|
|
|
tensor_relation[back_gap_id].SetBitTrue(tensor->GetId()); |
|
|
|
} |
|
|
|
} |
|
|
|
SomasTensorPtr front_neighbour = tensors_map_[front_neighbour_id]; |
|
|
|
SomasTensorPtr back_neighbour = tensors_map_[back_neighbour_id]; |
|
|
|
MS_EXCEPTION_IF_NULL(front_neighbour); |
|
|
|
MS_EXCEPTION_IF_NULL(back_neighbour); |
|
|
|
front_gap->num_constraints_ = front_neighbour->num_constraints_; |
|
|
|
front_gap->lifetime_.start_ = front_neighbour->lifetime_.end_; |
|
|
|
front_gap->lifetime_.end_ = front_neighbour->lifetime_.end_; |
|
|
|
back_gap->num_constraints_ = back_neighbour->num_constraints_; |
|
|
|
back_gap->lifetime_.start_ = back_neighbour->lifetime_.end_; |
|
|
|
back_gap->lifetime_.end_ = back_neighbour->lifetime_.end_; |
|
|
|
} |
|
|
|
MS_LOG(INFO) << "End Contiguous Gaps Preprocessing"; |
|
|
|
|
|
|
|
// Prepare solver info |
|
|
|
MS_LOG(INFO) << "Start Loop to create solver info"; |
|
|
|
for (auto tensor : tensors_list_) { |
|
|
|
@@ -977,6 +904,11 @@ bool Somas::Assign(const session::KernelGraph *graph) { |
|
|
|
} |
|
|
|
MS_LOG(INFO) << "\nEnd Solving Postprocessing for Ref Node"; |
|
|
|
|
|
|
|
// Contiguous gaps postprocessing |
|
|
|
for (auto list : contiguous_tensors_list_) { |
|
|
|
tensors_map_[list[0]]->offset_ += kGapSize; |
|
|
|
} |
|
|
|
|
|
|
|
// Set mem_offset_ value by solver result |
|
|
|
mem_offset_ = static_cast<size_t>(somas_solver_->GetMaxOffset()); |
|
|
|
|
|
|
|
@@ -1108,12 +1040,11 @@ void Somas::DumpOfflineIR(const string filename) { |
|
|
|
} |
|
|
|
|
|
|
|
for (auto tensor : tensors_list_) { |
|
|
|
if (tensor->IsGap()) continue; |
|
|
|
if (tensor->IsOutputOnly()) { |
|
|
|
if (tensor->IsOutputOnly() || tensor->type_ == TensorType::kRefNodeOutput) { |
|
|
|
ofs << "Somas EDGE ERROR src=n" << tensor->GetSourceNode()->GetId() |
|
|
|
<< ", srcstm=" << tensor->GetSourceStream()->GetId() << ", dst=nc" |
|
|
|
<< ", dststm=nc" |
|
|
|
<< ", workspace=0, size=" << tensor->GetAlignedSize() |
|
|
|
<< ", workspace=0, size=" << tensor->GetOriginalSize() |
|
|
|
<< ", lifelong=" << static_cast<int>(tensor->lifelong_value_) << ", tid=" << tensor->GetId() |
|
|
|
<< ", start=" << tensor->lifetime_.start_ << ", end=" << tensor->lifetime_.end_ << std::endl; |
|
|
|
} else { |
|
|
|
@@ -1126,24 +1057,15 @@ void Somas::DumpOfflineIR(const string filename) { |
|
|
|
ofs << "Somas EDGE src=n" << tensor->GetSourceNode()->GetId() |
|
|
|
<< ", srcstm=" << tensor->GetSourceStream()->GetId() << ", dst=n" << dest_info.first |
|
|
|
<< ", dststm=" << dest_info.second << ", workspace=" << static_cast<int>(tensor->type_ == kWorkspace) |
|
|
|
<< ", size=" << tensor->GetAlignedSize() << ", lifelong=" << static_cast<int>(tensor->lifelong_value_) |
|
|
|
<< ", size=" << tensor->GetOriginalSize() << ", lifelong=" << static_cast<int>(tensor->lifelong_value_) |
|
|
|
<< ", tid=" << tensor->GetId() << ", start=" << tensor->lifetime_.start_ |
|
|
|
<< ", end=" << tensor->lifetime_.end_ << std::endl; |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
for (vector<size_t> tList : contiguous_tensors_list_) { |
|
|
|
ofs << "Somas CONTIGUOUS "; |
|
|
|
// ignore front and back gaps |
|
|
|
for (size_t i = 1; i < tList.size() - 1; ++i) { |
|
|
|
if (tensors_map_[tList[i]]->IsGap()) { |
|
|
|
ofs << "INPUT"; |
|
|
|
break; |
|
|
|
} |
|
|
|
if (i == tList.size() - 2) ofs << "OUTPUT"; |
|
|
|
} |
|
|
|
ofs << "Somas CONTIGUOUS"; |
|
|
|
for (size_t tid : tList) { |
|
|
|
if (tensors_map_[tid]->IsGap()) continue; |
|
|
|
ofs << " " << tid; |
|
|
|
} |
|
|
|
ofs << std::endl; |
|
|
|
|