From 0f43012a9e3efa0def09d408088ebcbff273ab41 Mon Sep 17 00:00:00 2001 From: TangQunzhang Date: Mon, 11 Jan 2021 11:01:41 +0800 Subject: [PATCH] Continuous memory optimization, code refactoring --- .../build/memory/binary_block_mem_assigner.cc | 4 +- ge/graph/build/memory/block_mem_assigner.cc | 249 ++++-- ge/graph/build/memory/block_mem_assigner.h | 54 +- ge/graph/build/memory/graph_mem_assigner.cc | 798 ++++++------------ ge/graph/build/memory/graph_mem_assigner.h | 20 +- ge/graph/build/memory/graph_mem_assigner.zip | Bin 0 -> 30655 bytes 6 files changed, 450 insertions(+), 675 deletions(-) create mode 100644 ge/graph/build/memory/graph_mem_assigner.zip diff --git a/ge/graph/build/memory/binary_block_mem_assigner.cc b/ge/graph/build/memory/binary_block_mem_assigner.cc index fff589f3..97a0aed6 100644 --- a/ge/graph/build/memory/binary_block_mem_assigner.cc +++ b/ge/graph/build/memory/binary_block_mem_assigner.cc @@ -69,8 +69,8 @@ Status BinaryBlockMemAssigner::GetMemoryRanges(vector &range_ceils) { GELOGW("Vector all_memory_size is empty!"); return SUCCESS; } - if ((all_memory_size.front() == 0) || (log(kLogBase) == 0)) { - GELOGE(FAILED, "dividend is 0!"); + if ((all_memory_size.front() <= 0) || (log(kLogBase) == 0)) { + GELOGE(FAILED, "Memory size:%ld is invalid.", all_memory_size.front()); return FAILED; } // Memory size is 512 aligned, so it is not necessary to take less than 512 diff --git a/ge/graph/build/memory/block_mem_assigner.cc b/ge/graph/build/memory/block_mem_assigner.cc index 76e7efbe..d4b63c82 100755 --- a/ge/graph/build/memory/block_mem_assigner.cc +++ b/ge/graph/build/memory/block_mem_assigner.cc @@ -65,10 +65,7 @@ void AlignMemOffset(size_t &mem_align_size) { } static bool CompareLifeTime(const NodeTypeIndex &left, const NodeTypeIndex &right) { - auto left_node_op_desc = left.node->GetOpDesc(); - auto right_node_op_desc = right.node->GetOpDesc(); - if ((left_node_op_desc != nullptr) && (right_node_op_desc != nullptr) - && (left_node_op_desc->GetId() < right_node_op_desc->GetId())) { + if (left.GetLifeBegin() < right.GetLifeBegin()) { return true; } return false; @@ -100,14 +97,14 @@ bool CrossLifeTime(const NodeTypeIndex &left, const NodeTypeIndex &right) { auto left_node_op_desc = left.node->GetOpDesc(); auto right_node_op_desc = right.node->GetOpDesc(); if ((left_node_op_desc != nullptr) && (right_node_op_desc != nullptr)) { - if (left_node_op_desc->GetId() < right_node_op_desc->GetId()) { - if (left.life_time_end >= static_cast(right_node_op_desc->GetId())) { + if (left.GetLifeBegin() < right.GetLifeBegin()) { + if (left.life_time_end >= right.GetLifeBegin()) { return true; } - } else if (left_node_op_desc->GetId() == right_node_op_desc->GetId()) { + } else if (left.GetLifeBegin() == right.GetLifeBegin()) { return true; } else { - if (right.life_time_end >= static_cast(left_node_op_desc->GetId())) { + if (right.life_time_end >= left.GetLifeBegin()) { return true; } } @@ -325,12 +322,7 @@ void MemoryBlock::AddLifeReuseBlock(MemoryBlock *block, DependStreamLife &total_ size_t MemoryBlock::GetLifeBegin() { size_t life_time = 0; if (!node_type_index_list_.empty()) { - if (node_type_index_list_.front().node != nullptr) { - auto node_op_desc = node_type_index_list_.front().node->GetOpDesc(); - if (node_op_desc != nullptr) { - life_time = node_op_desc->GetId(); - } - } + life_time = node_type_index_list_.front().GetLifeBegin(); } return life_time; } @@ -417,7 +409,7 @@ void MemoryBlock::AddDependLifeBegin(DependStreamLife &total_node_depend_stream_ depend_stream_life_[stream_id_] = GetLifeBegin(); } -size_t MemoryBlock::GetLifeEnd() { +size_t MemoryBlock::GetLifeEnd() const { if (!node_type_index_list_.empty()) { return node_type_index_list_.back().life_time_end; } @@ -571,32 +563,29 @@ void BlockMemAssigner::GetOutAndWorkSpaceMem(vector &all_memory_size) { for (auto &out_anchor : n->GetAllOutDataAnchors()) { GeTensorDesc output_desc = node_op_desc->GetOutputDesc(out_anchor->GetIdx()); - bool reuse_input = false; - GE_IF_BOOL_EXEC(ge::TensorUtils::GetReuseInput(output_desc, reuse_input) != SUCCESS, - GELOGI("Get reuse_input failed")); - - if (!reuse_input) { - int64_t size = 0; - GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(output_desc, size) != SUCCESS, GELOGI("Get size failed")); - batch_all_memory_size[batch_label].emplace_back(size); - if (batch_total_size.find(batch_label) == batch_total_size.end()) { - batch_total_size[batch_label] = size; - } else { - batch_total_size[batch_label] += size; - } + int64_t size = 0; + GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(output_desc, size) != SUCCESS, GELOGI("Get size failed")); + GE_IF_BOOL_EXEC(size < 0, GELOGE(FAILED, "Node:%s size:%ld is invalid, maybe it is unknown shape node.", + node_op_desc->GetName().c_str(), size); + return;); + batch_all_memory_size[batch_label].emplace_back(size); + if (batch_total_size.find(batch_label) == batch_total_size.end()) { + batch_total_size[batch_label] = size; + } else { + batch_total_size[batch_label] += size; + } - if (!anchor_to_symbol_.empty()) { - auto iter1 = anchor_to_symbol_.find(NodeIndexIO(n, out_anchor->GetIdx(), kOut).ToString()); - if (iter1 == anchor_to_symbol_.end()) { - continue; - } - const std::string &symbol = iter1->second; - auto iter2 = symbol_size_.find(symbol); - if (iter2 == symbol_size_.end()) { - symbol_size_[symbol] = size; - } else if (size > static_cast(iter2->second)) { - iter2->second = size; - } + if (!anchor_to_symbol_.empty()) { + auto iter1 = anchor_to_symbol_.find(NodeIndexIO(n, out_anchor->GetIdx(), kOut).ToString()); + if (iter1 == anchor_to_symbol_.end()) { + continue; + } + const std::string &symbol = iter1->second; + auto iter2 = symbol_size_.find(symbol); + if (iter2 == symbol_size_.end()) { + symbol_size_[symbol] = size; + } else if (size > static_cast(iter2->second)) { + iter2->second = size; } } } @@ -637,35 +626,17 @@ bool IsDirectOutputNode(const NodePtr &node, int idx) { return false; } -void AddReusableBlockCount(const MemoryBlock &mem_block, map &reusable_block_counts) { - string key = std::to_string(mem_block.Size()); - key += "_" + std::to_string(mem_block.stream_id_); - key += "_" + std::to_string(mem_block.memory_type_); - auto it = reusable_block_counts.find(key); - if (it != reusable_block_counts.end()) { - it->second++; - } else { - reusable_block_counts[key] = 1; - } -} - -void ReduceReusableBlockCount(const MemoryBlock &mem_block, map &reusable_block_counts) { - string key = std::to_string(mem_block.Size()); - key += "_" + std::to_string(mem_block.stream_id_); - key += "_" + std::to_string(mem_block.memory_type_); - auto it = reusable_block_counts.find(key); - if (it != reusable_block_counts.end()) { - if (it->second > 0) { - it->second--; - } - } -} - -bool CanReuseBySize(const map &reusable_block_counts, const MemoryBlock &reusable_block, - size_t block_size, size_t real_size, bool continuous) { +bool CanReuseBlock(size_t continuous_life_begin, const MemoryBlock &reusable_block, size_t block_size) { bool can_reuse = false; if (reusable_block.Size() == block_size) { - can_reuse = true; + // in some continuous input case, continuous first input node's is not same as topo first node. + if (continuous_life_begin > 0) { + if (continuous_life_begin > reusable_block.GetLifeEnd()) { + can_reuse = true; + } + } else { + can_reuse = true; + } } return can_reuse; } @@ -676,6 +647,13 @@ bool BlockMemAssigner::IsOutNodeSetContinuousInput(const NodePtr &n, uint32_t ou if (n == nullptr || n->GetAllOutDataAnchors().size() <= 0) { return false; } + auto node_desc = n->GetOpDesc(); + GE_IF_BOOL_EXEC(node_desc == nullptr, GELOGE(FAILED, "Node[%s] nodedesc is null.", n->GetName().c_str()); + return false;); + std::vector offsets_for_fusion = {}; + bool has_lx_fusion_attr = + AttrUtils::GetListInt(node_desc, ATTR_NAME_OUTPUT_OFFSET_FOR_BUFFER_FUSION, offsets_for_fusion); + if (static_cast(out_index) < n->GetAllOutDataAnchors().size()) { auto out_anchor = n->GetOutDataAnchor(out_index); GE_IF_BOOL_EXEC(out_anchor == nullptr, @@ -698,16 +676,17 @@ bool BlockMemAssigner::IsOutNodeSetContinuousInput(const NodePtr &n, uint32_t ou return false;); // If GetBool fail, is_input_continuous is false. - bool is_input_continuous_no_padding = false; - (void)ge::AttrUtils::GetBool(peer_in_node_desc, ATTR_NAME_NOPADDING_CONTINUOUS_INPUT, - is_input_continuous_no_padding); - if (is_input_continuous_no_padding) { + (void)ge::AttrUtils::GetBool(peer_in_node_desc, ATTR_NAME_NOPADDING_CONTINUOUS_INPUT, is_input_continuous); + if (is_input_continuous) { reset_zero_copy_flag = true; - return false; + has_lx_fusion_attr = true; + } else { + (void)ge::AttrUtils::GetBool(peer_in_node_desc, ATTR_NAME_CONTINUOUS_INPUT, is_input_continuous); } - (void)ge::AttrUtils::GetBool(peer_in_node_desc, ATTR_NAME_CONTINUOUS_INPUT, is_input_continuous); - GE_IF_BOOL_EXEC(is_input_continuous && CheckIsZeroMemNodeType(peer_node->GetType()), + // lx_fusion memory only assign first input, broadcast's input some are variable some are not, reassign later + GE_IF_BOOL_EXEC(is_input_continuous && + (CheckIsZeroMemNodeType(peer_node->GetType()) || (has_lx_fusion_attr && (peer_in_anchor->GetIdx() != 0))), GELOGI("Node[%s] output[%u] no_need_assign_memory.", n->GetName().c_str(), out_index); no_need_assign_memory = true; return false;); @@ -721,6 +700,10 @@ bool BlockMemAssigner::IsOutNodeSetContinuousInput(const NodePtr &n, uint32_t ou // Only set attr one times. if (node_continuous_input_blocks_[peer_in_node_desc->GetName()].size() == 0) { (void)ge::AttrUtils::SetBool(peer_in_node_desc, ATTR_NAME_CONTINUOUS_INPUT_ALLOC, true); + // lx fusion case assign max size for first block, so reuse as none continuous + GE_IF_BOOL_EXEC(has_lx_fusion_attr, + is_op_reuse_mem_ = IsContinuousMemoryReuse(n, peer_node, out_index); + return false;); node_continuous_input_counts_[peer_in_node_desc->GetName()] = peer_node->GetAllInDataAnchorsSize(); } peer_input_index = peer_in_anchor->GetIdx(); @@ -733,6 +716,95 @@ bool BlockMemAssigner::IsOutNodeSetContinuousInput(const NodePtr &n, uint32_t ou return false; } +bool IsContinuousInputNodeMaxLife(const NodePtr &n, uint32_t out_index) { + if (n == nullptr) { + return false; + } + + int64_t max_node_life_time = 0; + int64_t continuous_input_node_life_time = 0; + if (static_cast(out_index) < n->GetAllOutDataAnchors().size()) { + auto out_anchor = n->GetOutDataAnchor(out_index); + if(out_anchor == nullptr) { + return false; + } + + // continuous input node's life time should be max + for (auto const &peer_in_anchor : out_anchor->GetPeerInDataAnchors()) { + if ((peer_in_anchor == nullptr) || (peer_in_anchor->GetOwnerNode() == nullptr)){ + return false; + } + auto peer_in_node_desc = peer_in_anchor->GetOwnerNode()->GetOpDesc(); + GE_IF_BOOL_EXEC(peer_in_node_desc == nullptr, + GELOGE(FAILED, "Node[%s] output[%u] peer in node desc is null.", n->GetName().c_str(), out_index); + return false;); + + if(peer_in_node_desc->GetId() > max_node_life_time) { + max_node_life_time = peer_in_node_desc->GetId(); + } + + // If GetBool fail, is_input_continuous is false. + bool is_input_continuous = false; + (void)ge::AttrUtils::GetBool(peer_in_node_desc, ATTR_NAME_NOPADDING_CONTINUOUS_INPUT, is_input_continuous); + if (!is_input_continuous) { + (void)ge::AttrUtils::GetBool(peer_in_node_desc, ATTR_NAME_CONTINUOUS_INPUT, is_input_continuous); + } + if (is_input_continuous) { + continuous_input_node_life_time = peer_in_node_desc->GetId(); + } + } + } + return ((max_node_life_time != 0) && (continuous_input_node_life_time == max_node_life_time)) ; +} + +/// +/// @ingroup GE +/// @brief Check continuous memory reuseable +/// @return void +/// +bool BlockMemAssigner::IsContinuousMemoryReuse(const NodePtr &n, const NodePtr &peer_node, uint32_t out_index) { + // n,peer_node_desc have been checked + auto node_desc = n->GetOpDesc(); + auto peer_node_desc = peer_node->GetOpDesc(); + continuous_life_begin_ = static_cast(node_desc->GetId()); + // lx fusion case check all continuous input node, firt input node's life time should be min + for (const auto &in_anchor : peer_node->GetAllInDataAnchors()) { + if ((in_anchor == nullptr) || (in_anchor->GetPeerOutAnchor() == nullptr) || + (in_anchor->GetPeerOutAnchor()->GetOwnerNode() == nullptr) || + (in_anchor->GetPeerOutAnchor()->GetOwnerNode()->GetOpDesc() == nullptr)) { + GELOGE(FAILED, "Node[%s] output[%u] peer input node desc is null.", n->GetName().c_str(), out_index); + return false; + } + auto peer_out_node_desc = in_anchor->GetPeerOutAnchor()->GetOwnerNode()->GetOpDesc(); + /// + /// node2 node1 node3 + /// | / / | + /// node5 node6 + /// firt input node's life time is not min + /// when node5's first input node2's life time is not min(node2 > node1), use node1's life time to reuse + /// + if (peer_out_node_desc->GetId() < continuous_life_begin_) { + continuous_life_begin_ = static_cast(peer_out_node_desc->GetId()); + GELOGI( + "Node[%s] life[%ld] output[%u] is not continuous input node[%s] life[%ld]'s min life time," + "min is node[%s] life[%zu]", + n->GetName().c_str(), node_desc->GetId(), out_index, peer_node_desc->GetName().c_str(), + peer_node_desc->GetId(), peer_out_node_desc->GetName().c_str(), continuous_life_begin_); + } + // when node3's output node5's life time is not max(node6 > node5), not reuse + if (!IsContinuousInputNodeMaxLife(in_anchor->GetPeerOutAnchor()->GetOwnerNode(), + in_anchor->GetPeerOutAnchor()->GetIdx())) { + GELOGI( + "Node[%s] life[%ld] output[%u]'s continuous input node[%s] life[%ld]'s is not node[%s] output[%d]'s " + "max life node", + n->GetName().c_str(), node_desc->GetId(), out_index, peer_node_desc->GetName().c_str(), + peer_node_desc->GetId(), peer_out_node_desc->GetName().c_str(), in_anchor->GetPeerOutAnchor()->GetIdx()); + return false; + } + } + return true; +} + /// /// @ingroup GE /// @brief Check pre_reuse flag & post_reuse glag for each symbol @@ -1018,8 +1090,9 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, GE_IF_BOOL_EXEC(reusable_block->batch_label_ != batch_label, continue); // A node can reuse blocks of the same stream and preorder streams - if (CanReuseBySize(reusable_block_counts_, *reusable_block, block_size, real_size, continuous)) { - reusable_block->AddNodeTypeIndex({n, mem_type, out_index, false}, real_size, no_align_size); + if (CanReuseBlock(continuous_life_begin_, *reusable_block, block_size)) { + reusable_block->AddNodeTypeIndex({n, mem_type, out_index, false, continuous_life_begin_}, + real_size, no_align_size); if (mem_type == kOutput) { auto iter = anchor_to_symbol_.find(NodeIndexIO(n, out_index, kOut).ToString()); if (iter != anchor_to_symbol_.end()) { @@ -1028,7 +1101,6 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, } reusable_block->continuous_block_ = continuous; reusable_block->ref_count_++; - ReduceReusableBlockCount(*reusable_block, reusable_block_counts_); reusable_blocks_[memory_type][stream_id].erase((++it).base()); return reusable_block; } @@ -1041,8 +1113,7 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, // Data and netoutput need zero copy block block->is_zero_copy_ = IsZeroCopyBlock(n, continuous); - - block->Init(real_size, mem_type, n, out_index, no_align_size, node_op_desc->GetStreamId()); + block->AddNodeTypeIndex({n, mem_type, out_index, false, continuous_life_begin_}, real_size, no_align_size); block->stream_id_ = node_op_desc->GetStreamId(); block->ref_count_++; block->continuous_block_ = continuous; @@ -1142,7 +1213,12 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index, std::string symbol; if (IsSymbolExist(node_index_io, symbol)) { block = symbol_blocks_[symbol]; - block->AddNodeTypeIndex({n, kOutput, index, true}, size, no_align_size); + GE_IF_BOOL_EXEC(block == nullptr, GELOGE(FAILED, "Node %s ref block is nullptr.", node_op_desc->GetName().c_str()); + return nullptr); + auto block_size = GetBlockSize(size, ranges); + block->SetSize(block_size); + block->SetLifeTimeEnd(life_time_); + block->AddNodeTypeIndex({n, kOutput, index, true, continuous_life_begin_}, size, no_align_size); block->ref_count_++; } else { int64_t max_size = size; @@ -1196,7 +1272,6 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index, GE_IF_BOOL_EXEC(ge::TensorUtils::GetReuseInputIndex(*owner_node_op_desc, dst_reuse_input_index) != SUCCESS, GELOGI("Get dst_reuse_input_index failed")); if (dst_reuse_input && (dst_reuse_input_index == static_cast(in_anchor->GetIdx()))) { - block->AddNodeTypeIndex({owner_node, kOutput, i, true}, block->Size(), block->Size()); out_count_reuse_input += 1; reuse_input = true; } @@ -1237,7 +1312,7 @@ bool IsAtomicOutputMemory(const ge::NodePtr &node, uint32_t output_index, bool i if (static_cast(index) == output_index) { if (node->GetOwnerComputeGraph() != nullptr) { string graph_name = node->GetOwnerComputeGraph()->GetName(); - GELOGD("[IMAS]Atomic no assign %s name[%s] output[%ld] streamid[%ld].", graph_name.c_str(), + GELOGD("Atomic no assign %s name[%s] output[%ld] streamid[%ld].", graph_name.c_str(), op_desc->GetName().c_str(), index, op_desc->GetStreamId()); } return true; @@ -1275,7 +1350,6 @@ void BlockMemAssigner::ReleaseMemory(MemoryBlock *to_release, vectorsame_stream_) { to_release->SetLifeTimeEnd(life_time_); reusable_memory.emplace_back(to_release); - AddReusableBlockCount(*to_release, reusable_block_counts_); } } } @@ -1375,6 +1449,7 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector } is_op_reuse_mem_ = true; + continuous_life_begin_ = 0; if (op_reuse_env_valid_ == true) { vector::iterator it_name = std::find(op_no_reuse_mem_vec_.begin(), op_no_reuse_mem_vec_.end(), op_desc->GetName()); @@ -1426,7 +1501,7 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector continue; } // atomic can't be reused - bool need_change = is_op_reuse_mem_ && out_node_set_continuous_input && is_atomic; + bool need_change = is_op_reuse_mem_ && is_atomic; if (need_change) { is_op_reuse_mem_ = false; } @@ -1820,10 +1895,10 @@ void SetOffsetSize(const NodeTypeIndex &node_type, const MemoryBlock *block, op_desc->SetWorkspace(workspace_list); } GELOGI("[IMAS]Set %s name[%s] %s[%u] offset to [%ld] streamid[%ld] size[%zu] realsize[%zu] noalignsize[%zu] " - "life time begin[%zu] life time end[%zu] child[%d:%d:%d:%d:%d] isref[%d] batch[%s]", graph_name.c_str(), + "life time begin[%s] life time end[%zu] child[%d:%d:%d:%d:%d] isref[%d] batch[%s]", graph_name.c_str(), op_desc->GetName().c_str(), node_type.GetMemType().c_str(), node_type.index, offset, op_desc->GetStreamId(), - block->Size(), real_size, no_align_size, op_desc->GetId(), end, child_block_level, block->reuse_mem_, - block->continuous_block_, block->is_zero_copy_, block->same_stream_, node_type.ref_input, + block->Size(), real_size, no_align_size, node_type.GetLifeBeginDesc().c_str(), end, child_block_level, + block->reuse_mem_, block->continuous_block_, block->is_zero_copy_, block->same_stream_, node_type.ref_input, block->batch_label_.c_str()); } diff --git a/ge/graph/build/memory/block_mem_assigner.h b/ge/graph/build/memory/block_mem_assigner.h index 58bcda75..78584078 100755 --- a/ge/graph/build/memory/block_mem_assigner.h +++ b/ge/graph/build/memory/block_mem_assigner.h @@ -39,14 +39,15 @@ using DependStreamLife = std::map>; enum OpMemoryType { kOutput, kWorkspace }; struct NodeTypeIndex { - NodeTypeIndex(ge::NodePtr node, OpMemoryType mem_type, uint32_t index, bool ref_input = false) - : node(std::move(node)), mem_type(mem_type), index(index), ref_input(ref_input) {} + NodeTypeIndex(ge::NodePtr node, OpMemoryType mem_type, uint32_t index, bool ref_input = false, size_t begin = 0) + : node(std::move(node)), mem_type(mem_type), index(index), ref_input(ref_input), life_time_begin(begin) {} ge::NodePtr node = nullptr; OpMemoryType mem_type = kOutput; uint32_t index = 0; - size_t life_time_end = kMaxLifeTime; bool ref_input = false; + size_t life_time_begin = 0; + size_t life_time_end = kMaxLifeTime; const string GetMemType() const { if (mem_type == kOutput) { return "output"; @@ -55,6 +56,34 @@ struct NodeTypeIndex { } return "unknown"; } + + size_t GetLifeBegin() const { + if ((node == nullptr) || (node->GetOpDesc() == nullptr)) { + return 0; + } + + if ((life_time_begin > 0) && (life_time_begin < static_cast(node->GetOpDesc()->GetId()))) { + return life_time_begin; + } else { + return node->GetOpDesc()->GetId(); + } + } + + std::string GetLifeBeginDesc() const { + if (node == nullptr) { + return ""; + } + auto node_op_desc = node->GetOpDesc(); + if (node_op_desc != nullptr) { + auto life_begin = GetLifeBegin(); + if (life_begin != static_cast(node_op_desc->GetId())) { + return std::to_string(life_begin) + "-" + std::to_string(node_op_desc->GetId()); + } else { + return std::to_string(node_op_desc->GetId()); + } + } + return ""; + } }; class MemoryBlock { @@ -86,16 +115,13 @@ class MemoryBlock { symbol_list_.clear(); } - void Init(size_t real_size, OpMemoryType type, const ge::NodePtr &node, uint32_t out_index, size_t no_align_size, - int64_t stream_id) { - real_size_list_.emplace_back(real_size); - no_align_size_list_.emplace_back(no_align_size); - node_type_index_list_.emplace_back(node, type, out_index, false); - if (stream_id != stream_id_) { - same_stream_ = false; + size_t Size() const { return block_size_; } + + void SetSize(size_t size) { + if (size > block_size_) { + block_size_ = size; } } - size_t Size() const { return block_size_; } size_t AlignSize() const; @@ -143,7 +169,7 @@ class MemoryBlock { size_t GetLifeBegin(); - size_t GetLifeEnd(); + size_t GetLifeEnd() const; void AddDependLifeBegin(DependStreamLife &node_depend_stream_life); @@ -406,6 +432,7 @@ class BlockMemAssigner : public MemAssigner { bool IsOutNodeSetContinuousInput(const NodePtr &n, uint32_t out_index, std::string &peer_name, uint32_t &peer_input_index, bool &no_need_assign_memory, bool &reset_zero_copy_flag); + bool IsContinuousMemoryReuse(const NodePtr &n, const NodePtr &peer_node, uint32_t out_index); /// /// @ingroup GE /// @|+++++++++block1++++++++| |+++++++++block1++++++++| @@ -425,8 +452,6 @@ class BlockMemAssigner : public MemAssigner { std::unordered_map>> reusable_blocks_; - std::map reusable_block_counts_; - std::unordered_map>> stream_workspace_blocks_; std::unordered_map> node_out_blocks_; @@ -456,6 +481,7 @@ class BlockMemAssigner : public MemAssigner { std::string max_batch_label_; + size_t continuous_life_begin_ = 0; /// /// @ [stream1][nodeid] /// @[nodeid] [stream2][nodeid] diff --git a/ge/graph/build/memory/graph_mem_assigner.cc b/ge/graph/build/memory/graph_mem_assigner.cc index 98d073d4..2f211001 100755 --- a/ge/graph/build/memory/graph_mem_assigner.cc +++ b/ge/graph/build/memory/graph_mem_assigner.cc @@ -35,10 +35,9 @@ namespace { const int kAllInputAddrIsAtomic = -1; const int kVirtualInputNodeMemoryReuse = 0; const int kVirtualOutputNodeMemoryReuse = 1; -const size_t kVirtualInputNodeOutputSize = 1; -const size_t kVirtualOutputNodeInputSize = 1; -const size_t kVirtualNodeDataIndex = 0; -const char *const kMbatchNodeNameFlag = "_ascend_mbatch_batch_"; +// One state per bit cannot be repeated +enum ContinuousType { kNotContinuous = 0, kInput = 1, kInputNoPadding = 2, kOutput = 4, kOutputNoPadding = 8 }; + int64_t GetSymbolOutputOffset(const std::map &anchor_to_symbol, const std::map> &symbol_to_anchors, const ge::NodePtr &node, const uint32_t i) { @@ -136,7 +135,7 @@ ge::Status GraphMemoryAssigner::AssignVarAttr2Nodes() { return ge::SUCCESS; } -ge::Status GraphMemoryAssigner::CalculateTensorRealSizeAndOutSize(const ge::ConstGeTensorDescPtr &output_desc, +ge::Status CalculateTensorRealSizeAndOutSize(const ge::ConstGeTensorDescPtr &output_desc, int64_t dim_index, int64_t &output_mem_size, int64_t &batch_dim_num, int64_t &out_size) { graphStatus graph_status = ge::TensorUtils::GetSize(*output_desc, out_size); @@ -181,68 +180,6 @@ ge::Status GraphMemoryAssigner::CalculateTensorRealSizeAndOutSize(const ge::Cons return SUCCESS; } -Status GraphMemoryAssigner::GetMaxBatchLabel(const map> &mem_reuse_virtual_nodes_map, - int32_t mem_reuse_model, string &max_batch_label) { - for (auto &i_map : mem_reuse_virtual_nodes_map) { - vector virtual_nodes_list = i_map.second; - vector max_shape_dims; - size_t max_batch_dim = 0; - bool max_batch_dim_find = false; - for (size_t i = 0; i < virtual_nodes_list.size(); ++i) { - GE_CHECK_NOTNULL(virtual_nodes_list[i]); - OpDescPtr op_desc = virtual_nodes_list[i]->GetOpDesc(); - GE_CHECK_NOTNULL(op_desc); - - ge::ConstGeTensorDescPtr input_output_desc; - if (mem_reuse_model == kVirtualInputNodeMemoryReuse) { - input_output_desc = op_desc->GetOutputDescPtr(kVirtualNodeDataIndex); - } else if (mem_reuse_model == kVirtualOutputNodeMemoryReuse) { - input_output_desc = op_desc->GetInputDescPtr(kVirtualNodeDataIndex); - } else { - std::string error = "Invalid parameter memory reuse model, which is " + FmtToStr(mem_reuse_model); - GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); - return FAILED; - } - GE_CHECK_NOTNULL(input_output_desc); - - if (i == 0) { - // All ops must have ATTR_NAME_BATCH_LABEL, no need to check return value. - (void) ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, max_batch_label); - max_shape_dims = input_output_desc->GetShape().GetDims(); - } else { - vector current_shape_dims = input_output_desc->GetShape().GetDims(); - if (current_shape_dims.size() != max_shape_dims.size()) { - std::string error = "The shape of several nodes between multiple batches does not match."; - GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); - return FAILED; - } - for (size_t j = 0; j < current_shape_dims.size(); ++j) { - if (current_shape_dims[j] == max_shape_dims[j]) { - continue; - } - if (max_batch_dim_find && max_batch_dim != j) { - std::string error = "The shape of several nodes between multiple batches does not match."; - GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); - return FAILED; - } - max_batch_dim_find = true; - max_batch_dim = j; - if (current_shape_dims[j] > max_shape_dims[j]) { - max_shape_dims[j] = current_shape_dims[j]; - // All ops must have ATTR_NAME_BATCH_LABEL, no need to check return value. - (void) ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, max_batch_label); - } - // Only compare the first different dim in shape. - break; - } - } - } - // In every element of virtual_input_nodes_map, the label of the max batch node is the same. - break; - } - return SUCCESS; -} - Status GraphMemoryAssigner::ReAssignMemory(bool is_loop_graph, map &mem_type_to_offset) { if (memory_offset_.empty()) { GELOGE(FAILED, "memory_offset_ is empty."); @@ -250,13 +187,6 @@ Status GraphMemoryAssigner::ReAssignMemory(bool is_loop_graph, map &mem_offse return SUCCESS; } +uint32_t GetContinuousMemoryType(const OpDescPtr &op_desc) { + if(op_desc == nullptr) { + return kNotContinuous; + }; + + bool is_continuous = false; + uint32_t continuous_type = kNotContinuous; + // If GetBool fail, is_continuous is false. + (void) ge::AttrUtils::GetBool(op_desc, ATTR_NAME_CONTINUOUS_INPUT, is_continuous); + if (is_continuous) { + continuous_type |= kInput; + } else { + (void) ge::AttrUtils::GetBool(op_desc, ATTR_NAME_NOPADDING_CONTINUOUS_INPUT, is_continuous); + if(is_continuous) { + bool attr_reuse = false; + (void) ge::AttrUtils::GetBool(op_desc, ATTR_NAME_OUTPUT_REUSE_INPUT, attr_reuse); + if (attr_reuse) { + continuous_type |= kInputNoPadding; + } + } + } + + is_continuous = false; + (void) ge::AttrUtils::GetBool(op_desc, ATTR_NAME_CONTINUOUS_OUTPUT, is_continuous); + if (is_continuous) { + continuous_type |= kOutput; + } else { + (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_NOPADDING_CONTINUOUS_OUTPUT, is_continuous); + if (is_continuous) { + bool attr_reuse = false; + (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_OUTPUT_REUSE_INPUT, attr_reuse); + if (attr_reuse) { + continuous_type |= kOutputNoPadding; + } + } + } + return continuous_type; +} + +Status GetMemorySize(const OpDescPtr &op_desc, const ge::ConstGeTensorDescPtr &output_desc, uint32_t continuous_type, + int64_t &tensor_size, int64_t &nopadding_size) { + if ((op_desc == nullptr) || (output_desc == nullptr)) { + GELOGE(FAILED, "Input para is nullptr."); + return FAILED; + } + tensor_size = 0; + nopadding_size = 0; + bool is_nopadding = ((continuous_type & kInputNoPadding) != 0) || ((continuous_type & kOutputNoPadding) != 0); + if (is_nopadding) { + int64_t attr_dim_index; + bool get_attr_dim_flag = ge::AttrUtils::GetInt(op_desc, ATTR_NAME_REUSE_INPUT_ON_DIM_INDEX, attr_dim_index); + if (!get_attr_dim_flag) { + GELOGE(FAILED, "Get attr _reuse_input_on_dim_index failed."); + return FAILED; + } + + // Calculate tensor real size of each piece of data and out size of complete data + int64_t batch_dim_num = 1; + if (CalculateTensorRealSizeAndOutSize(output_desc, attr_dim_index, nopadding_size, batch_dim_num, tensor_size) != + SUCCESS) { + GELOGE(FAILED, "CalculateTensorRealSizeAndOutSize failed for node %s.", op_desc->GetName().c_str()); + return FAILED; + } + } else { + if (ge::TensorUtils::GetSize(*output_desc, tensor_size) != ge::SUCCESS) { + GELOGE(FAILED, "GetSize failed."); + return FAILED; + } + } + if ((tensor_size < 0) || (nopadding_size < 0)) { + GELOGE(FAILED, "GetMemorySize for node %s failed.", op_desc->GetName().c_str()); + return FAILED; + } + return SUCCESS; +} + +void AlignMemOffset(int64_t &mem_align_size) { + if (mem_align_size <= 0) { + return; + } + mem_align_size = (mem_align_size + MEM_ALIGN_SIZE - 1) / MEM_ALIGN_SIZE * MEM_ALIGN_SIZE; +} + +bool IsContinuousInputConflict(const ge::NodePtr &node, const OpDescPtr &peer_op_desc) { + bool is_peer_output_continuous = false; + // If GetBool fail, is_peer_output_continuous is false. + (void) ge::AttrUtils::GetBool(peer_op_desc, ATTR_NAME_CONTINUOUS_OUTPUT, is_peer_output_continuous); + + // Get peer node output size, if size == 1(peer node has only one output), continuous input of the node and + // continuous output of the previous node is the same, we can support it. If size != 1, there may be + // conflict between the two, we can not support it. + auto peer_output_size = peer_op_desc->GetOutputsSize(); + GE_IF_BOOL_EXEC(is_peer_output_continuous && (peer_output_size != 1), + std::string error = "Current op" + FmtToStr(node->GetOpDesc()->GetName()) + + " requires continuous input, while the previous op" + FmtToStr(peer_op_desc->GetName()) + + " requires continuous output. There may be conflict between the two." + + "This node is not supported now."; + GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); + return true;); + + bool is_peer_reference = false; + // If GetBool fail, is_peer_reference is false. + (void) AttrUtils::GetBool(peer_op_desc, ATTR_NAME_REFERENCE, is_peer_reference); + GE_IF_BOOL_EXEC(is_peer_reference, + std::string error = "Current op" + FmtToStr(node->GetOpDesc()->GetName()) + + " requires continuous input, while the previous op" + FmtToStr(peer_op_desc->GetName()) + + " requires continuous output. There may be conflict between the two." + + "This node is not supported now."; + GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); + return true;); + return false; +} + Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) { Status ret; for (auto &node : compute_graph_->GetAllNodes()) { - // Get the continuous input type of the node, default is false - bool is_input_continuous = false; - GE_CHECK_NOTNULL(node->GetOpDesc()); - // If GetBool fail, is_input_continuous is false. - (void) ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_CONTINUOUS_INPUT, is_input_continuous); + GE_CHECK_NOTNULL(node); + auto continuous_type = GetContinuousMemoryType(node->GetOpDesc()); // Assign continuous input memory - if (is_input_continuous) { + bool is_continuous_input = ((continuous_type & kInput) != 0) || ((continuous_type & kInputNoPadding) != 0); + if (is_continuous_input) { int64_t memory_type = RT_MEMORY_HBM; GE_CHK_STATUS_RET(GetNodeMemoryType(node, memory_type, "input"), "Get node memory type failed."); int64_t mem_clean_start = 0; int64_t mem_clean_size = 0; - ret = AssignContinuousInputMemory(node, mem_clean_start, mem_clean_size, memory_type); + ret = AssignContinuousInputMemory(node, mem_clean_start, mem_clean_size, memory_type, continuous_type); if (ret != ge::SUCCESS) { GELOGE(ret, "Assign continuous input memory failed!"); return ret; @@ -338,7 +379,6 @@ Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) { vector input_indexes; // If GetListInt fail, input_indexes is empty. (void) ge::AttrUtils::GetListInt(node->GetOpDesc(), ATOMIC_ATTR_INPUT_INDEX, input_indexes); - if (!input_indexes.empty() && input_indexes[0] == kAllInputAddrIsAtomic) { // check whether there is an atomic conflict between the current node and the peer out node if (!CheckInputIsSupportAtomic(node)) { @@ -350,6 +390,7 @@ Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) { const auto &in_control_anchor = node->GetInControlAnchor(); GE_CHECK_NOTNULL(in_control_anchor); for (const auto &peer_out_control_anchor : in_control_anchor->GetPeerOutControlAnchors()) { + GE_CHECK_NOTNULL(peer_out_control_anchor); auto peer_out_node = peer_out_control_anchor->GetOwnerNode(); if (peer_out_node->GetType() == ATOMICADDRCLEAN) { ret = SetAtomicCleanAttr(peer_out_node, {mem_clean_start}, {mem_clean_size}); @@ -362,23 +403,12 @@ Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) { } } - // Get the reference type of the node, default is false - bool is_ref = false; - // If GetBool fail, is_ref is false. - (void) ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_REFERENCE, is_ref); - - // Get the continuous output type of the node, default is false - bool is_output_continuous = false; - // If GetBool fail, is_output_continuous is false. - (void) ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_CONTINUOUS_OUTPUT, is_output_continuous); - - // If the output is ref type and refers to the ref of an input, the name of the output - // and the input are the same. Ge encounters ref type, finds matching relationship according - // to the names of input and output, and allocates the same memory address, eg: HCOMBroadcast - if (!is_ref && is_output_continuous) { // Assign continuous output memory - ret = AssignContinuousOutputMemory(node); + // Assign continuous output memory + bool is_continuous_output = ((continuous_type & kOutput) != 0) || ((continuous_type & kOutputNoPadding) != 0); + if (is_continuous_output) { + ret = AssignContinuousOutputMemory(node, continuous_type); if (ret != ge::SUCCESS) { - GELOGE(ret, "Assign reference memory failed!"); + GELOGE(ret, "Assign continuous output memory failed!"); return ret; } } @@ -391,94 +421,39 @@ Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) { } Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node, int64_t &continuous_mem_start, - int64_t &continuous_mem_size, int64_t memory_type) { + int64_t &continuous_mem_size, int64_t memory_type, uint32_t continuous_type) { GELOGI("Current node %s needs continuous input.", node->GetName().c_str()); - bool continuous_input_alloc = false; - (void) ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_CONTINUOUS_INPUT_ALLOC, continuous_input_alloc); auto iter = memory_offset_.find(memory_type); if (iter == memory_offset_.end()) { std::string error = "Memory offset does not have memory type" + FmtToStr(memory_type); GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); return FAILED; } + // The head and tail of hcom continuous input should be added 512 + iter->second.mem_offset_ += MEM_ALIGN_SIZE; continuous_mem_start = iter->second.mem_offset_; + int64_t mem_offset = iter->second.mem_offset_; + int64_t extra_memory_size = 0; + bool is_continuous_input_allocated = false; + (void) ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_CONTINUOUS_INPUT_ALLOC, is_continuous_input_allocated); for (auto &in_data_anchor : node->GetAllInDataAnchors()) { + GE_IF_BOOL_EXEC(in_data_anchor == nullptr, continue); auto peer_out_data_anchor = in_data_anchor->GetPeerOutAnchor(); GE_IF_BOOL_EXEC(peer_out_data_anchor == nullptr, continue); - auto peer_op_desc = peer_out_data_anchor->GetOwnerNode()->GetOpDesc(); GE_IF_BOOL_EXEC(peer_op_desc == nullptr, continue); - bool is_peer_output_continuous = false; - // If GetBool fail, is_peer_output_continuous is false. - (void) ge::AttrUtils::GetBool(peer_op_desc, ATTR_NAME_CONTINUOUS_OUTPUT, is_peer_output_continuous); - - // Get peer node output size, if size == 1(peer node has only one output), continuous input of the node and - // continuous output of the previous node is the same, we can support it. If size != 1, there may be - // conflict between the two, we can not support it. - auto peer_output_size = peer_op_desc->GetOutputsSize(); - GE_IF_BOOL_EXEC(is_peer_output_continuous && (peer_output_size != 1), - std::string error = "Current op" + FmtToStr(node->GetOpDesc()->GetName()) + - " requires continuous input, while the previous op" + FmtToStr(peer_op_desc->GetName()) + - " requires continuous output. There may be conflict between the two." + - "This node is not supported now."; - GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); - return PARAM_INVALID;); - - bool is_peer_reference = false; - // If GetBool fail, is_peer_reference is false. - (void) AttrUtils::GetBool(peer_op_desc, ATTR_NAME_REFERENCE, is_peer_reference); - GE_IF_BOOL_EXEC(is_peer_reference, - std::string error = "Current op" + FmtToStr(node->GetOpDesc()->GetName()) + - " requires continuous input, while the previous op" + FmtToStr(peer_op_desc->GetName()) + - " requires continuous output. There may be conflict between the two." + - "This node is not supported now."; - GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); - return PARAM_INVALID;); - - vector output_list = peer_op_desc->GetOutputOffset(); - std::vector offsets_for_fusion = {}; - bool has_offset_attr = - AttrUtils::GetListInt(peer_op_desc, ATTR_NAME_OUTPUT_OFFSET_FOR_BUFFER_FUSION, offsets_for_fusion); - if (peer_out_data_anchor->GetIdx() < static_cast(output_list.size())) { - if (continuous_input_alloc && !has_offset_attr) { - if (in_data_anchor->GetIdx() == 0) { - continuous_mem_start = output_list.at(peer_out_data_anchor->GetIdx()); - } - // can not use else if, incase only one input - if (in_data_anchor->GetIdx() == static_cast(node->GetAllInDataAnchors().size()) - 1) { - int64_t tensor_desc_size = 0; - Status ret = ge::TensorUtils::GetSize(*(peer_op_desc->GetOutputDescPtr(peer_out_data_anchor->GetIdx())), - tensor_desc_size); - GE_IF_BOOL_EXEC(ret != ge::SUCCESS, GELOGE(FAILED, "GetSize failed."); return FAILED;); - - tensor_desc_size = (tensor_desc_size + MEM_ALIGN_SIZE - 1) / MEM_ALIGN_SIZE * MEM_ALIGN_SIZE; - continuous_mem_size = - output_list.at(peer_out_data_anchor->GetIdx()) - continuous_mem_start + tensor_desc_size + MEM_ALIGN_SIZE; - } - GELOGI( - "[IMAS]Check Continuous input : Set %s name[%s] output[%d] offset to [%ld] stream_id[%ld] size[%u] " - "real_size[%u].", - node->GetOwnerComputeGraph()->GetName().c_str(), peer_op_desc->GetName().c_str(), - peer_out_data_anchor->GetIdx(), output_list.at(peer_out_data_anchor->GetIdx()), peer_op_desc->GetStreamId(), - 0, 0); - continue; - } - - output_list.at(peer_out_data_anchor->GetIdx()) = iter->second.mem_offset_; - } else { - std::string error = "index" + FmtToStr(peer_out_data_anchor->GetIdx()) + " is out of range."; - GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); - GELOGE(FAILED, "index : %d is out of range.", peer_out_data_anchor->GetIdx()); - return FAILED; - } - peer_op_desc->SetOutputOffset(output_list); - size_t pre_mem_offset = iter->second.mem_offset_; + GE_IF_BOOL_EXEC(IsContinuousInputConflict(node, peer_op_desc), return PARAM_INVALID;); int64_t tensor_desc_size = 0; - if (has_offset_attr) { - if (peer_out_data_anchor->GetIdx() < static_cast(offsets_for_fusion.size())) { - auto offset_for_fusion = offsets_for_fusion[peer_out_data_anchor->GetIdx()]; - iter->second.mem_offset_ += offset_for_fusion; + int64_t nopadding_size = 0; + int64_t real_size = 0; + std::vector offsets_of_fusion = {}; + bool lx_fusion = AttrUtils::GetListInt(peer_op_desc, ATTR_NAME_OUTPUT_OFFSET_FOR_BUFFER_FUSION, offsets_of_fusion); + lx_fusion = lx_fusion && !offsets_of_fusion.empty(); + if (lx_fusion) { + if (peer_out_data_anchor->GetIdx() < static_cast(offsets_of_fusion.size())) { + nopadding_size = offsets_of_fusion[peer_out_data_anchor->GetIdx()]; + tensor_desc_size = nopadding_size; } else { std::string error = "fusion: peer node" + FmtToStr(peer_op_desc->GetName()) + " index" + FmtToStr(peer_out_data_anchor->GetIdx()) + " is out of range."; @@ -486,425 +461,140 @@ Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node, return FAILED; } } else { - Status ret = - TensorUtils::GetSize(*(peer_op_desc->GetOutputDescPtr(peer_out_data_anchor->GetIdx())), tensor_desc_size); - GE_IF_BOOL_EXEC(ret != ge::SUCCESS, GELOGE(FAILED, "GetSize failed."); return FAILED;); - - iter->second.mem_offset_ += tensor_desc_size; - } - - // If set tensor_actual_size, Memory alignment is not required. - int32_t is_tensor_actual_size = 0; - ge::AttrUtils::GetInt(peer_op_desc, ATTR_NAME_GET_TENSOR_ACTUAL_SIZE, is_tensor_actual_size); - if (is_tensor_actual_size == 0) { - AlignMemOffset(MEM_ALIGN_SIZE, memory_type); + if (GetMemorySize(node->GetOpDesc(), peer_op_desc->GetOutputDescPtr(peer_out_data_anchor->GetIdx()), + continuous_type, tensor_desc_size, nopadding_size) != ge::SUCCESS) { + return FAILED; + } } - GELOGI( - "[IMAS]Continuous input : Set %s name[%s] output[%d] offset to [%zu] stream_id[%ld] size[%zu] " - "real_size[%ld].", node->GetOwnerComputeGraph()->GetName().c_str(), peer_op_desc->GetName().c_str(), - peer_out_data_anchor->GetIdx(), pre_mem_offset, peer_op_desc->GetStreamId(), - (iter->second.mem_offset_ - pre_mem_offset), tensor_desc_size); - } - iter->second.mem_offset_ += MEM_ALIGN_SIZE; - if (!continuous_input_alloc) { - continuous_mem_size = iter->second.mem_offset_ - continuous_mem_start; - } - return SUCCESS; -} - -Status GraphMemoryAssigner::AssignContinuousOutputMemory(const ge::NodePtr &node) { - GELOGI("Current node %s needs continuous output.", node->GetName().c_str()); - auto out_op_desc = node->GetOpDesc(); - GE_IF_BOOL_EXEC(out_op_desc == nullptr, GELOGE(ge::FAILED, "out_op_desc is null."); return ge::FAILED); - vector output_list = out_op_desc->GetOutputOffset(); - - if ((out_op_desc->GetOutputsSize() > output_list.size()) || (output_list.size() == 0)) { - GELOGE(ge::FAILED, "The size %zu of node output desc is more than output_list's size %zu.", - out_op_desc->GetOutputsSize(), output_list.size()); - return ge::FAILED; - } - - size_t mem_offset = output_list[0]; - for (auto &out_data_anchor : node->GetAllOutDataAnchors()) { - output_list[out_data_anchor->GetIdx()] = mem_offset; - int64_t tensor_desc_size = 0; - if (ge::TensorUtils::GetSize(*(out_op_desc->GetOutputDescPtr(out_data_anchor->GetIdx())), tensor_desc_size) != - ge::SUCCESS) { - GELOGE(FAILED, "GetSize failed."); - return FAILED; - } - mem_offset += tensor_desc_size; - if (mem_offset <= 0) { + bool is_nopadding = ((continuous_type & kInputNoPadding) != 0) || lx_fusion; + vector output_list = peer_op_desc->GetOutputOffset(); + if (peer_out_data_anchor->GetIdx() >= static_cast(output_list.size())) { + std::string error = "index" + FmtToStr(peer_out_data_anchor->GetIdx()) + " is out of range."; + GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); return FAILED; } - mem_offset = (mem_offset + MEM_ALIGN_SIZE - 1) / MEM_ALIGN_SIZE * MEM_ALIGN_SIZE; - GELOGI( - "[IMAS]Continuous output : Set %s name[%s] output[%d] offset to [%zu] stream_id[%ld] size[%ld] " - "real_size[%ld].", - node->GetOwnerComputeGraph()->GetName().c_str(), out_op_desc->GetName().c_str(), out_data_anchor->GetIdx(), - output_list[out_data_anchor->GetIdx()], out_op_desc->GetStreamId(), tensor_desc_size, tensor_desc_size); - } - out_op_desc->SetOutputOffset(output_list); - return ge::SUCCESS; -} - -Status GraphMemoryAssigner::ReAssignVirtualInputNodeMemory(NodePtr node, size_t &mem_offset_reuse) { - OpDescPtr op_desc = node->GetOpDesc(); - vector output_list = op_desc->GetOutputOffset(); - if (output_list.empty()) { - GELOGE(FAILED, "Outputoffset is empty node name:%s", node->GetName().c_str()); - return FAILED; - } - output_list.at(0) = mem_offset_reuse; - op_desc->SetOutputOffset(output_list); - GELOGI("Set virtual input node %s output offset to %zu.", op_desc->GetName().c_str(), mem_offset_reuse); - int64_t attr_dim_index; - bool get_attr_dim_flag = ge::AttrUtils::GetInt(op_desc, ATTR_NAME_REUSE_INPUT_ON_DIM_INDEX, attr_dim_index); - if (!get_attr_dim_flag) { - GELOGE(FAILED, "Get attr _reuse_input_on_dim_index failed."); - return FAILED; - } - - size_t extra_memory_size = 0; - for (const auto &in_data_anchor : node->GetAllInDataAnchors()) { - auto peer_out_data_anchor = in_data_anchor->GetPeerOutAnchor(); - GE_CHECK_NOTNULL(peer_out_data_anchor); - auto peer_op_desc = peer_out_data_anchor->GetOwnerNode()->GetOpDesc(); - GE_CHECK_NOTNULL(peer_op_desc); - vector output_offsets = peer_op_desc->GetOutputOffset(); - if (peer_out_data_anchor->GetIdx() >= static_cast(output_offsets.size())) { - GELOGE(ge::FAILED, "Index : %d is out of range.", peer_out_data_anchor->GetIdx()); - return ge::FAILED; + // when continuous input has been allocated first input is beginning offset + bool is_allocated_first_input = is_continuous_input_allocated && (in_data_anchor->GetIdx() == 0); + if (is_allocated_first_input) { + mem_offset = output_list.at(peer_out_data_anchor->GetIdx()); + continuous_mem_start = output_list.at(peer_out_data_anchor->GetIdx()); + } else { + // set offset for input + output_list.at(peer_out_data_anchor->GetIdx()) = mem_offset; + peer_op_desc->SetOutputOffset(output_list); } - output_offsets.at(peer_out_data_anchor->GetIdx()) = mem_offset_reuse; - peer_op_desc->SetOutputOffset(output_offsets); - size_t pre_mem_offset = mem_offset_reuse; - // Calculate tensor real size of each piece of data and out size of complete data - ge::ConstGeTensorDescPtr output_desc = peer_op_desc->GetOutputDescPtr(peer_out_data_anchor->GetIdx()); - GE_CHECK_NOTNULL(output_desc); - int64_t output_mem_size; - int64_t batch_dim_num = 1; - int64_t out_size; - if (CalculateTensorRealSizeAndOutSize(output_desc, attr_dim_index, output_mem_size, batch_dim_num, out_size) != - SUCCESS) { - GELOGE(FAILED, "CalculateTensorRealSizeAndOutSize failed for node %s output [%d].", - peer_op_desc->GetName().c_str(), peer_out_data_anchor->GetIdx()); - return FAILED; + int64_t align_size = tensor_desc_size; + if (is_nopadding) { + mem_offset += nopadding_size; + extra_memory_size += (tensor_desc_size - nopadding_size); + real_size = nopadding_size; + } else { + ge::AlignMemOffset(align_size); + mem_offset += align_size; + // The head and tail of hcom continuous input should be added 512 + extra_memory_size = MEM_ALIGN_SIZE; + real_size = tensor_desc_size; } - mem_offset_reuse += output_mem_size; - extra_memory_size = extra_memory_size + out_size - output_mem_size; - - GELOGI("[IMAS]Virtual node optimize: set %s name[%s] output[%d] offset to [%zu] stream_id[%ld] size[%ld] " - "real_size[%ld].", - node->GetOwnerComputeGraph()->GetName().c_str(), peer_op_desc->GetName().c_str(), - peer_out_data_anchor->GetIdx(), pre_mem_offset, peer_op_desc->GetStreamId(), out_size, - output_mem_size); + GELOGI("[IMAS]Continuous input : Set %s name[%s] output[%d] offset to [%zu] stream_id[%ld] size[%zu] realsize[%ld]" + " nopadding[%d].", node->GetOwnerComputeGraph()->GetName().c_str(), peer_op_desc->GetName().c_str(), + peer_out_data_anchor->GetIdx(), output_list.at(peer_out_data_anchor->GetIdx()), peer_op_desc->GetStreamId(), + is_continuous_input_allocated ? 0UL : align_size, real_size, is_nopadding); } - mem_offset_reuse += extra_memory_size; - size_t after_mem_offset = mem_offset_reuse; - GELOGI("After reassign virtual input node[name: %s, type: %s] memory, memory offset = %zu.", - op_desc->GetName().c_str(), op_desc->GetType().c_str(), after_mem_offset); - return SUCCESS; -} -Status GraphMemoryAssigner::ReAssignReuseAndNoPaddingContinuousInputMemory() { - map> mem_reuse_virtual_input_nodes_map; - int64_t memory_type = RT_MEMORY_HBM; - for (const auto &n : compute_graph_->GetAllNodes()) { - OpDescPtr op_desc = n->GetOpDesc(); - GE_CHECK_NOTNULL(op_desc); - bool attr_continuous = false; - bool get_continuous_flag = ge::AttrUtils::GetBool(op_desc, ATTR_NAME_NOPADDING_CONTINUOUS_INPUT, attr_continuous); - GE_IF_BOOL_EXEC(!get_continuous_flag, continue); - bool attr_reuse = false; - bool get_reuse_flag = ge::AttrUtils::GetBool(op_desc, ATTR_NAME_OUTPUT_REUSE_INPUT, attr_reuse); - GE_IF_BOOL_EXEC(!get_reuse_flag, continue); - if (attr_reuse && attr_continuous) { - if (op_desc->GetOutputsSize() != kVirtualInputNodeOutputSize) { - // When current virtual node has several outputs, can't directly determine which input is the tensor for reuse. - std::string error = "Only one output is supported, current virtual node" + FmtToStr(n->GetName()) + - " has " + FmtToStr(op_desc->GetOutputsSize()) + " outputs."; - GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); - return FAILED; - } - GE_CHK_STATUS_RET(GetNodeMemoryType(n, memory_type, "input"), "Get node memory type failed."); - auto iter = memory_offset_.find(memory_type); - if (iter == memory_offset_.end()) { - std::string error = "Memory offset does not have memory type" + FmtToStr(memory_type); - GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); - return FAILED; - } - GELOGD("Start to reassign memory for virtual input node, memory offset = %zu, memory type = %ld.", - iter->second.mem_offset_, memory_type); - string batch_label_string; - // Not all ops have ATTR_NAME_BATCH_LABEL, no need to check return value, only check out parameter - (void) ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label_string); - if (batch_label_string.empty()) { - size_t node_mem_offset = iter->second.mem_offset_; - // No ATTR_NAME_BATCH_LABEL, no need to reuse memory. - Status status = ReAssignVirtualInputNodeMemory(n, node_mem_offset); - if (status != SUCCESS) { - GELOGE(FAILED, "Reassign memory of virtual input node failed, node name: %s.", n->GetName().c_str()); - return FAILED; - } - - iter->second.mem_offset_ = node_mem_offset; - AlignMemOffset(MEM_ALIGN_SIZE, memory_type); - GELOGD("After reassign memory for virtual input node, align memory = %zu, memory type = %ld.", - iter->second.mem_offset_, memory_type); - } else { - // Has ATTR_NAME_BATCH_LABEL, for dynamic multi-batch node, need to reuse memory. - string current_node_full_name = op_desc->GetName(); - size_t pos = current_node_full_name.find(kMbatchNodeNameFlag); - if (pos == string::npos) { - GELOGE(FAILED, "Cannot find key string [%s] of multi-batch in name of virtual input node, node name: %s.", - kMbatchNodeNameFlag, n->GetName().c_str()); - return FAILED; - } - string fixed_name = current_node_full_name.substr(0, pos); - vector parallel_virtual_input_nodes; - if (mem_reuse_virtual_input_nodes_map.count(fixed_name) != 0) { - parallel_virtual_input_nodes = mem_reuse_virtual_input_nodes_map[fixed_name]; - } - parallel_virtual_input_nodes.emplace_back(n); - mem_reuse_virtual_input_nodes_map[fixed_name] = parallel_virtual_input_nodes; - } - } - } - - int32_t mem_reuse_model = 0; - if (ReAssignVirtualNodesMemory(mem_reuse_virtual_input_nodes_map, mem_reuse_model) != SUCCESS) { - GELOGE(FAILED, "Reassign memory of virtual input nodes failed."); - return FAILED; + mem_offset += extra_memory_size; + ge::AlignMemOffset(mem_offset); + continuous_mem_size = mem_offset - continuous_mem_start; + if (is_continuous_input_allocated) { + // not allocate memory here, so no need add 512 in header + iter->second.mem_offset_ -= MEM_ALIGN_SIZE; + } else { + iter->second.mem_offset_ = mem_offset; } return SUCCESS; } -Status GraphMemoryAssigner::ReAssignVirtualOutputNodeMemory(NodePtr node, size_t &mem_offset_reuse) { - OpDescPtr op_desc = node->GetOpDesc(); - - // 1. set memory of to be reused input tensor +Status GetFirstInputPeerOutOutputOffset(const ge::NodePtr &node, int64_t &mem_offset) { auto in_data_anchor_list = node->GetAllInDataAnchors(); + if (in_data_anchor_list.empty()) { + GELOGE(FAILED, "Node %s's in data anchor is empty.", node->GetName().c_str()); + return FAILED; + } auto peer_out_data_anchor = in_data_anchor_list.at(0)->GetPeerOutAnchor(); - GE_CHECK_NOTNULL(peer_out_data_anchor); + GE_IF_BOOL_EXEC(peer_out_data_anchor == nullptr, GELOGE(ge::FAILED, "peer_out_data_anchor is null."); + return ge::FAILED); auto peer_op_desc = peer_out_data_anchor->GetOwnerNode()->GetOpDesc(); - GE_CHECK_NOTNULL(peer_op_desc); + GE_IF_BOOL_EXEC(peer_op_desc == nullptr, GELOGE(ge::FAILED, "peer_op_desc is null."); return ge::FAILED); vector in_node_output_offsets = peer_op_desc->GetOutputOffset(); if (peer_out_data_anchor->GetIdx() >= static_cast(in_node_output_offsets.size())) { GELOGE(FAILED, "Index : %d is out of range.", peer_out_data_anchor->GetIdx()); return FAILED; } - in_node_output_offsets.at(peer_out_data_anchor->GetIdx()) = mem_offset_reuse; - peer_op_desc->SetOutputOffset(in_node_output_offsets); - GELOGI("Set virtual output node %s input data offset to %zu.", op_desc->GetName().c_str(), mem_offset_reuse); + mem_offset = in_node_output_offsets.at(peer_out_data_anchor->GetIdx()); + return SUCCESS; +} - // 2. set memory of output tensor - vector output_list = op_desc->GetOutputOffset(); - if (output_list.empty()) { - GELOGE(FAILED, "Outputoffset is empty, node name: %s", node->GetName().c_str()); - return FAILED; - } - if (op_desc->GetOutputsSize() > output_list.size()) { - GELOGE(FAILED, "The size %zu of op_desc is more than output_list's size %zu.", op_desc->GetOutputsSize(), - output_list.size()); - return FAILED; - } - int64_t attr_dim_index; - bool get_attr_dim_flag = ge::AttrUtils::GetInt(op_desc, ATTR_NAME_REUSE_INPUT_ON_DIM_INDEX, attr_dim_index); - if (!get_attr_dim_flag) { - GELOGE(FAILED, "Get attr _reuse_input_on_dim_index failed."); - return FAILED; +Status GraphMemoryAssigner::AssignContinuousOutputMemory(const ge::NodePtr &node, uint32_t continuous_type) { + GELOGI("Current node %s needs continuous output.", node->GetName().c_str()); + auto out_op_desc = node->GetOpDesc(); + GE_IF_BOOL_EXEC(out_op_desc == nullptr, GELOGE(ge::FAILED, "out_op_desc is null."); return ge::FAILED); + vector output_list = out_op_desc->GetOutputOffset(); + if ((out_op_desc->GetOutputsSize() > output_list.size()) || (output_list.size() == 0)) { + GELOGE(ge::FAILED, "The size %zu of node output desc is more than output_list's size %zu.", + out_op_desc->GetOutputsSize(), output_list.size()); + return ge::FAILED; } - size_t extra_memory_size = 0; - for (auto &out_data_anchor : node->GetAllOutDataAnchors()) { - output_list[out_data_anchor->GetIdx()] = mem_offset_reuse; - size_t pre_mem_offset = mem_offset_reuse; - - // calculate tensor real size of each piece of data and out size of complete data - ge::ConstGeTensorDescPtr output_desc = op_desc->GetOutputDescPtr(out_data_anchor->GetIdx()); - GE_CHECK_NOTNULL(output_desc); - int64_t output_mem_size; - int64_t batch_dim_num = 1; - int64_t out_size; - if (CalculateTensorRealSizeAndOutSize(output_desc, attr_dim_index, output_mem_size, batch_dim_num, out_size) != - SUCCESS) { - GELOGE(FAILED, "CalculateTensorRealSizeAndOutSize failed for node %s output [%d].", - op_desc->GetName().c_str(), out_data_anchor->GetIdx()); - return FAILED; + int64_t mem_offset = 0; + bool is_nopadding = ((continuous_type & kOutputNoPadding) != 0); + if (is_nopadding) { + // out tensor memory must be reused input tensor memory + if (GetFirstInputPeerOutOutputOffset(node, mem_offset) != SUCCESS) { + return ge::FAILED; } + } else { + // Get the reference type of the node, default is false + bool is_ref = false; + // If GetBool fail, is_ref is false. + (void) ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_REFERENCE, is_ref); - mem_offset_reuse += output_mem_size; - extra_memory_size = extra_memory_size + out_size - output_mem_size; - - GELOGI("[IMAS]Virtual node optimize: set %s name[%s] output[%d] offset to [%zu], size[%ld], real_size[%ld].", - node->GetOwnerComputeGraph()->GetName().c_str(), op_desc->GetName().c_str(), out_data_anchor->GetIdx(), - pre_mem_offset, out_size, output_mem_size); - } - op_desc->SetOutputOffset(output_list); - mem_offset_reuse += extra_memory_size; - size_t after_mem_offset = mem_offset_reuse; - GELOGI("After reassign virtual output node[name: %s, type: %s] memory, memory offset = %zu.", - op_desc->GetName().c_str(), op_desc->GetType().c_str(), after_mem_offset); - return SUCCESS; -} - -Status GraphMemoryAssigner::ReAssignReuseAndNoPaddingContinuousOutputMemory() { - map> mem_reuse_virtual_output_nodes_map; - int64_t memory_type = RT_MEMORY_HBM; - for (const auto &n : compute_graph_->GetAllNodes()) { - OpDescPtr op_desc = n->GetOpDesc(); - GE_CHECK_NOTNULL(op_desc); - bool attr_continuous = false; - bool get_continuous_flag = ge::AttrUtils::GetBool(op_desc, ATTR_NAME_NOPADDING_CONTINUOUS_OUTPUT, attr_continuous); - GE_IF_BOOL_EXEC(!get_continuous_flag, continue); - bool attr_reuse = false; - bool get_reuse_flag = ge::AttrUtils::GetBool(op_desc, ATTR_NAME_OUTPUT_REUSE_INPUT, attr_reuse); - GE_IF_BOOL_EXEC(!get_reuse_flag, continue); - - if (attr_reuse && attr_continuous) { - auto in_data_anchor_list = n->GetAllInDataAnchors(); - if (in_data_anchor_list.size() != kVirtualOutputNodeInputSize) { - // When current virtual node has several inputs, can't directly determine which input is the tensor for reuse. - std::string error = "Only one input is supported, current virtual node" + FmtToStr(n->GetName()) + - " has " + FmtToStr(in_data_anchor_list.size()) + " inputs."; - GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); - return FAILED; - } - GE_CHK_STATUS_RET(GetNodeMemoryType(n, memory_type, "output"), "Get node memory type failed."); - auto iter = memory_offset_.find(memory_type); - if (iter == memory_offset_.end()) { - std::string error = "Memory offset does not have memory type" + FmtToStr(RT_MEMORY_HBM); - GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); - return FAILED; - } - GELOGD("Start to reassign memory for virtual output node, memory offset = %zu, memory type = %ld.", - iter->second.mem_offset_, memory_type); - string batch_label_string; - // Not all ops have ATTR_NAME_BATCH_LABEL, no need to check return value, only check out parameter - (void) ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label_string); - if (batch_label_string.empty()) { - size_t node_mem_offset = iter->second.mem_offset_; - // No ATTR_NAME_BATCH_LABEL, no need to reuse memory. - Status status = ReAssignVirtualOutputNodeMemory(n, node_mem_offset); - if (status != SUCCESS) { - GELOGE(FAILED, "Reassign memory of virtual output node failed, node name: %s.", n->GetName().c_str()); - return FAILED; - } - iter->second.mem_offset_ = node_mem_offset; - AlignMemOffset(MEM_ALIGN_SIZE, memory_type); - GELOGD("After reassign memory for virtual output node, align memory = %zu, memory type = %ld.", - iter->second.mem_offset_, memory_type); - } else { - // Has ATTR_NAME_BATCH_LABEL, for dynamic multi-batch node, need to reuse memory. - string current_node_full_name = op_desc->GetName(); - size_t pos = current_node_full_name.find(kMbatchNodeNameFlag); - if (pos == string::npos) { - std::string error = "Cannot find key string" + FmtToStr(kMbatchNodeNameFlag) + - " of multi-batch in name of virtual output node, the node name is " + FmtToStr(n->GetName()); - GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); - return FAILED; - } - string fixed_name = current_node_full_name.substr(0, pos); - vector parallel_virtual_output_nodes; - if (mem_reuse_virtual_output_nodes_map.count(fixed_name) != 0) { - parallel_virtual_output_nodes = mem_reuse_virtual_output_nodes_map[fixed_name]; - } - parallel_virtual_output_nodes.emplace_back(n); - mem_reuse_virtual_output_nodes_map[fixed_name] = parallel_virtual_output_nodes; - } + // If the output is ref type and refers to the ref of an input, the name of the output + // and the input are the same. Ge encounters ref type, finds matching relationship according + // to the names of input and output, and allocates the same memory address, eg: HCOMBroadcast + if (is_ref) { + GELOGI("Current node %s no needs assign continuous output because reference input by name.", + node->GetName().c_str()); + return SUCCESS; } + mem_offset = output_list[0]; } - int32_t mem_reuse_model = 1; - if (ReAssignVirtualNodesMemory(mem_reuse_virtual_output_nodes_map, mem_reuse_model) != SUCCESS) { - GELOGE(FAILED, "Reassign memory of virtual output nodes failed."); - return FAILED; - } - return SUCCESS; -} - -Status GraphMemoryAssigner::ReAssignVirtualNodesMemory(map> &mem_reuse_nodes_map, - int32_t mem_reuse_model) { - // Find max batch label value - string max_batch_label; - GE_CHK_STATUS_RET(GetMaxBatchLabel(mem_reuse_nodes_map, mem_reuse_model, max_batch_label), - "Get max batch label failed."); - PrintMemoryOffset(); - vector nodes_mem_offset_list; - for (auto &i_map : mem_reuse_nodes_map) { - vector virtual_nodes_list = i_map.second; - int64_t memory_type = RT_MEMORY_HBM; - GE_CHK_STATUS_RET(GetNodeListMemoryType(virtual_nodes_list, mem_reuse_model, memory_type), - "Get node list memory type failed."); - auto iter = memory_offset_.find(memory_type); - if (iter == memory_offset_.end()) { - std::string error = "Memory offset does not have memory type" + FmtToStr(RT_MEMORY_HBM); - GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); + for (auto &out_data_anchor : node->GetAllOutDataAnchors()) { + output_list[out_data_anchor->GetIdx()] = mem_offset; + int64_t tensor_desc_size = 0; + int64_t nopadding_size = 0; + if (GetMemorySize(out_op_desc, out_op_desc->GetOutputDescPtr(out_data_anchor->GetIdx()), continuous_type, + tensor_desc_size, nopadding_size) != ge::SUCCESS) { + GELOGE(FAILED, "GetSize failed."); return FAILED; } - size_t max_batch_node_mem_offset = iter->second.mem_offset_; - nodes_mem_offset_list.emplace_back(max_batch_node_mem_offset); - for (auto &i_node : virtual_nodes_list) { - // Op_desc is not nullptr, it has been checked. - OpDescPtr op_desc = i_node->GetOpDesc(); - string batch_label_string; - // All ops must have ATTR_NAME_BATCH_LABEL, no need to check return value. - (void) ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label_string); - if (batch_label_string == max_batch_label) { - Status status = SUCCESS; - if (mem_reuse_model == kVirtualInputNodeMemoryReuse) { - status = ReAssignVirtualInputNodeMemory(i_node, max_batch_node_mem_offset); - } else if (mem_reuse_model == kVirtualOutputNodeMemoryReuse) { - status = ReAssignVirtualOutputNodeMemory(i_node, max_batch_node_mem_offset); - } else { - std::string error = "Invalid parameter memory reuse model, which is " + FmtToStr(mem_reuse_model); - GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); - return FAILED; - } - - if (status != SUCCESS) { - GELOGE(FAILED, "Reassign memory of virtual node failed, node name: %s.", i_node->GetName().c_str()); - return FAILED; - } - iter->second.mem_offset_ = max_batch_node_mem_offset; - AlignMemOffset(MEM_ALIGN_SIZE, memory_type); - GELOGD("After reassign memory for virtual node, align memory = %zu, memory type = %ld.", - iter->second.mem_offset_, memory_type); - // Only assign memory of max batch nodes. - break; - } - } - } - PrintMemoryOffset(); - size_t memory_reuse_index = 0; - for (auto &i_map : mem_reuse_nodes_map) { - vector virtual_nodes_list = i_map.second; - for (auto &i_node : virtual_nodes_list) { - size_t remaining_batch_node_mem_offset = nodes_mem_offset_list[memory_reuse_index]; - Status status = SUCCESS; - if (mem_reuse_model == kVirtualInputNodeMemoryReuse) { - status = ReAssignVirtualInputNodeMemory(i_node, remaining_batch_node_mem_offset); - } else if (mem_reuse_model == kVirtualOutputNodeMemoryReuse) { - status = ReAssignVirtualOutputNodeMemory(i_node, remaining_batch_node_mem_offset); - } else { - std::string error = "Invalid parameter memory reuse model, which is " + FmtToStr(mem_reuse_model); - GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); - return FAILED; - } - if (status != SUCCESS) { - GELOGE(FAILED, "Reassign memory of virtual node failed, node name: %s.", i_node->GetName().c_str()); - return FAILED; - } + if (is_nopadding) { + mem_offset += nopadding_size; + } else { + mem_offset += tensor_desc_size; + ge::AlignMemOffset(mem_offset); } - memory_reuse_index++; + GELOGI("[IMAS]Continuous output : Set %s name[%s] output[%d] offset to [%ld] stream_id[%ld] size[%ld] " + "realsize[%ld] nopadding[%d].", node->GetOwnerComputeGraph()->GetName().c_str(), + out_op_desc->GetName().c_str(), out_data_anchor->GetIdx(), output_list[out_data_anchor->GetIdx()], + out_op_desc->GetStreamId(), 0UL, is_nopadding ? nopadding_size : tensor_desc_size, is_nopadding); } - return SUCCESS; + out_op_desc->SetOutputOffset(output_list); + return ge::SUCCESS; } Status GraphMemoryAssigner::ReAssignAtomicMemory(bool is_loop_graph) { diff --git a/ge/graph/build/memory/graph_mem_assigner.h b/ge/graph/build/memory/graph_mem_assigner.h index def24287..e3fb52bf 100755 --- a/ge/graph/build/memory/graph_mem_assigner.h +++ b/ge/graph/build/memory/graph_mem_assigner.h @@ -119,31 +119,15 @@ class GraphMemoryAssigner { /// ge::Status ReAssignContinuousMemory(bool is_loop_graph); - ge::Status ReAssignReuseAndNoPaddingContinuousInputMemory(); - - ge::Status ReAssignReuseAndNoPaddingContinuousOutputMemory(); - - ge::Status ReAssignVirtualInputNodeMemory(NodePtr node, size_t &mem_offset_reuse); - - ge::Status ReAssignVirtualOutputNodeMemory(NodePtr node, size_t &mem_offset_reuse); - - ge::Status ReAssignVirtualNodesMemory(map> &mem_reuse_nodes_map, int32_t mem_reuse_model); - - ge::Status GetMaxBatchLabel(const map> &mem_reuse_virtual_nodes_map, - int32_t mem_reuse_model, string &max_batch_label); - - ge::Status CalculateTensorRealSizeAndOutSize(const ge::ConstGeTensorDescPtr &output_desc, int64_t dim_index, - int64_t &output_mem_size, int64_t &batch_dim_num, int64_t &out_size); - ge::Status ReAssignAtomicMemory(bool is_loop_graph); ge::Status FilterAtomicNodesForMemoryAssign(map>> &normal_atomic_nodes_map, map> &connecting_output_atomic_nodes); ge::Status AssignContinuousInputMemory(const ge::NodePtr &node, int64_t &continuous_mem_start, - int64_t &continuous_mem_size, int64_t memory_type); + int64_t &continuous_mem_size, int64_t memory_type, uint32_t continuous_type); - ge::Status AssignContinuousOutputMemory(const ge::NodePtr &node); + ge::Status AssignContinuousOutputMemory(const ge::NodePtr &node, uint32_t continuous_type); /// /// @brief check the input of node whether support atomic attr diff --git a/ge/graph/build/memory/graph_mem_assigner.zip b/ge/graph/build/memory/graph_mem_assigner.zip new file mode 100644 index 0000000000000000000000000000000000000000..d9ddeee41df302d28f18939caebee2798a3059ec GIT binary patch literal 30655 zcmV(*K;FMlO9KQH000080PKb=Qm&B7%I8A>0GU4l02KfL0Ag%!V{2b+Wo=(!b8~5D zZe?;VV`J@oYjYbnvgr3zKj? z;gEi8qVwA?`T?NvnBhov&e>aaScyf>0F6eY(JwTBtw)c(|4uzp`^D_be0+7iP|u$} zf2!Ur2cPn>I?IRG(_&Iwjq_3$*=(z$#R$q{oulzEpO*PZEvKV=t`^t1>dXd1{fFPQ z)nD>?IWDH^`Q}sAf&%OOYJFn|VSFi;>SpjoO^bzEmbnHpF4g6DlB@jlFrO{dc&diQ z&1^CrOozGpG+ta|LkwmUf&7hu6c>wuE|oG27nS`t<43<^U1i zEaq2R6DnA49d-8)kNbyDG}5-pyXhn^OEu5`b2*;tE?<06gPF!^IJnTbP6nS;F;|1D zd9L3r3c&i)e7qP>uiC0CE*GB$^Bm!f#^qu@zF00ivKbt8-(>+!xWQDdclxT^Uso?X z{cgXFP=4v2z3II>Q@?ahPdmqF-NU}>ovQub@j>^j+dJ03Ua8LU-_+0DXhFxk4w^?%u5HqI7b zW_js@T)&(TZt_pX{KJ-Jw3}kOHOeoRS5QMUeo&TB)~tA4%#*J!mY0_y()R?KE=mskd^y!{a^OA2 zXjuYtmy6N%_RV0n<2@+zh5ulpXR7~XHW<%?N_s9&uTsy5WBoVFX))LHCLd){>+V&A z$H#oQD4^Z%znf}KF119<)vw=wrzu!2)bM&RSC8oXhmIbDV_oJK&Fxt3G}l7+)nss` zcGdbP^E8{of>h~C0j;_IxK zRlw`bjtj))ZGHnFrUl~wqM#a&-br@c%T5p9^$)YRhi{=~@W|C0Pjx%g#^AGVZwU=O zdzyZ~uN$Ef&!0YpGPg=?Z7EA%pV(JO?~lcJq&gEVwR9(XmzR2=TBYXFY@r^aM1xlu zJlTNaN{ug7%YUI>?5d~yR%vCkSk9-QC)F+d6O`SJYCl%G$64p7`}#QRcmIBT2=+Sv{QOmy5Pa zzCv3<^b?A1zRni_@MV5Ap0+mB3x%bE$3geiV!q5B4fx^ZU{dA=ZzMbeIntAok$sCQ z#mRH@bQTle$eL7+!0|#MppNYz>xLvp)m@sw6V*(0JxmEcDjOq%kc2`1=VKYb%aRL%yo@lZ$;+7Lq;%;ke3;$8ST<25bbCL`{ zlsEwtV4<||MyF~Bj81+N2z0IFc&=#U{EHfD|KcK7sB1^9VmkSvV5%ybsvDcs&~H!b zJ`!xQz;7NDf$+1STwLOL2*IoUj9-{JJ(za2(Ky!zHV_?kdCSuRgsPDi-YX$Tc$jp9 z|6Go=Zr8rvnBh7Cw^c|jLrknRO4l-TP9jn*YBvK(>({CgxZX>{T)K=0AfdZJQ!T!< zP;yCdHAO>7W?0p#H%&(EGiwrCVqVU*{sFg9fkIRuRt#NyvAhypgw)z-uSAv$)?<4w z6i}x(z<|oFCRdGxVznEVms-&0YoP@Z{6Ha*Dw-U{;PbqptnIg{XN!l25d< zspTU)2rNu$mhI))9LipjEY&74zKd-z3)3QouLJ#uZgX1wQ9mp68fZQb5iQioM9zkT za`A$2-Setr6Wx&)A4PNZ2g(5teN`JJiGI@vg0k#F2w`TSu3(!FWC)_PD1g2*VZ*A2EZ zj^Np^Ol^JcEcq5d4xXPe#$VcPO)J|+J_JbMsEBi10~A;u<%pt!0Y@s+?D zW{mqKaG7W^ow2i9EK5c_}fqrpW!F+xb|YiHYEz-u*u z_h{gwt7OQHFEf0ZN!uAs0th*0V=&#!AT+R|;16OtnCPJaE1sI(b&ev+K;|w44{^*L z{`#V#?UKZodi)rldV(@p;~c-gkAoU^fPyFzPfHQvR)RcSh?pK*@eQ=v_EU!2>%*-3 zDtp=M9c71qJ=|{vKrO;;i>IjxaohqX=0p`Ubv-Cm=j`k>JMO$a%wBfR_TOYjotKA4 zZCy%DVLfknoohwHqlK$C?EKM-V!2Sb0+H+4Kuxqj&uv`gS~x5g^LMa3vc0YQ&|l1b zMB7zu3(`qbRWcmR7+K7XOvF3Vz|{Pu=lzH3=_BiUlSlj4$HgM;T0g=G$2d4W5Qf9n zSev@wK0M`zBQ5uRR!wo#n1kV<87j+&(4uY**XHY6t8JzVJ}EBl&Dw4Qyg;QGgk*(8 z3sTUvB3l3eaVrTs;kVz+l+jZx9-iQq?FXYaF%*~4V^mRYz3n=tl)N=UcbEU^{#16^qy<*3tz5FQ^o}^w4|1T4~w2Zm|6a9n;`3WwEaML z*v2)^QSeX^J%;e*{d+yO>ypFO`bvoH6(H9+Hqth|qTZk#=FKMaUa}fL@>c`G$A=Ub zBQm!wxhvZ-1!P0!S=dtW{Lpy~)t3ZYC5?H`(m@^NTi zJDWj>hH9s<⪼&*LQFy-S~f>1VsB5y&&Xu%P)U=`utDNe$rx!zi6-Fr>B4X>Br}W zZ>4vmfebFenbfZ(EU5d3KQR#fWsiXT^alszheZO+AD?+}{`B<6AD)7{18g+mUQaFw2DxK`eNIRoRjZj9kQwBXvKInK||`4 zNTb}kbc4pq#5SQv;9v}?ZBUKaU7M%NlCm~glG1F*r^7G6mbBgjIe;ixhhK)1+yd)P z(L(mt&opM^1v+Ge2TwN`M1loe5AeimZ_QKN$*DC!YLs$11|~Fs!E~~gOwg-BVLG8} zLs{S0wvZm)S`tOw@>+6v^N~>(Br)REYiCSIUimlD`l}T4|El_Lt?D^;Z@Mx2^SQ_wK(ao2 z!UgE3msEgHp06tKqki&_x&rV-{bN-D^^a8r?9cyLRlq@@@;^ZC{9FrVAhiB5p3au` z2QoeKyOuxHW%E?CKD&4S;dCZFc=lgZw32N?F*u$se)>aZl!WHh``U(5$BU&IxK6et<|-sz&a4f6R(cBjy~#?98=UIogp1wbutuGlQ@otn8n_5_2)X z!v2^a8&{W~pniBBGcW~ejc81uP zyEM&ctAy^E9n{Wb(w%njap|o|rH0o+h{O1fbhA94XT@^i8ne}~#S{HLZm_D!09xEI z^9`1x)sYYbV%|^Fd=67ttS{HJXoWy+TV{&b2dSlzmF@6^S=pGCII=}Ik4Sjf*#sZp z&{X{wBCQ=h)fvdFWgHM`KO~Uwny(Z7G))r7`Pf%!0-8XWM3+EqPbh#@lPrP4d#`WO z&@phT2#e8g(-GOQGqQmPw;z;&RZWQDw2WV(b$0$>B=UzKr|A9WdP?^gv5w;b+e@@9 zHv}A88*OO@Bt|bNodBNbbUQiUx}(o%{>Eh75OlnCUwz%|bd#8lqREsFZD5XwHf1~< z0vdylIqdL55dwM$eoyny3vGc-hRX@=q_bjG=!in1p%FbOIFm#cRw#uVCM6oIIV(vgowyo2JDWSG1DUz#xcsuwESeyyBF`1ud3IV#@ z8x$RP#<@ANU~L*9?=e51Gbk7D%6!xX2TtP(X#_10*x_Zy!dnC@5f_tSEy$VZL!Cv-L6r#;-F|j>e312CW!>YGcV`eC zJ?g$X-2dDD(IMwDd)6K$J!z2Y0lUVi;+g>j5e*>KuvnHLzeJLN@W_eX!B++*Tk6U6<8zZ>Wn5|X5dLxpAnSt_wbtLb5gJoH zu&kY3+DHMs-28$+5b5gS3uyDiBn7#t7S@UCqwFRA7)~Z*X?$P0a(3%{$;9cUl@MA)Vuk-%(V5WH#|J@DfMD7K= zV)^r`nBNQ*U<$#XWN|9Oz|+*a#R<2>qXLBek|9y|c4Jq!_&41zH- zs znjTo57Aq8XpBMb#9c-4mp{HV}0>9Jp1>cA)e15Yo;m^G^Uv9&Wq5n2Z1GvnF#d2!u z+TU5CupX9+d|qzzQ|G;W*1n6!EGC)DACL^q)ey_i9$vQ=vI{poROOeOi(;Z%>Eo9w z@KlT&;5s2}*B3pW$Yoq!Jn=eT!528RV88KcCfnQLlNRiQS}yX}Fv-EX8`AF#XT1pv zLtK|QsLh!C*3sTJpBxy~yk<`t(_Z#ZuE5;>S*u!`BjZ>q}R=fUC8;n`t!eE3V& z?;gKCI&8_B8-_KVm!M^%c5X0xy)&6mo1}`&LV!rcZY$nW$T}H@n3857-n)Kue=r%r+Omm+2y4p3LyQ8)6dt+tfF4tP zl558+twB)r)~iGfsJ)=SqIyJ#cm*J!K9xaqWSh5k8fk*OHDgjwlmN1$O@QuB)l!2S zoq)f*;8;jKOe!-E!H@{~OVB5Pt8IfapTou!|8Y)h`<8fa?~Z$vyG40PGsu?mp>ZPA zMNS)YkZ&(l+-4Yrir>5Rjv$LjD7_-l9gBUkpH$n3%#J_nz~#_GYjt=yFlLawL6zI0 zDsb7M_iJ$h#A(MH@keG$lw5$QfS=C?WsZBO?AQe7Jv@Fawmfg7Yr;qDN1 zlVTKwJ?*M6po@L9Qj0h)I%ZQJq-Ms}0ww|admcKB02o686N=(sK#JYkwhW`Np@6RR zy1*f93X8o^Pus>lCpTNWkHBoWmIks9m(d`g^>HwP^`o1?7cIrLni|9Dh##iKrzrm7MV!|a>)U5%?Z_9;mNwep>OPvwfOHogs1Ldh{AfU;jB7ZA^^>e`5tB^ui7C5IL z(GV+{JB<@>&kJ7}g+_EvZP@+^h0DlPJf8iEPQp z{O5+c)A7QdP|2fkqPcfL0iL&mnTxd8Q?dT9=I(j1oT-uS06)5zkMm14%7-c>12|ad8Q1JjLC@aRu}^ORwx~UVPN3n8GG_G{!{+ zSy0bFIA-_@_m4~H0YFO=iO$c6jx^6B^O8{DX@OeaXjTZyd2e~%f6jo9Dx$^SvJm3= zVwY=(smHyd=R0Cp>JQ&Y4X~_J5nH-t3gu?sue?SA5LAz<~^X|umuY<6P48oHfQ z7+6C&V;GHwjv`$Tp=~$~+iVwQ0QgIKHFhJ0*?r-5rvt5hFq_EZWIu#}&|%iBP!>1d zu4y6hYQo_rltHOy+pG`?)SHgUlT@J7EiAv+l?yucI<>PDK9Vdo zM;h8173Ez>fd|``^9DS2SG#48W=20>?AyNJDsMDb#42gwmVP}OgHow9b(>qa@?+r> zAi2bRFL>B#J;^hWHGIN2>mfe#RWmUJvHzvL#|#dU$FqlpAeagd6c75S@{BktoI21q zJT@)Pe~#Vaq*EMu#3_da!){OQ(>nEPK^E&0fBH_3H36 zd-bm0?H#uhh#1cr6;?u{Sq6r~N>>f4uPKG6Y==(P6R>xTi1!CmLU5y*=J3iPCM9wH zVELXMhE-$TRuN+1Co%Be;lwBq%-nS0g23N_^y}`lGHwtMBY?J&Ak9b;Kz;fRh*LqH zs_bw-CA}g!Jf;MJ_%=i!3;*}%xGAXhF+@^uk7lTkajBl~PiW^Iz2wgb4jWCwG?k6cfXqI-Ai-|fN&7OYA4BO=)7qvR>dv> zPD~Ujf{%MAor8n!@#}2AcYM}8e%E`~r&P3L(|))pRX`d+DIFZzLn^6E0-Grc5Z%AS zckJe^H#!zgYdmcYYkq$9UO{eWFcQJP?lHKpxt> zio;Xv@bIDJw5q_lKb1IYj>~3=0RW#TA?+~Y#Rt44ll&QHw_hQiNC}(mt7cOYV$5a! zr*aB5VQaJ>o%EM^AA#%mt8AZbXv04@$hlA|nWvEw}JgB(hqJbI4y@Wj6yB4e77;kYeG zAt^UyF}o@w+&mg`PU|?Y>&$!O69kKq^ht`Q{(VGK);T)r?aRZQy^c;kD;7q000T=8 zb5<0ZcO;UG8((nd!~mRS?|d>Jb|`aV`LM!k(-WweiF;o(qjyW^g;KDAjy;%q+jJUC zcluOVZ1hQwEathb8rjI?NHd++;H~@-`nPGT5jlP*s>ed14nwa}7!?Zekzw%qfRP*> zj1hq7<{cM}!UYwcb*;mU6-4e1cGvC1vu6}=k!V1x%KtlfhU3e$GZ;rzgCoP~%hpzG zc0+f)LsH7?VhN|GV7e65`^GLEMocUHI5HmUeIi+W!?O&d&I`t^cLypP`yciZ05C2d z=y&Sg&O(i45~I$-q_x56VE&ZwR8w+>RRffRV!qznJ!$L*-rubMDnDjzmBn{|yzYq2 ztwd`TR;wjqv+=cmq>^a05*&k;KOHc&s_oqrLvE;@%2mGChu#_=dNEQ8nsp*D8^C?? zm)u~0liVyNU+qNKym221se39eTKDsZkNZ2e!eCLw_#9+9^ z3b48=Z@KVzfq=voC7p^48MsMh)CTi%(Lg(m-R|dM#i0`QMwd`ZsFJF+QG8xR50ds! zNpA$D#q_SNR;Sax0*p6|o}QF3)y8Va48k|kJVGi+hAhlXXQsS=kGTb$vHXQB_viTU zGy3tPcnKdkGieL||1Ed}rT=KY{N!HN@hGnvGJ{h2PjD9(0{bCeX?$K$y+w%c5$c+} z;AY`>ufl>ou%PR}(=m0OZhuiR!3Cq-NdnZgCCw%bG&t^d%m*!Cy0>IQCPq?H__Z`i zG|9vTT;kZbb$ElIyjuTSzL$A*X_k(A6Ne}KGL?I06J-fr1?fUmD#pv7Rg&3$)I=#< zcEBH#d^Y%ueD)Lb*&ngb3&~Pw%+xw_cbc$`7iU*PrF_s(v^D21O^{}+#l*7j3GKRoUAkGem9Z6n&f=%00txefEHccN&qz7etX zpxuvJhsS4!r}Xvo@D=^Ke|MN@e?9AJIGr)z?#vNtc^G6+41 zIX5!Dq2(LElQLi1wz8SxF4ruVEy5}-breOR7L6z+G3Zav#rCWKB&d=bu`$~rdGcIy z!ojJK<#g;rZ)P{Mug2KvK#bQf9O8JxX*tV>iuT#OfRixbWCZA| zrWcr6CDu12=Q1$LH-(q{)wDm3hfEEb*leD;p3s+wBTc=g;Ht-kedoPk1V1i%#V9BbuW<8ELc>u)ac{Jp{UU`tXYIM;GU_6x z;@AxMM4~Tv04G=*L?II7Yuim+W@+J6r}Z%hI1-uR69VVyqZ)W<>?V*4pn5d+$tYuA zVaMf+_up{!pc${c{|YPIHnpv;@&hcC;PIZ0TvV5vgjmE+JxSHXnK0j)KV*?>sUH;+~_h^lJ3%Q*V#Z z&YdQm^Ip=k4irDGrQc(I8KK4#Oy=7^vT&p0_3WGGEEy>|9-(cA{*yg2ErWAaOfSzq!*&;q0+7iNQG5^TxM}@X`u7#(~ha1^w50 zBEL^MxlMS32IKHEjD{1xMy&W&q&YOk3@g*i+iRZ|5zWNI*GLCX6nK(=YcM!Yu`vgi z%E4dT4GYmh08NJ&_mtf;+7RwXqwe0r%{0y!71@Q;+F&URl-4fZPhq7SzH?J&7f)Gm z4yo7(Os%1&Ja+}&`|fsVz=(qC&uSee%eqE=9Y=1x9a}K<70kT9r=MfULiF0kzE&}1 zxQ)R~{M-5i8hno#^;C2ulHVpCmE}(6e@H$kd8&lEc#?G0!@U!tJe>_D_TqVgFO5jXZy0sw`GqL4XOR%XG*MT452C<>nxIthAi2n<6ix-$rBs`=j8 z3V!N|GXrCWqD6pj$5Fg-DPXBYD*;a>N;F`qRIM7Wii@k=6pU}fR|NQXWULtTW}H=5 zPqG$EtgS86v)O!H%*S*_lM~*EoTTZt*R~xA%cruerxwJZ`C}ATyMyveM_})v0b@M> zSAVV4Uu1rxB;)>Cwtu@^K(Z39!(vxcOzv=DVbt{C?6bq;e(%)Ep|&@CckGcbK~3Ik zO`m%1@Sl5jO2EHIuiT_)knu&xM&Ydu=W8ePmQx8hj@Jw_RUXk+au*#-Od~++x6zr@ zNhXy5BZKEnvdZl6SIw?(w&}Qr9Nk3zXxud~fwVUF(srmK=hCderkW9+Z|bcl&|U1V z$!?#o-szLtHkz9UHB#HQ5d%5hiN5^F1U(13q_z z?_;-SE4y<=-V91$^p@l2lg{bk@mY4xWAsExPmHJ{A)qPqFrcs@l_Zn^*`z(-zbcR<`71lhDy?DsM=E_8=o2JV?dSb&UgIQ zr#sTTlLy_(gJ{KblL_w~ie={{^RQ!JO%}`IJOS}i2+gQ`FuW^kW~jCxU@&=15hR?E z=L+sPO_nOVXCaKI-W#FPDoVgobqq0~APh%ab}n#}uCoyUxht*9$8@}oEtlofk9b4; z`m=S54?=TAcsbe2@30pM{8@?R&8f_MR@-$`YOU2uO;NM=b58;SDW58QZp+wn<6;`D zN&a*4|HIMqSD1{?N>1CdAK}vY-fSb-@$F$<0jxJWE|3J8kr3a2diqHqZ*~N!M+Gm8 z?MC?f3J!n5dv``snVk!1^WMx8!IK7h=zf&;a=cyTcZ()&cxRzJb{OYT4I1t?toNI9 za~!cp)G^+w-W&+6RN%qFURcIcdpbtZxmxI#5A#L*w@>!mn5rL}(Tt!U_3>66eu|IBfxOvY}9gVC1flC2{ zyXkac`*=o=KYC`}I1+@v`i(VMGlV{J)X0;Z^=FEs&TvM<{K+u`wEtC(Na9m|bH8yE{fT=l{h+g7g8ksW9 zKPig+O^e0#y!f;sg-}ac)iq+Z9g6xy9A<^l$OzRLj)1G4^TowKwQ;P>HDN|PXu$w3 zDNcEDeqx@F6r3uf;8Yp2T(1-ll}6hgIiuj{0pH=A%IQe`_}0^U#Z=|yEOR8!^$rwV z6(2Td?Ua1pjSS^g;^TJ_8+Q{IjiiH_9X80$^V44>Aho)~yfz$w=JCtJdEa_CN6TSO z+jYuMgP9s!>L%N!njWZy(O4PZGoz_AD9h#CYO+y2(XP=*X%Y=4lP`8eOpQv!OezVB zgT}tJ3w~1R%AKwir_A0>Ik``@3;Ru6O|jMS-ack+^UYjNkX^E5fQvUkRxryfNOPy2 zJxy7KCte6~t5T^4Xz%(L93l+<`oi1%XUgrx*AdVRw-G<8{`hgQmO}y3U`Z!^8GvRl zc@ZM7>jVH&fm5HvZYDb@RkuQj=$8}~s0f&j_L^5W%cgB3-`dJUxcU+aGdSLe8TYl~ z?t({t-{boW!WFL6MFtkLDk8z3R6HS-Yb(vh)$s+#qG4Ep3TPQf2cd!3;j4kL_;@Oh zAaax<23_-yPrC0|g{Q*__2aJv0tl&yEJYjWG!IbH@V%Q3vx@}PS#u7oOExo_*-yw6 zd2?Qwl%4Ghl|(I*$G#(LnN$F$Q$Qe?6?5(bF8r78jh#pE4g|_A55P={Ig!j)i1(4ur(Q;c6^l zFLs;?H6g}E(a2c3H1}m2^1Gc_dWJl0C*ilFcr>mBRT>G=;9|TK@k^|kX$f*Y98T0? zP<|l$9>t8h&IjP-gZ;nIa*ztM{fCl8p7Ui5U%6S2%EDC%^+l1(?rUoG@NVz4jTPQN z{B3Qnn)QP88o>YBU`{;TO;aQ_|JT|^3Ck!kJ;IpV{Vk1^HznabU^BvVU2T_Fj2#F$ zaWVHJe|3ea>N~{}zXlT^Vgl&Ov|y1D8co=rXy>mbzMp^Zae>iTPK6pK@{`m9jTsIW zR}x&D%qn?M>W*ESdofB=y1||4QhBD<#a1f7SoK-K*=NS`HIiQd4wg6@-{36>_N=EY zD3^?__y!$oldRW|u_TMK392Sz=j#yWVU5wobe5oVAQ>vyVz+`jlV^o^CQ(}NKHgQ& zc3dH!W0TjAIhhkGY_;pv|8rH7LZLrV&sYZGZ)_|TQo$+J<(z!9E?A(!C`(a=F%(S< zA<~!%IE(CgX;yXnWv%$@x%1JIDm(&Iqrw3mnisVy9L0mMGRnC zPTIB&wo$va0SkND5y~XE3`h93EIJ)K`|+b#F=DK!digB1?IPX^$=AWkE@5H2IfJ)7 z^8SoOao~7N;;Y@Bk=Q*6SDj}aNU!DnlbFn&Y{PYUt(JV8w@45Y{P>VC-cGjRvu2ef z#*(|cs-mj$6Yv}F5rI!~pGXZPF>7}u9J4w@%#EQix(pagufc>9N|yR zLWYUp$RaL-MWTD_zTSJ*^JM-RE?@62FA#gs8mY}BOmi3|cz%byBhdyX<4T0Hv+<^l z+nOz$r};#StDJ?B7rS0Sf-JsmoB8a$-xGJXk_c=PMuiU&5|S%3Vu@y}0}h{rrV^C6lnSpjTR z0xBizi1&VG?yUh)(PM9YGcISdeIe^wzBy zC|lm*VJHJ!3(eONn~4)9iPNh$_F$@QK_s`o%@=02bUP%`#!hn@pi}`7-V02(W$} zBw;1T$L^Q!3R)yL{O|?UbqmaFFx|aR)`Ci7a}O%HV4pdH*(bdJ`Kkt9b^2$g`^U#8 zr-ujKeK-)g|GTy2&XcY>3cg}|aQ{s((Zs44jGToEaR^*Xmj;gt+&^Af@1nFnt-rst4DUe z$Yw=pL*W$#;UrTk<{BZ%@k9HEwmJUy)Ae|gtClGY3&|4PR6n#-_HsX{TRwFC5N3l# zv2Brf;_#N@YK_4pCc96^?QIB!4fGRGgNm`F7@Q;UCT~A%EBO$mpMB&@9R#);7x(RN z@piT>ugMCH;Q4m394AAC$2O+CVsHXtd2EPz+nwnMXBr0X52qgkPRyqtV@`zazxo-6 z-5%Q2Z+x!p{E_8P1OkssPVeIH#G{aVp&z)w62Eo(c|RapZwH_EuLtwvrE}%N2r!&H z-#gK+eRg{Iu73#WA$V@67Ad**NCs0az12_y84UICmk?ixU^7L9b;L1-#C&B;+lp!7 z_W{N??n7_()ja?KsgBMg6i&82j{1255g7FRl@T^id%dnUr@5 zteVU|vEeu!$im|3BOgl?U|0ZI6>iPFF-a|at9hd^Y5gH{vN03#;OHM;08BA(=qY7m z7xJK(e=Xe;E=dASLhyA&HGD`W=w-Dsi2% zomJ^vIjd!P$sm}PXrAD$(uRnYdTYF3Kg$rosW1(9S_rf>jLhk>hPfS zuipy~@_Gy&QA!e%fmpHvAo#@w&)R|&&QxNYDO9*2xxrt4^1_J~2W)Z4=BH{U*KE4| zjDJE*TL9iLZFeSGn!`>=o?+mc;n`=)Ih+_o`QnhuZ-T-2fD3yY{l~O-2p-rxI1+VX zRzUrMAxT2ZV>a;a2o78jlh zw{T7+>2PLIQX_ZkWNYHYJD{->mNe|??V+Sjc^^W=jDfV`?=&=m9 zkl7BoDH!mJb#I(6NVo85xLm~YF1|=-Z>?U>SAJ#~XYxTwcP0UkP*hH93)uRU(c&$~ zD0IPAi+Y2LL0ZRD?SVD6EvqumWlh2sqr0M0a_}%(3$6C^g%cu_Eyh8Mwxe!4qG#iX zdlKh%lALbf!-<>jxqmKV1>KX`BDRMv17t;p$4)TKD~CD#LJxRqrJLls7~)kUylQ(b z_RDR^Xv-8P_@u+HF8^jYfb~<%y3smgB>5W9(*CekENVBla%2jJ?2oG@7cQ%8B#RY& zc`MguZtQ9mC+zp);jGeoD@WI1?3!t$Qc`>@_|ig)Pc#XSc8WHt8i~Ao&zmVd*YG00 z1V7(oN3A%fTa6tDt1w8Pod==i_dZwIPWS<=dXma07M2Q2*v74td4C>C8FNz~7PXDr zcqxzy)k(bd&v3-=xkr4r$(EnGm4jdhnzSQRJvVXTg#C6)X>{SuDkD7|y}MNw^TEe_ zuB~>s5FTSg%w{Fp^wuT@!yTBe&Bp;4ro)K=5uEWTM+M9;aH@s3?Q9irqYlkU&tad< zi;o(>$iLl%qe*&3EhmfmYhLIZ&3+eIst+PLXX!PmJW7egFw#OBL#^HkEW}a5$w9Z@ zd3kh5k+lOhuHY)u3RlGs;vhPv$G8Ag?c?mCj{`Tg3@1fNSG5$ge9Cz%Z0Ce-sELkJ zFb%?#>QPX}M5?dyg>I-7L<$YUQI5U`!im7x2ls-q!OOH4l@1>#;HDk^b|N;JkywM) z(06fRMNP*c<=thJ7l-mqd$&*|+C4!M(M1^J@M26XzfMiMzRNOEiCx9w^utsLGb$g% zF-acP1ze|gdtCaklGNh<@8@1SH`>b6wEMg{)K2!?_L30`=lDWvs(^Z0jD&~rD2S)P z!2hDL>H}8j_G!vM{kEtQ#IS5TqB~y}_TrHUn~$>NY^T|XvWO{11!aWU5T-iNESaLM z&i?Y2qB|II{-t;NbN{5Xf5_+VSK<_Ob>(i!$S62-w~UWW{R9maMIijO?GU-gxcicYO#RIqDo3MuwPOkoAETmDNR6a1mkiNCj9D- zUSHf`=e1Qh8o}Xsp*z}+`Jg&_v}&G!`CivoX=J3yo4!FB(?<1hGr69EbDpU0k#3dW zt!12*ZSATQn2pFCuX7HXA7sGMAx%N6-pB@^h!PS~%KexrexbN#dhq0Vo~D*Jvjoy_ zYdv;)3x|tf3Dm)&INF4tdXTkm@6?VQ@@d_urpRdgP z7vqw!T0z9)h~w#(k@qM001bLyrzekFM-H@>UE4Xf)wcqzJP2GGo~bjwrMW2>zB&y# z42rZx4x+cJGxZSVV3lt?XyQqlmD^0TbnZ3NfZO%g#1G*9m~}z2CwnF$5uO}F4z4Y@ zgPQBr__O*nm``;L58J?$t67922UT-1jxSE!^5qwkMc<+sYV8Nag^1w7%2bI>`O_TA z3PDlYG+xu=w~}^rOe=&1z{dJFBBaZUiUSm>nc-!xlC@oNe@J-RSmYPpO?ae*ayzAA zm}beV8BCk2u2kU7bzpv`3Z9$D9-BWKaJvJ3gKY>#V^fBlM8FPbm4jJ*Xm?F8bHz9& z-Nt4aOrhCww=uJW^Uxjf#}rs0-EAy4k<%~sO7+4!_lO?#es4Bi-jphlfrURTzrO?* zIhxhR!nDp3`${pO&=5=@;o15q4Gh7s2-@b`sfw13!tn`?ZZ=SiQ1H)BwXb)45d%4z z3IK2?C*Xhvux@R*R%uN%9btSA_P%`o9hnd`{b1MVU`_&;)=a&5JjpM$x$QlgkFTzc zuEXVDT|^8%*tuAw`MzTV<;k81%EN7@2*-aKf=V9BC^mpLh*s0>5Ud6+zR06HX-N}X zjxTxV8n|k5f#K$1UY0iAEJ_j>S>CmgxE|tMKNgDh!G_$Khq%`!ugm zq$cBbw7p%ii>a7ju~=Lwb_6ikdF4TJs%Vdo;nGD`Laakf%N)CDtG1w6F1mY1IBNb- zs5G&iT$%}R)P<#$OIW!y$&8Vq46{hSda}o7!qZQ=F(AFzYPTA=vm7KmjMKRH&Gyd- zsz%z*K{m4y{($fqNp7b$;T2J^m@it<7`TkQ_!Xz)MPiGZ<%o-aW?1!@MBs%n=bNq- z!<5=}W>#2<$wb#5lq5;EAAq>VP*QllF^{z;@OTXT1E64dxtF_2#$S9ujn2KjO@NFE z&zi8RrPZhA-jj{$?4~LYAexJo)6Gr90#k39SeSp6LId%axSBZqYUvgcNSEGO*+7B_&_OF~K{P%-CY+h~^$dlN2@X~mEl3CDCGg*7C` zQ>446HT5%IVxVb?K8SYPD9akwuE}!Mb6`}c#q}63QbPyY!h&NroHFuv{Zy`rgk2uf z>d52AV}J2{hIh$Q!iuPHJ~oFb11_eJXGqb*(1en2q9I&q3%SOk(>3I!d{jhEiA1F+ zFAe@kYW)-DIBu@fUu!7#Ptgw2YK9zVzsK{9nTmG*&1c<_;ja{bkc_KWF_h6GdG&3PJB zb9Dpi^HDz2nmgl=6{d@XF=i)EG(aO#_*500w39Xl^@QY&ocR38ZcyhiGTtA z-ei`_7?uEgIoENib)F@gxER=N_{3rqjlo5ai_fNp^5>dM~2*BlFD&k2#z2k@j>sLEQV%ptFs)l;^xYW z9YJ!)&EPX#jVF^5+O&#al59hL$%>TACw70s`Hu0drWVoCR78uV%auKNC zo{We-j0-h}-=F=qvdHr%`m?&&kET$K)d9VahRQl+)<+T%l)4a&cDD!?A0I`eLFR~$ z^YF1xpSDClKgJauU*T1ecg&yF!%F`DCJ8Fduh=nVkjne<)GaH&XV~GIHAr>)aF7-&E>@R7%L}C%@Gy1Q?&+o&*=;It6eaprk^+F;GxA z)tIi(LPC?If`XFl^~%F5**r)ezxU*a6CS!;m3ENz>m&5^rB2#p;oO0n&AkTPT&mXN zSjNn4Ih*p&!^u*+O0I`DR(E1jW;p#;RakcvQtRgU#!6V>-IU3HbM=i7PX zkJ=XSd`4}xy5Ir?B|-P2al7vvaomnq?KJlc!b?zjp513E5O z30Xob-ck<5CFbU?X<5wd!r|hp_Q~|MQ;VpeRX?;@3YIY9kTi3#oS2EM;nM4DCOYM= z#O5c#?KmcL1)W@;ZIk?CKH(VxIpgGcL+*swabXbKj2v`k>)ND;)HV#8>R?_5sSahu z!ObBWtB-JAd6taHO|aUTrH=61yk)X{I`WWlJ4kZAE$P4abQ%+EA#6;OK<)5m@sJd} zu^w_}Rcdd)*LyGNPgNZyCqUYOF~%z$hBS{cEa&HP%etP8{-Ha?4Mja+b)8diCcxIN z<4lr?ZQFJ-aWb)OTNB&1ZQD*JwrwY0%rC}2`}|d>&c#067puCedtG()>WlY%R;N3K zrUEM$%T(|p;y!#E%Jvhk6~xR_enh!;%c-ePJu7x81KHL`B?6B*{?7dTS#Gw;!Skk@ zg!%SY7J_%%^HTXHewa6{e|v;9Z$yAA`6twi>d&>6Xk6DsbHXcMM@P{5S5@WvF z))}~5W?u+7gkghuSSwqQJgK5p`g{ntpY|O?Ux*$8gOV1^g!=v4*CflkQ(7@^Eb3_w z2P3x)prX(Y)-h0LMXs@JD}}Gv*+(>#nUi^N>s);I#Dxo5@eh5Z(8CfJn{x?Y8rNsg zNt%CSNUYG6hDu{qVNud*Asw*EzYU2D=%#q3FbR)xAY~MKI-)DC>xj|P+7bmB=4C!s z?J=VYa76p&|1=~Gx1h;nK)@R9G#{+CI8!q0xI<;Uq8e%O@4%NT8a;8z0sCR9cGH2U z+if{tUb>@Q7zaWTqYzvoY>pzqy!Z$;T+h~8`Jkr+?koK11h6uQAsvEi*|Dr3^hZDa*e}cm!eH9I@Axl z+dRs7ZfB2XGsd#+aJ2Ma8(8-3DAP}jvU*+E2McHjG7ONPT1~?FDnDI#c zod@2}G|-Vg!?xg4v=T@?l8{}g)X~9Q2|hALxjj}2)x%6XM$l(I3d|9iAdI|dX_VB8gR?1#kP2-y<##}xV3LJ^|WLfZswpX-b9+Hn~+ewH|Igs|#u$znaFbbk}OAA4Wz4M6p+Wqs~!rj_-~ z^!t|OfsrzpmV+;zJ$Kc`@j^qxAU)Q8Uz;9x4MlPKale!-!ZWspPh7gi#_G8~`MK(9 zFms`M#ccA^(>QV(w(< zVDVqbAojT2|HvSqCxT49lCgY>mohUuQC?STaKo-BhIqX7*%{l$5W?D4n=ycvZ6yNU zi*H{IRc|!?ZQJ#X9`l9)1EY#@qXvy?&h<^f&&NZ=14>6GeQ(;Ri>wm&yN_Y#JJ%GS zaX81^I6Je4@sG|i+I5pE+SoK>i#VUyC`Ey)`v#5a5fAEA&JF;5W5S07|5>E|LnF-z z>z&a>2jEu_JkY|}8Pm`^t8R$xox<#)L_@U$LP_PIGX03;n`Z)3T5)Q~LO@q0(uQMV zIL*i-2%MTp`^Qlo?AV7>#`sglWkdXKt-EvrJ^TUZe68r z?F0M%C5Q4WLcf>0uy55V@uX#q#sQZMdb?Ie^$;p~5 z_?6S+ALXoFJEwiBuZe!U@gFQXv#n+8_fX%QI`mNya1w0+l$#!!Gn4ylS>xQP*Z&wI zsu{8mj1TvVt>=O>jOweKtFH>z39hz-p~x&--d%K2j_~+KIRfV77sy$EiWiSf1m2eT z;xh-lJ##{>EfW4|E*!G;(VO@-6gXeAW!3I5Jn6&n9gOCI`78d4Ej`IL6qpEoOYHBh zW12;Te(B?;mBnad!#9FoOo>x)Eaub#K#HUgT>+EGRp?eFMIo_#f&X)C(7gc=^@hwv?3cQuzQv?FpGpq zN~h7|re&3hqYN~bs@F=Mv$sgm4 zek}U#N#C?~ZUHJz2;ZmjgC*`sjL^HKY0m5ki&eB8ki2ZOR@^)58D>#wFVI&jwFdf6 z!>H5~_D4$er-QKl{t8u}Y=OCw6|eU^yX&BZPbIgn8-m+uq{!Y_5hK-F2qi*2`l$!w zGGFtmh-!&Y->^n$Pr3b>7+sw5w9ik?P@kT;0DST@6({2DBw zjLauC7*<%B4tdFrOMSR6Ie7Q6Aba)s=bxcO%+I}=oObBsnyOMXPwKtp7g~%9P*V9p ztaWs&eE~W!K2K=m$H=m#ruxBeE^1G~=WPchw2zII-^Fv~@P8OS{LQB))fN3$x(n{j zpZ>v}BDLJQn8@l4)PagOh^}0+Wa{r~j7(M+I@tJy;1bVXsgx&4_-kt^1 zEhh#0LRJFB4j9baXMluTc&#T=i*};2z?GEt5hRmS3xcgKh~MSX&R?oK4{9Y=av+B7 zZ`QDGi0u*hz8Sjj1tD7++n6k;FH>a*pAVP~Y$wUtC-^!Fv*`Q7a_~64=eJRb1?hF7 z#ih8zUF0d z3sMK>5+t$`UUndv69s~-`2tnjw&f&G&7u-&def18Qt*8msf^3@$cFWVhM#{f9tpyU zNL}8nSP}+T8+#wwPl|$zmEH}iJV?prEq3DAvuHJDpmikf==#aIx;a>IhO_s`xzFHrBUb+)Xq5gW%X z*b}aS+#@J6_cmZMn^`d>(=nIUJtEB&JLRbcIYm|Fm*Nq}<@1X+5A#{}Ix^#5mzjBw z8h@Mr9wYMXBu(0IcVZ9JH!UyWqe(1=Lf$4665OeX>01PZ^B6Fw^77`>_=gHQlE8q^ z)fa+G)IMf4?Yy@Q216RY-2MT*4aO05+#$_-5Ds{yAlx#E(&@rL_gzQ*DWT+a+oj~t zzKK#>P^@9vMcPci{;BY_)u7Ba88L$mAuBPcs74RpjsRY!%g*Y8@r8s{lMHU>Kd2}( z0>-}qf2Y!&%fPH<0KJJOPakLV%NuXN!AL{+MT#Q;v}DPw`{}q8Yumq!L!wVMy2+Gc z!iM5ACF*n=9`1^o7_tC5QeN~nErx@G(-n9RA2%~2H+w{a;6)ksc)YzAqp6Uc{&a?; zjtmZ#DHYL(u#mH?cl_b9knQGa&QvUSMES$ff1ApV5dAb_ZP2k9U$fuK`S z;-aT?`)6aK?_6b$eE#I^#YO-;eaoDd^XkKh>kTwjOB$GeKv?Ec<|$CS)~zL($F4XT zimwo>nWb+Y3&!lyMm7)U8cO!M%b|-J>iQGXZGH5nd2%U;m#fH}yKV8`Oa3Y)(Xc9(7{KNOM%J&;lU?K_V?m#sblkx?X-}7WTm{v}9gptS%p|=>C^Z<= z*7prz#5`Wf%O$A>42#m+Z@D{!p+qWqEA{F1f+yXE9ySaC?hfiUgwBP-` zFr{u_?M3&=Wlvnn80xmalI{U}7~i6J)K4EYNjELNB7n|)&TeN3v2Qm&vAuB1ntboAt;>q&UC)U{LT z<})2-l^5reLp!fb7Tfvl+7x=e|GlAm3sGs9e-3G4?Py316${KOWhTQSh z)tj7Cyk}c)gC%}|DSRQUrW7$7}#a=4{*J7i&BKYli7|9 zD102GPpYs)=z7q>^03rG{0mtIz@S{u=XX+MC>Lr%m~Zu-Bj~yrf)ec!7hpOV3u?H) zuH9HOyt`5OIJ+-*O}Hz@lOC>@ebGdJRWup?;K^dNsNMPjRVfzIz}C$36ZeY_TR0!K zQ%uc_h*kB0$baubCbY(}O;=}nR_iuv^gd!}SAUoCi(yn}%sEq~x@GidCHHQUnW*(J zO{WrmSw!}Ni0Gr1mDa_vWA9(%7F8m0vB}D+MtN8leJf<$Hf*AOF;P&jT;yB} zkqiIpP@RDz-J+fRdJ5({f{;ky4kpI(N8Nk8;7eDf3Q`oWgtQZ~SZs;FP?k5_Zu|~p z&eU$SADG%l*LU>#v!A|i3ZBg&7h4n{&g>x0G@PG(Pz};%)PNvJm8y3}DKAQi1zy^K z-QrW&0D|wB%b0DV)Qy)B9GIl(B#0}FZ&ev6HUx1?=7u?j%bq`z368?wI>fC#b;M$4 zyW}qzMrx87v&JK>%9iD~zA%sc+@x1M^huQfn?Qlu@<&uWLw|g{pS!iSaNsFCBAhoo ztf^>K!d!w9S^{|u$Y+!JnI-hSjJ5Ppn9;y&oFL;9nxR|5DofA)`am}qi?$YV{NR}s zQTb@pI*(+{S}g77`w#Gll_52Yvi~|WtH%zY*`C~j&#cpmRyHIR9VT0{Vn2du_vfZjZCnvHi3>N|*nWRHZq~%`0MNMM zG(2T09w(NjHnH%w}WNp@9GVIKgsprgHdT&JAV5~UHKn94i z;D`&vLq#hn$5tHfb8Z#?hPkFNpjU%Co7?Xf8Ao$)SGRk(sBKU1f*P;mT{B_}ue@A* zIRZObW&oFtJZUfg%e~qe%GtV%+^N2Rr>Zb;1>t%6sQYaYejbMBbG*-M+=1qI#J4Dh zZ@|Qj$dKy?5yj@ox2Q={4`zQqPgz0Y#P=;@W&{eI%C1^;;8on5#-S_K<^KtH%}3cK z`~eawWb(G`FUq|yB0SMgqb7ocyo`H_7;3Ys%FIAmin8%&NLPU47LY5Z$a{v zT6%}%Q`*KcDuo0lU$y)Dhv9LpWk%llFI%z#5fEn)V&gn13GTIc5S)hpP;ncSb^hu) zaFgfT=@gU#^|&B8ksL@zD~=cb%hqd^qVRe|#&PzMd21tcM(eg&E2P7Uq9VcWD`9%7aDlKf^AYrI ztsPpLG_K;Ud7|l$=(04SM%Hp{8l7( zNuV+X#AqsVJPE6WqjY6V60IX$z6y>wGOpD24VIgnttRv2gCihnIs1sbkbPN(X-OmS zz)M|*tJlbXpwm~K1VB>~qjGsbF*{OReQXx+0}JO*z^BIclzI|#dxjE^WWigXpKpMP zl&TTmHk@&xT;yDR^qRCV@dRtumQA`E|9#yg%OWLg5jI08BDSjrER37h7BMn@3pZ(j z0*ipx=x7=A_FKQ@ak>``J#4i$Cq*bWs;oR!IwE`|QTPGaeu*P2`f3Dw<=h#$4%1fi zolSK<4a^V`*76^|Ri1k*>E12!2eE22t!3vw z6 zEaT^$y5&Oeqdec0+rWI~0RIC-o&kHH9VSrZVvy@LqPiDux7o!x)-1%wM+EC~&1OPN zaSWa(5A7@~6D{`z>8bh$0jgtQ_(cWN%TQzXit7UaO!|AI0wY55%pbv|s|}_z4=*%< zY~mhqy7Gd>%BH0~?D?1?F8Guv!2`w-&DuYoFrNVP_OUa4-^Y#2a!ZwSJ9KSY)c?X3 zPZC6YiMwV}5AZ1_VF-7aZs85zWV&dD^+ql;)ROo%mxaTf|?PKQD~Xc>}10&0AWQ(z2L7d|O7zTJ2R#X$=Q^CIDj^r=>EMU5^?4FAE58zp4d z|6TNaaJQ>YSdyJYDjzoda&RXq5V{WlB@*2|y16`h2-SrH0l4>D5E3f*!UciKO)TnZ zq%a}Kfb(j7egSqIdBwyD_CDT?&6hkQp3Lt)1{;bSkF#raduxjbWgj8F9A6ps;B9aW z4L4)*#Rl+W)8lz*D0b`ZNoq#Ag?qC0PHJBl%3<+^{Hgf2ZfOXOG$7mwxbzwcuG6|z z21t*a1f#A&>ZWR@=iy}6OPwuV@7V#!&63H8XW^44SZLZH1My(Iv2t*=V{krZT_A?D zBcwf)XpwJ-Ylod%C_dU=?QO(3Dbe%I&x6w(UcDKA#&0g;M`P2)c`JC1=o#~h1cmg% zAomG|`s}Z^%gl>$nlg&#p012Kkm@@-BY$%T(#syP!ymn*2h`xD#)|ac#k%t$(C^v> z{jxVu<>bSZsMCbI0(mU9vAP8leg#HlIcQnP=HS%d@45ksK zU_#9GE(wq|W@;y;Djw2uT9PC)>P_T7^P#U`$Lz=P!OR)`|4DO&pO1FY;1XiQA5h%K zqw|plFAPCp*imvQh0ylxZ6i07ET>(o)hr~s`MG?aE3?gw6--jM(dOxP$;fYXV0M}2 zE62Pt;#jy4My*tl_b2VSApLfuD7xqhqxU4xy`_hF|CPWaCx3D$zUfm_ry~OADJ2mU z1l|fh!4VP^#6*K1ffX`(uBZdb5{gLxJE#zl@$hs0fwE@-vBUbaOi)ZgtOHl6U^Ff6 z>`?S1N9Y!2EocI!1td7WzxsSDdl1Cegzok6KEC&)Xi~A~ty`4r#tre{q9|M80WVE` z60RiG*fGrRr0|&sffvipqV`t5)kLJvzNo)jS-&VW3$f-c`COql0-O! zTNI{R355`Ui1v3=!q!zn)m{`KlkMW)ONT=l+~#V_g&F0Uk~-!UmdJ3%)REi{+f~A3 zOD_&NmMWWRLu+bP`svYJzCnQ)*n7Hr;{V=$HOV zFyuYZrIlne^c1V!#mxEINj^)}hF3DFw(9Js4y^@slXJ7n@1g+XV7RnWvO2G(JNt)w z)Lv?#adi&&)u8brLm*i2d~5`An#1p;pq@{$!~Ajalb@ElPFQ;>7^)UG17U_b)^7up zXyT9iV%07`#Zr&9U#0lqF!CDG)k)!Hr(z7V=rsWMCnp4s256A7&vrsVRJBlhbj1AgK{i3 z7aLGWBS~mVz5ON{WA4YxHq;bGX72wH*F1!qG<&nF+%1c4fbxcE;!e9Un%4wVc5IgD z%FUe9dF>}*>^>rr0kU7k=y6bm98dQdfUHKXyiN#l4w|azh~y1R`m!j)V46@;?##VT zp>h)6A;1#v;-K6TgbEDt`p6x)`2JZbKC&<~wct&6l3(wbNtSSZ+yLzXz_{rTO;r)A z_MNd@MUoh3`hL3U$CAAv16gRtRgP$;syOrJTpwAKKm-=^4PZDDST=lxg{ON%cVJN7 zo10BQ@G4^GDR~Y{w=$5|fC?R#eRZ1mM~BaKD9>*t-I+yhChp@Q4j#4&V_yX|_uTTR zn_nVqoqAm48TSo64X~>>lg2p$n;D}^6Oa*nGa5sXtUtSXOo##HL_79bIt9B*C`YIy zkIKXmR9lgCQ22N_`C94bd+X}H<>m5c>zLGmQr55frlpR?T_Ki#BGd zBNXcbg5DbKYlEse6Qv!?l=AZ5|8Jf0MjhJmiTos!2dbTP!%Zn1GV&o|Q<-oqiqUxB zp!3#XE(C~q%UNt+NP0O^X7**NXpvHzLBz_6y@f??MR}0;AV(;OH)z$tOv}DqbF7;xlDhJMPpKZdHsjNdDM@)%8Xx>0IqaxPd>>-4}o@Wsw zeU)R_!_H~3f95wa7Fv88BtAFlMzUm;4$EsvO&iMAbBxfJdDuQrY2|ZCH#2GPL?WS| zhSkdTHp!hKIPvs)^>~@q|GopNGw|}(j2mW2{Y~}8w-&U4RQT|d`H;~N=~l820=y=_?6QJFBQ=K>=RPbE%enuw=w zT^L60Xf!3QLg<0I^9o0LH91&QeSAqgmv40o0xy(8=?HLGnzZ>{y?OmTcclJtUCGlZ z7Kp6%VNr^>WOcdm5`W6pG>uvMF{45y_en>|S0jKPC8qb>YTE_FSGBD1hsZp9wT#am z!qAGHI3+4Q?Vln6a|9);(ZQBYaxHm(;k5=7EMk~{ox%hr17C_1h`tumtM$Tr4teBx z@FEo-hN*tLc|k=r0!yysyy|S3g4_gp1f|Vv#o7`@QkMa63bw*E#9_P!HOrLQ!sACv zpl$?JR_w%Z$D6@sIGQ43mG9kmjAG=*Z&Rp1E%z5u)IN5QcxK51yjS@rg10A){UVAi zG!$KuW?0V*1~&0HExGZUW5D11+AP&>C+3H>kJn z`c&CW#i`X8!`LIj*NP7s%vrhz2zPKdfZTbwhoFRO3SBoGoy;PIh`hzczt*)?LOfHc z0>UD)jyGn<17|Ns-QxXJQcf)lg|TtGOpr^i1q0I7R+3T}4&dx1P9J(pACyRq z=Moeo`;Lh!-rE02eHO`B!?|eHJ^u)U^FT2UD6xXUYJv2Gg3EDAqUeQEVkdvVY8s^^5p1pih=W;v=P>0q|+hJ*dE60LTu>845O^} zSa4ADfK3$wN^u(_nWZ3g#nb7;9}&@&vwEwY4Hs9=T_v2&hgKxTpdK}ST}wL+Hd77@ zUT!@(xX%vuSFR1<%PTb!;my|mAH1ylZ!@j|ZiPSVD9Z!sT3^5P>WL7GT#^l7Zlav; zoSNswv-yp;v0L*CbfD~<`*aj8my5%dmJRuOxBHAvhg73goPbn8<%unc}#!1HAA(3M&Cm5xcV~7*4#I)gjtJ}?$M2i_*__AxHR>RZ@!QE6+ zV}kTgPdpt`Qx}liyGl?O`dLU`+g^W{=us{d5?&zhTa{u{G7-dwHK&tN;+Kcz9==`* zriN~vxH=x>R9*F&wppAO%@@6FBCKP(UEl;k(=|i!p@4^}L`p1#g_I^<1nUv7; zjb%a^hfRT;CC1Jx!79g^8)FhLFcAdNn5WLNnoI`0W}j>d#Mn4xYm=g)MG{YXGYYVj z$V$^vWNwY6V3)MW9|<9UtY>srv{vFThcy|9%*&v{X;V_KDnM%wfR1pj>HkG-n*whb z;o0H9qY(J!HV?_5+RD~1-ncheKGc-Q%&!F4bZ;!NpO!KnM6kTUS9|v-Am1|Q$Sl%2 z|1gk-r36uY(5rP|A`tLG6F9j!JtzB-Sud- zu!uU_%yDT2k&d1eS*aJmv7^VJ4VPp01lJzm)OB#C?0yXe8OT0t(;^Gx5QCgifi}a3wW-4T#WE zGfpb{t2-Ng;*SQKD%2C}YftbvCs~J8kwpAK@6BfVk5Kmw&^GJGFGD}8 zA2uMZdM%fs(XETfukaO+~TD!}+E?%f7b9IB2ekea3lpt2MpS-Mn_YalD)(?0Ks@-{cuwZm%vm&vJckE_q2R z*Fo~CR7vwd+O7vuAY*yGWiqGv;l0ODU-D|;D^5wGYwP42#(1FYfIdNMQ;P2zB1ye0 zeGt~NE>rk$#aN~kR`FW1o%AgqJ*y=RtpCv-uAZ1QmDuimq$V7wvh)dR$F1)~{#}_$ zO>Kw*zGl1Ta@#P9s_IZanBwt}VeBbu$QdI)-G%1Ao<)>(_&eS>rMqoW&SP1}!^7!<=)*1TzSqm2Bd=6ysT5sY2 z4B--?;YL&>A%-HLi~5=8AAI3ujo=&7c@G@qijM(CEa+uEWDBK8p&O=Oe7|9N@NqBa zb2jtdR01USGJC9-w~CVr(u~1}{RNI$lmQxWGPN90jn-W?=A!+St`rP(W5d-bWEWvo z!(7_uZh7wPh`?j*yJ4qBhZ(C3%yhK$melX==wt^&jcIM6Tx$k_9XDzhs{_9mE!bzQ z<{GanLV|xXso8H>mzIQ4bBP`iT^zXOwrHF7yksO+H9`%zm!zh~UY#gd!4!mJ<$>f9 z)`6lmD`BT0ZI|_#Gk^iZ$BN#e?hf0;v)ofK^7=JijPF`b0m?dBP^qU|E8}9p)rPD& zIB0?Q5k(!`%%*AUK0jwV=;Jr=IA9hA#&q8l0I%AnqWn<7OnF<@>(Fusa@>|4;3vtE#MB;qTxH5=8*F5#$iFd0o+nJf) z{pX2W$mJsa1Sl$Z z_6QNyOiIz77ajfI;O8H#Zk}hPx#GifX4a>c-+nKb3(l^a|U zk0eieHUeXwtOGoGIM8~M65wT-$=}g+jXqv1{o8%P9izG~`_jysW`n4PVX}98+w%0n z-n0*dZVNtEzUCS?(2cffZ!STP3}U^=kKE~E8Ga0Vs+L;4dvGkrDr2NA=7bKlHkGI_uI)Sg zFbQ9&a3bx*^5S5@iVmGExE7Hdvep*L&A8L)(<(hjCIVeM&zOlE(0#2#aDwZDVG*6y zyMc|oQ5eI)s$2>**shSQ@c7qll=4GG22m1qK-1+RnhQ*H@blgr-MS(?<1s~7Ih@cR zDvCrc`#_QUzTh@$h(YNFcBU#i#m+b)j}$c5NHyH6ujQ8^|HEHJ{3`VsxDC z*^%-dy}uB$Oi>G~hL~3yW!OVQ-!e87&RKX)Ps^BK!E6&_evn`62b3%T|T8gn~Fe2%gCp{c*KXVbvRox=y-Fssj& zk;q$4@L>dX>-@|c!1P4o`{bjH|J{tM0sfni39oY|uxn}2?;Q0jPO8O73gl^^N@tHk zf$7?CF=I3_(!}{8GT>#K0eLXOXeKd@qkO|kNe$(B6+JmzVQ|fan?YAo_H2 zRd;`OcUQI3&sbWq9{S9hv57T(ku~kclucUaXYl^mJ7Ti{lBT`2wp*I4m$_Mlr(agJ z>8+;T^k@2_3mbZ!^ta6R*j;1>w{kTNo!|6thfF;XJM_$V6|d<4>ZDzx5=RZQynD?B zz^)N^-hkl?qgaYyvP8ph8mr+ut*g(_^&ZU6#rF=>+m0-0&^)gWbkCWZo`7ci{ftuT z3V`J|jyrP}P50>3?oQ%+MeG=={A0f=i)>~g!Xu1+^$GAahICN+);CVHJ5ZpW0GSDR z(Rl4N1T~Qg9Qn-_Q>{i(0J$cs4ojaUaQ^^1*E5M^=P}#(n`HdkG~IlXOqA2xBh8DP z;Z9`zm`-B^MYrb*ibypqkk1b#F^^6)Ho&^dj05_sHs)WeV7>*ecUv0o+dr zAND;aI>89@Iy%BJ{L!g4pxsQ*J^LqCrYzR)vux3l6g?6Kontque-b3yIkfrd;iEh`*xFvRjAf?s4u3iZkd{XOXY-voLD3O)_rn>aKMEEscHtkZI0}k zz7hlF+X6j`xPKb>xvu1Wde?2g{*B7ZT_o)JNIs~(4bCfnls@R;m3IxS>8}o|L}Aod z;(fgdU0XBV*y^Sof(EICYP?#;ILBL-@O9Bos>-!bA73^yL%;A0@8^MVXWq85gZfDi z?AHeL^PKCo9El-6seLB`^bXeL z|5cUrI40cZ5;*wafL`h6M6e$4O8L)Be|Yq|&fhaNet}#7Cm_9bau}gW_Jy0?PW`@-;rLHQFJpr8rtac>E%e`|C`Xq5*_`0fc zlhm8@OtzQ@Hn^8+suX#rrVM8BkyyrX`&=ak zf4G~n4sAEJ(EZ==q+hlHuXbQSFON*(Kt&1G5l`J3cNHAm?Z*&qjFq88W@q?Gh-~i$ zXk0ZB#+Fhq+RZ8C58Fx>dIp)tGNuLV7bdC-PNXeR%2Q1R^fIj_8h`PO-Se+x%T^1I zut?S?Wd`JiMz7w#3hK`I-0Uj}_3p-zg6p46dUtNJ=f&5>zuNiGQU>e8M&=mhvCyX7 z7SGdA-%Q}e@9lC^NOW6C)&s{!15;_gvyvSQrtdZ_qOeORRdPhSdwU%*_9It>q!?_Y zT6FDA7fnBrEJmxa5nfc!xJtY5br-`uI3Gjf6VCK#H&Dyqggg#eLJWwghlnv!<`-Tz zw96O$3IAP1s}A(v=H@I5t-G?c$)RZqA;Fa?54o1OHiixopA0~Ag_To7A(oRLf7>w63Cj?r5~dH*SS zDr*O$VIgV9V?mbP*6F&`IDYA$dv}cXH=XAYW*^`BQmg}X^;8>W%?|;mr?cOWc}^l< z2%MiP59=c}AGqe;laREpkh>uuN9qGd&C>m6#KBFXYPwJdykEb|aq!2-HjBv!wZt|G z&1;NpM&n7&f}TBvEB{m@HP;MSV3Rb#XBTENl6lzU#11y&zL_tLb5 zdE<1uS6mU_x?@-rXc4QL~=LK6@ff%;NF3sKF+DgJiyFIHVG3>eyu>cLsd9lkC_ zu)*y@{3^P4jNr)G?3?lX*zW^XkOl=q1Nr|q$^F+S7X%c9=s)4#_^&N<|KAk<%}w*4 gga4ZXM-&9)zj|vFq#>dHvkmOu4E+~L-Tt%sAO1`ZQUCw| literal 0 HcmV?d00001