|
|
@@ -35,10 +35,9 @@ namespace { |
|
|
const int kAllInputAddrIsAtomic = -1; |
|
|
const int kAllInputAddrIsAtomic = -1; |
|
|
const int kVirtualInputNodeMemoryReuse = 0; |
|
|
const int kVirtualInputNodeMemoryReuse = 0; |
|
|
const int kVirtualOutputNodeMemoryReuse = 1; |
|
|
const int kVirtualOutputNodeMemoryReuse = 1; |
|
|
const size_t kVirtualInputNodeOutputSize = 1; |
|
|
|
|
|
const size_t kVirtualOutputNodeInputSize = 1; |
|
|
|
|
|
const size_t kVirtualNodeDataIndex = 0; |
|
|
|
|
|
const char *const kMbatchNodeNameFlag = "_ascend_mbatch_batch_"; |
|
|
|
|
|
|
|
|
// One state per bit cannot be repeated |
|
|
|
|
|
enum ContinuousType { kNotContinuous = 0, kInput = 1, kInputNoPadding = 2, kOutput = 4, kOutputNoPadding = 8 }; |
|
|
|
|
|
|
|
|
int64_t GetSymbolOutputOffset(const std::map<std::string, std::string> &anchor_to_symbol, |
|
|
int64_t GetSymbolOutputOffset(const std::map<std::string, std::string> &anchor_to_symbol, |
|
|
const std::map<std::string, std::list<ge::NodeIndexIO>> &symbol_to_anchors, |
|
|
const std::map<std::string, std::list<ge::NodeIndexIO>> &symbol_to_anchors, |
|
|
const ge::NodePtr &node, const uint32_t i) { |
|
|
const ge::NodePtr &node, const uint32_t i) { |
|
|
@@ -136,7 +135,7 @@ ge::Status GraphMemoryAssigner::AssignVarAttr2Nodes() { |
|
|
return ge::SUCCESS; |
|
|
return ge::SUCCESS; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
ge::Status GraphMemoryAssigner::CalculateTensorRealSizeAndOutSize(const ge::ConstGeTensorDescPtr &output_desc, |
|
|
|
|
|
|
|
|
ge::Status CalculateTensorRealSizeAndOutSize(const ge::ConstGeTensorDescPtr &output_desc, |
|
|
int64_t dim_index, int64_t &output_mem_size, |
|
|
int64_t dim_index, int64_t &output_mem_size, |
|
|
int64_t &batch_dim_num, int64_t &out_size) { |
|
|
int64_t &batch_dim_num, int64_t &out_size) { |
|
|
graphStatus graph_status = ge::TensorUtils::GetSize(*output_desc, out_size); |
|
|
graphStatus graph_status = ge::TensorUtils::GetSize(*output_desc, out_size); |
|
|
@@ -181,68 +180,6 @@ ge::Status GraphMemoryAssigner::CalculateTensorRealSizeAndOutSize(const ge::Cons |
|
|
return SUCCESS; |
|
|
return SUCCESS; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
Status GraphMemoryAssigner::GetMaxBatchLabel(const map<string, vector<NodePtr>> &mem_reuse_virtual_nodes_map, |
|
|
|
|
|
int32_t mem_reuse_model, string &max_batch_label) { |
|
|
|
|
|
for (auto &i_map : mem_reuse_virtual_nodes_map) { |
|
|
|
|
|
vector<NodePtr> virtual_nodes_list = i_map.second; |
|
|
|
|
|
vector<int64_t> max_shape_dims; |
|
|
|
|
|
size_t max_batch_dim = 0; |
|
|
|
|
|
bool max_batch_dim_find = false; |
|
|
|
|
|
for (size_t i = 0; i < virtual_nodes_list.size(); ++i) { |
|
|
|
|
|
GE_CHECK_NOTNULL(virtual_nodes_list[i]); |
|
|
|
|
|
OpDescPtr op_desc = virtual_nodes_list[i]->GetOpDesc(); |
|
|
|
|
|
GE_CHECK_NOTNULL(op_desc); |
|
|
|
|
|
|
|
|
|
|
|
ge::ConstGeTensorDescPtr input_output_desc; |
|
|
|
|
|
if (mem_reuse_model == kVirtualInputNodeMemoryReuse) { |
|
|
|
|
|
input_output_desc = op_desc->GetOutputDescPtr(kVirtualNodeDataIndex); |
|
|
|
|
|
} else if (mem_reuse_model == kVirtualOutputNodeMemoryReuse) { |
|
|
|
|
|
input_output_desc = op_desc->GetInputDescPtr(kVirtualNodeDataIndex); |
|
|
|
|
|
} else { |
|
|
|
|
|
std::string error = "Invalid parameter memory reuse model, which is " + FmtToStr(mem_reuse_model); |
|
|
|
|
|
GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); |
|
|
|
|
|
return FAILED; |
|
|
|
|
|
} |
|
|
|
|
|
GE_CHECK_NOTNULL(input_output_desc); |
|
|
|
|
|
|
|
|
|
|
|
if (i == 0) { |
|
|
|
|
|
// All ops must have ATTR_NAME_BATCH_LABEL, no need to check return value. |
|
|
|
|
|
(void) ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, max_batch_label); |
|
|
|
|
|
max_shape_dims = input_output_desc->GetShape().GetDims(); |
|
|
|
|
|
} else { |
|
|
|
|
|
vector<int64_t> current_shape_dims = input_output_desc->GetShape().GetDims(); |
|
|
|
|
|
if (current_shape_dims.size() != max_shape_dims.size()) { |
|
|
|
|
|
std::string error = "The shape of several nodes between multiple batches does not match."; |
|
|
|
|
|
GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); |
|
|
|
|
|
return FAILED; |
|
|
|
|
|
} |
|
|
|
|
|
for (size_t j = 0; j < current_shape_dims.size(); ++j) { |
|
|
|
|
|
if (current_shape_dims[j] == max_shape_dims[j]) { |
|
|
|
|
|
continue; |
|
|
|
|
|
} |
|
|
|
|
|
if (max_batch_dim_find && max_batch_dim != j) { |
|
|
|
|
|
std::string error = "The shape of several nodes between multiple batches does not match."; |
|
|
|
|
|
GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); |
|
|
|
|
|
return FAILED; |
|
|
|
|
|
} |
|
|
|
|
|
max_batch_dim_find = true; |
|
|
|
|
|
max_batch_dim = j; |
|
|
|
|
|
if (current_shape_dims[j] > max_shape_dims[j]) { |
|
|
|
|
|
max_shape_dims[j] = current_shape_dims[j]; |
|
|
|
|
|
// All ops must have ATTR_NAME_BATCH_LABEL, no need to check return value. |
|
|
|
|
|
(void) ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, max_batch_label); |
|
|
|
|
|
} |
|
|
|
|
|
// Only compare the first different dim in shape. |
|
|
|
|
|
break; |
|
|
|
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
// In every element of virtual_input_nodes_map, the label of the max batch node is the same. |
|
|
|
|
|
break; |
|
|
|
|
|
} |
|
|
|
|
|
return SUCCESS; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
Status GraphMemoryAssigner::ReAssignMemory(bool is_loop_graph, map<int64_t, size_t> &mem_type_to_offset) { |
|
|
Status GraphMemoryAssigner::ReAssignMemory(bool is_loop_graph, map<int64_t, size_t> &mem_type_to_offset) { |
|
|
if (memory_offset_.empty()) { |
|
|
if (memory_offset_.empty()) { |
|
|
GELOGE(FAILED, "memory_offset_ is empty."); |
|
|
GELOGE(FAILED, "memory_offset_ is empty."); |
|
|
@@ -250,13 +187,6 @@ Status GraphMemoryAssigner::ReAssignMemory(bool is_loop_graph, map<int64_t, size |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
GE_CHK_STATUS_RET(ReAssignContinuousMemory(is_loop_graph), "ReAssignContinuousMemory Failed!"); |
|
|
GE_CHK_STATUS_RET(ReAssignContinuousMemory(is_loop_graph), "ReAssignContinuousMemory Failed!"); |
|
|
|
|
|
|
|
|
GE_CHK_STATUS_RET(ReAssignReuseAndNoPaddingContinuousInputMemory(), |
|
|
|
|
|
"ReAssignReuseAndNoPaddingContinuousInputMemory Failed!"); |
|
|
|
|
|
|
|
|
|
|
|
GE_CHK_STATUS_RET(ReAssignReuseAndNoPaddingContinuousOutputMemory(), |
|
|
|
|
|
"ReAssignReuseAndNoPaddingContinuousOutputMemory Failed!"); |
|
|
|
|
|
|
|
|
|
|
|
GE_CHK_STATUS_RET(ReAssignAtomicMemory(is_loop_graph), "ReAssignAtomicMemory Failed!"); |
|
|
GE_CHK_STATUS_RET(ReAssignAtomicMemory(is_loop_graph), "ReAssignAtomicMemory Failed!"); |
|
|
|
|
|
|
|
|
size_t total_mem_offset = 0; |
|
|
size_t total_mem_offset = 0; |
|
|
@@ -313,22 +243,133 @@ Status GraphMemoryAssigner::AssignZeroCopyMemory(map<int64_t, size_t> &mem_offse |
|
|
return SUCCESS; |
|
|
return SUCCESS; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
uint32_t GetContinuousMemoryType(const OpDescPtr &op_desc) { |
|
|
|
|
|
if(op_desc == nullptr) { |
|
|
|
|
|
return kNotContinuous; |
|
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
|
|
bool is_continuous = false; |
|
|
|
|
|
uint32_t continuous_type = kNotContinuous; |
|
|
|
|
|
// If GetBool fail, is_continuous is false. |
|
|
|
|
|
(void) ge::AttrUtils::GetBool(op_desc, ATTR_NAME_CONTINUOUS_INPUT, is_continuous); |
|
|
|
|
|
if (is_continuous) { |
|
|
|
|
|
continuous_type |= kInput; |
|
|
|
|
|
} else { |
|
|
|
|
|
(void) ge::AttrUtils::GetBool(op_desc, ATTR_NAME_NOPADDING_CONTINUOUS_INPUT, is_continuous); |
|
|
|
|
|
if(is_continuous) { |
|
|
|
|
|
bool attr_reuse = false; |
|
|
|
|
|
(void) ge::AttrUtils::GetBool(op_desc, ATTR_NAME_OUTPUT_REUSE_INPUT, attr_reuse); |
|
|
|
|
|
if (attr_reuse) { |
|
|
|
|
|
continuous_type |= kInputNoPadding; |
|
|
|
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
is_continuous = false; |
|
|
|
|
|
(void) ge::AttrUtils::GetBool(op_desc, ATTR_NAME_CONTINUOUS_OUTPUT, is_continuous); |
|
|
|
|
|
if (is_continuous) { |
|
|
|
|
|
continuous_type |= kOutput; |
|
|
|
|
|
} else { |
|
|
|
|
|
(void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_NOPADDING_CONTINUOUS_OUTPUT, is_continuous); |
|
|
|
|
|
if (is_continuous) { |
|
|
|
|
|
bool attr_reuse = false; |
|
|
|
|
|
(void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_OUTPUT_REUSE_INPUT, attr_reuse); |
|
|
|
|
|
if (attr_reuse) { |
|
|
|
|
|
continuous_type |= kOutputNoPadding; |
|
|
|
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
return continuous_type; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
Status GetMemorySize(const OpDescPtr &op_desc, const ge::ConstGeTensorDescPtr &output_desc, uint32_t continuous_type, |
|
|
|
|
|
int64_t &tensor_size, int64_t &nopadding_size) { |
|
|
|
|
|
if ((op_desc == nullptr) || (output_desc == nullptr)) { |
|
|
|
|
|
GELOGE(FAILED, "Input para is nullptr."); |
|
|
|
|
|
return FAILED; |
|
|
|
|
|
} |
|
|
|
|
|
tensor_size = 0; |
|
|
|
|
|
nopadding_size = 0; |
|
|
|
|
|
bool is_nopadding = ((continuous_type & kInputNoPadding) != 0) || ((continuous_type & kOutputNoPadding) != 0); |
|
|
|
|
|
if (is_nopadding) { |
|
|
|
|
|
int64_t attr_dim_index; |
|
|
|
|
|
bool get_attr_dim_flag = ge::AttrUtils::GetInt(op_desc, ATTR_NAME_REUSE_INPUT_ON_DIM_INDEX, attr_dim_index); |
|
|
|
|
|
if (!get_attr_dim_flag) { |
|
|
|
|
|
GELOGE(FAILED, "Get attr _reuse_input_on_dim_index failed."); |
|
|
|
|
|
return FAILED; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
// Calculate tensor real size of each piece of data and out size of complete data |
|
|
|
|
|
int64_t batch_dim_num = 1; |
|
|
|
|
|
if (CalculateTensorRealSizeAndOutSize(output_desc, attr_dim_index, nopadding_size, batch_dim_num, tensor_size) != |
|
|
|
|
|
SUCCESS) { |
|
|
|
|
|
GELOGE(FAILED, "CalculateTensorRealSizeAndOutSize failed for node %s.", op_desc->GetName().c_str()); |
|
|
|
|
|
return FAILED; |
|
|
|
|
|
} |
|
|
|
|
|
} else { |
|
|
|
|
|
if (ge::TensorUtils::GetSize(*output_desc, tensor_size) != ge::SUCCESS) { |
|
|
|
|
|
GELOGE(FAILED, "GetSize failed."); |
|
|
|
|
|
return FAILED; |
|
|
|
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
if ((tensor_size < 0) || (nopadding_size < 0)) { |
|
|
|
|
|
GELOGE(FAILED, "GetMemorySize for node %s failed.", op_desc->GetName().c_str()); |
|
|
|
|
|
return FAILED; |
|
|
|
|
|
} |
|
|
|
|
|
return SUCCESS; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
void AlignMemOffset(int64_t &mem_align_size) { |
|
|
|
|
|
if (mem_align_size <= 0) { |
|
|
|
|
|
return; |
|
|
|
|
|
} |
|
|
|
|
|
mem_align_size = (mem_align_size + MEM_ALIGN_SIZE - 1) / MEM_ALIGN_SIZE * MEM_ALIGN_SIZE; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
bool IsContinuousInputConflict(const ge::NodePtr &node, const OpDescPtr &peer_op_desc) { |
|
|
|
|
|
bool is_peer_output_continuous = false; |
|
|
|
|
|
// If GetBool fail, is_peer_output_continuous is false. |
|
|
|
|
|
(void) ge::AttrUtils::GetBool(peer_op_desc, ATTR_NAME_CONTINUOUS_OUTPUT, is_peer_output_continuous); |
|
|
|
|
|
|
|
|
|
|
|
// Get peer node output size, if size == 1(peer node has only one output), continuous input of the node and |
|
|
|
|
|
// continuous output of the previous node is the same, we can support it. If size != 1, there may be |
|
|
|
|
|
// conflict between the two, we can not support it. |
|
|
|
|
|
auto peer_output_size = peer_op_desc->GetOutputsSize(); |
|
|
|
|
|
GE_IF_BOOL_EXEC(is_peer_output_continuous && (peer_output_size != 1), |
|
|
|
|
|
std::string error = "Current op" + FmtToStr(node->GetOpDesc()->GetName()) + |
|
|
|
|
|
" requires continuous input, while the previous op" + FmtToStr(peer_op_desc->GetName()) + |
|
|
|
|
|
" requires continuous output. There may be conflict between the two." + |
|
|
|
|
|
"This node is not supported now."; |
|
|
|
|
|
GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); |
|
|
|
|
|
return true;); |
|
|
|
|
|
|
|
|
|
|
|
bool is_peer_reference = false; |
|
|
|
|
|
// If GetBool fail, is_peer_reference is false. |
|
|
|
|
|
(void) AttrUtils::GetBool(peer_op_desc, ATTR_NAME_REFERENCE, is_peer_reference); |
|
|
|
|
|
GE_IF_BOOL_EXEC(is_peer_reference, |
|
|
|
|
|
std::string error = "Current op" + FmtToStr(node->GetOpDesc()->GetName()) + |
|
|
|
|
|
" requires continuous input, while the previous op" + FmtToStr(peer_op_desc->GetName()) + |
|
|
|
|
|
" requires continuous output. There may be conflict between the two." + |
|
|
|
|
|
"This node is not supported now."; |
|
|
|
|
|
GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); |
|
|
|
|
|
return true;); |
|
|
|
|
|
return false; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) { |
|
|
Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) { |
|
|
Status ret; |
|
|
Status ret; |
|
|
for (auto &node : compute_graph_->GetAllNodes()) { |
|
|
for (auto &node : compute_graph_->GetAllNodes()) { |
|
|
// Get the continuous input type of the node, default is false |
|
|
|
|
|
bool is_input_continuous = false; |
|
|
|
|
|
GE_CHECK_NOTNULL(node->GetOpDesc()); |
|
|
|
|
|
// If GetBool fail, is_input_continuous is false. |
|
|
|
|
|
(void) ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_CONTINUOUS_INPUT, is_input_continuous); |
|
|
|
|
|
|
|
|
GE_CHECK_NOTNULL(node); |
|
|
|
|
|
auto continuous_type = GetContinuousMemoryType(node->GetOpDesc()); |
|
|
|
|
|
|
|
|
// Assign continuous input memory |
|
|
// Assign continuous input memory |
|
|
if (is_input_continuous) { |
|
|
|
|
|
|
|
|
bool is_continuous_input = ((continuous_type & kInput) != 0) || ((continuous_type & kInputNoPadding) != 0); |
|
|
|
|
|
if (is_continuous_input) { |
|
|
int64_t memory_type = RT_MEMORY_HBM; |
|
|
int64_t memory_type = RT_MEMORY_HBM; |
|
|
GE_CHK_STATUS_RET(GetNodeMemoryType(node, memory_type, "input"), "Get node memory type failed."); |
|
|
GE_CHK_STATUS_RET(GetNodeMemoryType(node, memory_type, "input"), "Get node memory type failed."); |
|
|
int64_t mem_clean_start = 0; |
|
|
int64_t mem_clean_start = 0; |
|
|
int64_t mem_clean_size = 0; |
|
|
int64_t mem_clean_size = 0; |
|
|
ret = AssignContinuousInputMemory(node, mem_clean_start, mem_clean_size, memory_type); |
|
|
|
|
|
|
|
|
ret = AssignContinuousInputMemory(node, mem_clean_start, mem_clean_size, memory_type, continuous_type); |
|
|
if (ret != ge::SUCCESS) { |
|
|
if (ret != ge::SUCCESS) { |
|
|
GELOGE(ret, "Assign continuous input memory failed!"); |
|
|
GELOGE(ret, "Assign continuous input memory failed!"); |
|
|
return ret; |
|
|
return ret; |
|
|
@@ -338,7 +379,6 @@ Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) { |
|
|
vector<int32_t> input_indexes; |
|
|
vector<int32_t> input_indexes; |
|
|
// If GetListInt fail, input_indexes is empty. |
|
|
// If GetListInt fail, input_indexes is empty. |
|
|
(void) ge::AttrUtils::GetListInt(node->GetOpDesc(), ATOMIC_ATTR_INPUT_INDEX, input_indexes); |
|
|
(void) ge::AttrUtils::GetListInt(node->GetOpDesc(), ATOMIC_ATTR_INPUT_INDEX, input_indexes); |
|
|
|
|
|
|
|
|
if (!input_indexes.empty() && input_indexes[0] == kAllInputAddrIsAtomic) { |
|
|
if (!input_indexes.empty() && input_indexes[0] == kAllInputAddrIsAtomic) { |
|
|
// check whether there is an atomic conflict between the current node and the peer out node |
|
|
// check whether there is an atomic conflict between the current node and the peer out node |
|
|
if (!CheckInputIsSupportAtomic(node)) { |
|
|
if (!CheckInputIsSupportAtomic(node)) { |
|
|
@@ -350,6 +390,7 @@ Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) { |
|
|
const auto &in_control_anchor = node->GetInControlAnchor(); |
|
|
const auto &in_control_anchor = node->GetInControlAnchor(); |
|
|
GE_CHECK_NOTNULL(in_control_anchor); |
|
|
GE_CHECK_NOTNULL(in_control_anchor); |
|
|
for (const auto &peer_out_control_anchor : in_control_anchor->GetPeerOutControlAnchors()) { |
|
|
for (const auto &peer_out_control_anchor : in_control_anchor->GetPeerOutControlAnchors()) { |
|
|
|
|
|
GE_CHECK_NOTNULL(peer_out_control_anchor); |
|
|
auto peer_out_node = peer_out_control_anchor->GetOwnerNode(); |
|
|
auto peer_out_node = peer_out_control_anchor->GetOwnerNode(); |
|
|
if (peer_out_node->GetType() == ATOMICADDRCLEAN) { |
|
|
if (peer_out_node->GetType() == ATOMICADDRCLEAN) { |
|
|
ret = SetAtomicCleanAttr(peer_out_node, {mem_clean_start}, {mem_clean_size}); |
|
|
ret = SetAtomicCleanAttr(peer_out_node, {mem_clean_start}, {mem_clean_size}); |
|
|
@@ -362,23 +403,12 @@ Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) { |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
// Get the reference type of the node, default is false |
|
|
|
|
|
bool is_ref = false; |
|
|
|
|
|
// If GetBool fail, is_ref is false. |
|
|
|
|
|
(void) ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_REFERENCE, is_ref); |
|
|
|
|
|
|
|
|
|
|
|
// Get the continuous output type of the node, default is false |
|
|
|
|
|
bool is_output_continuous = false; |
|
|
|
|
|
// If GetBool fail, is_output_continuous is false. |
|
|
|
|
|
(void) ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_CONTINUOUS_OUTPUT, is_output_continuous); |
|
|
|
|
|
|
|
|
|
|
|
// If the output is ref type and refers to the ref of an input, the name of the output |
|
|
|
|
|
// and the input are the same. Ge encounters ref type, finds matching relationship according |
|
|
|
|
|
// to the names of input and output, and allocates the same memory address, eg: HCOMBroadcast |
|
|
|
|
|
if (!is_ref && is_output_continuous) { // Assign continuous output memory |
|
|
|
|
|
ret = AssignContinuousOutputMemory(node); |
|
|
|
|
|
|
|
|
// Assign continuous output memory |
|
|
|
|
|
bool is_continuous_output = ((continuous_type & kOutput) != 0) || ((continuous_type & kOutputNoPadding) != 0); |
|
|
|
|
|
if (is_continuous_output) { |
|
|
|
|
|
ret = AssignContinuousOutputMemory(node, continuous_type); |
|
|
if (ret != ge::SUCCESS) { |
|
|
if (ret != ge::SUCCESS) { |
|
|
GELOGE(ret, "Assign reference memory failed!"); |
|
|
|
|
|
|
|
|
GELOGE(ret, "Assign continuous output memory failed!"); |
|
|
return ret; |
|
|
return ret; |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
@@ -391,94 +421,39 @@ Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) { |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node, int64_t &continuous_mem_start, |
|
|
Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node, int64_t &continuous_mem_start, |
|
|
int64_t &continuous_mem_size, int64_t memory_type) { |
|
|
|
|
|
|
|
|
int64_t &continuous_mem_size, int64_t memory_type, uint32_t continuous_type) { |
|
|
GELOGI("Current node %s needs continuous input.", node->GetName().c_str()); |
|
|
GELOGI("Current node %s needs continuous input.", node->GetName().c_str()); |
|
|
bool continuous_input_alloc = false; |
|
|
|
|
|
(void) ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_CONTINUOUS_INPUT_ALLOC, continuous_input_alloc); |
|
|
|
|
|
auto iter = memory_offset_.find(memory_type); |
|
|
auto iter = memory_offset_.find(memory_type); |
|
|
if (iter == memory_offset_.end()) { |
|
|
if (iter == memory_offset_.end()) { |
|
|
std::string error = "Memory offset does not have memory type" + FmtToStr(memory_type); |
|
|
std::string error = "Memory offset does not have memory type" + FmtToStr(memory_type); |
|
|
GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); |
|
|
GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); |
|
|
return FAILED; |
|
|
return FAILED; |
|
|
} |
|
|
} |
|
|
|
|
|
// The head and tail of hcom continuous input should be added 512 |
|
|
|
|
|
iter->second.mem_offset_ += MEM_ALIGN_SIZE; |
|
|
continuous_mem_start = iter->second.mem_offset_; |
|
|
continuous_mem_start = iter->second.mem_offset_; |
|
|
|
|
|
int64_t mem_offset = iter->second.mem_offset_; |
|
|
|
|
|
int64_t extra_memory_size = 0; |
|
|
|
|
|
bool is_continuous_input_allocated = false; |
|
|
|
|
|
(void) ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_CONTINUOUS_INPUT_ALLOC, is_continuous_input_allocated); |
|
|
for (auto &in_data_anchor : node->GetAllInDataAnchors()) { |
|
|
for (auto &in_data_anchor : node->GetAllInDataAnchors()) { |
|
|
|
|
|
GE_IF_BOOL_EXEC(in_data_anchor == nullptr, continue); |
|
|
auto peer_out_data_anchor = in_data_anchor->GetPeerOutAnchor(); |
|
|
auto peer_out_data_anchor = in_data_anchor->GetPeerOutAnchor(); |
|
|
GE_IF_BOOL_EXEC(peer_out_data_anchor == nullptr, continue); |
|
|
GE_IF_BOOL_EXEC(peer_out_data_anchor == nullptr, continue); |
|
|
|
|
|
|
|
|
auto peer_op_desc = peer_out_data_anchor->GetOwnerNode()->GetOpDesc(); |
|
|
auto peer_op_desc = peer_out_data_anchor->GetOwnerNode()->GetOpDesc(); |
|
|
GE_IF_BOOL_EXEC(peer_op_desc == nullptr, continue); |
|
|
GE_IF_BOOL_EXEC(peer_op_desc == nullptr, continue); |
|
|
bool is_peer_output_continuous = false; |
|
|
|
|
|
// If GetBool fail, is_peer_output_continuous is false. |
|
|
|
|
|
(void) ge::AttrUtils::GetBool(peer_op_desc, ATTR_NAME_CONTINUOUS_OUTPUT, is_peer_output_continuous); |
|
|
|
|
|
|
|
|
|
|
|
// Get peer node output size, if size == 1(peer node has only one output), continuous input of the node and |
|
|
|
|
|
// continuous output of the previous node is the same, we can support it. If size != 1, there may be |
|
|
|
|
|
// conflict between the two, we can not support it. |
|
|
|
|
|
auto peer_output_size = peer_op_desc->GetOutputsSize(); |
|
|
|
|
|
GE_IF_BOOL_EXEC(is_peer_output_continuous && (peer_output_size != 1), |
|
|
|
|
|
std::string error = "Current op" + FmtToStr(node->GetOpDesc()->GetName()) + |
|
|
|
|
|
" requires continuous input, while the previous op" + FmtToStr(peer_op_desc->GetName()) + |
|
|
|
|
|
" requires continuous output. There may be conflict between the two." + |
|
|
|
|
|
"This node is not supported now."; |
|
|
|
|
|
GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); |
|
|
|
|
|
return PARAM_INVALID;); |
|
|
|
|
|
|
|
|
|
|
|
bool is_peer_reference = false; |
|
|
|
|
|
// If GetBool fail, is_peer_reference is false. |
|
|
|
|
|
(void) AttrUtils::GetBool(peer_op_desc, ATTR_NAME_REFERENCE, is_peer_reference); |
|
|
|
|
|
GE_IF_BOOL_EXEC(is_peer_reference, |
|
|
|
|
|
std::string error = "Current op" + FmtToStr(node->GetOpDesc()->GetName()) + |
|
|
|
|
|
" requires continuous input, while the previous op" + FmtToStr(peer_op_desc->GetName()) + |
|
|
|
|
|
" requires continuous output. There may be conflict between the two." + |
|
|
|
|
|
"This node is not supported now."; |
|
|
|
|
|
GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); |
|
|
|
|
|
return PARAM_INVALID;); |
|
|
|
|
|
|
|
|
|
|
|
vector<int64_t> output_list = peer_op_desc->GetOutputOffset(); |
|
|
|
|
|
std::vector<int64_t> offsets_for_fusion = {}; |
|
|
|
|
|
bool has_offset_attr = |
|
|
|
|
|
AttrUtils::GetListInt(peer_op_desc, ATTR_NAME_OUTPUT_OFFSET_FOR_BUFFER_FUSION, offsets_for_fusion); |
|
|
|
|
|
if (peer_out_data_anchor->GetIdx() < static_cast<int>(output_list.size())) { |
|
|
|
|
|
if (continuous_input_alloc && !has_offset_attr) { |
|
|
|
|
|
if (in_data_anchor->GetIdx() == 0) { |
|
|
|
|
|
continuous_mem_start = output_list.at(peer_out_data_anchor->GetIdx()); |
|
|
|
|
|
} |
|
|
|
|
|
// can not use else if, incase only one input |
|
|
|
|
|
if (in_data_anchor->GetIdx() == static_cast<int>(node->GetAllInDataAnchors().size()) - 1) { |
|
|
|
|
|
int64_t tensor_desc_size = 0; |
|
|
|
|
|
Status ret = ge::TensorUtils::GetSize(*(peer_op_desc->GetOutputDescPtr(peer_out_data_anchor->GetIdx())), |
|
|
|
|
|
tensor_desc_size); |
|
|
|
|
|
GE_IF_BOOL_EXEC(ret != ge::SUCCESS, GELOGE(FAILED, "GetSize failed."); return FAILED;); |
|
|
|
|
|
|
|
|
|
|
|
tensor_desc_size = (tensor_desc_size + MEM_ALIGN_SIZE - 1) / MEM_ALIGN_SIZE * MEM_ALIGN_SIZE; |
|
|
|
|
|
continuous_mem_size = |
|
|
|
|
|
output_list.at(peer_out_data_anchor->GetIdx()) - continuous_mem_start + tensor_desc_size + MEM_ALIGN_SIZE; |
|
|
|
|
|
} |
|
|
|
|
|
GELOGI( |
|
|
|
|
|
"[IMAS]Check Continuous input : Set %s name[%s] output[%d] offset to [%ld] stream_id[%ld] size[%u] " |
|
|
|
|
|
"real_size[%u].", |
|
|
|
|
|
node->GetOwnerComputeGraph()->GetName().c_str(), peer_op_desc->GetName().c_str(), |
|
|
|
|
|
peer_out_data_anchor->GetIdx(), output_list.at(peer_out_data_anchor->GetIdx()), peer_op_desc->GetStreamId(), |
|
|
|
|
|
0, 0); |
|
|
|
|
|
continue; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
output_list.at(peer_out_data_anchor->GetIdx()) = iter->second.mem_offset_; |
|
|
|
|
|
} else { |
|
|
|
|
|
std::string error = "index" + FmtToStr(peer_out_data_anchor->GetIdx()) + " is out of range."; |
|
|
|
|
|
GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); |
|
|
|
|
|
GELOGE(FAILED, "index : %d is out of range.", peer_out_data_anchor->GetIdx()); |
|
|
|
|
|
return FAILED; |
|
|
|
|
|
} |
|
|
|
|
|
peer_op_desc->SetOutputOffset(output_list); |
|
|
|
|
|
size_t pre_mem_offset = iter->second.mem_offset_; |
|
|
|
|
|
|
|
|
GE_IF_BOOL_EXEC(IsContinuousInputConflict(node, peer_op_desc), return PARAM_INVALID;); |
|
|
|
|
|
|
|
|
int64_t tensor_desc_size = 0; |
|
|
int64_t tensor_desc_size = 0; |
|
|
if (has_offset_attr) { |
|
|
|
|
|
if (peer_out_data_anchor->GetIdx() < static_cast<int>(offsets_for_fusion.size())) { |
|
|
|
|
|
auto offset_for_fusion = offsets_for_fusion[peer_out_data_anchor->GetIdx()]; |
|
|
|
|
|
iter->second.mem_offset_ += offset_for_fusion; |
|
|
|
|
|
|
|
|
int64_t nopadding_size = 0; |
|
|
|
|
|
int64_t real_size = 0; |
|
|
|
|
|
std::vector<int64_t> offsets_of_fusion = {}; |
|
|
|
|
|
bool lx_fusion = AttrUtils::GetListInt(peer_op_desc, ATTR_NAME_OUTPUT_OFFSET_FOR_BUFFER_FUSION, offsets_of_fusion); |
|
|
|
|
|
lx_fusion = lx_fusion && !offsets_of_fusion.empty(); |
|
|
|
|
|
if (lx_fusion) { |
|
|
|
|
|
if (peer_out_data_anchor->GetIdx() < static_cast<int>(offsets_of_fusion.size())) { |
|
|
|
|
|
nopadding_size = offsets_of_fusion[peer_out_data_anchor->GetIdx()]; |
|
|
|
|
|
tensor_desc_size = nopadding_size; |
|
|
} else { |
|
|
} else { |
|
|
std::string error = "fusion: peer node" + FmtToStr(peer_op_desc->GetName()) + |
|
|
std::string error = "fusion: peer node" + FmtToStr(peer_op_desc->GetName()) + |
|
|
" index" + FmtToStr(peer_out_data_anchor->GetIdx()) + " is out of range."; |
|
|
" index" + FmtToStr(peer_out_data_anchor->GetIdx()) + " is out of range."; |
|
|
@@ -486,425 +461,140 @@ Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node, |
|
|
return FAILED; |
|
|
return FAILED; |
|
|
} |
|
|
} |
|
|
} else { |
|
|
} else { |
|
|
Status ret = |
|
|
|
|
|
TensorUtils::GetSize(*(peer_op_desc->GetOutputDescPtr(peer_out_data_anchor->GetIdx())), tensor_desc_size); |
|
|
|
|
|
GE_IF_BOOL_EXEC(ret != ge::SUCCESS, GELOGE(FAILED, "GetSize failed."); return FAILED;); |
|
|
|
|
|
|
|
|
|
|
|
iter->second.mem_offset_ += tensor_desc_size; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
// If set tensor_actual_size, Memory alignment is not required. |
|
|
|
|
|
int32_t is_tensor_actual_size = 0; |
|
|
|
|
|
ge::AttrUtils::GetInt(peer_op_desc, ATTR_NAME_GET_TENSOR_ACTUAL_SIZE, is_tensor_actual_size); |
|
|
|
|
|
if (is_tensor_actual_size == 0) { |
|
|
|
|
|
AlignMemOffset(MEM_ALIGN_SIZE, memory_type); |
|
|
|
|
|
|
|
|
if (GetMemorySize(node->GetOpDesc(), peer_op_desc->GetOutputDescPtr(peer_out_data_anchor->GetIdx()), |
|
|
|
|
|
continuous_type, tensor_desc_size, nopadding_size) != ge::SUCCESS) { |
|
|
|
|
|
return FAILED; |
|
|
|
|
|
} |
|
|
} |
|
|
} |
|
|
GELOGI( |
|
|
|
|
|
"[IMAS]Continuous input : Set %s name[%s] output[%d] offset to [%zu] stream_id[%ld] size[%zu] " |
|
|
|
|
|
"real_size[%ld].", node->GetOwnerComputeGraph()->GetName().c_str(), peer_op_desc->GetName().c_str(), |
|
|
|
|
|
peer_out_data_anchor->GetIdx(), pre_mem_offset, peer_op_desc->GetStreamId(), |
|
|
|
|
|
(iter->second.mem_offset_ - pre_mem_offset), tensor_desc_size); |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
iter->second.mem_offset_ += MEM_ALIGN_SIZE; |
|
|
|
|
|
if (!continuous_input_alloc) { |
|
|
|
|
|
continuous_mem_size = iter->second.mem_offset_ - continuous_mem_start; |
|
|
|
|
|
} |
|
|
|
|
|
return SUCCESS; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
Status GraphMemoryAssigner::AssignContinuousOutputMemory(const ge::NodePtr &node) { |
|
|
|
|
|
GELOGI("Current node %s needs continuous output.", node->GetName().c_str()); |
|
|
|
|
|
auto out_op_desc = node->GetOpDesc(); |
|
|
|
|
|
GE_IF_BOOL_EXEC(out_op_desc == nullptr, GELOGE(ge::FAILED, "out_op_desc is null."); return ge::FAILED); |
|
|
|
|
|
vector<int64_t> output_list = out_op_desc->GetOutputOffset(); |
|
|
|
|
|
|
|
|
|
|
|
if ((out_op_desc->GetOutputsSize() > output_list.size()) || (output_list.size() == 0)) { |
|
|
|
|
|
GELOGE(ge::FAILED, "The size %zu of node output desc is more than output_list's size %zu.", |
|
|
|
|
|
out_op_desc->GetOutputsSize(), output_list.size()); |
|
|
|
|
|
return ge::FAILED; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
size_t mem_offset = output_list[0]; |
|
|
|
|
|
for (auto &out_data_anchor : node->GetAllOutDataAnchors()) { |
|
|
|
|
|
output_list[out_data_anchor->GetIdx()] = mem_offset; |
|
|
|
|
|
int64_t tensor_desc_size = 0; |
|
|
|
|
|
if (ge::TensorUtils::GetSize(*(out_op_desc->GetOutputDescPtr(out_data_anchor->GetIdx())), tensor_desc_size) != |
|
|
|
|
|
ge::SUCCESS) { |
|
|
|
|
|
GELOGE(FAILED, "GetSize failed."); |
|
|
|
|
|
return FAILED; |
|
|
|
|
|
} |
|
|
|
|
|
mem_offset += tensor_desc_size; |
|
|
|
|
|
if (mem_offset <= 0) { |
|
|
|
|
|
|
|
|
bool is_nopadding = ((continuous_type & kInputNoPadding) != 0) || lx_fusion; |
|
|
|
|
|
vector<int64_t> output_list = peer_op_desc->GetOutputOffset(); |
|
|
|
|
|
if (peer_out_data_anchor->GetIdx() >= static_cast<int>(output_list.size())) { |
|
|
|
|
|
std::string error = "index" + FmtToStr(peer_out_data_anchor->GetIdx()) + " is out of range."; |
|
|
|
|
|
GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); |
|
|
return FAILED; |
|
|
return FAILED; |
|
|
} |
|
|
} |
|
|
mem_offset = (mem_offset + MEM_ALIGN_SIZE - 1) / MEM_ALIGN_SIZE * MEM_ALIGN_SIZE; |
|
|
|
|
|
GELOGI( |
|
|
|
|
|
"[IMAS]Continuous output : Set %s name[%s] output[%d] offset to [%zu] stream_id[%ld] size[%ld] " |
|
|
|
|
|
"real_size[%ld].", |
|
|
|
|
|
node->GetOwnerComputeGraph()->GetName().c_str(), out_op_desc->GetName().c_str(), out_data_anchor->GetIdx(), |
|
|
|
|
|
output_list[out_data_anchor->GetIdx()], out_op_desc->GetStreamId(), tensor_desc_size, tensor_desc_size); |
|
|
|
|
|
} |
|
|
|
|
|
out_op_desc->SetOutputOffset(output_list); |
|
|
|
|
|
return ge::SUCCESS; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
Status GraphMemoryAssigner::ReAssignVirtualInputNodeMemory(NodePtr node, size_t &mem_offset_reuse) { |
|
|
|
|
|
OpDescPtr op_desc = node->GetOpDesc(); |
|
|
|
|
|
vector<int64_t> output_list = op_desc->GetOutputOffset(); |
|
|
|
|
|
if (output_list.empty()) { |
|
|
|
|
|
GELOGE(FAILED, "Outputoffset is empty node name:%s", node->GetName().c_str()); |
|
|
|
|
|
return FAILED; |
|
|
|
|
|
} |
|
|
|
|
|
output_list.at(0) = mem_offset_reuse; |
|
|
|
|
|
op_desc->SetOutputOffset(output_list); |
|
|
|
|
|
GELOGI("Set virtual input node %s output offset to %zu.", op_desc->GetName().c_str(), mem_offset_reuse); |
|
|
|
|
|
|
|
|
|
|
|
int64_t attr_dim_index; |
|
|
|
|
|
bool get_attr_dim_flag = ge::AttrUtils::GetInt(op_desc, ATTR_NAME_REUSE_INPUT_ON_DIM_INDEX, attr_dim_index); |
|
|
|
|
|
if (!get_attr_dim_flag) { |
|
|
|
|
|
GELOGE(FAILED, "Get attr _reuse_input_on_dim_index failed."); |
|
|
|
|
|
return FAILED; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
size_t extra_memory_size = 0; |
|
|
|
|
|
for (const auto &in_data_anchor : node->GetAllInDataAnchors()) { |
|
|
|
|
|
auto peer_out_data_anchor = in_data_anchor->GetPeerOutAnchor(); |
|
|
|
|
|
GE_CHECK_NOTNULL(peer_out_data_anchor); |
|
|
|
|
|
auto peer_op_desc = peer_out_data_anchor->GetOwnerNode()->GetOpDesc(); |
|
|
|
|
|
GE_CHECK_NOTNULL(peer_op_desc); |
|
|
|
|
|
vector<int64_t> output_offsets = peer_op_desc->GetOutputOffset(); |
|
|
|
|
|
if (peer_out_data_anchor->GetIdx() >= static_cast<int>(output_offsets.size())) { |
|
|
|
|
|
GELOGE(ge::FAILED, "Index : %d is out of range.", peer_out_data_anchor->GetIdx()); |
|
|
|
|
|
return ge::FAILED; |
|
|
|
|
|
|
|
|
// when continuous input has been allocated first input is beginning offset |
|
|
|
|
|
bool is_allocated_first_input = is_continuous_input_allocated && (in_data_anchor->GetIdx() == 0); |
|
|
|
|
|
if (is_allocated_first_input) { |
|
|
|
|
|
mem_offset = output_list.at(peer_out_data_anchor->GetIdx()); |
|
|
|
|
|
continuous_mem_start = output_list.at(peer_out_data_anchor->GetIdx()); |
|
|
|
|
|
} else { |
|
|
|
|
|
// set offset for input |
|
|
|
|
|
output_list.at(peer_out_data_anchor->GetIdx()) = mem_offset; |
|
|
|
|
|
peer_op_desc->SetOutputOffset(output_list); |
|
|
} |
|
|
} |
|
|
output_offsets.at(peer_out_data_anchor->GetIdx()) = mem_offset_reuse; |
|
|
|
|
|
peer_op_desc->SetOutputOffset(output_offsets); |
|
|
|
|
|
size_t pre_mem_offset = mem_offset_reuse; |
|
|
|
|
|
|
|
|
|
|
|
// Calculate tensor real size of each piece of data and out size of complete data |
|
|
|
|
|
ge::ConstGeTensorDescPtr output_desc = peer_op_desc->GetOutputDescPtr(peer_out_data_anchor->GetIdx()); |
|
|
|
|
|
GE_CHECK_NOTNULL(output_desc); |
|
|
|
|
|
int64_t output_mem_size; |
|
|
|
|
|
int64_t batch_dim_num = 1; |
|
|
|
|
|
int64_t out_size; |
|
|
|
|
|
if (CalculateTensorRealSizeAndOutSize(output_desc, attr_dim_index, output_mem_size, batch_dim_num, out_size) != |
|
|
|
|
|
SUCCESS) { |
|
|
|
|
|
GELOGE(FAILED, "CalculateTensorRealSizeAndOutSize failed for node %s output [%d].", |
|
|
|
|
|
peer_op_desc->GetName().c_str(), peer_out_data_anchor->GetIdx()); |
|
|
|
|
|
return FAILED; |
|
|
|
|
|
|
|
|
int64_t align_size = tensor_desc_size; |
|
|
|
|
|
if (is_nopadding) { |
|
|
|
|
|
mem_offset += nopadding_size; |
|
|
|
|
|
extra_memory_size += (tensor_desc_size - nopadding_size); |
|
|
|
|
|
real_size = nopadding_size; |
|
|
|
|
|
} else { |
|
|
|
|
|
ge::AlignMemOffset(align_size); |
|
|
|
|
|
mem_offset += align_size; |
|
|
|
|
|
// The head and tail of hcom continuous input should be added 512 |
|
|
|
|
|
extra_memory_size = MEM_ALIGN_SIZE; |
|
|
|
|
|
real_size = tensor_desc_size; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
mem_offset_reuse += output_mem_size; |
|
|
|
|
|
extra_memory_size = extra_memory_size + out_size - output_mem_size; |
|
|
|
|
|
|
|
|
|
|
|
GELOGI("[IMAS]Virtual node optimize: set %s name[%s] output[%d] offset to [%zu] stream_id[%ld] size[%ld] " |
|
|
|
|
|
"real_size[%ld].", |
|
|
|
|
|
node->GetOwnerComputeGraph()->GetName().c_str(), peer_op_desc->GetName().c_str(), |
|
|
|
|
|
peer_out_data_anchor->GetIdx(), pre_mem_offset, peer_op_desc->GetStreamId(), out_size, |
|
|
|
|
|
output_mem_size); |
|
|
|
|
|
|
|
|
GELOGI("[IMAS]Continuous input : Set %s name[%s] output[%d] offset to [%zu] stream_id[%ld] size[%zu] realsize[%ld]" |
|
|
|
|
|
" nopadding[%d].", node->GetOwnerComputeGraph()->GetName().c_str(), peer_op_desc->GetName().c_str(), |
|
|
|
|
|
peer_out_data_anchor->GetIdx(), output_list.at(peer_out_data_anchor->GetIdx()), peer_op_desc->GetStreamId(), |
|
|
|
|
|
is_continuous_input_allocated ? 0UL : align_size, real_size, is_nopadding); |
|
|
} |
|
|
} |
|
|
mem_offset_reuse += extra_memory_size; |
|
|
|
|
|
size_t after_mem_offset = mem_offset_reuse; |
|
|
|
|
|
GELOGI("After reassign virtual input node[name: %s, type: %s] memory, memory offset = %zu.", |
|
|
|
|
|
op_desc->GetName().c_str(), op_desc->GetType().c_str(), after_mem_offset); |
|
|
|
|
|
return SUCCESS; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
Status GraphMemoryAssigner::ReAssignReuseAndNoPaddingContinuousInputMemory() { |
|
|
|
|
|
map<string, vector<NodePtr>> mem_reuse_virtual_input_nodes_map; |
|
|
|
|
|
int64_t memory_type = RT_MEMORY_HBM; |
|
|
|
|
|
for (const auto &n : compute_graph_->GetAllNodes()) { |
|
|
|
|
|
OpDescPtr op_desc = n->GetOpDesc(); |
|
|
|
|
|
GE_CHECK_NOTNULL(op_desc); |
|
|
|
|
|
bool attr_continuous = false; |
|
|
|
|
|
bool get_continuous_flag = ge::AttrUtils::GetBool(op_desc, ATTR_NAME_NOPADDING_CONTINUOUS_INPUT, attr_continuous); |
|
|
|
|
|
GE_IF_BOOL_EXEC(!get_continuous_flag, continue); |
|
|
|
|
|
bool attr_reuse = false; |
|
|
|
|
|
bool get_reuse_flag = ge::AttrUtils::GetBool(op_desc, ATTR_NAME_OUTPUT_REUSE_INPUT, attr_reuse); |
|
|
|
|
|
GE_IF_BOOL_EXEC(!get_reuse_flag, continue); |
|
|
|
|
|
if (attr_reuse && attr_continuous) { |
|
|
|
|
|
if (op_desc->GetOutputsSize() != kVirtualInputNodeOutputSize) { |
|
|
|
|
|
// When current virtual node has several outputs, can't directly determine which input is the tensor for reuse. |
|
|
|
|
|
std::string error = "Only one output is supported, current virtual node" + FmtToStr(n->GetName()) + |
|
|
|
|
|
" has " + FmtToStr(op_desc->GetOutputsSize()) + " outputs."; |
|
|
|
|
|
GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); |
|
|
|
|
|
return FAILED; |
|
|
|
|
|
} |
|
|
|
|
|
GE_CHK_STATUS_RET(GetNodeMemoryType(n, memory_type, "input"), "Get node memory type failed."); |
|
|
|
|
|
auto iter = memory_offset_.find(memory_type); |
|
|
|
|
|
if (iter == memory_offset_.end()) { |
|
|
|
|
|
std::string error = "Memory offset does not have memory type" + FmtToStr(memory_type); |
|
|
|
|
|
GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); |
|
|
|
|
|
return FAILED; |
|
|
|
|
|
} |
|
|
|
|
|
GELOGD("Start to reassign memory for virtual input node, memory offset = %zu, memory type = %ld.", |
|
|
|
|
|
iter->second.mem_offset_, memory_type); |
|
|
|
|
|
string batch_label_string; |
|
|
|
|
|
// Not all ops have ATTR_NAME_BATCH_LABEL, no need to check return value, only check out parameter |
|
|
|
|
|
(void) ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label_string); |
|
|
|
|
|
if (batch_label_string.empty()) { |
|
|
|
|
|
size_t node_mem_offset = iter->second.mem_offset_; |
|
|
|
|
|
// No ATTR_NAME_BATCH_LABEL, no need to reuse memory. |
|
|
|
|
|
Status status = ReAssignVirtualInputNodeMemory(n, node_mem_offset); |
|
|
|
|
|
if (status != SUCCESS) { |
|
|
|
|
|
GELOGE(FAILED, "Reassign memory of virtual input node failed, node name: %s.", n->GetName().c_str()); |
|
|
|
|
|
return FAILED; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
iter->second.mem_offset_ = node_mem_offset; |
|
|
|
|
|
AlignMemOffset(MEM_ALIGN_SIZE, memory_type); |
|
|
|
|
|
GELOGD("After reassign memory for virtual input node, align memory = %zu, memory type = %ld.", |
|
|
|
|
|
iter->second.mem_offset_, memory_type); |
|
|
|
|
|
} else { |
|
|
|
|
|
// Has ATTR_NAME_BATCH_LABEL, for dynamic multi-batch node, need to reuse memory. |
|
|
|
|
|
string current_node_full_name = op_desc->GetName(); |
|
|
|
|
|
size_t pos = current_node_full_name.find(kMbatchNodeNameFlag); |
|
|
|
|
|
if (pos == string::npos) { |
|
|
|
|
|
GELOGE(FAILED, "Cannot find key string [%s] of multi-batch in name of virtual input node, node name: %s.", |
|
|
|
|
|
kMbatchNodeNameFlag, n->GetName().c_str()); |
|
|
|
|
|
return FAILED; |
|
|
|
|
|
} |
|
|
|
|
|
string fixed_name = current_node_full_name.substr(0, pos); |
|
|
|
|
|
vector<NodePtr> parallel_virtual_input_nodes; |
|
|
|
|
|
if (mem_reuse_virtual_input_nodes_map.count(fixed_name) != 0) { |
|
|
|
|
|
parallel_virtual_input_nodes = mem_reuse_virtual_input_nodes_map[fixed_name]; |
|
|
|
|
|
} |
|
|
|
|
|
parallel_virtual_input_nodes.emplace_back(n); |
|
|
|
|
|
mem_reuse_virtual_input_nodes_map[fixed_name] = parallel_virtual_input_nodes; |
|
|
|
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
int32_t mem_reuse_model = 0; |
|
|
|
|
|
if (ReAssignVirtualNodesMemory(mem_reuse_virtual_input_nodes_map, mem_reuse_model) != SUCCESS) { |
|
|
|
|
|
GELOGE(FAILED, "Reassign memory of virtual input nodes failed."); |
|
|
|
|
|
return FAILED; |
|
|
|
|
|
|
|
|
mem_offset += extra_memory_size; |
|
|
|
|
|
ge::AlignMemOffset(mem_offset); |
|
|
|
|
|
continuous_mem_size = mem_offset - continuous_mem_start; |
|
|
|
|
|
if (is_continuous_input_allocated) { |
|
|
|
|
|
// not allocate memory here, so no need add 512 in header |
|
|
|
|
|
iter->second.mem_offset_ -= MEM_ALIGN_SIZE; |
|
|
|
|
|
} else { |
|
|
|
|
|
iter->second.mem_offset_ = mem_offset; |
|
|
} |
|
|
} |
|
|
return SUCCESS; |
|
|
return SUCCESS; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
Status GraphMemoryAssigner::ReAssignVirtualOutputNodeMemory(NodePtr node, size_t &mem_offset_reuse) { |
|
|
|
|
|
OpDescPtr op_desc = node->GetOpDesc(); |
|
|
|
|
|
|
|
|
|
|
|
// 1. set memory of to be reused input tensor |
|
|
|
|
|
|
|
|
Status GetFirstInputPeerOutOutputOffset(const ge::NodePtr &node, int64_t &mem_offset) { |
|
|
auto in_data_anchor_list = node->GetAllInDataAnchors(); |
|
|
auto in_data_anchor_list = node->GetAllInDataAnchors(); |
|
|
|
|
|
if (in_data_anchor_list.empty()) { |
|
|
|
|
|
GELOGE(FAILED, "Node %s's in data anchor is empty.", node->GetName().c_str()); |
|
|
|
|
|
return FAILED; |
|
|
|
|
|
} |
|
|
auto peer_out_data_anchor = in_data_anchor_list.at(0)->GetPeerOutAnchor(); |
|
|
auto peer_out_data_anchor = in_data_anchor_list.at(0)->GetPeerOutAnchor(); |
|
|
GE_CHECK_NOTNULL(peer_out_data_anchor); |
|
|
|
|
|
|
|
|
GE_IF_BOOL_EXEC(peer_out_data_anchor == nullptr, GELOGE(ge::FAILED, "peer_out_data_anchor is null."); |
|
|
|
|
|
return ge::FAILED); |
|
|
auto peer_op_desc = peer_out_data_anchor->GetOwnerNode()->GetOpDesc(); |
|
|
auto peer_op_desc = peer_out_data_anchor->GetOwnerNode()->GetOpDesc(); |
|
|
GE_CHECK_NOTNULL(peer_op_desc); |
|
|
|
|
|
|
|
|
GE_IF_BOOL_EXEC(peer_op_desc == nullptr, GELOGE(ge::FAILED, "peer_op_desc is null."); return ge::FAILED); |
|
|
vector<int64_t> in_node_output_offsets = peer_op_desc->GetOutputOffset(); |
|
|
vector<int64_t> in_node_output_offsets = peer_op_desc->GetOutputOffset(); |
|
|
if (peer_out_data_anchor->GetIdx() >= static_cast<int>(in_node_output_offsets.size())) { |
|
|
if (peer_out_data_anchor->GetIdx() >= static_cast<int>(in_node_output_offsets.size())) { |
|
|
GELOGE(FAILED, "Index : %d is out of range.", peer_out_data_anchor->GetIdx()); |
|
|
GELOGE(FAILED, "Index : %d is out of range.", peer_out_data_anchor->GetIdx()); |
|
|
return FAILED; |
|
|
return FAILED; |
|
|
} |
|
|
} |
|
|
in_node_output_offsets.at(peer_out_data_anchor->GetIdx()) = mem_offset_reuse; |
|
|
|
|
|
peer_op_desc->SetOutputOffset(in_node_output_offsets); |
|
|
|
|
|
GELOGI("Set virtual output node %s input data offset to %zu.", op_desc->GetName().c_str(), mem_offset_reuse); |
|
|
|
|
|
|
|
|
mem_offset = in_node_output_offsets.at(peer_out_data_anchor->GetIdx()); |
|
|
|
|
|
return SUCCESS; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
// 2. set memory of output tensor |
|
|
|
|
|
vector<int64_t> output_list = op_desc->GetOutputOffset(); |
|
|
|
|
|
if (output_list.empty()) { |
|
|
|
|
|
GELOGE(FAILED, "Outputoffset is empty, node name: %s", node->GetName().c_str()); |
|
|
|
|
|
return FAILED; |
|
|
|
|
|
} |
|
|
|
|
|
if (op_desc->GetOutputsSize() > output_list.size()) { |
|
|
|
|
|
GELOGE(FAILED, "The size %zu of op_desc is more than output_list's size %zu.", op_desc->GetOutputsSize(), |
|
|
|
|
|
output_list.size()); |
|
|
|
|
|
return FAILED; |
|
|
|
|
|
} |
|
|
|
|
|
int64_t attr_dim_index; |
|
|
|
|
|
bool get_attr_dim_flag = ge::AttrUtils::GetInt(op_desc, ATTR_NAME_REUSE_INPUT_ON_DIM_INDEX, attr_dim_index); |
|
|
|
|
|
if (!get_attr_dim_flag) { |
|
|
|
|
|
GELOGE(FAILED, "Get attr _reuse_input_on_dim_index failed."); |
|
|
|
|
|
return FAILED; |
|
|
|
|
|
|
|
|
Status GraphMemoryAssigner::AssignContinuousOutputMemory(const ge::NodePtr &node, uint32_t continuous_type) { |
|
|
|
|
|
GELOGI("Current node %s needs continuous output.", node->GetName().c_str()); |
|
|
|
|
|
auto out_op_desc = node->GetOpDesc(); |
|
|
|
|
|
GE_IF_BOOL_EXEC(out_op_desc == nullptr, GELOGE(ge::FAILED, "out_op_desc is null."); return ge::FAILED); |
|
|
|
|
|
vector<int64_t> output_list = out_op_desc->GetOutputOffset(); |
|
|
|
|
|
if ((out_op_desc->GetOutputsSize() > output_list.size()) || (output_list.size() == 0)) { |
|
|
|
|
|
GELOGE(ge::FAILED, "The size %zu of node output desc is more than output_list's size %zu.", |
|
|
|
|
|
out_op_desc->GetOutputsSize(), output_list.size()); |
|
|
|
|
|
return ge::FAILED; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
size_t extra_memory_size = 0; |
|
|
|
|
|
for (auto &out_data_anchor : node->GetAllOutDataAnchors()) { |
|
|
|
|
|
output_list[out_data_anchor->GetIdx()] = mem_offset_reuse; |
|
|
|
|
|
size_t pre_mem_offset = mem_offset_reuse; |
|
|
|
|
|
|
|
|
|
|
|
// calculate tensor real size of each piece of data and out size of complete data |
|
|
|
|
|
ge::ConstGeTensorDescPtr output_desc = op_desc->GetOutputDescPtr(out_data_anchor->GetIdx()); |
|
|
|
|
|
GE_CHECK_NOTNULL(output_desc); |
|
|
|
|
|
int64_t output_mem_size; |
|
|
|
|
|
int64_t batch_dim_num = 1; |
|
|
|
|
|
int64_t out_size; |
|
|
|
|
|
if (CalculateTensorRealSizeAndOutSize(output_desc, attr_dim_index, output_mem_size, batch_dim_num, out_size) != |
|
|
|
|
|
SUCCESS) { |
|
|
|
|
|
GELOGE(FAILED, "CalculateTensorRealSizeAndOutSize failed for node %s output [%d].", |
|
|
|
|
|
op_desc->GetName().c_str(), out_data_anchor->GetIdx()); |
|
|
|
|
|
return FAILED; |
|
|
|
|
|
|
|
|
int64_t mem_offset = 0; |
|
|
|
|
|
bool is_nopadding = ((continuous_type & kOutputNoPadding) != 0); |
|
|
|
|
|
if (is_nopadding) { |
|
|
|
|
|
// out tensor memory must be reused input tensor memory |
|
|
|
|
|
if (GetFirstInputPeerOutOutputOffset(node, mem_offset) != SUCCESS) { |
|
|
|
|
|
return ge::FAILED; |
|
|
} |
|
|
} |
|
|
|
|
|
} else { |
|
|
|
|
|
// Get the reference type of the node, default is false |
|
|
|
|
|
bool is_ref = false; |
|
|
|
|
|
// If GetBool fail, is_ref is false. |
|
|
|
|
|
(void) ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_REFERENCE, is_ref); |
|
|
|
|
|
|
|
|
mem_offset_reuse += output_mem_size; |
|
|
|
|
|
extra_memory_size = extra_memory_size + out_size - output_mem_size; |
|
|
|
|
|
|
|
|
|
|
|
GELOGI("[IMAS]Virtual node optimize: set %s name[%s] output[%d] offset to [%zu], size[%ld], real_size[%ld].", |
|
|
|
|
|
node->GetOwnerComputeGraph()->GetName().c_str(), op_desc->GetName().c_str(), out_data_anchor->GetIdx(), |
|
|
|
|
|
pre_mem_offset, out_size, output_mem_size); |
|
|
|
|
|
} |
|
|
|
|
|
op_desc->SetOutputOffset(output_list); |
|
|
|
|
|
mem_offset_reuse += extra_memory_size; |
|
|
|
|
|
size_t after_mem_offset = mem_offset_reuse; |
|
|
|
|
|
GELOGI("After reassign virtual output node[name: %s, type: %s] memory, memory offset = %zu.", |
|
|
|
|
|
op_desc->GetName().c_str(), op_desc->GetType().c_str(), after_mem_offset); |
|
|
|
|
|
return SUCCESS; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
Status GraphMemoryAssigner::ReAssignReuseAndNoPaddingContinuousOutputMemory() { |
|
|
|
|
|
map<string, vector<NodePtr>> mem_reuse_virtual_output_nodes_map; |
|
|
|
|
|
int64_t memory_type = RT_MEMORY_HBM; |
|
|
|
|
|
for (const auto &n : compute_graph_->GetAllNodes()) { |
|
|
|
|
|
OpDescPtr op_desc = n->GetOpDesc(); |
|
|
|
|
|
GE_CHECK_NOTNULL(op_desc); |
|
|
|
|
|
bool attr_continuous = false; |
|
|
|
|
|
bool get_continuous_flag = ge::AttrUtils::GetBool(op_desc, ATTR_NAME_NOPADDING_CONTINUOUS_OUTPUT, attr_continuous); |
|
|
|
|
|
GE_IF_BOOL_EXEC(!get_continuous_flag, continue); |
|
|
|
|
|
bool attr_reuse = false; |
|
|
|
|
|
bool get_reuse_flag = ge::AttrUtils::GetBool(op_desc, ATTR_NAME_OUTPUT_REUSE_INPUT, attr_reuse); |
|
|
|
|
|
GE_IF_BOOL_EXEC(!get_reuse_flag, continue); |
|
|
|
|
|
|
|
|
|
|
|
if (attr_reuse && attr_continuous) { |
|
|
|
|
|
auto in_data_anchor_list = n->GetAllInDataAnchors(); |
|
|
|
|
|
if (in_data_anchor_list.size() != kVirtualOutputNodeInputSize) { |
|
|
|
|
|
// When current virtual node has several inputs, can't directly determine which input is the tensor for reuse. |
|
|
|
|
|
std::string error = "Only one input is supported, current virtual node" + FmtToStr(n->GetName()) + |
|
|
|
|
|
" has " + FmtToStr(in_data_anchor_list.size()) + " inputs."; |
|
|
|
|
|
GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); |
|
|
|
|
|
return FAILED; |
|
|
|
|
|
} |
|
|
|
|
|
GE_CHK_STATUS_RET(GetNodeMemoryType(n, memory_type, "output"), "Get node memory type failed."); |
|
|
|
|
|
auto iter = memory_offset_.find(memory_type); |
|
|
|
|
|
if (iter == memory_offset_.end()) { |
|
|
|
|
|
std::string error = "Memory offset does not have memory type" + FmtToStr(RT_MEMORY_HBM); |
|
|
|
|
|
GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); |
|
|
|
|
|
return FAILED; |
|
|
|
|
|
} |
|
|
|
|
|
GELOGD("Start to reassign memory for virtual output node, memory offset = %zu, memory type = %ld.", |
|
|
|
|
|
iter->second.mem_offset_, memory_type); |
|
|
|
|
|
string batch_label_string; |
|
|
|
|
|
// Not all ops have ATTR_NAME_BATCH_LABEL, no need to check return value, only check out parameter |
|
|
|
|
|
(void) ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label_string); |
|
|
|
|
|
if (batch_label_string.empty()) { |
|
|
|
|
|
size_t node_mem_offset = iter->second.mem_offset_; |
|
|
|
|
|
// No ATTR_NAME_BATCH_LABEL, no need to reuse memory. |
|
|
|
|
|
Status status = ReAssignVirtualOutputNodeMemory(n, node_mem_offset); |
|
|
|
|
|
if (status != SUCCESS) { |
|
|
|
|
|
GELOGE(FAILED, "Reassign memory of virtual output node failed, node name: %s.", n->GetName().c_str()); |
|
|
|
|
|
return FAILED; |
|
|
|
|
|
} |
|
|
|
|
|
iter->second.mem_offset_ = node_mem_offset; |
|
|
|
|
|
AlignMemOffset(MEM_ALIGN_SIZE, memory_type); |
|
|
|
|
|
GELOGD("After reassign memory for virtual output node, align memory = %zu, memory type = %ld.", |
|
|
|
|
|
iter->second.mem_offset_, memory_type); |
|
|
|
|
|
} else { |
|
|
|
|
|
// Has ATTR_NAME_BATCH_LABEL, for dynamic multi-batch node, need to reuse memory. |
|
|
|
|
|
string current_node_full_name = op_desc->GetName(); |
|
|
|
|
|
size_t pos = current_node_full_name.find(kMbatchNodeNameFlag); |
|
|
|
|
|
if (pos == string::npos) { |
|
|
|
|
|
std::string error = "Cannot find key string" + FmtToStr(kMbatchNodeNameFlag) + |
|
|
|
|
|
" of multi-batch in name of virtual output node, the node name is " + FmtToStr(n->GetName()); |
|
|
|
|
|
GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); |
|
|
|
|
|
return FAILED; |
|
|
|
|
|
} |
|
|
|
|
|
string fixed_name = current_node_full_name.substr(0, pos); |
|
|
|
|
|
vector<NodePtr> parallel_virtual_output_nodes; |
|
|
|
|
|
if (mem_reuse_virtual_output_nodes_map.count(fixed_name) != 0) { |
|
|
|
|
|
parallel_virtual_output_nodes = mem_reuse_virtual_output_nodes_map[fixed_name]; |
|
|
|
|
|
} |
|
|
|
|
|
parallel_virtual_output_nodes.emplace_back(n); |
|
|
|
|
|
mem_reuse_virtual_output_nodes_map[fixed_name] = parallel_virtual_output_nodes; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
// If the output is ref type and refers to the ref of an input, the name of the output |
|
|
|
|
|
// and the input are the same. Ge encounters ref type, finds matching relationship according |
|
|
|
|
|
// to the names of input and output, and allocates the same memory address, eg: HCOMBroadcast |
|
|
|
|
|
if (is_ref) { |
|
|
|
|
|
GELOGI("Current node %s no needs assign continuous output because reference input by name.", |
|
|
|
|
|
node->GetName().c_str()); |
|
|
|
|
|
return SUCCESS; |
|
|
} |
|
|
} |
|
|
|
|
|
mem_offset = output_list[0]; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
int32_t mem_reuse_model = 1; |
|
|
|
|
|
if (ReAssignVirtualNodesMemory(mem_reuse_virtual_output_nodes_map, mem_reuse_model) != SUCCESS) { |
|
|
|
|
|
GELOGE(FAILED, "Reassign memory of virtual output nodes failed."); |
|
|
|
|
|
return FAILED; |
|
|
|
|
|
} |
|
|
|
|
|
return SUCCESS; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
Status GraphMemoryAssigner::ReAssignVirtualNodesMemory(map<string, vector<NodePtr>> &mem_reuse_nodes_map, |
|
|
|
|
|
int32_t mem_reuse_model) { |
|
|
|
|
|
// Find max batch label value |
|
|
|
|
|
string max_batch_label; |
|
|
|
|
|
GE_CHK_STATUS_RET(GetMaxBatchLabel(mem_reuse_nodes_map, mem_reuse_model, max_batch_label), |
|
|
|
|
|
"Get max batch label failed."); |
|
|
|
|
|
PrintMemoryOffset(); |
|
|
|
|
|
vector<size_t> nodes_mem_offset_list; |
|
|
|
|
|
for (auto &i_map : mem_reuse_nodes_map) { |
|
|
|
|
|
vector<NodePtr> virtual_nodes_list = i_map.second; |
|
|
|
|
|
int64_t memory_type = RT_MEMORY_HBM; |
|
|
|
|
|
GE_CHK_STATUS_RET(GetNodeListMemoryType(virtual_nodes_list, mem_reuse_model, memory_type), |
|
|
|
|
|
"Get node list memory type failed."); |
|
|
|
|
|
auto iter = memory_offset_.find(memory_type); |
|
|
|
|
|
if (iter == memory_offset_.end()) { |
|
|
|
|
|
std::string error = "Memory offset does not have memory type" + FmtToStr(RT_MEMORY_HBM); |
|
|
|
|
|
GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); |
|
|
|
|
|
|
|
|
for (auto &out_data_anchor : node->GetAllOutDataAnchors()) { |
|
|
|
|
|
output_list[out_data_anchor->GetIdx()] = mem_offset; |
|
|
|
|
|
int64_t tensor_desc_size = 0; |
|
|
|
|
|
int64_t nopadding_size = 0; |
|
|
|
|
|
if (GetMemorySize(out_op_desc, out_op_desc->GetOutputDescPtr(out_data_anchor->GetIdx()), continuous_type, |
|
|
|
|
|
tensor_desc_size, nopadding_size) != ge::SUCCESS) { |
|
|
|
|
|
GELOGE(FAILED, "GetSize failed."); |
|
|
return FAILED; |
|
|
return FAILED; |
|
|
} |
|
|
} |
|
|
size_t max_batch_node_mem_offset = iter->second.mem_offset_; |
|
|
|
|
|
nodes_mem_offset_list.emplace_back(max_batch_node_mem_offset); |
|
|
|
|
|
for (auto &i_node : virtual_nodes_list) { |
|
|
|
|
|
// Op_desc is not nullptr, it has been checked. |
|
|
|
|
|
OpDescPtr op_desc = i_node->GetOpDesc(); |
|
|
|
|
|
string batch_label_string; |
|
|
|
|
|
// All ops must have ATTR_NAME_BATCH_LABEL, no need to check return value. |
|
|
|
|
|
(void) ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label_string); |
|
|
|
|
|
if (batch_label_string == max_batch_label) { |
|
|
|
|
|
Status status = SUCCESS; |
|
|
|
|
|
if (mem_reuse_model == kVirtualInputNodeMemoryReuse) { |
|
|
|
|
|
status = ReAssignVirtualInputNodeMemory(i_node, max_batch_node_mem_offset); |
|
|
|
|
|
} else if (mem_reuse_model == kVirtualOutputNodeMemoryReuse) { |
|
|
|
|
|
status = ReAssignVirtualOutputNodeMemory(i_node, max_batch_node_mem_offset); |
|
|
|
|
|
} else { |
|
|
|
|
|
std::string error = "Invalid parameter memory reuse model, which is " + FmtToStr(mem_reuse_model); |
|
|
|
|
|
GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); |
|
|
|
|
|
return FAILED; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
if (status != SUCCESS) { |
|
|
|
|
|
GELOGE(FAILED, "Reassign memory of virtual node failed, node name: %s.", i_node->GetName().c_str()); |
|
|
|
|
|
return FAILED; |
|
|
|
|
|
} |
|
|
|
|
|
iter->second.mem_offset_ = max_batch_node_mem_offset; |
|
|
|
|
|
AlignMemOffset(MEM_ALIGN_SIZE, memory_type); |
|
|
|
|
|
GELOGD("After reassign memory for virtual node, align memory = %zu, memory type = %ld.", |
|
|
|
|
|
iter->second.mem_offset_, memory_type); |
|
|
|
|
|
// Only assign memory of max batch nodes. |
|
|
|
|
|
break; |
|
|
|
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
PrintMemoryOffset(); |
|
|
|
|
|
size_t memory_reuse_index = 0; |
|
|
|
|
|
for (auto &i_map : mem_reuse_nodes_map) { |
|
|
|
|
|
vector<NodePtr> virtual_nodes_list = i_map.second; |
|
|
|
|
|
for (auto &i_node : virtual_nodes_list) { |
|
|
|
|
|
size_t remaining_batch_node_mem_offset = nodes_mem_offset_list[memory_reuse_index]; |
|
|
|
|
|
Status status = SUCCESS; |
|
|
|
|
|
if (mem_reuse_model == kVirtualInputNodeMemoryReuse) { |
|
|
|
|
|
status = ReAssignVirtualInputNodeMemory(i_node, remaining_batch_node_mem_offset); |
|
|
|
|
|
} else if (mem_reuse_model == kVirtualOutputNodeMemoryReuse) { |
|
|
|
|
|
status = ReAssignVirtualOutputNodeMemory(i_node, remaining_batch_node_mem_offset); |
|
|
|
|
|
} else { |
|
|
|
|
|
std::string error = "Invalid parameter memory reuse model, which is " + FmtToStr(mem_reuse_model); |
|
|
|
|
|
GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); |
|
|
|
|
|
return FAILED; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
if (status != SUCCESS) { |
|
|
|
|
|
GELOGE(FAILED, "Reassign memory of virtual node failed, node name: %s.", i_node->GetName().c_str()); |
|
|
|
|
|
return FAILED; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
if (is_nopadding) { |
|
|
|
|
|
mem_offset += nopadding_size; |
|
|
|
|
|
} else { |
|
|
|
|
|
mem_offset += tensor_desc_size; |
|
|
|
|
|
ge::AlignMemOffset(mem_offset); |
|
|
} |
|
|
} |
|
|
memory_reuse_index++; |
|
|
|
|
|
|
|
|
GELOGI("[IMAS]Continuous output : Set %s name[%s] output[%d] offset to [%ld] stream_id[%ld] size[%ld] " |
|
|
|
|
|
"realsize[%ld] nopadding[%d].", node->GetOwnerComputeGraph()->GetName().c_str(), |
|
|
|
|
|
out_op_desc->GetName().c_str(), out_data_anchor->GetIdx(), output_list[out_data_anchor->GetIdx()], |
|
|
|
|
|
out_op_desc->GetStreamId(), 0UL, is_nopadding ? nopadding_size : tensor_desc_size, is_nopadding); |
|
|
} |
|
|
} |
|
|
return SUCCESS; |
|
|
|
|
|
|
|
|
out_op_desc->SetOutputOffset(output_list); |
|
|
|
|
|
return ge::SUCCESS; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
Status GraphMemoryAssigner::ReAssignAtomicMemory(bool is_loop_graph) { |
|
|
Status GraphMemoryAssigner::ReAssignAtomicMemory(bool is_loop_graph) { |
|
|
|