You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

graph_mem_assigner.cc 71 kB

5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560
  1. /**
  2. * Copyright 2019-2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "graph/build/memory/graph_mem_assigner.h"
  17. #include <cstring>
  18. #include <set>
  19. #include "common/math/math_util.h"
  20. #include "common/util/error_manager/error_manager.h"
  21. #include "framework/common/debug/ge_log.h"
  22. #include "graph/build/memory/hybrid_mem_assigner.h"
  23. #include "graph/build/memory/var_mem_assign_util.h"
  24. #include "graph/build/memory/block_mem_assigner.h"
  25. #include "graph/common/omg_util.h"
  26. #include "graph/debug/ge_attr_define.h"
  27. #include "graph/ge_attr_value.h"
  28. #include "graph/manager/graph_var_manager.h"
  29. #include "graph/utils/tensor_utils.h"
  30. #include "graph/utils/type_utils.h"
  31. namespace {
  32. const int kDataOutputIndex = 0;
  33. const int kAllInputAddrIsAtomic = -1;
  34. const int kVirtualInputNodeMemoryReuse = 0;
  35. const int kVirtualOutputNodeMemoryReuse = 1;
  36. const size_t kVirtualInputNodeOutputSize = 1;
  37. const size_t kVirtualOutputNodeInputSize = 1;
  38. const size_t kVirtualNodeDataIndex = 0;
  39. const char *const kMbatchNodeNameFlag = "_ascend_mbatch_batch_";
  40. } // namespace
  41. namespace ge {
  42. Status VariableMemoryAssigner::Assign() {
  43. Status result = ge::VarMemAssignUtil::AssignConstantOpMemory(compute_graph_);
  44. if (result != ge::SUCCESS) {
  45. return result;
  46. }
  47. result = ge::VarMemAssignUtil::AssignVarMemory(compute_graph_);
  48. if (result != ge::SUCCESS) {
  49. return result;
  50. }
  51. return ge::SUCCESS;
  52. }
  53. Status VariableMemoryAssigner::AssignVarAttr2Nodes() {
  54. Status result = ge::VarMemAssignUtil::AssignVarAttr2Nodes(compute_graph_);
  55. if (result != ge::SUCCESS) {
  56. return result;
  57. }
  58. return ge::SUCCESS;
  59. }
  60. Status GraphMemoryAssigner::AssignMemory() {
  61. ge::HybridMemAssignerPtr mem_assigner(new (std::nothrow) HybridMemAssigner(compute_graph_));
  62. if (mem_assigner->Assign() != ge::SUCCESS) {
  63. GELOGE(ge::FAILED, "Memory assigner failed");
  64. return ge::FAILED;
  65. }
  66. MemoryOffset memory_offset(RT_MEMORY_HBM, mem_assigner->GetMemOffset());
  67. memory_offset_.push_back(memory_offset);
  68. auto session_id = compute_graph_->GetSessionID();
  69. int64_t var_size_before_assign = ge::VarManager::Instance(session_id)->GetVarMemSize(RT_MEMORY_HBM);
  70. auto variable_assigner =
  71. std::unique_ptr<ge::VariableMemoryAssigner>(new (std::nothrow) ge::VariableMemoryAssigner(compute_graph_));
  72. if (variable_assigner == nullptr) {
  73. GELOGE(ge::FAILED, "Alloc VariableMemoryAssigner failed.");
  74. return ge::FAILED;
  75. }
  76. if (variable_assigner->Assign() != ge::SUCCESS) {
  77. return ge::FAILED;
  78. }
  79. int64_t var_size_assign = ge::VarManager::Instance(session_id)->GetVarMemSize(RT_MEMORY_HBM) - var_size_before_assign;
  80. GELOGI("GraphMemoryAssigner::AssignMemory variable size = %ld", var_size_assign);
  81. mem_assigner_ = std::move(mem_assigner);
  82. return ge::SUCCESS;
  83. }
  84. ge::Status GraphMemoryAssigner::AssignVarAttr2Nodes() {
  85. auto variable_assigner =
  86. std::unique_ptr<ge::VariableMemoryAssigner>(new (std::nothrow) ge::VariableMemoryAssigner(compute_graph_));
  87. if (variable_assigner == nullptr) {
  88. GELOGE(ge::FAILED, "Alloc VariableMemoryAssigner failed.");
  89. return ge::FAILED;
  90. }
  91. if (variable_assigner->AssignVarAttr2Nodes() != ge::SUCCESS) {
  92. return ge::FAILED;
  93. }
  94. return ge::SUCCESS;
  95. }
  96. ge::Status GraphMemoryAssigner::CalculateTensorRealSizeAndOutSize(const ge::ConstGeTensorDescPtr &output_desc,
  97. int64_t dim_index, int64_t &output_mem_size,
  98. int64_t &batch_dim_num, int64_t &out_size) {
  99. graphStatus graph_status = ge::TensorUtils::GetSize(*output_desc, out_size);
  100. if (graph_status != GRAPH_SUCCESS) {
  101. GELOGE(FAILED, "Opdesc GetSize failed!");
  102. return FAILED;
  103. }
  104. GeShape output_shape = output_desc->GetShape();
  105. std::vector<int64_t> output_dims = output_shape.GetDims();
  106. if (dim_index >= static_cast<int64_t>(output_dims.size())) {
  107. GELOGE(FAILED, "Invaild value(%ld) of attr _reuse_input_on_dim_index, which is out of data range [0, %zu).",
  108. dim_index, output_dims.size());
  109. return FAILED;
  110. }
  111. for (int64_t index = 0; index < dim_index; index++) {
  112. FMK_INT64_MULCHECK(batch_dim_num, output_dims[index]);
  113. batch_dim_num *= output_dims[index];
  114. output_dims[index] = 1;
  115. }
  116. output_shape = GeShape(output_dims);
  117. Format out_format = output_desc->GetFormat();
  118. DataType data_type = output_desc->GetDataType();
  119. graph_status = ge::TensorUtils::CalcTensorMemSize(output_shape, out_format, data_type, output_mem_size);
  120. if (graph_status != GRAPH_SUCCESS) {
  121. GELOGE(graph_status, "Opdesc CalcTensorMemSize failed!");
  122. return FAILED;
  123. }
  124. if (output_mem_size < 0) {
  125. GELOGE(FAILED, "After calculating tensor memory size, output_mem_size = %ld, out of data range [0, %ld]",
  126. output_mem_size, INT64_MAX);
  127. return FAILED;
  128. }
  129. return SUCCESS;
  130. }
  131. Status GraphMemoryAssigner::GetMaxBatchLabel(const map<string, vector<NodePtr>> &mem_reuse_virtual_nodes_map,
  132. int32_t mem_reuse_model, string &max_batch_label) {
  133. for (auto &i_map : mem_reuse_virtual_nodes_map) {
  134. vector<NodePtr> virtual_nodes_list = i_map.second;
  135. vector<int64_t> max_shape_dims;
  136. size_t max_batch_dim = 0;
  137. bool max_batch_dim_find = false;
  138. for (size_t i = 0; i < virtual_nodes_list.size(); ++i) {
  139. GE_CHECK_NOTNULL(virtual_nodes_list[i]);
  140. OpDescPtr op_desc = virtual_nodes_list[i]->GetOpDesc();
  141. GE_CHECK_NOTNULL(op_desc);
  142. ge::ConstGeTensorDescPtr input_output_desc;
  143. if (mem_reuse_model == kVirtualInputNodeMemoryReuse) {
  144. input_output_desc = op_desc->GetOutputDescPtr(kVirtualNodeDataIndex);
  145. } else if (mem_reuse_model == kVirtualOutputNodeMemoryReuse) {
  146. input_output_desc = op_desc->GetInputDescPtr(kVirtualNodeDataIndex);
  147. } else {
  148. GELOGE(FAILED, "Invalid parameter memory reuse model, which is: %d.", mem_reuse_model);
  149. return FAILED;
  150. }
  151. GE_CHECK_NOTNULL(input_output_desc);
  152. if (i == 0) {
  153. // All ops must have ATTR_NAME_BATCH_LABEL, no need to check return value.
  154. (void)ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, max_batch_label);
  155. max_shape_dims = input_output_desc->GetShape().GetDims();
  156. } else {
  157. vector<int64_t> current_shape_dims = input_output_desc->GetShape().GetDims();
  158. if (current_shape_dims.size() != max_shape_dims.size()) {
  159. GELOGE(FAILED, "The shape size of several nodes between multiple batches does not match.");
  160. return FAILED;
  161. }
  162. for (size_t j = 0; j < current_shape_dims.size(); ++j) {
  163. if (current_shape_dims[j] == max_shape_dims[j]) {
  164. continue;
  165. }
  166. if (max_batch_dim_find && max_batch_dim != j) {
  167. GELOGE(FAILED, "The shape of several nodes between multiple batches does not match.");
  168. return FAILED;
  169. }
  170. max_batch_dim_find = true;
  171. max_batch_dim = j;
  172. if (current_shape_dims[j] > max_shape_dims[j]) {
  173. max_shape_dims[j] = current_shape_dims[j];
  174. // All ops must have ATTR_NAME_BATCH_LABEL, no need to check return value.
  175. (void)ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, max_batch_label);
  176. }
  177. // Only compare the first different dim in shape.
  178. break;
  179. }
  180. }
  181. }
  182. // In every element of virtual_input_nodes_map, the label of the max batch node is the same.
  183. break;
  184. }
  185. return SUCCESS;
  186. }
  187. Status GraphMemoryAssigner::ReAssignMemory(bool is_loop_graph, size_t &mem_offset) {
  188. if (memory_offset_.empty()) {
  189. GELOGE(FAILED, "memory_offset_ is empty.");
  190. return ge::FAILED;
  191. }
  192. GE_CHK_STATUS_RET(ReAssignContinuousMemory(is_loop_graph), "ReAssignContinuousMemory Failed!");
  193. GE_CHK_STATUS_RET(ReAssignReuseAndNoPaddingContinuousInputMemory(),
  194. "ReAssignReuseAndNoPaddingContinuousInputMemory Failed!");
  195. GE_CHK_STATUS_RET(ReAssignReuseAndNoPaddingContinuousOutputMemory(),
  196. "ReAssignReuseAndNoPaddingContinuousOutputMemory Failed!");
  197. GE_CHK_STATUS_RET(ReAssignAtomicMemory(is_loop_graph), "ReAssignAtomicMemory Failed!");
  198. mem_offset = memory_offset_[0].mem_offset_;
  199. auto session_id = compute_graph_->GetSessionID();
  200. if (mem_offset > VarManager::Instance(session_id)->GetGraphMemoryMaxSize()) {
  201. GELOGE(ge::FAILED, "Current memoffset %zu is greater than memory manager malloc max size %zu", mem_offset,
  202. VarManager::Instance(session_id)->GetGraphMemoryMaxSize());
  203. ErrorManager::GetInstance().ATCReportErrMessage(
  204. "E19022", {"size", "item", "maxsize"},
  205. {std::to_string(mem_offset), "featuremap",
  206. std::to_string(VarManager::Instance(session_id)->GetGraphMemoryMaxSize())});
  207. return ge::FAILED;
  208. }
  209. return SUCCESS;
  210. }
  211. Status GraphMemoryAssigner::AssignZeroCopyMemory(size_t &mem_offset, size_t &zero_mem_copy_size) {
  212. BlockMemAssignerPtr priority_assigner = std::move(mem_assigner_->GetPriorityAssinger());
  213. GE_IF_BOOL_EXEC(priority_assigner == nullptr, GELOGE(FAILED, "Get priority_assigner failed."); return ge::FAILED;);
  214. size_t mem_offset_tmp = mem_offset;
  215. // set offset for zero copy block
  216. for (auto &memory_block : priority_assigner->GetMemoryBlocks()) {
  217. if (memory_block == nullptr || memory_block->deleted_block_ || !memory_block->is_zero_copy_) {
  218. continue;
  219. }
  220. memory_block->Resize();
  221. memory_block->SetHeadOffset(mem_offset);
  222. mem_offset += memory_block->Size();
  223. memory_block->SetTailOffset(mem_offset - 1);
  224. }
  225. GELOGI("mem_offset_ include zero_copy_memory is %zu.", mem_offset);
  226. // set offset for zero copy nodes
  227. priority_assigner->SetOpMemOffset(true);
  228. zero_mem_copy_size = mem_offset - mem_offset_tmp;
  229. memory_offset_[0].mem_offset_ = mem_offset;
  230. GELOGI("max_mem_offset:%zu, mem_offset:%zu, zero_mem_copy_size:%zu.", mem_offset, mem_offset_tmp, zero_mem_copy_size);
  231. return SUCCESS;
  232. }
  233. Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) {
  234. GELOGI("Begin to reassign continuous memory");
  235. Status ret;
  236. for (auto &node : compute_graph_->GetAllNodes()) {
  237. // Get the continuous input type of the node, default is false
  238. bool is_input_continuous = false;
  239. GE_CHECK_NOTNULL(node->GetOpDesc());
  240. // If GetBool fail, is_input_continuous is false.
  241. (void)ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_CONTINUOUS_INPUT, is_input_continuous);
  242. // Assign continuous input memory
  243. if (is_input_continuous) {
  244. int64_t mem_clean_start = 0;
  245. int64_t mem_clean_size = 0;
  246. ret = AssignContinuousInputMemory(node, mem_clean_start, mem_clean_size);
  247. if (ret != ge::SUCCESS) {
  248. GELOGE(ret, "Assign continuous input memory failed!");
  249. return ret;
  250. }
  251. // Clean up atomic address, eg, hcom node
  252. vector<int32_t> input_indexes;
  253. // If GetListInt fail, input_indexes is empty.
  254. (void)ge::AttrUtils::GetListInt(node->GetOpDesc(), ATOMIC_ATTR_INPUT_INDEX, input_indexes);
  255. if (!input_indexes.empty() && input_indexes[0] == kAllInputAddrIsAtomic) {
  256. // check whether there is an atomic conflict between the current node and the peer out node
  257. if (!CheckInputIsSupportAtomic(node)) {
  258. GELOGE(ge::FAILED,
  259. "There is an atomic conflict between the current node and the peer out node, not supported!");
  260. return ge::FAILED;
  261. } else if (is_loop_graph) {
  262. GE_CHK_STATUS_RET(SetLoopGraphAtomicAttr(node, mem_clean_start));
  263. } else {
  264. GE_CHK_STATUS_RET(SetAtomicCleanAttr(nullptr, {mem_clean_start}, {mem_clean_size}),
  265. "SetAtomicCleanAttr failed.");
  266. }
  267. }
  268. }
  269. // Get the reference type of the node, default is false
  270. bool is_ref = false;
  271. // If GetBool fail, is_ref is false.
  272. (void)ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_REFERENCE, is_ref);
  273. // Get the continuous output type of the node, default is false
  274. bool is_output_continuous = false;
  275. // If GetBool fail, is_output_continuous is false.
  276. (void)ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_CONTINUOUS_OUTPUT, is_output_continuous);
  277. // If the output is ref type and refers to the ref of an input, the name of the output
  278. // and the input are the same. Ge encounters ref type, finds matching relationship according
  279. // to the names of input and output, and allocates the same memory address, eg: HCOMBroadcast
  280. if (!is_ref && is_output_continuous) { // Assign continuous output memory
  281. ret = AssignContinuousOutputMemory(node);
  282. if (ret != ge::SUCCESS) {
  283. GELOGE(ret, "Assign reference memory failed!");
  284. return ret;
  285. }
  286. }
  287. }
  288. GELOGI("After reassign continuous memory, memoffset = %zu.", memory_offset_[0].mem_offset_);
  289. return ge::SUCCESS;
  290. }
  291. Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node, int64_t &continuous_mem_start,
  292. int64_t &continuous_mem_size) {
  293. GELOGI("Current node %s needs continuous input.", node->GetName().c_str());
  294. continuous_mem_start = memory_offset_[0].mem_offset_;
  295. bool continuous_input_alloc = false;
  296. (void)ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_CONTINUOUS_INPUT_ALLOC, continuous_input_alloc);
  297. for (auto &in_data_anchor : node->GetAllInDataAnchors()) {
  298. auto peer_out_data_anchor = in_data_anchor->GetPeerOutAnchor();
  299. GE_IF_BOOL_EXEC(peer_out_data_anchor == nullptr, continue);
  300. auto peer_op_desc = peer_out_data_anchor->GetOwnerNode()->GetOpDesc();
  301. GE_IF_BOOL_EXEC(peer_op_desc == nullptr, continue);
  302. bool is_peer_output_continuous = false;
  303. // If GetBool fail, is_peer_output_continuous is false.
  304. (void)ge::AttrUtils::GetBool(peer_op_desc, ATTR_NAME_CONTINUOUS_OUTPUT, is_peer_output_continuous);
  305. // Get peer node output size, if size == 1(peer node has only one output), continuous input of the node and
  306. // continuous output of the previous node is the same, we can support it. If size != 1, there may be
  307. // conflict between the two, we can not support it.
  308. auto peer_output_size = peer_op_desc->GetOutputsSize();
  309. GE_IF_BOOL_EXEC(is_peer_output_continuous && (peer_output_size != 1),
  310. GELOGE(PARAM_INVALID,
  311. "Current node %s requires continuous input, while the previous node %s requires "
  312. "continuous output. There may be conflict between the two. This node is not supported now.",
  313. node->GetOpDesc()->GetName().c_str(), peer_op_desc->GetName().c_str());
  314. return PARAM_INVALID;);
  315. bool is_peer_reference = false;
  316. // If GetBool fail, is_peer_reference is false.
  317. (void)AttrUtils::GetBool(peer_op_desc, ATTR_NAME_REFERENCE, is_peer_reference);
  318. GE_IF_BOOL_EXEC(is_peer_reference,
  319. GELOGE(PARAM_INVALID,
  320. "Current node %s requires continuous input, while the previous node %s requires "
  321. "reference. There may be conflict between the two. This node is not supported now.",
  322. node->GetOpDesc()->GetName().c_str(), peer_op_desc->GetName().c_str());
  323. return PARAM_INVALID;);
  324. vector<int64_t> output_list = peer_op_desc->GetOutputOffset();
  325. std::vector<int64_t> offsets_for_fusion = {};
  326. bool has_offset_attr =
  327. AttrUtils::GetListInt(peer_op_desc, ATTR_NAME_OUTPUT_OFFSET_FOR_BUFFER_FUSION, offsets_for_fusion);
  328. if (peer_out_data_anchor->GetIdx() < static_cast<int>(output_list.size())) {
  329. if (continuous_input_alloc && !has_offset_attr) {
  330. if (in_data_anchor->GetIdx() == 0) {
  331. continuous_mem_start = output_list.at(peer_out_data_anchor->GetIdx());
  332. }
  333. // can not use else if, incase only one input
  334. if (in_data_anchor->GetIdx() == static_cast<int>(node->GetAllInDataAnchors().size()) - 1) {
  335. int64_t tensor_desc_size = 0;
  336. Status ret = ge::TensorUtils::GetSize(*(peer_op_desc->GetOutputDescPtr(peer_out_data_anchor->GetIdx())),
  337. tensor_desc_size);
  338. GE_IF_BOOL_EXEC(ret != ge::SUCCESS, GELOGE(FAILED, "GetSize failed."); return FAILED;);
  339. tensor_desc_size = (tensor_desc_size + MEM_ALIGN_SIZE - 1) / MEM_ALIGN_SIZE * MEM_ALIGN_SIZE;
  340. continuous_mem_size =
  341. output_list.at(peer_out_data_anchor->GetIdx()) - continuous_mem_start + tensor_desc_size + MEM_ALIGN_SIZE;
  342. }
  343. GELOGI(
  344. "[IMAS]Check Continuous input : Set %s name[%s] output[%d] offset to [%zu] stream_id[%ld] size[%zu] "
  345. "real_size[%u].",
  346. node->GetOwnerComputeGraph()->GetName().c_str(), peer_op_desc->GetName().c_str(),
  347. peer_out_data_anchor->GetIdx(), output_list.at(peer_out_data_anchor->GetIdx()), peer_op_desc->GetStreamId(),
  348. 0, 0);
  349. continue;
  350. }
  351. output_list.at(peer_out_data_anchor->GetIdx()) = memory_offset_[0].mem_offset_;
  352. } else {
  353. GELOGE(FAILED, "index : %d is out of range.", peer_out_data_anchor->GetIdx());
  354. return FAILED;
  355. }
  356. peer_op_desc->SetOutputOffset(output_list);
  357. size_t pre_mem_offset = memory_offset_[0].mem_offset_;
  358. int64_t tensor_desc_size = 0;
  359. if (has_offset_attr) {
  360. if (peer_out_data_anchor->GetIdx() < static_cast<int>(offsets_for_fusion.size())) {
  361. auto offset_for_fusion = offsets_for_fusion[peer_out_data_anchor->GetIdx()];
  362. memory_offset_[0].mem_offset_ += offset_for_fusion;
  363. } else {
  364. GELOGE(FAILED, "fusion: peer node %s index : %d is out of range.", peer_op_desc->GetName().c_str(),
  365. peer_out_data_anchor->GetIdx());
  366. return FAILED;
  367. }
  368. } else {
  369. Status ret =
  370. TensorUtils::GetSize(*(peer_op_desc->GetOutputDescPtr(peer_out_data_anchor->GetIdx())), tensor_desc_size);
  371. GE_IF_BOOL_EXEC(ret != ge::SUCCESS, GELOGE(FAILED, "GetSize failed."); return FAILED;);
  372. memory_offset_[0].mem_offset_ += tensor_desc_size;
  373. }
  374. // If set tensor_actual_size, Memory alignment is not required.
  375. int32_t is_tensor_actual_size = 0;
  376. ge::AttrUtils::GetInt(peer_op_desc, ATTR_NAME_GET_TENSOR_ACTUAL_SIZE, is_tensor_actual_size);
  377. if (is_tensor_actual_size == 0) {
  378. AlignMemOffset(MEM_ALIGN_SIZE);
  379. }
  380. GELOGI(
  381. "[IMAS]Continuous input : Set %s name[%s] output[%d] offset to [%zu] stream_id[%ld] size[%zu] "
  382. "real_size[%ld].",
  383. node->GetOwnerComputeGraph()->GetName().c_str(), peer_op_desc->GetName().c_str(), peer_out_data_anchor->GetIdx(),
  384. pre_mem_offset, peer_op_desc->GetStreamId(), (memory_offset_[0].mem_offset_ - pre_mem_offset), tensor_desc_size);
  385. }
  386. memory_offset_[0].mem_offset_ += MEM_ALIGN_SIZE;
  387. if (!continuous_input_alloc) {
  388. continuous_mem_size = memory_offset_[0].mem_offset_ - continuous_mem_start;
  389. }
  390. return SUCCESS;
  391. }
  392. Status GraphMemoryAssigner::AssignContinuousOutputMemory(const ge::NodePtr &node) {
  393. GELOGI("Current node %s needs continuous output.", node->GetName().c_str());
  394. auto out_op_desc = node->GetOpDesc();
  395. GE_IF_BOOL_EXEC(out_op_desc == nullptr, GELOGE(ge::FAILED, "out_op_desc is null."); return ge::FAILED);
  396. vector<int64_t> output_list = out_op_desc->GetOutputOffset();
  397. if ((out_op_desc->GetOutputsSize() > output_list.size()) || (output_list.size() == 0)) {
  398. GELOGE(ge::FAILED, "The size %zu of node output desc is more than output_list's size %zu.",
  399. out_op_desc->GetOutputsSize(), output_list.size());
  400. return ge::FAILED;
  401. }
  402. size_t mem_offset = output_list[0];
  403. for (auto &out_data_anchor : node->GetAllOutDataAnchors()) {
  404. output_list[out_data_anchor->GetIdx()] = mem_offset;
  405. int64_t tensor_desc_size = 0;
  406. if (ge::TensorUtils::GetSize(*(out_op_desc->GetOutputDescPtr(out_data_anchor->GetIdx())), tensor_desc_size) !=
  407. ge::SUCCESS) {
  408. GELOGE(FAILED, "GetSize failed.");
  409. return FAILED;
  410. }
  411. mem_offset += tensor_desc_size;
  412. if (mem_offset <= 0) {
  413. return FAILED;
  414. }
  415. mem_offset = (mem_offset + MEM_ALIGN_SIZE - 1) / MEM_ALIGN_SIZE * MEM_ALIGN_SIZE;
  416. GELOGI(
  417. "[IMAS]Continuous output : Set %s name[%s] output[%d] offset to [%zu] stream_id[%ld] size[%ld] "
  418. "real_size[%ld].",
  419. node->GetOwnerComputeGraph()->GetName().c_str(), out_op_desc->GetName().c_str(), out_data_anchor->GetIdx(),
  420. output_list[out_data_anchor->GetIdx()], out_op_desc->GetStreamId(), tensor_desc_size, tensor_desc_size);
  421. }
  422. out_op_desc->SetOutputOffset(output_list);
  423. return ge::SUCCESS;
  424. }
  425. Status GraphMemoryAssigner::ReAssignVirtualInputNodeMemory(NodePtr node, size_t &mem_offset_reuse) {
  426. OpDescPtr op_desc = node->GetOpDesc();
  427. vector<int64_t> output_list = op_desc->GetOutputOffset();
  428. if (output_list.empty()) {
  429. GELOGE(FAILED, "Outputoffset is empty node name:%s", node->GetName().c_str());
  430. return FAILED;
  431. }
  432. output_list.at(0) = mem_offset_reuse;
  433. op_desc->SetOutputOffset(output_list);
  434. GELOGI("Set virtual input node %s output offset to %zu.", op_desc->GetName().c_str(), mem_offset_reuse);
  435. int64_t attr_dim_index;
  436. bool get_attr_dim_flag = ge::AttrUtils::GetInt(op_desc, ATTR_NAME_REUSE_INPUT_ON_DIM_INDEX, attr_dim_index);
  437. if (!get_attr_dim_flag) {
  438. GELOGE(FAILED, "Get attr _reuse_input_on_dim_index failed.");
  439. return FAILED;
  440. }
  441. size_t extra_memory_size = 0;
  442. for (const auto &in_data_anchor : node->GetAllInDataAnchors()) {
  443. auto peer_out_data_anchor = in_data_anchor->GetPeerOutAnchor();
  444. GE_CHECK_NOTNULL(peer_out_data_anchor);
  445. auto peer_op_desc = peer_out_data_anchor->GetOwnerNode()->GetOpDesc();
  446. GE_CHECK_NOTNULL(peer_op_desc);
  447. vector<int64_t> output_offsets = peer_op_desc->GetOutputOffset();
  448. if (peer_out_data_anchor->GetIdx() >= static_cast<int>(output_offsets.size())) {
  449. GELOGE(ge::FAILED, "Index : %d is out of range.", peer_out_data_anchor->GetIdx());
  450. return ge::FAILED;
  451. }
  452. output_offsets.at(peer_out_data_anchor->GetIdx()) = mem_offset_reuse;
  453. peer_op_desc->SetOutputOffset(output_offsets);
  454. size_t pre_mem_offset = mem_offset_reuse;
  455. // Calculate tensor real size of each piece of data and out size of complete data
  456. ge::ConstGeTensorDescPtr output_desc = peer_op_desc->GetOutputDescPtr(peer_out_data_anchor->GetIdx());
  457. GE_CHECK_NOTNULL(output_desc);
  458. int64_t output_mem_size;
  459. int64_t batch_dim_num = 1;
  460. int64_t out_size;
  461. if (CalculateTensorRealSizeAndOutSize(output_desc, attr_dim_index, output_mem_size, batch_dim_num, out_size) !=
  462. SUCCESS) {
  463. GELOGE(FAILED, "CalculateTensorRealSizeAndOutSize failed for node %s output [%d].",
  464. peer_op_desc->GetName().c_str(), peer_out_data_anchor->GetIdx());
  465. return FAILED;
  466. }
  467. mem_offset_reuse += output_mem_size;
  468. extra_memory_size = extra_memory_size + out_size - output_mem_size;
  469. GELOGI(
  470. "[IMAS]Virtual node optimize: set %s name[%s] output[%d] offset to [%zu] stream_id[%ld] size[%ld] "
  471. "real_size[%ld].",
  472. node->GetOwnerComputeGraph()->GetName().c_str(), peer_op_desc->GetName().c_str(), peer_out_data_anchor->GetIdx(),
  473. pre_mem_offset, peer_op_desc->GetStreamId(), out_size, output_mem_size);
  474. }
  475. mem_offset_reuse += extra_memory_size;
  476. size_t after_mem_offset = mem_offset_reuse;
  477. GELOGI("After reassign virtual input node[name: %s, type: %s] memory, memory offset = %zu.",
  478. op_desc->GetName().c_str(), op_desc->GetType().c_str(), after_mem_offset);
  479. return SUCCESS;
  480. }
  481. Status GraphMemoryAssigner::ReAssignReuseAndNoPaddingContinuousInputMemory() {
  482. map<string, vector<NodePtr>> mem_reuse_virtual_input_nodes_map;
  483. for (const auto &n : compute_graph_->GetAllNodes()) {
  484. OpDescPtr op_desc = n->GetOpDesc();
  485. GE_CHECK_NOTNULL(op_desc);
  486. bool attr_continuous = false;
  487. bool get_continuous_flag = ge::AttrUtils::GetBool(op_desc, ATTR_NAME_NOPADDING_CONTINUOUS_INPUT, attr_continuous);
  488. GE_IF_BOOL_EXEC(!get_continuous_flag, continue);
  489. bool attr_reuse = false;
  490. bool get_reuse_flag = ge::AttrUtils::GetBool(op_desc, ATTR_NAME_OUTPUT_REUSE_INPUT, attr_reuse);
  491. GE_IF_BOOL_EXEC(!get_reuse_flag, continue);
  492. if (attr_reuse && attr_continuous) {
  493. if (op_desc->GetOutputsSize() != kVirtualInputNodeOutputSize) {
  494. // When current virtual node has several outputs, can't directly determine which input is the tensor for reuse.
  495. GELOGE(FAILED, "Only one output is supported, current virtual node %s has %zu inputs.", n->GetName().c_str(),
  496. op_desc->GetOutputsSize());
  497. return FAILED;
  498. }
  499. GELOGD("Start to reassign memory for virtual input node, memory offset = %zu.", memory_offset_[0].mem_offset_);
  500. string batch_label_string;
  501. // Not all ops have ATTR_NAME_BATCH_LABEL, no need to check return value, only check out parameter
  502. (void)ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label_string);
  503. if (batch_label_string.empty()) {
  504. size_t node_mem_offset = memory_offset_[0].mem_offset_;
  505. // No ATTR_NAME_BATCH_LABEL, no need to reuse memory.
  506. Status status = ReAssignVirtualInputNodeMemory(n, node_mem_offset);
  507. if (status != SUCCESS) {
  508. GELOGE(FAILED, "Reassign memory of virtual input node failed, node name: %s.", n->GetName().c_str());
  509. return FAILED;
  510. }
  511. memory_offset_[0].mem_offset_ = node_mem_offset;
  512. AlignMemOffset(MEM_ALIGN_SIZE);
  513. GELOGD("After reassign memory for virtual input node, align memory = %zu.", memory_offset_[0].mem_offset_);
  514. } else {
  515. // Has ATTR_NAME_BATCH_LABEL, for dynamic multi-batch node, need to reuse memory.
  516. string current_node_full_name = op_desc->GetName();
  517. size_t pos = current_node_full_name.find(kMbatchNodeNameFlag);
  518. if (pos == string::npos) {
  519. GELOGE(FAILED, "Cannot find key string [%s] of multi-batch in name of virtual input node, node name: %s.",
  520. kMbatchNodeNameFlag, n->GetName().c_str());
  521. return FAILED;
  522. }
  523. string fixed_name = current_node_full_name.substr(0, pos);
  524. vector<NodePtr> parallel_virtual_input_nodes;
  525. if (mem_reuse_virtual_input_nodes_map.count(fixed_name) != 0) {
  526. parallel_virtual_input_nodes = mem_reuse_virtual_input_nodes_map[fixed_name];
  527. }
  528. parallel_virtual_input_nodes.emplace_back(n);
  529. mem_reuse_virtual_input_nodes_map[fixed_name] = parallel_virtual_input_nodes;
  530. }
  531. }
  532. }
  533. int32_t mem_reuse_model = 0;
  534. if (ReAssignVirtualNodesMemory(mem_reuse_virtual_input_nodes_map, mem_reuse_model) != SUCCESS) {
  535. GELOGE(FAILED, "Reassign memory of virtual input nodes failed.");
  536. return FAILED;
  537. }
  538. return SUCCESS;
  539. }
  540. Status GraphMemoryAssigner::ReAssignVirtualOutputNodeMemory(NodePtr node, size_t &mem_offset_reuse) {
  541. OpDescPtr op_desc = node->GetOpDesc();
  542. // 1. set memory of to be reused input tensor
  543. auto in_data_anchor_list = node->GetAllInDataAnchors();
  544. auto peer_out_data_anchor = in_data_anchor_list.at(0)->GetPeerOutAnchor();
  545. GE_CHECK_NOTNULL(peer_out_data_anchor);
  546. auto peer_op_desc = peer_out_data_anchor->GetOwnerNode()->GetOpDesc();
  547. GE_CHECK_NOTNULL(peer_op_desc);
  548. vector<int64_t> in_node_output_offsets = peer_op_desc->GetOutputOffset();
  549. if (peer_out_data_anchor->GetIdx() >= static_cast<int>(in_node_output_offsets.size())) {
  550. GELOGE(FAILED, "Index : %d is out of range.", peer_out_data_anchor->GetIdx());
  551. return FAILED;
  552. }
  553. in_node_output_offsets.at(peer_out_data_anchor->GetIdx()) = mem_offset_reuse;
  554. peer_op_desc->SetOutputOffset(in_node_output_offsets);
  555. GELOGI("Set virtual output node %s input data offset to %zu.", op_desc->GetName().c_str(), mem_offset_reuse);
  556. // 2. set memory of output tensor
  557. vector<int64_t> output_list = op_desc->GetOutputOffset();
  558. if (output_list.empty()) {
  559. GELOGE(FAILED, "Outputoffset is empty, node name: %s", node->GetName().c_str());
  560. return FAILED;
  561. }
  562. if (op_desc->GetOutputsSize() > output_list.size()) {
  563. GELOGE(FAILED, "The size %zu of op_desc is more than output_list's size %zu.", op_desc->GetOutputsSize(),
  564. output_list.size());
  565. return FAILED;
  566. }
  567. int64_t attr_dim_index;
  568. bool get_attr_dim_flag = ge::AttrUtils::GetInt(op_desc, ATTR_NAME_REUSE_INPUT_ON_DIM_INDEX, attr_dim_index);
  569. if (!get_attr_dim_flag) {
  570. GELOGE(FAILED, "Get attr _reuse_input_on_dim_index failed.");
  571. return FAILED;
  572. }
  573. size_t extra_memory_size = 0;
  574. for (auto &out_data_anchor : node->GetAllOutDataAnchors()) {
  575. output_list[out_data_anchor->GetIdx()] = mem_offset_reuse;
  576. size_t pre_mem_offset = mem_offset_reuse;
  577. // calculate tensor real size of each piece of data and out size of complete data
  578. ge::ConstGeTensorDescPtr output_desc = op_desc->GetOutputDescPtr(out_data_anchor->GetIdx());
  579. GE_CHECK_NOTNULL(output_desc);
  580. int64_t output_mem_size;
  581. int64_t batch_dim_num = 1;
  582. int64_t out_size;
  583. if (CalculateTensorRealSizeAndOutSize(output_desc, attr_dim_index, output_mem_size, batch_dim_num, out_size) !=
  584. SUCCESS) {
  585. GELOGE(FAILED, "CalculateTensorRealSizeAndOutSize failed for node %s output [%d].", op_desc->GetName().c_str(),
  586. out_data_anchor->GetIdx());
  587. return FAILED;
  588. }
  589. mem_offset_reuse += output_mem_size;
  590. extra_memory_size = extra_memory_size + out_size - output_mem_size;
  591. GELOGI("[IMAS]Virtual node optimize: set %s name[%s] output[%d] offset to [%zu], size[%ld], real_size[%ld].",
  592. node->GetOwnerComputeGraph()->GetName().c_str(), op_desc->GetName().c_str(), out_data_anchor->GetIdx(),
  593. pre_mem_offset, out_size, output_mem_size);
  594. }
  595. op_desc->SetOutputOffset(output_list);
  596. mem_offset_reuse += extra_memory_size;
  597. size_t after_mem_offset = mem_offset_reuse;
  598. GELOGI("After reassign virtual output node[name: %s, type: %s] memory, memory offset = %zu.",
  599. op_desc->GetName().c_str(), op_desc->GetType().c_str(), after_mem_offset);
  600. return SUCCESS;
  601. }
  602. Status GraphMemoryAssigner::ReAssignReuseAndNoPaddingContinuousOutputMemory() {
  603. map<string, vector<NodePtr>> mem_reuse_virtual_output_nodes_map;
  604. for (const auto &n : compute_graph_->GetAllNodes()) {
  605. OpDescPtr op_desc = n->GetOpDesc();
  606. GE_CHECK_NOTNULL(op_desc);
  607. bool attr_continuous = false;
  608. bool get_continuous_flag = ge::AttrUtils::GetBool(op_desc, ATTR_NAME_NOPADDING_CONTINUOUS_OUTPUT, attr_continuous);
  609. GE_IF_BOOL_EXEC(!get_continuous_flag, continue);
  610. bool attr_reuse = false;
  611. bool get_reuse_flag = ge::AttrUtils::GetBool(op_desc, ATTR_NAME_OUTPUT_REUSE_INPUT, attr_reuse);
  612. GE_IF_BOOL_EXEC(!get_reuse_flag, continue);
  613. if (attr_reuse && attr_continuous) {
  614. auto in_data_anchor_list = n->GetAllInDataAnchors();
  615. if (in_data_anchor_list.size() != kVirtualOutputNodeInputSize) {
  616. // When current virtual node has several inputs, can't directly determine which input is the tensor for reuse.
  617. GELOGE(FAILED, "Only one input is supported, current virtual node %s has %zu inputs.", n->GetName().c_str(),
  618. in_data_anchor_list.size());
  619. return FAILED;
  620. }
  621. GELOGD("Start to reassign memory for virtual output node, memory offset = %zu.", memory_offset_[0].mem_offset_);
  622. string batch_label_string;
  623. // Not all ops have ATTR_NAME_BATCH_LABEL, no need to check return value, only check out parameter
  624. (void)ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label_string);
  625. if (batch_label_string.empty()) {
  626. size_t node_mem_offset = memory_offset_[0].mem_offset_;
  627. // No ATTR_NAME_BATCH_LABEL, no need to reuse memory.
  628. Status status = ReAssignVirtualOutputNodeMemory(n, node_mem_offset);
  629. if (status != SUCCESS) {
  630. GELOGE(FAILED, "Reassign memory of virtual output node failed, node name: %s.", n->GetName().c_str());
  631. return FAILED;
  632. }
  633. memory_offset_[0].mem_offset_ = node_mem_offset;
  634. AlignMemOffset(MEM_ALIGN_SIZE);
  635. GELOGD("After reassign memory for virtual output node, align memory = %zu.", memory_offset_[0].mem_offset_);
  636. } else {
  637. // Has ATTR_NAME_BATCH_LABEL, for dynamic multi-batch node, need to reuse memory.
  638. string current_node_full_name = op_desc->GetName();
  639. size_t pos = current_node_full_name.find(kMbatchNodeNameFlag);
  640. if (pos == string::npos) {
  641. GELOGE(FAILED, "Cannot find key string [%s] of multi-batch in name of virtual output node, node name: %s.",
  642. kMbatchNodeNameFlag, n->GetName().c_str());
  643. return FAILED;
  644. }
  645. string fixed_name = current_node_full_name.substr(0, pos);
  646. vector<NodePtr> parallel_virtual_output_nodes;
  647. if (mem_reuse_virtual_output_nodes_map.count(fixed_name) != 0) {
  648. parallel_virtual_output_nodes = mem_reuse_virtual_output_nodes_map[fixed_name];
  649. }
  650. parallel_virtual_output_nodes.emplace_back(n);
  651. mem_reuse_virtual_output_nodes_map[fixed_name] = parallel_virtual_output_nodes;
  652. }
  653. }
  654. }
  655. int32_t mem_reuse_model = 1;
  656. if (ReAssignVirtualNodesMemory(mem_reuse_virtual_output_nodes_map, mem_reuse_model) != SUCCESS) {
  657. GELOGE(FAILED, "Reassign memory of virtual output nodes failed.");
  658. return FAILED;
  659. }
  660. return SUCCESS;
  661. }
  662. Status GraphMemoryAssigner::ReAssignVirtualNodesMemory(map<string, vector<NodePtr>> &mem_reuse_nodes_map,
  663. int32_t mem_reuse_model) {
  664. // Find max batch label value
  665. string max_batch_label;
  666. if (GetMaxBatchLabel(mem_reuse_nodes_map, mem_reuse_model, max_batch_label) != SUCCESS) {
  667. GELOGE(FAILED, "Get max batch label failed.");
  668. return FAILED;
  669. }
  670. GELOGI("The batch label of max batch virtual nodes is %s.", max_batch_label.c_str());
  671. // Assign memory of max batch nodes that have the same batch label.
  672. GELOGD("Start to reassign memory for max batch virtual nodes, memory offset = %zu.", memory_offset_[0].mem_offset_);
  673. vector<size_t> nodes_mem_offset_list;
  674. for (auto &i_map : mem_reuse_nodes_map) {
  675. size_t max_batch_node_mem_offset = memory_offset_[0].mem_offset_;
  676. nodes_mem_offset_list.emplace_back(max_batch_node_mem_offset);
  677. vector<NodePtr> virtual_nodes_list = i_map.second;
  678. for (auto &i_node : virtual_nodes_list) {
  679. // Op_desc is not nullptr, it has been checked.
  680. OpDescPtr op_desc = i_node->GetOpDesc();
  681. string batch_label_string;
  682. // All ops must have ATTR_NAME_BATCH_LABEL, no need to check return value.
  683. (void)ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label_string);
  684. if (batch_label_string == max_batch_label) {
  685. Status status = SUCCESS;
  686. if (mem_reuse_model == kVirtualInputNodeMemoryReuse) {
  687. status = ReAssignVirtualInputNodeMemory(i_node, max_batch_node_mem_offset);
  688. } else if (mem_reuse_model == kVirtualOutputNodeMemoryReuse) {
  689. status = ReAssignVirtualOutputNodeMemory(i_node, max_batch_node_mem_offset);
  690. } else {
  691. GELOGE(FAILED, "Invalid parameter memory reuse model, which is: %d.", mem_reuse_model);
  692. return FAILED;
  693. }
  694. if (status != SUCCESS) {
  695. GELOGE(FAILED, "Reassign memory of virtual node failed, node name: %s.", i_node->GetName().c_str());
  696. return FAILED;
  697. }
  698. memory_offset_[0].mem_offset_ = max_batch_node_mem_offset;
  699. AlignMemOffset(MEM_ALIGN_SIZE);
  700. GELOGD("After reassign memory for virtual node, align memory = %zu.", memory_offset_[0].mem_offset_);
  701. // Only assign memory of max batch nodes.
  702. break;
  703. }
  704. }
  705. }
  706. // Assign memory of remaining nodes that have the same fixed_name.
  707. GELOGD("Start to reassign memory for remaining batch virtual nodes, memory offset = %zu.",
  708. memory_offset_[0].mem_offset_);
  709. size_t memory_reuse_index = 0;
  710. for (auto &i_map : mem_reuse_nodes_map) {
  711. vector<NodePtr> virtual_nodes_list = i_map.second;
  712. for (auto &i_node : virtual_nodes_list) {
  713. size_t remaining_batch_node_mem_offset = nodes_mem_offset_list[memory_reuse_index];
  714. Status status = SUCCESS;
  715. if (mem_reuse_model == kVirtualInputNodeMemoryReuse) {
  716. status = ReAssignVirtualInputNodeMemory(i_node, remaining_batch_node_mem_offset);
  717. } else if (mem_reuse_model == kVirtualOutputNodeMemoryReuse) {
  718. status = ReAssignVirtualOutputNodeMemory(i_node, remaining_batch_node_mem_offset);
  719. } else {
  720. GELOGE(FAILED, "Invalid parameter memory reuse model, which is: %d.", mem_reuse_model);
  721. return FAILED;
  722. }
  723. if (status != SUCCESS) {
  724. GELOGE(FAILED, "Reassign memory of virtual node failed, node name: %s.", i_node->GetName().c_str());
  725. return FAILED;
  726. }
  727. }
  728. memory_reuse_index++;
  729. }
  730. return SUCCESS;
  731. }
  732. Status GraphMemoryAssigner::ReAssignAtomicMemory(bool is_loop_graph) {
  733. GE_CHECK_NOTNULL(compute_graph_);
  734. // Atomic op memory start addr
  735. int64_t atomic_mem_start = static_cast<int64_t>(memory_offset_[0].mem_offset_);
  736. GELOGI("Begin to reAssign atomic memory, atomic initial address mem_offset = %zu!", memory_offset_[0].mem_offset_);
  737. vector<NodePtr> connect_netoutput_nodes;
  738. for (auto &node : compute_graph_->GetAllNodes()) {
  739. auto node_op_desc = node->GetOpDesc();
  740. if (node_op_desc == nullptr) {
  741. continue;
  742. }
  743. bool is_atomic = false;
  744. // If GetBool fail, is_atomic is false.
  745. (void)ge::AttrUtils::GetBool(node_op_desc, ATOMIC_ATTR_IS_ATOMIC_NODE, is_atomic);
  746. if (!is_atomic) {
  747. continue;
  748. }
  749. bool is_ref = false;
  750. // If GetBool fail, is_ref is false.
  751. (void)ge::AttrUtils::GetBool(node_op_desc, ATTR_NAME_REFERENCE, is_ref);
  752. if (is_ref) {
  753. GELOGE(ge::PARAM_INVALID, "The node %s cannot have both atomic and ref attribute.",
  754. node_op_desc->GetName().c_str());
  755. return ge::PARAM_INVALID;
  756. }
  757. vector<int> is_connect_netoutput;
  758. // If GetBool fail, attr is_connect_netoutput is an empty vector.
  759. (void)ge::AttrUtils::GetListInt(node_op_desc, ATTR_NAME_NODE_CONNECT_OUTPUT, is_connect_netoutput);
  760. if (!is_connect_netoutput.empty()) {
  761. connect_netoutput_nodes.emplace_back(node);
  762. continue;
  763. }
  764. // Atomic op memory start addr of loop graph
  765. int64_t loop_graph_atomic_mem_start = static_cast<int64_t>(memory_offset_[0].mem_offset_);
  766. vector<int64_t> mem_offset_end;
  767. if (AssignAtomicOutputAndWorkspaceMemory(node, mem_offset_end) != SUCCESS) {
  768. GELOGE(FAILED, "Assign atomic output and workspace memory failed, node is %s.", node->GetName().c_str());
  769. return FAILED;
  770. }
  771. /// In networks with loop op, atomic op uses atomic_addr_clean op independently,
  772. /// so we need to set the attr separately.
  773. if (is_loop_graph) {
  774. GE_CHK_STATUS_RET(SetLoopGraphAtomicAttr(node, loop_graph_atomic_mem_start));
  775. }
  776. }
  777. // In networks without loop op, the same atomic addr clean op is used for atomic op
  778. if (!is_loop_graph) {
  779. // Set the address attr of atomic clean operator
  780. int64_t atomic_mem_size = memory_offset_[0].mem_offset_ - atomic_mem_start;
  781. if (atomic_mem_size != 0) {
  782. GE_CHK_STATUS_RET(SetAtomicCleanAttr(nullptr, {atomic_mem_start}, {atomic_mem_size}),
  783. "SetAtomicCleanAttr failed.");
  784. }
  785. }
  786. if (AssignConnectNetOutputAtomicMemory(connect_netoutput_nodes) != SUCCESS) {
  787. GELOGE(FAILED, "Failed to assign memory of nodes that connect to netoutput.");
  788. return FAILED;
  789. }
  790. return SUCCESS;
  791. }
  792. Status GraphMemoryAssigner::AssignAtomicOutputAndWorkspaceMemory(const ge::NodePtr &node,
  793. vector<int64_t> &mem_offset_end) {
  794. auto node_op_desc = node->GetOpDesc();
  795. // Assign atomic node output memory
  796. Status ret = AssignAtomicOutputMemory(node, mem_offset_end);
  797. if (ret != SUCCESS) {
  798. GELOGE(ret, "Failed to assign atomic output memory, node is %s.", node_op_desc->GetName().c_str());
  799. return ret;
  800. }
  801. // Check and assign atomic node workspace memory
  802. map<string, map<int64_t, int64_t>> atomic_workspace_info;
  803. atomic_workspace_info = node_op_desc->TryGetExtAttr(EXT_ATTR_ATOMIC_WORKSPACE_INFO, atomic_workspace_info);
  804. if (!atomic_workspace_info.empty()) {
  805. bool is_fusion_node = false;
  806. // If GetBool fail, is_fusion_node is false.
  807. (void)ge::AttrUtils::GetBool(node_op_desc, ATOMIC_ATTR_IS_FUSION_NODE, is_fusion_node);
  808. if (is_fusion_node) {
  809. // Assign fusion atomic node workspace memory
  810. ret = AssignFusionAtomicWorkspaceMemory(node_op_desc, atomic_workspace_info, mem_offset_end);
  811. } else {
  812. // Assign single ordinary atomic node workspace memory, not include fusion node
  813. ret = AssignOrdinaryAtomicWorkspaceMemory(node_op_desc, atomic_workspace_info, mem_offset_end);
  814. }
  815. if (ret != SUCCESS) {
  816. GELOGE(ret, "Assign atomic workspace memory failed, node is %s.", node_op_desc->GetName().c_str());
  817. return ret;
  818. }
  819. } else {
  820. GELOGW("Current atomic node %s does not have attr ATOMIC_WORKSPACE_INFO.", node->GetName().c_str());
  821. }
  822. return SUCCESS;
  823. }
  824. Status GraphMemoryAssigner::AssignConnectNetOutputAtomicMemory(vector<NodePtr> &connect_netoutput_nodes) {
  825. for (auto &node : connect_netoutput_nodes) {
  826. GE_CHECK_NOTNULL(node);
  827. if (node->GetOpDesc() == nullptr) {
  828. GELOGW("Current node %s op desc is nullptr, memory assignment is skipped.", node->GetName().c_str());
  829. continue;
  830. }
  831. // Atomic memory start addr
  832. int64_t original_atomic_mem_start = static_cast<int64_t>(memory_offset_[0].mem_offset_);
  833. GELOGD("Start to assign memory of atomic node, node name: %s, node type: %s, mem_offset: %ld.",
  834. node->GetName().c_str(), node->GetOpDesc()->GetType().c_str(), original_atomic_mem_start);
  835. vector<int64_t> mem_offset_end;
  836. if (AssignAtomicOutputAndWorkspaceMemory(node, mem_offset_end) != SUCCESS) {
  837. GELOGE(FAILED, "Assign atomic output and workspace memory failed, node is %s.", node->GetName().c_str());
  838. return FAILED;
  839. }
  840. // All atomic nodes use atomic_addr_clean op independently, so we need to set the attr separately.
  841. if (SetIndependentAtomicAttr(node, original_atomic_mem_start, mem_offset_end) != SUCCESS) {
  842. GELOGE(FAILED, "Failed to set atomic attr separately.");
  843. return FAILED;
  844. }
  845. }
  846. return SUCCESS;
  847. }
  848. Status GraphMemoryAssigner::AssignReferenceMemory() {
  849. for (auto &node : compute_graph_->GetDirectNode()) {
  850. // Get the reference type of the node, default is false
  851. bool is_ref = false;
  852. // If GetBool fail, is_ref is false.
  853. (void)ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_REFERENCE, is_ref);
  854. if (!is_ref) {
  855. continue;
  856. }
  857. GELOGI("Current node %s needs to support the reference relationship between output and input.",
  858. node->GetName().c_str());
  859. auto out_op_desc = node->GetOpDesc();
  860. GE_IF_BOOL_EXEC(out_op_desc == nullptr, GELOGE(ge::FAILED, "out_op_desc is null."); return ge::FAILED);
  861. vector<int64_t> output_list = out_op_desc->GetOutputOffset();
  862. if (out_op_desc->GetOutputsSize() > output_list.size()) {
  863. GELOGE(ge::FAILED, "The size %zu of node output desc is more than output_list's size %zu.",
  864. out_op_desc->GetOutputsSize(), output_list.size());
  865. return ge::FAILED;
  866. }
  867. map<string, int> input_name_index;
  868. for (const auto &input_name : out_op_desc->GetAllInputNames()) {
  869. int index = out_op_desc->GetInputIndexByName(input_name);
  870. input_name_index.emplace(input_name, index);
  871. }
  872. for (auto &out_data_anchor : node->GetAllOutDataAnchors()) {
  873. string out_data_anchor_name = out_op_desc->GetOutputNameByIndex(out_data_anchor->GetIdx());
  874. auto iter = input_name_index.find(out_data_anchor_name);
  875. if (iter != input_name_index.end()) {
  876. int index = iter->second;
  877. GELOGI("Reference memory: input anchor index = %d, input anchor name = %s, output anchor name = %s.", index,
  878. iter->first.c_str(), out_data_anchor_name.c_str());
  879. GE_CHECK_NOTNULL(node->GetInDataAnchor(index));
  880. auto peer_out_anchor = node->GetInDataAnchor(index)->GetPeerOutAnchor();
  881. GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, continue);
  882. int peer_out_anchor_index = peer_out_anchor->GetIdx();
  883. auto peer_out_node = peer_out_anchor->GetOwnerNode();
  884. auto peer_out_op_desc = peer_out_node->GetOpDesc();
  885. GE_CHECK_NOTNULL(peer_out_op_desc);
  886. output_list[out_data_anchor->GetIdx()] = peer_out_op_desc->GetOutputOffset()[peer_out_anchor_index];
  887. GELOGI("Reference output : Set %s name[%s] output[%d] offset to [%ld] stream_id[%ld]",
  888. node->GetOwnerComputeGraph()->GetName().c_str(), peer_out_op_desc->GetName().c_str(),
  889. out_data_anchor->GetIdx(), output_list[out_data_anchor->GetIdx()], peer_out_op_desc->GetStreamId());
  890. } else {
  891. GELOGI("Reference output : origin %s name[%s] output[%d] offset is [%ld] stream_id[%ld]",
  892. node->GetOwnerComputeGraph()->GetName().c_str(), out_op_desc->GetName().c_str(),
  893. out_data_anchor->GetIdx(), output_list[out_data_anchor->GetIdx()], out_op_desc->GetStreamId());
  894. }
  895. }
  896. out_op_desc->SetOutputOffset(output_list);
  897. }
  898. return ge::SUCCESS;
  899. }
  900. bool GraphMemoryAssigner::CheckInputIsSupportAtomic(const ge::NodePtr &node) {
  901. for (auto &in_data_anchor : node->GetAllInDataAnchors()) {
  902. auto peer_out_data_anchor = in_data_anchor->GetPeerOutAnchor();
  903. if (peer_out_data_anchor == nullptr) {
  904. continue;
  905. }
  906. auto peer_op_desc = peer_out_data_anchor->GetOwnerNode()->GetOpDesc();
  907. if (peer_op_desc == nullptr) {
  908. continue;
  909. }
  910. if ((peer_op_desc->GetType() == CONSTANTOP) || (peer_op_desc->GetType() == AIPP_DATA_TYPE) ||
  911. (peer_op_desc->GetType() == VARIABLE)) {
  912. GELOGE(ge::FAILED,
  913. "The current node is %s, and the peer out node is %s. Currently, this scenario is not supported",
  914. node->GetName().c_str(), peer_op_desc->GetName().c_str());
  915. return false;
  916. }
  917. }
  918. return true;
  919. }
  920. Status GraphMemoryAssigner::AssignAtomicOutputMemory(const ge::NodePtr &node, vector<int64_t> &mem_offset_end) {
  921. auto op_desc = node->GetOpDesc();
  922. GE_IF_BOOL_EXEC(op_desc == nullptr, GELOGE(ge::FAILED, "op_desc is null."); return ge::FAILED);
  923. mem_offset_end.clear();
  924. GELOGD("Begin to assign atomic output memory, node = %s.", op_desc->GetName().c_str());
  925. vector<int64_t> atomic_output_index;
  926. // If GetListInt fail, atomic_output_index is empty.
  927. (void)ge::AttrUtils::GetListInt(op_desc, ATOMIC_ATTR_OUTPUT_INDEX, atomic_output_index);
  928. // Check atomic output
  929. vector<int64_t> output_list = op_desc->GetOutputOffset();
  930. if (atomic_output_index.size() > output_list.size()) {
  931. GELOGE(ge::FAILED, "The size of atomic_output_index is more than the size of output_list");
  932. return ge::FAILED;
  933. }
  934. auto output_list_size = static_cast<int64_t>(output_list.size());
  935. for (auto &output_index : atomic_output_index) {
  936. if (output_index >= output_list_size) {
  937. GELOGE(ge::PARAM_INVALID, "The output index %ld is more than the size %ld of output_list.", output_index,
  938. output_list_size);
  939. return ge::PARAM_INVALID;
  940. }
  941. // If the input of the cascade op needs to clear the atomic addr, there is no need to clear it separately here
  942. bool is_assigned_mem = false;
  943. if (GetMemoryAssignmentStatus(node, output_index, is_assigned_mem) != SUCCESS) {
  944. GELOGE(ge::FAILED, "Failed to get memory assignment of node %s.", node->GetName().c_str());
  945. return ge::FAILED;
  946. }
  947. // If you have already assigned an atomic address, skip it, and you don't need to reassign it.
  948. if (is_assigned_mem) {
  949. GELOGI(
  950. "Node %s atomic output : we have assigned atomic memory as the input of next node in "
  951. "ReAssignContinuousMemory function.",
  952. op_desc->GetName().c_str());
  953. continue;
  954. }
  955. auto output_desc = op_desc->GetAllOutputsDescPtr().at(output_index);
  956. int64_t size = 0;
  957. if (ge::TensorUtils::GetSize(*output_desc, size) != SUCCESS) {
  958. GELOGI("Get size failed");
  959. }
  960. output_list[output_index] = memory_offset_[0].mem_offset_;
  961. GELOGI("[IMAS]Atomic output : Set %s name[%s] output[%ld] offset to [%zu] stream_id[%ld] size[%ld] real_size[%ld].",
  962. compute_graph_->GetName().c_str(), op_desc->GetName().c_str(), output_index, memory_offset_[0].mem_offset_,
  963. op_desc->GetStreamId(), size, size);
  964. memory_offset_[0].mem_offset_ += size;
  965. AlignMemOffset(MEM_ALIGN_SIZE);
  966. mem_offset_end.emplace_back(memory_offset_[0].mem_offset_);
  967. }
  968. op_desc->SetOutputOffset(output_list);
  969. return ge::SUCCESS;
  970. }
  971. Status GraphMemoryAssigner::GetMemoryAssignmentStatus(const ge::NodePtr &node, int64_t output_index,
  972. bool &is_mem_assigned) {
  973. if (static_cast<size_t>(output_index) >= node->GetAllOutDataAnchors().size()) {
  974. GELOGE(ge::PARAM_INVALID, "Output index %ld is more than the size of node's AllOutDataAnchors.", output_index);
  975. return ge::PARAM_INVALID;
  976. }
  977. auto out_data_anchor = node->GetAllOutDataAnchors().at(output_index);
  978. GE_CHECK_NOTNULL(out_data_anchor);
  979. auto input_anchors = out_data_anchor->GetPeerInDataAnchors();
  980. for (auto &input_anchor : input_anchors) {
  981. auto output_node = input_anchor->GetOwnerNode();
  982. /// Get input atomic attr of peer output op, if atomic_input_index[0] = -1, indicates that the atomic address
  983. /// has been assigned
  984. vector<int64_t> atomic_input_index;
  985. (void)ge::AttrUtils::GetListInt(output_node->GetOpDesc(), ATOMIC_ATTR_INPUT_INDEX, atomic_input_index);
  986. if (!atomic_input_index.empty() && (atomic_input_index[0] == kAllInputAddrIsAtomic)) {
  987. is_mem_assigned = true;
  988. break;
  989. }
  990. }
  991. return SUCCESS;
  992. }
  993. Status GraphMemoryAssigner::AssignOrdinaryAtomicWorkspaceMemory(const ge::OpDescPtr &op_desc,
  994. map<string, map<int64_t, int64_t>> &workspace_info,
  995. vector<int64_t> &mem_offset_end) {
  996. GELOGI("Begin to reassign normal atomic memory, node = %s.", op_desc->GetName().c_str());
  997. vector<int64_t> workspace_vector = op_desc->GetWorkspace();
  998. for (auto iter = workspace_info.begin(); iter != workspace_info.end(); ++iter) {
  999. if (op_desc->GetName() != iter->first) {
  1000. GELOGE(ge::PARAM_INVALID, "The node name %s and the node name %s in workspace info are inconsistent.",
  1001. op_desc->GetName().c_str(), iter->first.c_str());
  1002. return ge::PARAM_INVALID;
  1003. }
  1004. if (iter->second.empty()) {
  1005. continue;
  1006. }
  1007. for (auto &info_iter : iter->second) {
  1008. auto workspace_index = static_cast<uint64_t>(info_iter.first);
  1009. auto workspace_size = info_iter.second;
  1010. if (workspace_index >= workspace_vector.size()) {
  1011. GELOGE(ge::PARAM_INVALID, "The workspace index %lu is more than the size %zu of workspace vector.",
  1012. workspace_index, workspace_vector.size());
  1013. return ge::PARAM_INVALID;
  1014. }
  1015. workspace_vector[workspace_index] = memory_offset_[0].mem_offset_;
  1016. GELOGI(
  1017. "[IMAS]Atomic ordinary workspace : Set %s name[%s] workspace[%lu] offset to [%zu] stream_id[%ld] "
  1018. "size[%ld] real_size[%ld].",
  1019. compute_graph_->GetName().c_str(), op_desc->GetName().c_str(), workspace_index, memory_offset_[0].mem_offset_,
  1020. op_desc->GetStreamId(), workspace_size, workspace_size);
  1021. memory_offset_[0].mem_offset_ += workspace_size;
  1022. mem_offset_end.emplace_back(memory_offset_[0].mem_offset_);
  1023. }
  1024. }
  1025. op_desc->SetWorkspace(workspace_vector);
  1026. return SUCCESS;
  1027. }
  1028. Status GraphMemoryAssigner::AssignFusionAtomicWorkspaceMemory(const ge::OpDescPtr &op_desc,
  1029. map<string, map<int64_t, int64_t>> &workspace_info,
  1030. vector<int64_t> &mem_offset_end) {
  1031. GELOGI("Begin to reassign fusion atomic memory, node = %s.", op_desc->GetName().c_str());
  1032. map<string, map<int64_t, int64_t>> sub_node_workspace_offset;
  1033. for (auto &iter : workspace_info) {
  1034. if (iter.second.empty()) {
  1035. continue;
  1036. }
  1037. map<int64_t, int64_t> index_offset;
  1038. for (auto &info_iter : iter.second) {
  1039. auto workspace_index = static_cast<uint64_t>(info_iter.first);
  1040. auto workspace_size = info_iter.second;
  1041. size_t workspace_offset = memory_offset_[0].mem_offset_;
  1042. GELOGI(
  1043. "[IMAS]Atomic fusion workspace : Set %s name[%s] workspace[%lu] offset to [%zu] stream_id[%ld] size[%ld] "
  1044. "real_size[%ld].",
  1045. compute_graph_->GetName().c_str(), op_desc->GetName().c_str(), workspace_index, memory_offset_[0].mem_offset_,
  1046. op_desc->GetStreamId(), workspace_size, workspace_size);
  1047. memory_offset_[0].mem_offset_ += workspace_size;
  1048. mem_offset_end.emplace_back(memory_offset_[0].mem_offset_);
  1049. index_offset.insert(std::make_pair(workspace_index, workspace_offset));
  1050. }
  1051. sub_node_workspace_offset.insert(std::make_pair(iter.first, index_offset));
  1052. }
  1053. if (!(op_desc->SetExtAttr(EXT_ATTR_ATOMIC_WORKSPACE_OFFSET, sub_node_workspace_offset))) {
  1054. GELOGE(FAILED, "Set EXT_ATTR_ATOMIC_WORKSPACE_OFFSET failed, op name:%s.", op_desc->GetName().c_str());
  1055. return FAILED;
  1056. }
  1057. return SUCCESS;
  1058. }
  1059. Status GraphMemoryAssigner::CheckOffset() {
  1060. for (const ge::NodePtr &node : compute_graph_->GetAllNodes()) {
  1061. GE_CHECK_NOTNULL(node->GetOpDesc());
  1062. vector<int64_t> input_list = node->GetOpDesc()->GetInputOffset();
  1063. for (auto input : input_list) {
  1064. if (input == ge::kInvalidOffset) {
  1065. GELOGE(FAILED, "Invalid offset in node: %s input: %ld.", node->GetName().c_str(), ge::kInvalidOffset);
  1066. return FAILED;
  1067. }
  1068. }
  1069. vector<int64_t> output_list = node->GetOpDesc()->GetOutputOffset();
  1070. for (auto output : output_list) {
  1071. if (output == ge::kInvalidOffset) {
  1072. GELOGE(FAILED, "Invalid offset in node: %s output: %ld.", node->GetName().c_str(), ge::kInvalidOffset);
  1073. return FAILED;
  1074. }
  1075. }
  1076. vector<int64_t> workspace_list = node->GetOpDesc()->GetWorkspace();
  1077. for (auto workspace : workspace_list) {
  1078. if (workspace == ge::kInvalidOffset) {
  1079. GELOGE(FAILED, "Invalid offset in node: %s workspace: %ld.", node->GetName().c_str(), ge::kInvalidOffset);
  1080. return FAILED;
  1081. }
  1082. }
  1083. }
  1084. return SUCCESS;
  1085. }
  1086. ge::Status GraphMemoryAssigner::SetInputOffset() {
  1087. if (memory_offset_.empty()) {
  1088. GELOGE(FAILED, "memory_offset_ is empty.");
  1089. return FAILED;
  1090. }
  1091. GEEVENT("[IMAS]AfterAssignMemory : %s memoffset[%zu]", compute_graph_->GetName().c_str(),
  1092. memory_offset_[0].mem_offset_);
  1093. for (const ge::NodePtr &node : compute_graph_->GetAllNodes()) {
  1094. if (UpdateOpInputOffset(node) != ge::SUCCESS) {
  1095. GELOGE(ge::FAILED, "Update op input offset failed");
  1096. return ge::FAILED;
  1097. }
  1098. }
  1099. return ge::SUCCESS;
  1100. }
  1101. NodePtr GraphMemoryAssigner::GetKnownInputNode(const NodePtr &node) const {
  1102. if (!node->GetOpDesc()->HasAttr(ATTR_NAME_PARENT_NODE_INDEX)) {
  1103. return node;
  1104. }
  1105. if (NodeUtils::IsDynamicShape(node)) {
  1106. return node;
  1107. }
  1108. return NodeUtils::GetParentInput(node);
  1109. }
  1110. ge::Status GraphMemoryAssigner::UpdateConstArgsOffset(const NodePtr &node, vector<int64_t> &input_list) const {
  1111. uint32_t parent_index = 0;
  1112. if (!AttrUtils::GetInt(node->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, parent_index)) {
  1113. return SUCCESS;
  1114. }
  1115. // Subgraph Data Node, check for constant input.
  1116. std::string op_type;
  1117. const auto &in_node = NodeUtils::GetParentInput(node);
  1118. if (NodeUtils::GetConstOpType(in_node, op_type)) {
  1119. input_list = in_node->GetOpDesc()->GetOutputOffset();
  1120. node->GetOpDesc()->SetOutputOffset(input_list); // Set Data output same as const output.
  1121. return SUCCESS; // Constant input.
  1122. }
  1123. // Memory allocated for dynamic shape subgraph Data.
  1124. if (NodeUtils::IsDynamicShape(node)) {
  1125. return SUCCESS;
  1126. }
  1127. const auto &owner = node->GetOwnerComputeGraph();
  1128. const auto &parent_desc = owner->GetParentNode()->GetOpDesc();
  1129. const auto parent_inputs = parent_desc->GetInputOffset();
  1130. if (parent_inputs.size() <= parent_index) {
  1131. GELOGE(FAILED, "Get Parent input offset failed, node: %s, input size: %zu, parent index: %u",
  1132. node->GetName().c_str(), parent_inputs.size(), parent_index);
  1133. return FAILED;
  1134. }
  1135. input_list = {parent_inputs[parent_index]};
  1136. node->GetOpDesc()->SetOutputOffset(input_list); // Set Data output same as parent input.
  1137. return SUCCESS;
  1138. }
  1139. ge::Status GraphMemoryAssigner::UpdateOpInputOffset(const NodePtr &node, vector<int64_t> &input_list) const {
  1140. vector<int64_t> origin_input_list;
  1141. vector<int64_t> memory_type;
  1142. auto tmp_op_desc = node->GetOpDesc();
  1143. origin_input_list = tmp_op_desc->GetInputOffset();
  1144. bool has_mem_type_attr = ge::AttrUtils::GetListInt(tmp_op_desc, ATTR_NAME_INPUT_MEM_TYPE_LIST, memory_type);
  1145. for (const auto &anchor : node->GetAllInDataAnchors()) {
  1146. vector<int64_t> output_list;
  1147. auto peer_out_anchor = anchor->GetPeerOutAnchor();
  1148. if (peer_out_anchor == nullptr) {
  1149. continue;
  1150. }
  1151. // If the current node not broadcast, the OutputOffset of the previous node is used to update the input_list
  1152. auto last_peer_out_node = peer_out_anchor->GetOwnerNode();
  1153. auto last_peer_out_op_desc = last_peer_out_node->GetOpDesc();
  1154. GE_CHECK_NOTNULL(last_peer_out_op_desc);
  1155. output_list = last_peer_out_op_desc->GetOutputOffset();
  1156. if (output_list.size() > static_cast<size_t>(peer_out_anchor->GetIdx())) {
  1157. auto input_index = anchor->GetIdx();
  1158. if (has_mem_type_attr) {
  1159. auto input_size = tmp_op_desc->GetInputsSize();
  1160. auto ori_input_offset_list_size = origin_input_list.size();
  1161. auto mem_type_size = memory_type.size();
  1162. if ((input_size != mem_type_size) || (input_size != ori_input_offset_list_size)) {
  1163. GELOGE(ge::FAILED,
  1164. "fusion: node[%s] input_size[%zu] diff from memory_type_size[%zu]"
  1165. " from ori_input_offset_list_size[%lu]",
  1166. tmp_op_desc->GetName().c_str(), input_size, mem_type_size, ori_input_offset_list_size);
  1167. return ge::FAILED;
  1168. }
  1169. // not hbm keep orignal inputoffest
  1170. // hbm inputoffset = original inputoffset + outputoffset
  1171. input_list.emplace_back(memory_type[input_index] == RT_MEMORY_L1
  1172. ? origin_input_list[input_index]
  1173. : origin_input_list[input_index] + output_list.at(peer_out_anchor->GetIdx()));
  1174. GELOGI("fuison: node[%s] input[%d] is set from node[%s] out index[%d] offset[%ld]",
  1175. tmp_op_desc->GetName().c_str(), input_index,
  1176. peer_out_anchor->GetOwnerNode()->GetOpDesc()->GetName().c_str(), peer_out_anchor->GetIdx(),
  1177. input_list.back());
  1178. } else {
  1179. int64_t output_offset = output_list.at(peer_out_anchor->GetIdx());
  1180. const auto &in_node = GetKnownInputNode(peer_out_anchor->GetOwnerNode());
  1181. if (in_node->GetType() == CONSTANT) {
  1182. GeTensorDesc tensor_desc = tmp_op_desc->GetInputDesc(input_index);
  1183. GE_CHK_STATUS(TensorUtils::GetDataOffset(tensor_desc, output_offset));
  1184. }
  1185. GELOGI("node[%s] input[%d] is set from node[%s] out index[%d] offset[%ld]", tmp_op_desc->GetName().c_str(),
  1186. input_index, peer_out_anchor->GetOwnerNode()->GetOpDesc()->GetName().c_str(), peer_out_anchor->GetIdx(),
  1187. output_offset);
  1188. input_list.emplace_back(output_offset);
  1189. }
  1190. }
  1191. }
  1192. return ge::SUCCESS;
  1193. }
  1194. ge::Status GraphMemoryAssigner::UpdateOpInputOffset(const NodePtr &node) const {
  1195. GE_CHECK_NOTNULL(node->GetOpDesc());
  1196. vector<int64_t> input_list;
  1197. if (node->GetType() == HCOMBROADCAST || node->GetType() == HVDCALLBACKBROADCAST) {
  1198. for (const auto &anchor : node->GetAllInDataAnchors()) {
  1199. vector<int64_t> output_list;
  1200. auto peer_out_anchor = anchor->GetPeerOutAnchor();
  1201. if (peer_out_anchor == nullptr) {
  1202. continue;
  1203. }
  1204. auto last_peer_out_node = peer_out_anchor->GetOwnerNode();
  1205. // If the current node is broadcast and the preceding node is variable, because InputOffset has been set
  1206. // in function:AssignVarAttr2Nodes, then the InputOffset of the broadcast node is taken to update the input_list.
  1207. // Otherwise, the OutputOffset of the previous node is used to update the input_list.
  1208. if (last_peer_out_node->GetType() != VARIABLE) {
  1209. auto last_peer_out_op_desc = last_peer_out_node->GetOpDesc();
  1210. GE_CHECK_NOTNULL(last_peer_out_op_desc);
  1211. output_list = last_peer_out_op_desc->GetOutputOffset();
  1212. if (output_list.size() > static_cast<size_t>(peer_out_anchor->GetIdx())) {
  1213. input_list.emplace_back(output_list.at(peer_out_anchor->GetIdx()));
  1214. }
  1215. } else {
  1216. vector<int64_t> cur_node_input_list;
  1217. auto cur_node_op_desc = node->GetOpDesc();
  1218. GE_CHECK_NOTNULL(cur_node_op_desc);
  1219. cur_node_input_list = cur_node_op_desc->GetInputOffset();
  1220. if (cur_node_input_list.size() > static_cast<size_t>(anchor->GetIdx())) {
  1221. input_list.emplace_back(cur_node_input_list.at(anchor->GetIdx()));
  1222. }
  1223. }
  1224. }
  1225. } else if (node->GetType() == DATA_TYPE) {
  1226. if (UpdateConstArgsOffset(node, input_list) != SUCCESS) {
  1227. GELOGE(FAILED, "Update data: %s args offset failed.", node->GetName().c_str());
  1228. return FAILED;
  1229. }
  1230. } else {
  1231. if (UpdateOpInputOffset(node, input_list) != SUCCESS) {
  1232. GELOGE(FAILED, "Update node: %s input offset failed.", node->GetName().c_str());
  1233. return FAILED;
  1234. }
  1235. }
  1236. node->GetOpDesc()->SetInputOffset(input_list);
  1237. return SUCCESS;
  1238. }
  1239. Status GraphMemoryAssigner::SetIndependentAtomicAttr(const ge::NodePtr &node, int64_t atomic_mem_start,
  1240. const vector<int64_t> &mem_offset_end) {
  1241. GELOGD("Start to set independent atomic attr, atomic_addr_clean memory offset start is %ld", atomic_mem_start);
  1242. // Parsing offset and size vectors
  1243. vector<int64_t> memory_offset_start;
  1244. vector<int64_t> memory_offset_size;
  1245. memory_offset_start.emplace_back(atomic_mem_start);
  1246. for (size_t i = 0; i < mem_offset_end.size(); ++i) {
  1247. memory_offset_start.emplace_back(mem_offset_end[i]);
  1248. // Number 1 means element index
  1249. auto size = memory_offset_start[i + 1] - memory_offset_start[i];
  1250. memory_offset_size.emplace_back(size);
  1251. }
  1252. memory_offset_start.pop_back();
  1253. const auto &in_control_anchor = node->GetInControlAnchor();
  1254. if (!memory_offset_size.empty() && in_control_anchor != nullptr) {
  1255. for (auto &peer_out_control_anchor : in_control_anchor->GetPeerOutControlAnchors()) {
  1256. if (peer_out_control_anchor == nullptr) {
  1257. continue;
  1258. }
  1259. auto peer_out_node = peer_out_control_anchor->GetOwnerNode();
  1260. auto peer_out_node_desc = peer_out_node->GetOpDesc();
  1261. if (peer_out_node_desc == nullptr) {
  1262. continue;
  1263. }
  1264. GELOGD("Current node memory_offset vector size is %zu, node name %s, node type is %s.", memory_offset_size.size(),
  1265. peer_out_node_desc->GetName().c_str(), peer_out_node_desc->GetType().c_str());
  1266. if (peer_out_node_desc->GetType() == ATOMICADDRCLEAN) {
  1267. if (SetAtomicCleanAttr(peer_out_node, memory_offset_start, memory_offset_size) != SUCCESS) {
  1268. GELOGE(FAILED, "Set atomic clean attr failed.");
  1269. return FAILED;
  1270. }
  1271. }
  1272. }
  1273. }
  1274. return SUCCESS;
  1275. }
  1276. Status GraphMemoryAssigner::SetLoopGraphAtomicAttr(const ge::NodePtr &node, int64_t atomic_mem_start) {
  1277. // set the address attr of atomic clean operator for loop graph
  1278. int64_t atomic_mem_size = memory_offset_[0].mem_offset_ - atomic_mem_start;
  1279. GELOGI("SetLoopGraphAtomicAttr beign, atomic_addr_clean start size is %ld, mem_size is %ld, mem_offset is %zu.",
  1280. atomic_mem_start, atomic_mem_size, memory_offset_[0].mem_offset_);
  1281. const auto &in_control_anchor = node->GetInControlAnchor();
  1282. if (atomic_mem_size != 0 && in_control_anchor != nullptr) {
  1283. for (auto &peer_out_control_anchor : in_control_anchor->GetPeerOutControlAnchors()) {
  1284. if (peer_out_control_anchor == nullptr) {
  1285. continue;
  1286. }
  1287. auto peer_out_node = peer_out_control_anchor->GetOwnerNode();
  1288. auto peer_out_node_desc = peer_out_node->GetOpDesc();
  1289. if (peer_out_node_desc == nullptr) {
  1290. continue;
  1291. }
  1292. GELOGD("SetLoopGraphAtomicAttr, node is %s, op type is %s.", peer_out_node_desc->GetName().c_str(),
  1293. peer_out_node_desc->GetType().c_str());
  1294. if (peer_out_node_desc->GetType() == ATOMICADDRCLEAN) {
  1295. GE_CHK_STATUS_EXEC(SetAtomicCleanAttr(peer_out_node, {atomic_mem_start}, {atomic_mem_size}),
  1296. GELOGE(FAILED, "SetAtomicCleanAttr failed.");
  1297. return FAILED);
  1298. }
  1299. }
  1300. }
  1301. return SUCCESS;
  1302. }
  1303. ge::Status GraphMemoryAssigner::IsIndependentAtomicClean(const ge::NodePtr &node,
  1304. bool &is_independent_atomic_clean_node) {
  1305. GE_CHECK_NOTNULL(node);
  1306. const auto &out_control_anchor = node->GetOutControlAnchor();
  1307. GE_CHECK_NOTNULL(out_control_anchor);
  1308. for (const auto &peer_in_control_anchor : out_control_anchor->GetPeerInControlAnchors()) {
  1309. if (peer_in_control_anchor != nullptr) {
  1310. auto peer_in_node = peer_in_control_anchor->GetOwnerNode();
  1311. auto peer_in_node_desc = peer_in_node->GetOpDesc();
  1312. if (peer_in_node_desc != nullptr) {
  1313. bool is_atomic_node = false;
  1314. // If GetBool fail, is_atomic_node is false.
  1315. (void)ge::AttrUtils::GetBool(peer_in_node_desc, ATOMIC_ATTR_IS_ATOMIC_NODE, is_atomic_node);
  1316. if (is_atomic_node) {
  1317. vector<int> is_connect_netoutput;
  1318. // If GetBool fail, attr is_connect_netoutput is an empty vector.
  1319. (void)ge::AttrUtils::GetListInt(peer_in_node_desc, ATTR_NAME_NODE_CONNECT_OUTPUT, is_connect_netoutput);
  1320. if (!is_connect_netoutput.empty()) {
  1321. GELOGD("Peer in node %s is independent atomic clean node", peer_in_node->GetName().c_str());
  1322. is_independent_atomic_clean_node = true;
  1323. break;
  1324. }
  1325. }
  1326. }
  1327. }
  1328. }
  1329. return SUCCESS;
  1330. }
  1331. ge::Status GraphMemoryAssigner::SetAtomicCleanAttr(const NodePtr &n, const vector<int64_t> &atomic_mem_start,
  1332. const vector<int64_t> &atomic_mem_size) {
  1333. for (ge::NodePtr &node : compute_graph_->GetAllNodes()) {
  1334. auto node_op_desc = node->GetOpDesc();
  1335. GE_IF_BOOL_EXEC(node_op_desc == nullptr, continue);
  1336. bool is_valid_atomic_clean_node = (n != nullptr) && (node->GetName() == n->GetName());
  1337. if (((n == nullptr) && (node_op_desc->GetType() == ATOMICADDRCLEAN))) {
  1338. bool is_independent_atomic_clean = false;
  1339. if (IsIndependentAtomicClean(node, is_independent_atomic_clean) != SUCCESS) {
  1340. GELOGE(FAILED, "Failed to determine the connection relationship of atomic addr clean node.");
  1341. return PARAM_INVALID;
  1342. }
  1343. is_valid_atomic_clean_node = is_valid_atomic_clean_node || (!is_independent_atomic_clean);
  1344. }
  1345. if (is_valid_atomic_clean_node) {
  1346. GELOGD("Node %s, set atomic clean attr start.", node->GetName().c_str());
  1347. vector<int64_t> workspace_vector = node_op_desc->GetWorkspace();
  1348. vector<int64_t> workspace_byte_vector = node_op_desc->GetWorkspaceBytes();
  1349. workspace_vector.insert(workspace_vector.end(), atomic_mem_start.begin(), atomic_mem_start.end());
  1350. workspace_byte_vector.insert(workspace_byte_vector.end(), atomic_mem_size.begin(), atomic_mem_size.end());
  1351. node_op_desc->SetWorkspace(workspace_vector);
  1352. node_op_desc->SetWorkspaceBytes(workspace_byte_vector);
  1353. std::vector<int64_t> mem_start_vector;
  1354. // If GetListInt fail, mem_start_vector is empty.
  1355. (void)ge::AttrUtils::GetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_START, mem_start_vector);
  1356. mem_start_vector.insert(mem_start_vector.end(), atomic_mem_start.begin(), atomic_mem_start.end());
  1357. GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_START, mem_start_vector),
  1358. GELOGE(FAILED, "SetListInt failed.");
  1359. return FAILED);
  1360. std::vector<int64_t> mem_size_vector;
  1361. // If GetListInt fail, mem_size_vector is empty.
  1362. (void)ge::AttrUtils::GetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_MEM_SIZE, mem_size_vector);
  1363. mem_size_vector.insert(mem_size_vector.end(), atomic_mem_size.begin(), atomic_mem_size.end());
  1364. GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_MEM_SIZE, mem_size_vector),
  1365. GELOGE(FAILED, "SetListInt failed.");
  1366. return FAILED);
  1367. std::stringstream ss;
  1368. for (auto iter : atomic_mem_start) {
  1369. ss << iter << " ";
  1370. }
  1371. string atomic_mem_start_str = ss.str();
  1372. ss.clear();
  1373. ss.str("");
  1374. for (auto iter : atomic_mem_size) {
  1375. ss << iter << " ";
  1376. }
  1377. string atomic_mem_size_str = ss.str();
  1378. GELOGI("[IMAS]SetAtomicCleanAttr : Set graph[%s] atomic_node[%s] output offset [%s] size[%s] streamid[%ld]",
  1379. node->GetOwnerComputeGraph()->GetName().c_str(), node_op_desc->GetName().c_str(),
  1380. atomic_mem_start_str.c_str(), atomic_mem_size_str.c_str(), node->GetOpDesc()->GetStreamId());
  1381. }
  1382. }
  1383. return SUCCESS;
  1384. }
  1385. void GraphMemoryAssigner::AlignMemOffset(const int64_t &mem_align_size) {
  1386. if (mem_align_size <= 0) {
  1387. return;
  1388. }
  1389. memory_offset_[0].mem_offset_ =
  1390. (memory_offset_[0].mem_offset_ + mem_align_size - 1) / mem_align_size * mem_align_size;
  1391. }
  1392. } // namespace ge

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示