You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

somas.cc 70 kB

5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839
  1. /**
  2. * Copyright 2020-2021 Huawei Technologies Co., Ltd
  3. * Licensed under the Apache License, Version 2.0 (the "License");
  4. * you may not use this file except in compliance with the License.
  5. * You may obtain a copy of the License at
  6. * http://www.apache.org/licenses/LICENSE-2.0
  7. * Unless required by applicable law or agreed to in writing, software
  8. * distributed under the License is distributed on an "AS IS" BASIS,
  9. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. * See the License for the specific language governing permissions and
  11. * limitations under the License.
  12. */
  13. #include "backend/optimizer/somas/somas.h"
  14. #include <algorithm>
  15. #include <cstdio>
  16. #include <fstream>
  17. #include <iterator>
  18. #include <memory>
  19. #include <numeric>
  20. #include <set>
  21. #include <sstream>
  22. #include "backend/optimizer/somas/somas_node.h"
  23. #include "backend/optimizer/somas/somas_solver_pre.h"
  24. #include "backend/optimizer/somas/somas_stream.h"
  25. #include "backend/optimizer/somas/somas_tensor.h"
  26. #ifdef ENABLE_D
  27. #include "runtime/device/ascend/ascend_stream_assign.h"
  28. #endif
  29. #include "backend/optimizer/common/helper.h"
  30. #include "utils/ms_context.h"
  31. #include "debug/common.h"
  32. #ifdef ENABLE_DUMP_IR
  33. #include "debug/rdr/running_data_recorder.h"
  34. #endif
  35. #include "common/thread_pool.h"
  36. #include "profiler/device/common/memory_profiling.h"
  37. using mindspore::profiler::MemoryProfiling;
  38. using mindspore::profiler::NodeMemory;
  39. using mindspore::profiler::TensorMemory;
  40. namespace mindspore {
  41. namespace somas {
  42. constexpr auto kGapSize = 512;
  43. constexpr auto kGraphId = "graph_id";
  44. constexpr auto kHashId = "hash_id";
  45. constexpr auto kMemOffset = "mem_offset";
  46. constexpr auto kNodeSize = "node_size";
  47. constexpr auto kTensorSize = "tensor_size";
  48. constexpr auto kContiguousSize = "contiguous_size";
  49. constexpr auto kRefNodeSize = "ref_node_size";
  50. constexpr auto kStreamSize = "stream_size";
  51. constexpr auto kStreamGroupSize = "stream_group_size";
  52. constexpr auto kTensors = "tensors";
  53. constexpr auto kTensorId = "tensor_id";
  54. constexpr auto kSize = "size";
  55. constexpr auto kOriSize = "ori_size";
  56. constexpr auto kLifelongValue = "lifelong_value";
  57. constexpr auto kLifeStart = "life_start";
  58. constexpr auto kLifeEnd = "life_end";
  59. constexpr auto kOffset = "offset";
  60. std::map<TensorType, std::string> tensor_type_name_map = {{kCommon, "Common"},
  61. {kOutputOnly, "OutputOnly"},
  62. {kWorkspace, "Workspace"},
  63. {kGetNextOutput, "GetNextOutput"},
  64. {kSummaryInput, "SummaryInput"},
  65. {kRefNodeInput, "RefNodeInput"},
  66. {kRefNodeOutput, "RefNodeOutput"},
  67. {kUnknown, "Unknown"}};
  68. std::map<LifeLongType, std::string> life_long_name_map = {{kLifeLongNone, "LifeLongNone"},
  69. {kLifeLongGraphAll, "LifeLongGraphAll"},
  70. {kLifeLongGraphStart, "LifeLongGraphStart"},
  71. {kLifeLongGraphEnd, "LifeLongGraphEnd"}};
  72. bool Somas::Allocate(const session::KernelGraph *graph) {
  73. auto ret = InitSomasTensors(graph);
  74. if (!ret) {
  75. MS_LOG(EXCEPTION) << "Somas Initialize Failed.";
  76. }
  77. if (tensors_list_.empty()) {
  78. MS_LOG(INFO) << "No Tensor for Somas";
  79. return true;
  80. }
  81. ret = CalcSomasModelHash(graph);
  82. if (ret) {
  83. std::string filename =
  84. save_graphs_path_ + "/somas_meta/" + "somas_graph" + std::to_string(graph->graph_id()) + "_" + hash_id_ + ".json";
  85. ret = LoadSomasResult(graph, filename);
  86. if (ret) {
  87. MS_LOG(INFO) << "Load Somas Cache file " << filename << " Successfully.";
  88. GenGraphStatisticInfo();
  89. return ret;
  90. } else {
  91. for (auto &tensor : tensors_list_) {
  92. tensor->offset_ = 0;
  93. }
  94. }
  95. } else {
  96. MS_LOG(ERROR) << "Calculate somas's model hash id failed.";
  97. }
  98. // Computing Conflict pairs
  99. MS_LOG(INFO) << "Start Computing Conflict Pairs";
  100. ComputeConflictPairs();
  101. MS_LOG(INFO) << "End Computing Conflict Pairs";
  102. ret = Assign(graph);
  103. if (!ret) {
  104. MS_LOG(EXCEPTION) << "Somas Assign Failed.";
  105. }
  106. SaveSomasResult(graph);
  107. GenGraphStatisticInfo();
  108. return ret;
  109. }
  110. bool Somas::CalcSomasModelHash(const session::KernelGraph *graph) {
  111. auto model_str = SomasInfo(true);
  112. hash_id_ = std::to_string(std::hash<std::string>()(model_str));
  113. MS_LOG(INFO) << "Graph " << graph->graph_id() << "'s SOMAS Model hash id is " << hash_id_;
  114. std::string filename =
  115. save_graphs_path_ + "/somas_meta/" + "somas_graph" + std::to_string(graph->graph_id()) + "_" + hash_id_ + ".info";
  116. if (filename.size() > PATH_MAX) {
  117. MS_LOG(WARNING) << "File path " << filename << " is too long.";
  118. return false;
  119. }
  120. auto real_path = Common::GetRealPath(filename);
  121. if (!real_path.has_value()) {
  122. MS_LOG(WARNING) << "Get real path failed. path=" << filename;
  123. return false;
  124. }
  125. std::ifstream ifs(real_path.value());
  126. if (ifs) {
  127. MS_LOG(INFO) << "Graph " << graph->graph_id() << "'s SOMAS Model file " << real_path.value() << " is exist.";
  128. ifs.close();
  129. return true;
  130. }
  131. ChangeFileMode(real_path.value(), S_IRWXU);
  132. std::ofstream ofs(real_path.value());
  133. if (!ofs.is_open()) {
  134. MS_LOG(WARNING) << "Open file '" << real_path.value() << "' failed!";
  135. return false;
  136. }
  137. ofs << model_str << std::endl;
  138. ofs.close();
  139. return true;
  140. }
  141. bool Somas::SaveSomasResult(const session::KernelGraph *graph) {
  142. nlohmann::json somas_json;
  143. somas_json[kGraphId] = graph->graph_id();
  144. somas_json[kHashId] = hash_id_;
  145. somas_json[kMemOffset] = mem_offset_;
  146. somas_json[kNodeSize] = nodes_list_.size();
  147. somas_json[kTensorSize] = tensors_list_.size();
  148. somas_json[kContiguousSize] = contiguous_tensors_list_.size();
  149. somas_json[kRefNodeSize] = ref_node_constraints_.size();
  150. somas_json[kStreamSize] = streams_list_.size();
  151. somas_json[kStreamGroupSize] = streams_groups_.size();
  152. std::vector<nlohmann::json> tensors_json;
  153. for (auto &tensor : tensors_list_) {
  154. nlohmann::json tensor_json;
  155. tensor_json[kTensorId] = tensor->GetId();
  156. tensor_json[kSize] = tensor->GetAlignedSize();
  157. tensor_json[kOriSize] = tensor->GetOriginalSize();
  158. tensor_json[kLifelongValue] = tensor->lifelong_value_;
  159. tensor_json[kLifeStart] = tensor->lifetime_.start_;
  160. tensor_json[kLifeEnd] = tensor->lifetime_.end_;
  161. tensor_json[kOffset] = tensor->GetOffset();
  162. tensors_json.emplace_back(tensor_json);
  163. }
  164. somas_json[kTensors] = tensors_json;
  165. std::string filename =
  166. save_graphs_path_ + "/somas_meta/" + "somas_graph" + std::to_string(graph->graph_id()) + "_" + hash_id_ + ".json";
  167. if (filename.size() > PATH_MAX) {
  168. MS_LOG(WARNING) << "File path " << filename << " is too long.";
  169. return false;
  170. }
  171. auto real_path = Common::GetRealPath(filename);
  172. if (!real_path.has_value()) {
  173. MS_LOG(WARNING) << "Get real path failed. path=" << filename;
  174. return false;
  175. }
  176. ChangeFileMode(real_path.value(), S_IRWXU);
  177. std::ofstream ofs(real_path.value());
  178. if (!ofs.is_open()) {
  179. MS_LOG(WARNING) << "Open file '" << real_path.value() << "' failed!";
  180. return false;
  181. }
  182. ofs << somas_json.dump() << std::endl;
  183. ofs.close();
  184. return true;
  185. }
  186. bool Somas::LoadSomasResult(const session::KernelGraph *graph, const string filename) {
  187. if (filename.length() <= strlen(".json")) {
  188. MS_LOG(WARNING) << "please check somas cache file path.";
  189. return false;
  190. }
  191. std::ifstream somas_json_fs(filename);
  192. if (!somas_json_fs.is_open()) {
  193. MS_LOG(INFO) << "Open json file: " << filename << " error, Somas Cache Missed.";
  194. return false;
  195. }
  196. nlohmann::json somas_json;
  197. try {
  198. somas_json_fs >> somas_json;
  199. somas_json_fs.close();
  200. } catch (std::exception &e) {
  201. MS_LOG(WARNING) << "Parse json file error: " << filename << ", sleep 500ms and retry again.";
  202. somas_json_fs.close();
  203. std::this_thread::sleep_for(std::chrono::milliseconds(500));
  204. std::ifstream retry_tmp(filename);
  205. if (!retry_tmp.is_open()) {
  206. MS_LOG(INFO) << "Open json file: " << filename << " error, please check kernel_meta.";
  207. return false;
  208. }
  209. retry_tmp >> somas_json;
  210. retry_tmp.close();
  211. }
  212. auto ret = VerifySomasResult(graph, somas_json);
  213. if (!ret) {
  214. MS_LOG(WARNING) << "Verify Somas Result Failed.";
  215. return false;
  216. }
  217. auto mem_offset = somas_json[kMemOffset];
  218. mem_offset_ = mem_offset;
  219. ret = UpdateTensorsOffset(somas_json[kTensors]);
  220. return ret;
  221. }
  222. bool Somas::VerifySomasResult(const session::KernelGraph *graph, const nlohmann::json &somas_json) const {
  223. auto graph_id = somas_json[kGraphId];
  224. auto hash_id = somas_json[kHashId];
  225. auto node_size = somas_json[kNodeSize];
  226. auto tensor_size = somas_json[kTensorSize];
  227. auto contiguous_size = somas_json[kContiguousSize];
  228. auto ref_node_size = somas_json[kRefNodeSize];
  229. auto stream_size = somas_json[kStreamSize];
  230. auto stream_group_size = somas_json[kStreamGroupSize];
  231. if (graph_id != graph->graph_id()) {
  232. MS_LOG(WARNING) << "Mismatch graph id " << graph_id << " vs " << graph->graph_id();
  233. return false;
  234. }
  235. if (hash_id != hash_id_) {
  236. MS_LOG(WARNING) << "Mismatch hash id " << hash_id << " vs " << hash_id_;
  237. return false;
  238. }
  239. if (node_size != nodes_list_.size()) {
  240. MS_LOG(WARNING) << "Mismatch node size " << node_size << " vs " << nodes_list_.size();
  241. return false;
  242. }
  243. if (tensor_size != tensors_list_.size()) {
  244. MS_LOG(WARNING) << "Mismatch tensor size " << tensor_size << " vs " << tensors_list_.size();
  245. return false;
  246. }
  247. if (contiguous_size != contiguous_tensors_list_.size()) {
  248. MS_LOG(WARNING) << "Mismatch contiguous size " << contiguous_size << " vs " << contiguous_tensors_list_.size();
  249. return false;
  250. }
  251. if (ref_node_size != ref_node_constraints_.size()) {
  252. MS_LOG(WARNING) << "Mismatch ref node size " << ref_node_size << " vs " << ref_node_constraints_.size();
  253. return false;
  254. }
  255. if (stream_size != streams_list_.size()) {
  256. MS_LOG(WARNING) << "Mismatch stream size " << stream_size << " vs " << streams_list_.size();
  257. return false;
  258. }
  259. if (stream_group_size != streams_groups_.size()) {
  260. MS_LOG(WARNING) << "Mismatch stream group size " << stream_group_size << " vs " << streams_groups_.size();
  261. return false;
  262. }
  263. return true;
  264. }
  265. bool Somas::UpdateTensorsOffset(const std::vector<nlohmann::json> &tensors_json) {
  266. bool ret = true;
  267. for (auto &tensor_json : tensors_json) {
  268. auto tensor_id = tensor_json[kTensorId];
  269. auto size = tensor_json[kSize];
  270. auto ori_size = tensor_json[kOriSize];
  271. auto lifelong_value = tensor_json[kLifelongValue];
  272. auto life_start = tensor_json[kLifeStart];
  273. auto life_end = tensor_json[kLifeEnd];
  274. auto offset = tensor_json[kOffset];
  275. auto iter = tensors_map_.find(tensor_id);
  276. if (iter != tensors_map_.end()) {
  277. if (size != iter->second->aligned_size_) {
  278. MS_LOG(WARNING) << "Mismatch size of tensor " << tensor_id << " " << size << " vs "
  279. << iter->second->aligned_size_;
  280. ret = false;
  281. break;
  282. }
  283. if (ori_size != iter->second->GetOriginalSize()) {
  284. MS_LOG(WARNING) << "Mismatch original size of tensor " << tensor_id << " " << ori_size << " vs "
  285. << iter->second->GetOriginalSize();
  286. ret = false;
  287. break;
  288. }
  289. if (lifelong_value != iter->second->lifelong_value_) {
  290. MS_LOG(WARNING) << "Mismatch lifelong value of tensor " << tensor_id << " " << lifelong_value << " vs "
  291. << iter->second->lifelong_value_;
  292. ret = false;
  293. break;
  294. }
  295. if (life_start != iter->second->lifetime_.start_) {
  296. MS_LOG(WARNING) << "Mismatch life start of tensor " << tensor_id << " " << life_start << " vs "
  297. << iter->second->lifetime_.start_;
  298. ret = false;
  299. break;
  300. }
  301. if (life_end != iter->second->lifetime_.end_) {
  302. MS_LOG(WARNING) << "Mismatch life start of tensor " << tensor_id << " " << life_end << " vs "
  303. << iter->second->lifetime_.end_;
  304. ret = false;
  305. break;
  306. }
  307. // verify pass, update memory offset
  308. iter->second->offset_ = offset;
  309. } else {
  310. MS_LOG(WARNING) << "Can't find tensor " << tensor_id;
  311. ret = false;
  312. break;
  313. }
  314. }
  315. return ret;
  316. }
  317. bool Somas::InitSomasTensors(const session::KernelGraph *graph) {
  318. MS_EXCEPTION_IF_NULL(graph);
  319. InitBasicInfo(graph);
  320. IndependentNodeOutputProcess(graph);
  321. SummaryInputProcess(graph);
  322. RefNodeProcess(graph);
  323. NonTaskSplitProcess(graph);
  324. UnReuseNodeProcess(graph);
  325. GenContiguousList(graph);
  326. GetNextOutputProcess(graph);
  327. if (tensors_list_.empty()) {
  328. MS_LOG(INFO) << "No Tensor from graph " << graph->graph_id();
  329. return true;
  330. }
  331. MS_LOG(INFO) << "Created " << streams_list_.size() << " streams (" << streams_groups_.size() << " groups), "
  332. << nodes_list_.size() << " nodes, " << tensors_list_.size() << " tensors, and "
  333. << contiguous_tensors_list_.size() << " contiguous lists";
  334. #ifdef ENABLE_DUMP_IR
  335. SubModuleId module = SubModuleId::SM_OPTIMIZER;
  336. std::string name = "somas_pre_processed_info." + std::to_string(graph->graph_id());
  337. mindspore::RDR::RecordString(module, name, SomasInfo());
  338. name = "somas_offline_log." + std::to_string(graph->graph_id());
  339. mindspore::RDR::RecordString(module, name, Offline());
  340. #endif
  341. if (save_graphs_) {
  342. std::string file_path =
  343. save_graphs_path_ + "/" + "somas_pre_processed_info_" + std::to_string(graph->graph_id()) + ".ir";
  344. DumpSomasInfoIR(file_path);
  345. std::string offline_file_path =
  346. save_graphs_path_ + "/" + "somas_offline_log_" + std::to_string(graph->graph_id()) + ".ir";
  347. DumpOfflineIR(offline_file_path);
  348. }
  349. return true;
  350. }
  351. void Somas::InitSomasStreamAndNode(const session::KernelGraph *graph) {
  352. MS_EXCEPTION_IF_NULL(graph);
  353. std::vector<CNodePtr> kernel_cnodes;
  354. streams_list_ = {};
  355. nodes_list_ = {};
  356. size_t node_index = 0;
  357. if (graph->subgraph_multi_call()) {
  358. kernel_cnodes = graph->mem_reuse_exec_order();
  359. } else {
  360. kernel_cnodes = graph->execution_order();
  361. }
  362. for (size_t i = 0; i < kernel_cnodes.size(); i++) {
  363. auto kernel = kernel_cnodes[i];
  364. SomasStreamPtr stream;
  365. auto stream_id = AnfAlgo::GetStreamId(kernel);
  366. auto it = find_if(streams_list_.begin(), streams_list_.end(),
  367. [stream_id](const SomasStreamPtr &s) { return s->GetId() == stream_id; });
  368. if (it == streams_list_.end()) {
  369. stream = std::make_shared<SomasStream>(stream_id);
  370. streams_list_.push_back(stream);
  371. } else {
  372. stream = *it;
  373. }
  374. // Node
  375. NodeType type = kCommonNode;
  376. if (AnfAlgo::IsCommunicationOp(kernel)) {
  377. type = kCommunicationNode;
  378. }
  379. auto node = std::make_shared<SomasNode>(node_index, type, stream);
  380. MS_EXCEPTION_IF_NULL(node);
  381. node->scope_full_name_ = kernel->fullname_with_scope();
  382. nodes_list_.push_back(node);
  383. stream->nodes_.push_back(node);
  384. auto key = kernel.get();
  385. auto &nodes = nodes_map_[key];
  386. nodes.push_back(node);
  387. node_index++;
  388. }
  389. }
  390. void Somas::InitSomasOutputAndWorkspaceTensors(const session::KernelGraph *graph) {
  391. MS_EXCEPTION_IF_NULL(graph);
  392. tensors_list_ = {};
  393. size_t tensor_index = 0;
  394. auto kernel_cnodes = graph->execution_order();
  395. for (const auto &kernel : kernel_cnodes) {
  396. auto nodes = nodes_map_[kernel.get()];
  397. auto node = nodes[0];
  398. MS_EXCEPTION_IF_NULL(node);
  399. auto stream = node->GetStream();
  400. MS_EXCEPTION_IF_NULL(stream);
  401. // Output Tensor
  402. auto kernel_mod = AnfAlgo::GetKernelMod(kernel);
  403. MS_EXCEPTION_IF_NULL(kernel_mod);
  404. auto output_sizes = kernel_mod->GetOutputSizeList();
  405. auto index = 0;
  406. for (const auto &size : output_sizes) {
  407. auto output_tensor_index = tensor_index;
  408. tensor_index++;
  409. // Set all output tensor lifelong to true.
  410. auto tensor = std::make_shared<SomasTensor>(output_tensor_index, node, stream, size, kLifeLongNone);
  411. tensor->lifetime_.start_ = node->GetId();
  412. tensor->lifetime_.end_ = (nodes.size() > 1) ? nodes.back()->GetId() : node->GetId();
  413. tensor->type_ = kOutputOnly;
  414. if (AnfAlgo::OutputAddrExist(kernel, index)) {
  415. tensor->aligned_size_ = 0;
  416. }
  417. tensors_list_.push_back(tensor);
  418. tensors_map_[output_tensor_index] = tensor;
  419. stream->tensors_.push_back(tensor);
  420. std::for_each(nodes.begin(), nodes.end(), [tensor](auto &node) {
  421. node->tensors_.insert(tensor);
  422. node->output_tensors_.push_back(tensor);
  423. });
  424. index++;
  425. }
  426. // WorkSpace Tensor
  427. auto workspace_sizes = kernel_mod->GetWorkspaceSizeList();
  428. index = 0;
  429. for (const auto &size : workspace_sizes) {
  430. auto workspace_tensor_index = tensor_index;
  431. tensor_index++;
  432. SomasTensorPtr tensor = std::make_shared<SomasTensor>(workspace_tensor_index, node, stream, size, kLifeLongNone);
  433. tensor->type_ = kWorkspace;
  434. tensor->lifetime_.start_ = node->GetId();
  435. tensor->lifetime_.end_ = (nodes.size() > 1) ? nodes.back()->GetId() : node->GetId();
  436. if (AnfAlgo::WorkspaceAddrExist(kernel, index)) {
  437. tensor->aligned_size_ = 0;
  438. }
  439. tensors_list_.push_back(tensor);
  440. tensors_map_[workspace_tensor_index] = tensor;
  441. stream->tensors_.push_back(tensor);
  442. std::for_each(nodes.begin(), nodes.end(), [tensor](auto &node) {
  443. node->tensors_.insert(tensor);
  444. node->workspace_tensors_.push_back(tensor);
  445. });
  446. index++;
  447. }
  448. }
  449. }
  450. void Somas::InitSomasInputTensors(const session::KernelGraph *graph) {
  451. MS_EXCEPTION_IF_NULL(graph);
  452. bool is_all_nop_node = opt::IsAllNopNode(graph);
  453. static const auto enable_fusion_clear = (common::GetEnv("ENV_FUSION_CLEAR") == "1");
  454. auto kernel_cnodes = graph->execution_order();
  455. for (const auto &kernel : kernel_cnodes) {
  456. if (AnfAlgo::GetCNodeName(kernel) != kAtomicAddrCleanOpName) {
  457. InitCommonNodeInputs(is_all_nop_node, kernel);
  458. } else {
  459. InitAtomicCleanInputs(enable_fusion_clear, kernel);
  460. }
  461. }
  462. }
  463. void Somas::InitCommonNodeInputs(bool is_all_nop_node, const CNodePtr &kernel) {
  464. auto nodes = nodes_map_[kernel.get()];
  465. auto node = nodes[0];
  466. MS_EXCEPTION_IF_NULL(node);
  467. auto stream = node->GetStream();
  468. MS_EXCEPTION_IF_NULL(stream);
  469. // Input Tensor
  470. auto input_tensor_num = AnfAlgo::GetInputTensorNum(kernel);
  471. size_t real_input_index = 0;
  472. for (size_t i = 0; i < input_tensor_num; i++) {
  473. auto input_node = kernel->input(i + 1);
  474. session::KernelWithIndex prenode_index;
  475. if (is_all_nop_node) {
  476. prenode_index = AnfAlgo::VisitKernelWithReturnType(input_node, 0, false);
  477. } else {
  478. prenode_index = AnfAlgo::VisitKernelWithReturnType(input_node, 0, true);
  479. }
  480. if (AnfAlgo::CheckPrimitiveType(prenode_index.first, prim::kPrimMakeTuple)) {
  481. MS_LOG(EXCEPTION) << "Input node [" << input_node->DebugString() << "]'s input " << i << " is MakeTuple";
  482. }
  483. if (!AnfAlgo::IsRealCNodeKernel(prenode_index.first)) {
  484. auto op_name = AnfAlgo::GetCNodeName(kernel);
  485. TypeId input_origin_type = AnfAlgo::GetPrevNodeOutputInferDataType(kernel, i);
  486. if ((op_name == kDynamicRNNOpName || op_name == kDynamicGRUV2OpName) && input_origin_type == kMetaTypeNone) {
  487. continue;
  488. }
  489. auto parameter = GetSomasParameters(prenode_index.first, prenode_index.second);
  490. node->input_parameters_map_[real_input_index] = parameter;
  491. real_input_index++;
  492. MS_LOG(DEBUG) << "Input [" << prenode_index.first->fullname_with_scope() << "] is not a real cnode kernel.";
  493. continue;
  494. }
  495. auto iter = nodes_map_.find(prenode_index.first.get());
  496. if (iter == nodes_map_.end()) {
  497. MS_LOG(EXCEPTION) << "Kernel[" << kernel->fullname_with_scope() << "]'s input " << i << " ["
  498. << prenode_index.first->fullname_with_scope() << "] is not init.";
  499. }
  500. auto pre_somas_node = iter->second.at(0);
  501. if (prenode_index.second > pre_somas_node->output_tensors_.size()) {
  502. MS_LOG(EXCEPTION) << "Output index " << prenode_index.second << " exceed input node ["
  503. << prenode_index.first->fullname_with_scope() << "]'s outputs size "
  504. << pre_somas_node->output_tensors_.size();
  505. }
  506. auto input_somas_tensor = pre_somas_node->output_tensors_[prenode_index.second];
  507. MS_EXCEPTION_IF_NULL(input_somas_tensor);
  508. std::for_each(nodes.begin(), nodes.end(),
  509. [input_somas_tensor](auto &node) { node->input_tensors_.push_back(input_somas_tensor); });
  510. real_input_index++;
  511. if (input_somas_tensor->type_ == kOutputOnly) {
  512. input_somas_tensor->type_ = kCommon;
  513. }
  514. input_somas_tensor->destinationStreams_.insert(stream);
  515. for (auto &repeat_node : nodes) {
  516. input_somas_tensor->destinations_.insert(repeat_node);
  517. if (input_somas_tensor->lifetime_.end_ < repeat_node->GetId()) {
  518. input_somas_tensor->lifetime_.end_ = repeat_node->GetId();
  519. }
  520. }
  521. if (node != pre_somas_node) {
  522. node->ancestor_nodes_.insert(pre_somas_node);
  523. }
  524. auto input_tensor_stream = input_somas_tensor->GetSourceStream();
  525. if (input_tensor_stream != stream) {
  526. stream->ancestor_streams_.insert(input_tensor_stream);
  527. input_somas_tensor->between_streams_ = true;
  528. }
  529. }
  530. }
  531. void Somas::InitAtomicCleanInputs(bool enable_fusion_clear, const CNodePtr &kernel) {
  532. auto node = nodes_map_[kernel.get()].at(0);
  533. MS_EXCEPTION_IF_NULL(node);
  534. auto stream = node->GetStream();
  535. MS_EXCEPTION_IF_NULL(stream);
  536. auto input_tensor_num = AnfAlgo::GetInputTensorNum(kernel);
  537. for (size_t i = 0; i < input_tensor_num; i++) {
  538. MS_EXCEPTION_IF_NULL(kernel->inputs()[i + 1]);
  539. auto pre_node = kernel->input(i + 1)->cast<CNodePtr>();
  540. auto iter = nodes_map_.find(pre_node.get());
  541. if (iter == nodes_map_.end()) {
  542. MS_LOG(EXCEPTION) << "Kernel[" << kernel->fullname_with_scope() << "]'s input ["
  543. << pre_node->fullname_with_scope() << "] is not init.";
  544. }
  545. auto pre_somas_node = iter->second.at(0);
  546. // set clean output tensors
  547. if (AnfAlgo::HasNodeAttr(kAttrAtomicOutputIndexs, pre_node)) {
  548. auto clean_output_indexs = AnfAlgo::GetNodeAttr<std::vector<size_t>>(pre_node, kAttrAtomicOutputIndexs);
  549. for (auto index : clean_output_indexs) {
  550. if (index > pre_somas_node->output_tensors_.size()) {
  551. MS_LOG(EXCEPTION) << "Output index " << index << " exceed input node [" << pre_node->fullname_with_scope()
  552. << "]'s outputs size " << pre_somas_node->output_tensors_.size();
  553. }
  554. auto input_somas_tensor = pre_somas_node->output_tensors_[index];
  555. MS_EXCEPTION_IF_NULL(input_somas_tensor);
  556. node->input_tensors_.push_back(input_somas_tensor);
  557. if (enable_fusion_clear) {
  558. input_somas_tensor->lifelong_value_ = kLifeLongGraphAll;
  559. MS_LOG(INFO) << "Set " << node->scope_full_name_ << "'s Input node " << pre_somas_node->scope_full_name_
  560. << " 's output" << index << " to lifelong";
  561. }
  562. }
  563. }
  564. // set clean workspace tensors
  565. if (AnfAlgo::HasNodeAttr(kAttrAtomicWorkspaceIndexs, pre_node)) {
  566. auto clean_workspace_indexs = AnfAlgo::GetNodeAttr<std::vector<size_t>>(pre_node, kAttrAtomicWorkspaceIndexs);
  567. for (const auto &index : clean_workspace_indexs) {
  568. if (index > pre_somas_node->output_tensors_.size()) {
  569. MS_LOG(EXCEPTION) << "Workspace index " << index << " exceed input node [" << pre_node->fullname_with_scope()
  570. << "]'s Workspace size " << pre_somas_node->workspace_tensors_.size();
  571. }
  572. auto input_somas_tensor = pre_somas_node->workspace_tensors_[index];
  573. MS_EXCEPTION_IF_NULL(input_somas_tensor);
  574. node->input_tensors_.push_back(input_somas_tensor);
  575. if (enable_fusion_clear) {
  576. input_somas_tensor->lifelong_value_ = kLifeLongGraphAll;
  577. MS_LOG(INFO) << "Set " << node->scope_full_name_ << "'s Input node " << pre_somas_node->scope_full_name_
  578. << " 's workspace" << index << " to lifelong";
  579. }
  580. }
  581. }
  582. }
  583. }
  584. SomasParameterPtr Somas::CreateSomasParameters(AnfNodePtr node, size_t index) {
  585. auto id = parameters_list_.size();
  586. auto device_addr = AnfAlgo::GetOutputAddr(node, index);
  587. if (device_addr == nullptr) {
  588. MS_LOG(EXCEPTION) << "Node " << node->fullname_with_scope() << " has no device address before Somas.";
  589. }
  590. auto param = std::make_shared<SomasParameter>(id, node->fullname_with_scope(), index, device_addr->GetPtr(),
  591. device_addr->GetSize());
  592. parameters_list_.push_back(param);
  593. return param;
  594. }
  595. SomasParameterPtr Somas::GetSomasParameters(AnfNodePtr node, size_t index) {
  596. auto key = node.get();
  597. auto iter = parameters_map_.find(key);
  598. if (iter != parameters_map_.end()) {
  599. auto it = std::find_if(iter->second.begin(), iter->second.end(),
  600. [index](SomasParameterPtr param) -> bool { return index == param->output_index_; });
  601. if (it != iter->second.end()) {
  602. return *it;
  603. } else {
  604. auto new_param = CreateSomasParameters(node, index);
  605. iter->second.push_back(new_param);
  606. return new_param;
  607. }
  608. } else {
  609. auto new_param = CreateSomasParameters(node, index);
  610. parameters_map_[key].push_back(new_param);
  611. return new_param;
  612. }
  613. }
  614. void Somas::InitBasicInfo(const session::KernelGraph *graph) {
  615. MS_EXCEPTION_IF_NULL(graph);
  616. #ifdef ENABLE_D
  617. streams_groups_ = device::ascend::AscendStreamAssign::GetInstance().get_stream_group();
  618. #endif
  619. InitSomasStreamAndNode(graph);
  620. InitSomasOutputAndWorkspaceTensors(graph);
  621. InitSomasInputTensors(graph);
  622. auto context_ptr = MsContext::GetInstance();
  623. MS_EXCEPTION_IF_NULL(context_ptr);
  624. #ifdef ENABLE_DUMP_IR
  625. SubModuleId module = SubModuleId::SM_OPTIMIZER;
  626. std::string name = "somas_initial_info." + std::to_string(graph->graph_id());
  627. mindspore::RDR::RecordString(module, name, SomasInfo());
  628. #endif
  629. save_graphs_ = context_ptr->get_param<bool>(MS_CTX_SAVE_GRAPHS_FLAG);
  630. save_graphs_path_ = context_ptr->get_param<std::string>(MS_CTX_SAVE_GRAPHS_PATH);
  631. if (save_graphs_path_.empty()) {
  632. save_graphs_path_ = ".";
  633. }
  634. if (save_graphs_) {
  635. std::string file_path = save_graphs_path_ + "/" + "somas_initial_info_" + std::to_string(graph->graph_id()) + ".ir";
  636. DumpSomasInfoIR(file_path);
  637. }
  638. }
  639. void Somas::GetNextOutputProcess(const session::KernelGraph *graph) {
  640. MS_EXCEPTION_IF_NULL(graph);
  641. auto kernel_cnodes = graph->execution_order();
  642. size_t total_size = 0;
  643. for (const auto &kernel : kernel_cnodes) {
  644. if (AnfAlgo::GetCNodeName(kernel) != kGetNextOpName) {
  645. continue;
  646. }
  647. auto iter = nodes_map_.find(kernel.get());
  648. if (iter != nodes_map_.end()) {
  649. auto &node = iter->second.at(0);
  650. auto getnext_output_tensors = node->output_tensors_;
  651. for (auto &tensor : getnext_output_tensors) {
  652. total_size += tensor->GetAlignedSize();
  653. tensor->lifelong_value_ = kLifeLongGraphAll;
  654. tensor->type_ = kGetNextOutput;
  655. }
  656. }
  657. }
  658. MS_LOG(INFO) << "Special Tensor total size: GetNext Output " << total_size;
  659. }
  660. void Somas::IndependentNodeOutputProcess(const session::KernelGraph *graph) {
  661. MS_EXCEPTION_IF_NULL(graph);
  662. auto kernel_cnodes = graph->execution_order();
  663. size_t total_size = 0;
  664. for (const auto &kernel : kernel_cnodes) {
  665. bool independent = AnfAlgo::IsIndependentNode(kernel);
  666. if (!independent) {
  667. continue;
  668. }
  669. auto iter = nodes_map_.find(kernel.get());
  670. if (iter != nodes_map_.end()) {
  671. auto &node = iter->second.at(0);
  672. auto semi_reuse_output_tensors = node->output_tensors_;
  673. for (auto &tensor : semi_reuse_output_tensors) {
  674. total_size += tensor->GetAlignedSize();
  675. tensor->lifelong_value_ = kLifeLongGraphAll;
  676. }
  677. }
  678. }
  679. MS_LOG(INFO) << "Special Tensor total size: Independent Node output " << total_size;
  680. }
  681. void Somas::SummaryInputProcess(const session::KernelGraph *graph) {
  682. MS_EXCEPTION_IF_NULL(graph);
  683. bool summary_exist = graph->summary_node_exist();
  684. if (!summary_exist) {
  685. return;
  686. }
  687. auto summary_nodes = graph->summary_nodes();
  688. if (summary_nodes.empty()) {
  689. return;
  690. }
  691. size_t total_summary_size = 0;
  692. for (auto &node_item : summary_nodes) {
  693. auto node = node_item.second.first;
  694. size_t index = IntToSize(node_item.second.second);
  695. auto iter = nodes_map_.find(node.get());
  696. if (iter != nodes_map_.end()) {
  697. auto input_node = iter->second.at(0);
  698. if (index < input_node->output_tensors_.size()) {
  699. auto tensor = input_node->output_tensors_[index];
  700. tensor->lifelong_value_ = kLifeLongGraphAll;
  701. tensor->type_ = kSummaryInput;
  702. total_summary_size += tensor->GetAlignedSize();
  703. MS_LOG(INFO) << "Set summary node input tensor's lifelong, node: " << node->fullname_with_scope()
  704. << " index: " << index;
  705. } else {
  706. MS_LOG(WARNING) << "Index exceed size, node " << node->fullname_with_scope() << " index: " << index
  707. << " size: " << input_node->output_tensors_.size();
  708. }
  709. } else {
  710. MS_LOG(WARNING) << "Can't find summary input node " << node->fullname_with_scope() << " index: " << index;
  711. }
  712. }
  713. MS_LOG(INFO) << "Special Tensor total size: SummaryNodes: " << total_summary_size;
  714. }
  715. void Somas::RefNodeProcess(const session::KernelGraph *graph) {
  716. MS_EXCEPTION_IF_NULL(graph);
  717. auto kernel_cnodes = graph->execution_order();
  718. size_t total_output_size = 0;
  719. size_t total_input_size = 0;
  720. for (const auto &kernel : kernel_cnodes) {
  721. auto kernel_mod = AnfAlgo::GetKernelMod(kernel);
  722. if (kernel_mod == nullptr) {
  723. MS_LOG(WARNING) << "Kernel mode is NULL Of " << kernel->fullname_with_scope();
  724. continue;
  725. }
  726. auto output_sizes = kernel_mod->GetOutputSizeList();
  727. size_t output_index = 0;
  728. for (const auto &size : output_sizes) {
  729. auto out_index = output_index;
  730. output_index++;
  731. session::AnfWithOutIndex out_pair(kernel, out_index);
  732. if (graph->IsInRefOutputMap(out_pair)) {
  733. auto origin_pair = graph->GetRefCorrespondOutput(out_pair);
  734. MS_EXCEPTION_IF_NULL(origin_pair.first);
  735. auto &node = nodes_map_[kernel.get()].at(0);
  736. auto output_tensor = node->output_tensors_[out_index];
  737. MS_EXCEPTION_IF_NULL(output_tensor);
  738. output_tensor->type_ = kRefNodeOutput;
  739. total_output_size += size;
  740. if (AnfAlgo::IsRealCNodeKernel(origin_pair.first)) {
  741. auto ori_node = origin_pair.first->cast<CNodePtr>();
  742. auto ori_index = origin_pair.second;
  743. auto &repeat_node = nodes_map_[ori_node.get()].at(0);
  744. auto input_tensor = repeat_node->output_tensors_[ori_index];
  745. MS_EXCEPTION_IF_NULL(input_tensor);
  746. input_tensor->type_ = kRefNodeInput;
  747. total_input_size += input_tensor->aligned_size_;
  748. std::vector<size_t> refnode_input_output;
  749. refnode_input_output.push_back(input_tensor->GetId());
  750. refnode_input_output.push_back(output_tensor->GetId());
  751. ref_node_constraints_.push_back(refnode_input_output);
  752. MS_LOG(INFO) << "RefNode: input " << input_tensor->GetId() << " output " << output_tensor->GetId();
  753. }
  754. }
  755. }
  756. }
  757. MS_LOG(INFO) << "Special Tensor total size: RefNode: input " << total_input_size << " output " << total_output_size;
  758. }
  759. void Somas::NonTaskSplitProcess(const session::KernelGraph *graph) {
  760. MS_EXCEPTION_IF_NULL(graph);
  761. auto kernel_cnodes = graph->execution_order();
  762. for (const auto &kernel : kernel_cnodes) {
  763. auto op_name = AnfAlgo::GetCNodeName(kernel);
  764. if ((op_name == kSplitOpName || op_name == kSplitVOpName) && AnfAlgo::HasNodeAttr(kAttrNonTask, kernel)) {
  765. std::vector<size_t> refnode_input_output;
  766. auto node = nodes_map_[kernel.get()].at(0);
  767. if (node->input_tensors_.size() == 0) {
  768. MS_LOG(EXCEPTION) << op_name << " has no input tensor, can not do split non_task process.";
  769. }
  770. auto input_tensor = node->input_tensors_[0];
  771. input_tensor->type_ = kRefNodeInput;
  772. refnode_input_output.push_back(input_tensor->GetId());
  773. for (auto &output_tensor : node->output_tensors_) {
  774. output_tensor->type_ = kRefNodeOutput;
  775. refnode_input_output.push_back(output_tensor->GetId());
  776. }
  777. ref_node_constraints_.push_back(refnode_input_output);
  778. }
  779. }
  780. }
  781. void Somas::UnReuseNodeProcess(const session::KernelGraph *graph) {
  782. MS_EXCEPTION_IF_NULL(graph);
  783. vector<string> full_name_list = {};
  784. if (full_name_list.size() == 0) {
  785. return;
  786. }
  787. auto kernel_cnodes = graph->execution_order();
  788. for (const auto &kernel : kernel_cnodes) {
  789. auto full_name = kernel->fullname_with_scope();
  790. auto iter = std::find(full_name_list.begin(), full_name_list.end(), full_name);
  791. if (iter != full_name_list.end()) {
  792. MS_LOG(INFO) << "Set UnReuse Node in somas, Node:" << full_name;
  793. auto key = kernel.get();
  794. auto somas_node = nodes_map_[key].at(0);
  795. // input
  796. auto inputs = somas_node->input_tensors_;
  797. for (auto &input : inputs) {
  798. input->lifelong_value_ = kLifeLongGraphAll;
  799. }
  800. // output
  801. auto outputs = somas_node->output_tensors_;
  802. MS_LOG(INFO) << "Output size of " << kernel->fullname_with_scope() << " is " << outputs.size();
  803. for (auto &output : outputs) {
  804. output->lifelong_value_ = kLifeLongGraphAll;
  805. }
  806. // workspace
  807. auto workspaces = somas_node->workspace_tensors_;
  808. for (auto &workspace : workspaces) {
  809. workspace->lifelong_value_ = kLifeLongGraphAll;
  810. }
  811. }
  812. }
  813. }
  814. void Somas::GenContiguousList(const session::KernelGraph *graph) {
  815. MS_EXCEPTION_IF_NULL(graph);
  816. for (const auto &node : nodes_list_) {
  817. MS_EXCEPTION_IF_NULL(node);
  818. if (node->GetType() != kCommunicationNode) {
  819. continue;
  820. }
  821. // Contiguous input
  822. if ((!node->input_tensors_.empty()) && (!node->input_tensors_[0]->contiguous_)) {
  823. if (node->input_tensors_[0]->aligned_size_) {
  824. node->input_tensors_[0]->aligned_size_ += kGapSize;
  825. }
  826. if (node->input_tensors_[node->input_tensors_.size() - 1]->aligned_size_) {
  827. node->input_tensors_[node->input_tensors_.size() - 1]->aligned_size_ += kGapSize;
  828. }
  829. std::vector<size_t> inputs;
  830. for (const auto &input_tensor : node->input_tensors_) {
  831. comm_input_total_size_ += input_tensor->aligned_size_;
  832. input_tensor->contiguous_ = true;
  833. inputs.push_back(input_tensor->GetId());
  834. }
  835. contiguous_tensors_list_.push_back(inputs);
  836. }
  837. // Contiguous output
  838. if ((!node->output_tensors_.empty()) && (!node->output_tensors_[0]->contiguous_)) {
  839. if (node->output_tensors_[0]->aligned_size_) {
  840. node->output_tensors_[0]->aligned_size_ += kGapSize;
  841. }
  842. if (node->output_tensors_[node->output_tensors_.size() - 1]->aligned_size_) {
  843. node->output_tensors_[node->output_tensors_.size() - 1]->aligned_size_ += kGapSize;
  844. }
  845. std::vector<size_t> outputs;
  846. for (const auto &output_tensor : node->output_tensors_) {
  847. comm_output_total_size_ += output_tensor->aligned_size_;
  848. output_tensor->contiguous_ = true;
  849. outputs.push_back(output_tensor->GetId());
  850. }
  851. contiguous_tensors_list_.push_back(outputs);
  852. }
  853. }
  854. }
  855. void Somas::ComputeConflictPairs() {
  856. if (tensors_list_.empty()) {
  857. MS_LOG(INFO) << "No Tensor for Conflict computing";
  858. return;
  859. }
  860. MS_LOG(INFO) << "Start Conflict Computing (Bitset Model)";
  861. auto start_conflict = std::chrono::system_clock::now();
  862. std::sort(nodes_list_.begin(), nodes_list_.end(), NodeSort);
  863. UpdateTensorDestinations();
  864. MS_LOG(INFO) << "Start Bitset";
  865. std::vector<DynamicBitSet> nodes_dependency;
  866. size_t count = nodes_list_.back()->GetId() + 1;
  867. for (size_t i = 0; i < count; i++) {
  868. nodes_dependency.emplace_back(count);
  869. }
  870. MS_LOG(INFO) << "Start Path Computing";
  871. // Loop to compute ancestor paths via bitset for time dependence
  872. for (const auto &node : nodes_list_) {
  873. for (const auto &ancestor : node->ancestor_nodes_) {
  874. nodes_dependency[node->GetId()].SetBitTrue(ancestor->GetId());
  875. Union(&nodes_dependency[node->GetId()], &nodes_dependency[ancestor->GetId()]);
  876. }
  877. }
  878. MS_LOG(INFO) << "End Path Computing";
  879. MS_LOG(INFO) << "Start Tensor Relation Computing";
  880. count = tensors_list_.back()->GetId() + 1;
  881. for (size_t i = 0; i < count; i++) {
  882. reuse_matrix_.emplace_back(count);
  883. }
  884. if (tensors_list_.size() < kParallelComputeSizeThreshold) {
  885. ComputeMultiTensorConflicts(tensors_list_, tensors_list_, nodes_dependency, &reuse_matrix_);
  886. } else {
  887. MS_LOG(INFO) << "Tensor Num " << tensors_list_.size() << " is larger than " << kParallelComputeSizeThreshold;
  888. MS_LOG(INFO) << "Enter Multi-Thread Mode...";
  889. size_t process_num = common::ThreadPool::GetInstance().GetSyncRunThreadNum();
  890. MS_LOG(INFO) << "Threads Num is " << process_num;
  891. size_t start_index = 0;
  892. size_t total_size = tensors_list_.size();
  893. size_t job_size = total_size / process_num;
  894. if (job_size == 0) {
  895. job_size = total_size;
  896. }
  897. std::vector<common::Task> tasks;
  898. while (start_index < total_size) {
  899. size_t end_index = (start_index + job_size) > total_size ? total_size : start_index + job_size;
  900. auto jobs = std::vector<SomasTensorPtr>(tensors_list_.begin() + start_index, tensors_list_.begin() + end_index);
  901. auto task = [this, jobs, &nodes_dependency]() {
  902. this->ComputeMultiTensorConflicts(jobs, tensors_list_, nodes_dependency, &reuse_matrix_);
  903. return common::SUCCESS;
  904. };
  905. tasks.emplace_back(task);
  906. start_index += job_size;
  907. }
  908. common::ThreadPool::GetInstance().SyncRun(tasks);
  909. }
  910. MS_LOG(INFO) << "End Tensor Relation Computing";
  911. auto end_conflict = std::chrono::system_clock::now();
  912. MS_LOG(INFO) << "End Conflict Computing (Bitset Model)(time taken "
  913. << std::chrono::duration_cast<std::chrono::milliseconds>(end_conflict - start_conflict).count() << "ms)";
  914. }
  915. void Somas::UpdateTensorDestinations() {
  916. // Loop to add edges within each stream (node order within stream)
  917. for (const auto &stream : streams_list_) {
  918. auto &nodes = stream->nodes_;
  919. std::sort(nodes.begin(), nodes.end(), NodeSort);
  920. for (size_t i = 1; i < nodes.size(); i++) {
  921. const auto &previous_node = nodes[i - 1];
  922. const auto &current_node = nodes[i];
  923. current_node->ancestor_nodes_.insert(previous_node);
  924. }
  925. }
  926. // Loop to add edges from end to beginning of next group
  927. for (const auto &group : streams_groups_) {
  928. for (size_t i = 1; i < group.size(); i++) {
  929. int64_t previous_stream = group[i - 1];
  930. int64_t current_stream = group[i];
  931. auto it =
  932. std::find_if(streams_list_.begin(), streams_list_.end(),
  933. [previous_stream](const SomasStreamPtr &stream) { return stream->GetId() == previous_stream; });
  934. if (it == streams_list_.end()) {
  935. continue;
  936. }
  937. auto &last_node_in_prev_stream = (*it)->nodes_.back();
  938. it = std::find_if(streams_list_.begin(), streams_list_.end(),
  939. [current_stream](const SomasStreamPtr &stream) { return stream->GetId() == current_stream; });
  940. if (it == streams_list_.end()) {
  941. continue;
  942. }
  943. auto &first_node_in_cur_stream = (*it)->nodes_.front();
  944. first_node_in_cur_stream->ancestor_nodes_.insert(last_node_in_prev_stream);
  945. }
  946. }
  947. // Loop to avoid tensors with empty destinations (add itself)
  948. for (const auto &tensor : tensors_list_) {
  949. if (tensor->destinations_.size() == 0) {
  950. tensor->destinations_.insert(tensor->GetSourceNode());
  951. }
  952. }
  953. // Loop to compute max destinations in each stream
  954. for (const auto &tensor : tensors_list_) {
  955. tensor->ComputeMaxDestinationId();
  956. }
  957. }
  958. void Somas::ComputeMultiTensorConflicts(const std::vector<SomasTensorPtr> &calc_tensors_list,
  959. const std::vector<SomasTensorPtr> &all_tensors_list,
  960. const vector<DynamicBitSet> &nodes_dependency,
  961. std::vector<DynamicBitSet> *tensor_relation) const {
  962. auto start = std::chrono::system_clock::now();
  963. MS_LOG(INFO) << "Start Computing Conflicts Pairs, tensors list size is " << calc_tensors_list.size();
  964. for (size_t i = 0; i < calc_tensors_list.size(); i++) {
  965. auto calc_tensor = calc_tensors_list[i];
  966. if (calc_tensor->IsLifelong() || calc_tensor->IsRefOverlap() || calc_tensor->GetAlignedSize() == 0) {
  967. continue;
  968. }
  969. ComputeOneTensorConflicts(calc_tensor, all_tensors_list, nodes_dependency, tensor_relation);
  970. }
  971. auto end = std::chrono::system_clock::now();
  972. MS_LOG(INFO) << "End Computing Conflicts Pairs (time taken "
  973. << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() << "ms)";
  974. }
  975. void Somas::ComputeOneTensorConflicts(const std::shared_ptr<SomasTensor> &calc_tensor,
  976. const std::vector<SomasTensorPtr> &all_tensors_list,
  977. const vector<DynamicBitSet> &nodes_dependency,
  978. std::vector<DynamicBitSet> *tensor_relation) const {
  979. for (size_t j = 0; j < all_tensors_list.size(); j++) {
  980. auto target_tensor = all_tensors_list[j];
  981. if (calc_tensor == target_tensor || target_tensor->IsLifelong() || target_tensor->IsRefOverlap() ||
  982. target_tensor->GetAlignedSize() == 0) {
  983. continue;
  984. }
  985. size_t calc_src_node = calc_tensor->GetSourceNode()->GetId();
  986. size_t target_src_node = target_tensor->GetSourceNode()->GetId();
  987. if (calc_src_node == target_src_node) {
  988. continue;
  989. }
  990. if ((*tensor_relation)[calc_tensor->GetId()].IsBitTrue(target_tensor->GetId()) ||
  991. (*tensor_relation)[target_tensor->GetId()].IsBitTrue(calc_tensor->GetId())) {
  992. continue;
  993. }
  994. bool reuse = true;
  995. // check calc_tensor's all consumers is target_tensor's source node's dependency or not
  996. for (const auto &dst_map : calc_tensor->max_destinations_) {
  997. const auto &dst_node = dst_map.second;
  998. if (nodes_dependency[target_src_node].IsBitTrue(dst_node->GetId()) == false) {
  999. // calc_tensor's consumer is not in target_tensor's source node's dependency, not sure this consumer is done or
  1000. // not when target_tensor produced
  1001. reuse = false;
  1002. break;
  1003. } else if (target_src_node == dst_node->GetId()) {
  1004. // calc_tensor is target_tensor's source node's input, can't reuse
  1005. reuse = false;
  1006. break;
  1007. } else {
  1008. // calc_tensor's consumer is in target_tensor's source node's dependency, this consumer is done when
  1009. // target_tensor produced
  1010. reuse = true;
  1011. }
  1012. }
  1013. if (reuse) {
  1014. // calc_tensor and target_tensor have dependencies so they can reuse each other
  1015. (*tensor_relation)[calc_tensor->GetId()].SetBitTrue(target_tensor->GetId());
  1016. (*tensor_relation)[target_tensor->GetId()].SetBitTrue(calc_tensor->GetId());
  1017. }
  1018. }
  1019. }
  1020. bool Somas::NodeSort(SomasNodePtr node1, SomasNodePtr node2) { return node1->GetId() < node2->GetId(); }
  1021. bool Somas::Assign(const session::KernelGraph *graph) {
  1022. if (tensors_list_.empty()) {
  1023. MS_LOG(INFO) << "No Tensor for Assigner";
  1024. return true;
  1025. }
  1026. // Ref Node Preprocessing
  1027. UpdateRefTensorsConflict();
  1028. std::map<size_t, size_t> contiguous_list_with_ref_index_map = GetContiguousListContainRefTensor();
  1029. vector<vector<size_t>> contiguous_tensors_list_removed = contiguous_tensors_list_;
  1030. std::set<vector<size_t>> contiguous_tensors_list_to_remove;
  1031. for (auto ref_list_pair : contiguous_list_with_ref_index_map) {
  1032. contiguous_tensors_list_to_remove.insert(contiguous_tensors_list_[ref_list_pair.second]);
  1033. }
  1034. // remove the contiguous list which all tensors' align size is 0
  1035. for (auto contiguous_list : contiguous_tensors_list_) {
  1036. bool all_outputs = true;
  1037. for (auto tensor_id : contiguous_list) {
  1038. auto tensor = tensors_list_[tensor_id];
  1039. if (tensor->aligned_size_ != 0) {
  1040. all_outputs = false;
  1041. break;
  1042. }
  1043. }
  1044. if (all_outputs) {
  1045. contiguous_tensors_list_to_remove.insert(contiguous_list);
  1046. }
  1047. }
  1048. for (auto contiguous_list : contiguous_tensors_list_to_remove) {
  1049. auto iterator =
  1050. std::find(contiguous_tensors_list_removed.begin(), contiguous_tensors_list_removed.end(), contiguous_list);
  1051. if (iterator != contiguous_tensors_list_removed.end()) {
  1052. contiguous_tensors_list_removed.erase(iterator);
  1053. } else {
  1054. MS_LOG(WARNING) << "Could not find contiguous list to remove for ref";
  1055. }
  1056. }
  1057. MS_LOG(INFO) << "End Solving Preprocessing for Ref Node";
  1058. UpdateRefOverlapTensorsConflicts();
  1059. #ifdef SOMAS_DEBUG
  1060. // Compute number of constraints for each tensor
  1061. auto tensors_num = tensors_list_.size();
  1062. for (auto tensor1 : tensors_list_) {
  1063. auto ones_num = reuse_matrix_[tensor1->GetId()].CountOnesNum();
  1064. tensor1->num_constraints_ = tensors_num - ones_num;
  1065. }
  1066. #endif
  1067. // Prepare solver info
  1068. MS_LOG(INFO) << "Start Loop to create solver info";
  1069. for (auto tensor : tensors_list_) {
  1070. if (tensor->GetSolverTensorDesc() != nullptr) {
  1071. SomasSolverTensorDescPtr pSolverTensor = tensor->GetSolverTensorDesc();
  1072. solver_tensor_desc_map_.insert(std::pair<size_t, SomasSolverTensorDescPtr>(pSolverTensor->index_, pSolverTensor));
  1073. }
  1074. }
  1075. MS_LOG(INFO) << "End Loop to create solver info";
  1076. MS_LOG(INFO) << "Start Solving";
  1077. if (solver_tensor_desc_map_.empty()) {
  1078. MS_LOG(INFO) << "solver_tensor_desc_list is empty.";
  1079. return true;
  1080. }
  1081. somas_solver_ = std::make_shared<SomasSolverPre>();
  1082. auto status =
  1083. somas_solver_->Solving(graph, &solver_tensor_desc_map_, &reuse_matrix_, contiguous_tensors_list_removed, false);
  1084. MS_LOG(INFO) << "End Solving";
  1085. if (status != SUCCESS) {
  1086. GenGraphStatisticInfo();
  1087. MS_LOG(EXCEPTION) << "SOMAS Solving Failed.";
  1088. }
  1089. // Update solver_tensor_desc offset to tensors list
  1090. for (const auto &tensor : tensors_list_) {
  1091. tensor->SetOffset();
  1092. }
  1093. UpdateRefTensorsOffset();
  1094. UpdateContiguousTensorsOffset(contiguous_list_with_ref_index_map);
  1095. // Set mem_offset_ value by solver result
  1096. mem_offset_ = static_cast<size_t>(somas_solver_->GetMaxOffset());
  1097. return true;
  1098. }
  1099. std::map<size_t, size_t> Somas::GetContiguousListContainRefTensor() {
  1100. // key: contiguous list index with ref node input; value: contiguous list index with ref node output
  1101. std::map<size_t, size_t> contiguous_list_with_ref_index_map;
  1102. std::map<size_t, size_t> ref_tensors_in_contiguous_map = GetRefTensorsInContiguousList();
  1103. std::map<size_t, std::map<size_t, std::set<size_t>>> contiguous_ref_list_error_check_map;
  1104. for (auto ref_pair : ref_tensors_in_contiguous_map) {
  1105. size_t ref_first = ref_pair.first;
  1106. size_t ref_second = ref_pair.second;
  1107. bool found_first = false;
  1108. bool found_second = false;
  1109. size_t index_first = 0;
  1110. size_t index_second = 0;
  1111. size_t index_in_list_first = 0;
  1112. size_t index_in_list_second = 0;
  1113. for (size_t index = 0; index < contiguous_tensors_list_.size() && (!found_first || !found_second); index++) {
  1114. if (!found_first) {
  1115. auto iterator_first =
  1116. std::find(contiguous_tensors_list_[index].begin(), contiguous_tensors_list_[index].end(), ref_first);
  1117. if (iterator_first != contiguous_tensors_list_[index].end()) {
  1118. index_first = index;
  1119. index_in_list_first = iterator_first - contiguous_tensors_list_[index].begin();
  1120. found_first = true;
  1121. }
  1122. }
  1123. if (!found_second) {
  1124. auto iterator_second =
  1125. std::find(contiguous_tensors_list_[index].begin(), contiguous_tensors_list_[index].end(), ref_second);
  1126. if (iterator_second != contiguous_tensors_list_[index].end()) {
  1127. index_second = index;
  1128. index_in_list_second = iterator_second - contiguous_tensors_list_[index].begin();
  1129. found_second = true;
  1130. }
  1131. }
  1132. }
  1133. if (!found_first) {
  1134. MS_LOG(WARNING) << "Contiguous ref tensor " << ref_first << " not found in any contiguous list";
  1135. }
  1136. if (!found_second) {
  1137. MS_LOG(WARNING) << "Contiguous ref tensor " << ref_second << " not found in any contiguous list";
  1138. }
  1139. if (contiguous_list_with_ref_index_map.find(index_first) == contiguous_list_with_ref_index_map.end() ||
  1140. contiguous_list_with_ref_index_map[index_first] == index_second) {
  1141. contiguous_list_with_ref_index_map[index_first] = index_second;
  1142. // Checking for error cases
  1143. if (index_in_list_first != index_in_list_second) {
  1144. MS_LOG(WARNING) << "Inconsistency in contiguous ref: tensor " << ref_first << " in position "
  1145. << index_in_list_first << " of contiguous list " << index_first << " and tensor " << ref_second
  1146. << " in position " << index_in_list_second << " of contiguous list " << index_second;
  1147. }
  1148. contiguous_ref_list_error_check_map[index_first][index_second].insert(index_in_list_first);
  1149. } else {
  1150. MS_LOG(WARNING) << "Contiguous list " << index_first << " associated (ref node) with two other contiguous lists: "
  1151. << contiguous_list_with_ref_index_map[index_first] << " and " << index_second;
  1152. }
  1153. }
  1154. for (auto check_list_pair : contiguous_ref_list_error_check_map) {
  1155. auto first_list = check_list_pair.first;
  1156. auto index_set_map = check_list_pair.second;
  1157. for (auto index_set : index_set_map) {
  1158. auto second_list = index_set.first;
  1159. if (contiguous_tensors_list_[first_list].size() != contiguous_tensors_list_[second_list].size()) {
  1160. MS_LOG(WARNING) << "Contiguous lists " << first_list << " and " << second_list
  1161. << " considered in ref do not have the same size";
  1162. }
  1163. for (size_t x = 0; x < contiguous_tensors_list_[second_list].size(); x++) {
  1164. if (contiguous_ref_list_error_check_map[first_list][second_list].count(x) == 0) {
  1165. MS_LOG(WARNING) << "Contiguous lists " << first_list << " and " << second_list
  1166. << " considered in ref: ref pair at in-lists index " << x << " has not been considered";
  1167. }
  1168. }
  1169. }
  1170. }
  1171. return contiguous_list_with_ref_index_map;
  1172. }
  1173. std::map<size_t, size_t> Somas::GetRefTensorsInContiguousList() {
  1174. // key: refnode input value: refnode output
  1175. std::map<size_t, size_t> ref_tensors_in_contiguous_map;
  1176. for (auto ref_node_list : ref_node_constraints_) {
  1177. // Count contiguous tensors in ref list
  1178. size_t contiguous_in_ref_list = std::count_if(ref_node_list.begin(), ref_node_list.end(),
  1179. [this](size_t tid) { return tensors_map_[tid]->contiguous_; });
  1180. // Keep info about contiguous and check for errors
  1181. if (ref_node_list.size() > 2 && contiguous_in_ref_list > 0) {
  1182. MS_LOG(WARNING) << "Ref node of size greater than two with at least one contiguous tensor in";
  1183. }
  1184. if (ref_node_list.size() == 2 && contiguous_in_ref_list == 1) {
  1185. MS_LOG(WARNING) << "Ref node of size two with only one contiguous tensor" << ref_node_list[0] << ":"
  1186. << tensors_map_[ref_node_list[0]]->contiguous_ << ", " << ref_node_list[1] << ":"
  1187. << tensors_map_[ref_node_list[1]]->contiguous_;
  1188. }
  1189. if (ref_node_list.size() == 2 && contiguous_in_ref_list == 2) {
  1190. ref_tensors_in_contiguous_map[ref_node_list[0]] = ref_node_list[1];
  1191. }
  1192. }
  1193. return ref_tensors_in_contiguous_map;
  1194. }
  1195. void Somas::UpdateContiguousTensorsOffset(const std::map<size_t, size_t> &contiguous_ref_list_map) {
  1196. // Handle contiguous ref node
  1197. for (auto ref_list_pair : contiguous_ref_list_map) {
  1198. size_t index_first = ref_list_pair.first;
  1199. size_t index_second = ref_list_pair.second;
  1200. for (size_t x = 0; x < contiguous_tensors_list_[index_second].size(); x++) {
  1201. tensors_map_[contiguous_tensors_list_[index_second][x]]->offset_ =
  1202. tensors_map_[contiguous_tensors_list_[index_first][x]]->offset_;
  1203. }
  1204. }
  1205. // Contiguous gaps postprocessing
  1206. for (auto list : contiguous_tensors_list_) {
  1207. tensors_map_[list[0]]->offset_ += kGapSize;
  1208. }
  1209. }
  1210. void Somas::UpdateRefTensorsOffset() {
  1211. // Ref Node Postprocessing
  1212. MS_LOG(INFO) << "\nStart Solving Postprocessing for Ref Node";
  1213. // Set offset for rest of ref node list (ignored by solver due to ref node preprocessing)
  1214. for (auto ref_node_list : ref_node_constraints_) {
  1215. for (size_t i = 1; i < ref_node_list.size(); ++i) {
  1216. tensors_map_[ref_node_list[i]]->offset_ = tensors_map_[ref_node_list[0]]->offset_;
  1217. }
  1218. }
  1219. }
  1220. void Somas::UpdateRefOverlapTensorsConflicts() {
  1221. // Ref Overlap Preprocessing
  1222. MS_LOG(INFO) << "Start Solving Preprocessing for Ref Overlap";
  1223. // In ConflictComputing(), by use of ref_overlap_ flag, each tensor in a ref_overlap_list has all entries 1 in
  1224. // cannot_reuse_ array Here, we allow reuse only among tensors in same list
  1225. for (auto ref_overlap_list : ref_overlap_constraints_) {
  1226. for (size_t tid_1 : ref_overlap_list) {
  1227. for (size_t tid_2 : ref_overlap_list) {
  1228. reuse_matrix_[tid_1].SetBitTrue(tid_2);
  1229. reuse_matrix_[tid_2].SetBitTrue(tid_1);
  1230. }
  1231. }
  1232. }
  1233. MS_LOG(INFO) << "End Solving Preprocessing for Ref Overlap";
  1234. }
  1235. void Somas::UpdateRefTensorsConflict() {
  1236. // Keep all constraints for first tensor in list
  1237. for (auto ref_node_list : ref_node_constraints_) {
  1238. size_t tid_0 = ref_node_list[0];
  1239. for (SomasTensorPtr tensor : tensors_list_) {
  1240. if (reuse_matrix_[tid_0].IsBitTrue(tensor->GetId()) == false) {
  1241. continue;
  1242. }
  1243. for (size_t tid : ref_node_list) {
  1244. if (reuse_matrix_[tid].IsBitTrue(tensor->GetId()) == false) {
  1245. reuse_matrix_[tid_0].SetBitFalse(tensor->GetId());
  1246. reuse_matrix_[tensor->GetId()].SetBitFalse(tid_0);
  1247. break;
  1248. }
  1249. }
  1250. }
  1251. // Set rest to size 0, so that solver ignores them (if not contiguous)
  1252. for (size_t i = 1; i < ref_node_list.size(); ++i) {
  1253. if (!tensors_map_[ref_node_list[i]]->contiguous_) {
  1254. tensors_map_[ref_node_list[i]]->aligned_size_ = 0;
  1255. }
  1256. }
  1257. }
  1258. }
  1259. std::string Somas::GetSplitName(const std::string &scope_name) const {
  1260. auto indx = scope_name.rfind('/');
  1261. if (indx == std::string::npos) {
  1262. return scope_name;
  1263. } else {
  1264. if (indx < scope_name.size() - 1) {
  1265. auto split_name = scope_name.substr(indx + 1);
  1266. return split_name;
  1267. }
  1268. return scope_name;
  1269. }
  1270. }
  1271. std::string Somas::SomasInfo(bool calc_hash) {
  1272. std::ostringstream oss;
  1273. if (!calc_hash) {
  1274. DumpParameters(oss);
  1275. }
  1276. DumpTensors(oss);
  1277. DumpNodes(oss);
  1278. oss << "\n\nAll Stream Groups:\n\n";
  1279. for (const auto &stream_group : streams_groups_) {
  1280. for (const auto &stream : stream_group) {
  1281. oss << "stm" << stream << " ";
  1282. }
  1283. oss << "\n";
  1284. }
  1285. if (!ref_node_constraints_.empty()) {
  1286. oss << "\n\nAll Ref Node Info:\n\n";
  1287. for (const auto &ref_in_out : ref_node_constraints_) {
  1288. oss << "refnode input-output:";
  1289. for (const auto &item : ref_in_out) {
  1290. oss << "%" << item << "T ";
  1291. }
  1292. oss << "\n";
  1293. }
  1294. }
  1295. return oss.str();
  1296. }
  1297. void Somas::DumpNodes(std::ostringstream &oss) const {
  1298. oss << "\n\nAll Nodes:\n\n";
  1299. for (const auto &node : nodes_list_) {
  1300. auto scope_name = node->scope_full_name_;
  1301. std::string split_name = GetSplitName(scope_name);
  1302. oss << "$" << node->GetId() << "\t" << split_name << "\t" << static_cast<int>(node->GetType()) << "\t";
  1303. auto input_num = node->input_tensors_.size() + node->input_parameters_map_.size();
  1304. oss << "inputs[";
  1305. size_t tensor_index = 0;
  1306. for (size_t input_index = 0; input_index < input_num; input_index++) {
  1307. auto iter = node->input_parameters_map_.find(input_index);
  1308. if (iter != node->input_parameters_map_.end()) {
  1309. oss << "%" << iter->second->id_ << "P"
  1310. << ", ";
  1311. } else {
  1312. oss << "%" << node->input_tensors_[tensor_index]->GetId() << "T"
  1313. << ", ";
  1314. tensor_index++;
  1315. }
  1316. }
  1317. oss << "]";
  1318. oss << "\toutputs[";
  1319. for (const auto &out : node->output_tensors_) {
  1320. oss << "%" << out->GetId() << "T"
  1321. << ", ";
  1322. }
  1323. oss << "]";
  1324. oss << "\tworkspace[";
  1325. for (const auto &wk : node->workspace_tensors_) {
  1326. oss << "%" << wk->GetId() << "T"
  1327. << ", ";
  1328. }
  1329. oss << "]";
  1330. oss << "\tstreamID["
  1331. << "@" << node->GetStream()->GetId() << "]\n";
  1332. }
  1333. }
  1334. void Somas::DumpTensors(std::ostringstream &oss) const {
  1335. oss << "\n\nAll Tensors:\n\n";
  1336. oss << "index:"
  1337. << "\tsize:"
  1338. << "\treal_size:"
  1339. << "\toffset:"
  1340. << "\taddr:"
  1341. << "\ttype:"
  1342. << "\tlifelong:"
  1343. << "\tlife_start:"
  1344. << "\tlife_end:"
  1345. << "\tsource node name:\n";
  1346. for (const auto &tensor : tensors_list_) {
  1347. auto scope_name = tensor->GetSourceNode()->scope_full_name_;
  1348. std::string split_name = GetSplitName(scope_name);
  1349. oss << "%" << tensor->GetId() << "T"
  1350. << "\t"
  1351. << "#" << tensor->GetAlignedSize() << "S"
  1352. << "\t"
  1353. << "#" << tensor->GetOriginalSize() << "S"
  1354. << "\t"
  1355. << "&" << tensor->GetOffset() << ""
  1356. << "\t"
  1357. << "&" << static_cast<void *>(tensor->GetOffset() + mem_base_addr_) << "\t"
  1358. << tensor_type_name_map[tensor->type_] << "\t" << tensor->IsLifelong() << "\t" << tensor->lifetime_.start_
  1359. << "\t" << tensor->lifetime_.end_ << "\t" << split_name << "\n";
  1360. }
  1361. }
  1362. void Somas::DumpParameters(std::ostringstream &oss) const {
  1363. oss << "All Parameters:\n\n";
  1364. oss << "index:"
  1365. << "\tsize:"
  1366. << "\tstart_addr:"
  1367. << "\tsource node name:"
  1368. << "\tnode out index:\n";
  1369. for (const auto &param : parameters_list_) {
  1370. oss << "%" << param->id_ << "P"
  1371. << "\t"
  1372. << "#" << param->size_ << "S"
  1373. << "\t"
  1374. << "&" << param->addr_ << "\t" << param->source_node_name_ << "\t" << param->output_index_ << "\n";
  1375. }
  1376. }
  1377. void Somas::DumpSomasInfoIR(const string filename) {
  1378. if (filename.size() > PATH_MAX) {
  1379. MS_LOG(ERROR) << "File path " << filename << " is too long.";
  1380. return;
  1381. }
  1382. auto real_path = Common::GetRealPath(filename);
  1383. if (!real_path.has_value()) {
  1384. MS_LOG(ERROR) << "Get real path failed. path=" << filename;
  1385. return;
  1386. }
  1387. ChangeFileMode(real_path.value(), S_IRWXU);
  1388. std::ofstream ofs(real_path.value());
  1389. if (!ofs.is_open()) {
  1390. MS_LOG(ERROR) << "Open dump file '" << real_path.value() << "' failed!";
  1391. return;
  1392. }
  1393. ofs << SomasInfo();
  1394. ofs.close();
  1395. }
  1396. std::string Somas::Offline() {
  1397. std::ostringstream oss;
  1398. for (auto tensor : tensors_list_) {
  1399. if (tensor->IsOutputOnly() || tensor->type_ == TensorType::kRefNodeOutput) {
  1400. oss << "Somas EDGE ERROR src=n" << tensor->GetSourceNode()->GetId()
  1401. << ", srcstm=" << tensor->GetSourceStream()->GetId() << ", dst=nc"
  1402. << ", dststm=nc"
  1403. << ", workspace=0, size=" << tensor->GetOriginalSize()
  1404. << ", lifelong=" << static_cast<int>(tensor->lifelong_value_) << ", tid=" << tensor->GetId()
  1405. << ", start=" << tensor->lifetime_.start_ << ", end=" << tensor->lifetime_.end_ << std::endl;
  1406. } else {
  1407. std::map<size_t, size_t> dest_infos;
  1408. for (SomasNodePtr dest_node : tensor->destinations_) {
  1409. dest_infos.insert(std::make_pair(dest_node->GetId(), dest_node->GetStream()->GetId()));
  1410. }
  1411. for (auto dest_info : dest_infos) {
  1412. oss << "Somas EDGE src=n" << tensor->GetSourceNode()->GetId()
  1413. << ", srcstm=" << tensor->GetSourceStream()->GetId() << ", dst=n" << dest_info.first
  1414. << ", dststm=" << dest_info.second << ", workspace=" << static_cast<int>(tensor->type_ == kWorkspace)
  1415. << ", size=" << tensor->GetOriginalSize() << ", lifelong=" << static_cast<int>(tensor->lifelong_value_)
  1416. << ", tid=" << tensor->GetId() << ", start=" << tensor->lifetime_.start_
  1417. << ", end=" << tensor->lifetime_.end_ << std::endl;
  1418. }
  1419. }
  1420. }
  1421. for (vector<size_t> tList : contiguous_tensors_list_) {
  1422. oss << "Somas CONTIGUOUS";
  1423. for (size_t tid : tList) {
  1424. oss << " " << tid;
  1425. }
  1426. oss << std::endl;
  1427. }
  1428. for (const auto &group : streams_groups_) {
  1429. oss << "Somas GROUP";
  1430. for (int64_t sid : group) {
  1431. oss << " " << sid;
  1432. }
  1433. oss << std::endl;
  1434. }
  1435. return oss.str();
  1436. }
  1437. void Somas::DumpOfflineIR(const string filename) {
  1438. MS_LOG(INFO) << "Printing somas-log-from-graph log: " << filename;
  1439. if (filename.size() > PATH_MAX) {
  1440. MS_LOG(ERROR) << "File path " << filename << " is too long.";
  1441. return;
  1442. }
  1443. auto real_path = Common::GetRealPath(filename);
  1444. if (!real_path.has_value()) {
  1445. MS_LOG(ERROR) << "Get real path failed. path=" << filename;
  1446. return;
  1447. }
  1448. ChangeFileMode(real_path.value(), S_IRWXU);
  1449. std::ofstream ofs(real_path.value());
  1450. if (!ofs.is_open()) {
  1451. MS_LOG(ERROR) << "Open dump file '" << real_path.value() << "' failed!";
  1452. return;
  1453. }
  1454. ofs << Offline();
  1455. ofs.close();
  1456. }
  1457. std::string Somas::SomasMemory() {
  1458. std::ostringstream oss;
  1459. std::map<size_t, size_t> mem_map;
  1460. for (auto tensor : tensors_list_) {
  1461. mem_map[tensor->GetOffset()] = 0;
  1462. }
  1463. size_t num = 0;
  1464. for (auto iter = mem_map.begin(); iter != mem_map.end(); ++iter, ++num) {
  1465. iter->second = num;
  1466. }
  1467. std::map<size_t, std::map<size_t, SomasTensorPtr>> mem_list;
  1468. for (const auto &tensor : tensors_list_) {
  1469. size_t key = tensor->offset_;
  1470. auto iter = mem_list.find(key);
  1471. if (iter == mem_list.end()) {
  1472. std::map<size_t, SomasTensorPtr> id_tensor_map;
  1473. id_tensor_map[tensor->GetId()] = tensor;
  1474. mem_list[key] = id_tensor_map;
  1475. } else {
  1476. iter->second[tensor->GetId()] = tensor;
  1477. }
  1478. }
  1479. oss << "mem_id:"
  1480. << "\tstart_offset:"
  1481. << "\tend_offset:"
  1482. << "\ttensor_id:"
  1483. << "\torigin_size:"
  1484. << "\talign_size:"
  1485. << "\tstart_addr:"
  1486. << "\tend_addr:"
  1487. << "\ttype:"
  1488. << "\tsrc_node:"
  1489. << "\tsrc_stm_id:"
  1490. << "lifetime_start\t"
  1491. << "lifetime_end\n";
  1492. for (const auto &mem : mem_list) {
  1493. auto id_tensor_map = mem.second;
  1494. for (const auto &id_tensor : id_tensor_map) {
  1495. auto tensor = id_tensor.second;
  1496. std::string scope_name;
  1497. size_t src_stm_id = 0xffff;
  1498. if (tensor->GetSourceNode() != nullptr) {
  1499. scope_name = tensor->GetSourceNode()->scope_full_name_;
  1500. src_stm_id = tensor->GetSourceNode()->GetStream()->GetId();
  1501. } else {
  1502. scope_name = "Somas Tensor";
  1503. }
  1504. std::string split_name = GetSplitName(scope_name);
  1505. oss << "#" << mem_map[tensor->GetOffset()] << "\t" << tensor->GetOffset() << "\t"
  1506. << tensor->GetOffset() + tensor->GetAlignedSize() << "\t%" << tensor->GetId() << "T\t"
  1507. << tensor->GetOriginalSize() << "\t" << tensor->GetAlignedSize() << "\t&"
  1508. << static_cast<void *>(tensor->GetOffset() + mem_base_addr_) << "\t&"
  1509. << static_cast<void *>(tensor->GetOffset() + mem_base_addr_ + tensor->GetAlignedSize()) << "\t"
  1510. << tensor_type_name_map[tensor->type_] << "\t" << split_name << "\tstm" << src_stm_id << "\t"
  1511. << tensor->lifetime_.start_ << "\t" << tensor->lifetime_.end_ << "\n";
  1512. }
  1513. }
  1514. return oss.str();
  1515. }
  1516. void Somas::DumpSomasMemoryIR(const string filename) {
  1517. if (filename.size() > PATH_MAX) {
  1518. MS_LOG(ERROR) << "File path " << filename << " is too long.";
  1519. return;
  1520. }
  1521. auto real_path = Common::GetRealPath(filename);
  1522. if (!real_path.has_value()) {
  1523. MS_LOG(ERROR) << "Get real path failed. path=" << filename;
  1524. return;
  1525. }
  1526. ChangeFileMode(real_path.value(), S_IRWXU);
  1527. std::ofstream ofs(real_path.value());
  1528. if (!ofs.is_open()) {
  1529. MS_LOG(ERROR) << "Open dump file '" << real_path.value() << "' failed!";
  1530. return;
  1531. }
  1532. ofs << SomasMemory();
  1533. ofs.close();
  1534. }
  1535. size_t Somas::CalcLowerBound() const {
  1536. size_t max_node_id = std::accumulate(tensors_list_.begin(), tensors_list_.end(), 0, [](size_t max_id, auto tensor) {
  1537. return std::max(max_id, tensor->lifetime_.end_);
  1538. });
  1539. std::map<size_t, size_t> lifetime_lb;
  1540. for (size_t time = 0; time <= max_node_id; time++) {
  1541. lifetime_lb[time] = 0;
  1542. }
  1543. size_t lower, upper;
  1544. for (auto tensor : tensors_list_) {
  1545. if (tensor->lifelong_value_ == kLifeLongGraphAll) {
  1546. lower = 0;
  1547. upper = max_node_id;
  1548. } else {
  1549. lower = tensor->lifetime_.start_;
  1550. upper = tensor->lifetime_.end_;
  1551. }
  1552. for (size_t time = lower; time <= upper; time++) {
  1553. lifetime_lb[time] += tensor->GetAlignedSize();
  1554. }
  1555. }
  1556. size_t max_lifetime = 0;
  1557. for (size_t time = 0; time <= max_node_id; time++) {
  1558. if (max_lifetime < lifetime_lb[time]) {
  1559. max_lifetime = lifetime_lb[time];
  1560. }
  1561. }
  1562. return max_lifetime;
  1563. }
  1564. void Somas::GenGraphStatisticInfo() {
  1565. lower_bound_ = CalcLowerBound();
  1566. for (const auto &tensor : tensors_list_) {
  1567. upper_bound_ += tensor->aligned_size_;
  1568. if (tensor->type_ == kWorkspace) {
  1569. workspace_total_size_ += tensor->aligned_size_;
  1570. }
  1571. if (tensor->lifelong_value_ == kLifeLongGraphAll) {
  1572. lifelong_all_total_size_ += tensor->aligned_size_;
  1573. } else if (tensor->lifelong_value_ == kLifeLongGraphStart) {
  1574. lifelong_start_total_size_ += tensor->aligned_size_;
  1575. } else if (tensor->lifelong_value_ == kLifeLongGraphEnd) {
  1576. lifelong_end_total_size_ += tensor->aligned_size_;
  1577. }
  1578. }
  1579. const double giga = 1024. * 1024. * 1024.;
  1580. MS_LOG(INFO) << "Lower Bound: " << lower_bound_ << " (" << lower_bound_ / giga
  1581. << " GB), Upper Bound: " << upper_bound_ << " (" << upper_bound_ / giga << " GB)";
  1582. MS_LOG(INFO) << "\nTotal Dynamic Size (Upper Bound):\t" << upper_bound_ << "\n"
  1583. << "Theoretical Optimal Size (Lower Bound):\t" << lower_bound_ << "\n"
  1584. << "Total Workspace Size:\t" << workspace_total_size_ << "\n"
  1585. << "Total Communication Input Tensor Size:\t" << comm_input_total_size_ << "\n"
  1586. << "Total Communication Output Tensor Size:\t" << comm_output_total_size_ << "\n"
  1587. << "Total LifeLong All Tensor Size:\t" << lifelong_all_total_size_ << "\n"
  1588. << "Total LifeLong Start Tensor Size:\t" << lifelong_start_total_size_ << "\n"
  1589. << "Total LifeLong End Tensor Size:\t" << lifelong_end_total_size_ << "\n"
  1590. << "Reused Size(Allocate Size):\t" << GetTotalMemSize() << "\n\n\n";
  1591. }
  1592. uint8_t *Somas::GetNodeOutputPtr(const AnfNodePtr &node, size_t index) const {
  1593. auto key = node.get();
  1594. auto iter = nodes_map_.find(key);
  1595. uint8_t *ptr = nullptr;
  1596. if (iter != nodes_map_.end()) {
  1597. auto &node = iter->second.at(0);
  1598. if (index >= node->output_tensors_.size()) {
  1599. MS_LOG(EXCEPTION) << "index:[" << index << "] is larger than it's workspace size:["
  1600. << node->output_tensors_.size() << "]";
  1601. }
  1602. auto output_tensor = node->output_tensors_[index];
  1603. ptr = mem_base_addr_ + output_tensor->offset_;
  1604. } else {
  1605. MS_LOG(EXCEPTION) << "node [" << AnfAlgo::GetCNodeName(node) << "] don't exist in nodes_map";
  1606. }
  1607. return ptr;
  1608. }
  1609. uint8_t *Somas::GetNodeWorkSpacePtr(const AnfNodePtr &node, size_t index) const {
  1610. auto key = node.get();
  1611. auto iter = nodes_map_.find(key);
  1612. uint8_t *ptr = nullptr;
  1613. if (iter != nodes_map_.end()) {
  1614. auto &node = iter->second.at(0);
  1615. if (index >= node->workspace_tensors_.size()) {
  1616. MS_LOG(EXCEPTION) << "index:[" << index << "] is larger than it's workspace size:["
  1617. << node->workspace_tensors_.size() << "]";
  1618. }
  1619. auto workspace_tensor = node->workspace_tensors_[index];
  1620. ptr = mem_base_addr_ + workspace_tensor->offset_;
  1621. }
  1622. return ptr;
  1623. }
  1624. void Somas::ConvertToProfilingNode(uint32_t graph_id) {
  1625. #ifdef ENABLE_D
  1626. auto graph_node = MemoryProfiling::GetInstance().GetGraphMemoryNode(graph_id);
  1627. if (graph_node == nullptr) {
  1628. graph_node = MemoryProfiling::GetInstance().AddGraphMemoryNode(graph_id);
  1629. MS_LOG(INFO) << "Add graph memory node for dynamic memory profiling, graph id is " << graph_id;
  1630. }
  1631. for (const auto &tensor : tensors_list_) {
  1632. TensorMemory tensor_memory;
  1633. tensor_memory.SetTensorId(tensor->GetId());
  1634. tensor_memory.SetAlignedSize(tensor->GetAlignedSize());
  1635. tensor_memory.SetType(tensor_type_name_map[tensor->type_]);
  1636. tensor_memory.SetLifeStart(tensor->lifetime_.start_);
  1637. tensor_memory.SetLifeEnd(tensor->lifetime_.end_);
  1638. tensor_memory.SetLifeLong(life_long_name_map[tensor->lifelong_value_]);
  1639. graph_node->AddTensorMemory(tensor_memory);
  1640. }
  1641. for (const auto &node : nodes_list_) {
  1642. NodeMemory node_memory;
  1643. std::string name = GetSplitName(node->scope_full_name_);
  1644. node_memory.SetNodeName(name);
  1645. node_memory.SetNodeId(node->GetId());
  1646. for (const auto &tensor : node->input_tensors_) {
  1647. node_memory.AddInputTensorId(tensor->GetId());
  1648. }
  1649. for (const auto &tensor : node->output_tensors_) {
  1650. node_memory.AddOutputTensorId(tensor->GetId());
  1651. }
  1652. for (const auto &tensor : node->workspace_tensors_) {
  1653. node_memory.AddWorkSpaceTensorId(tensor->GetId());
  1654. }
  1655. graph_node->AddNodeMemory(node_memory);
  1656. }
  1657. #endif
  1658. }
  1659. } // namespace somas
  1660. } // namespace mindspore