You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

common_utils.cc 23 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638
  1. /**
  2. * Copyright 2019 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "kernel/common_utils.h"
  17. #include <unordered_map>
  18. #include <map>
  19. #include <iostream>
  20. #include <utility>
  21. #include <fstream>
  22. #include "nlohmann/json.hpp"
  23. #include "session/anf_runtime_algorithm.h"
  24. #include "common/utils.h"
  25. namespace mindspore {
  26. namespace kernel {
  27. const std::unordered_map<std::string, TypeId> type_id_maps = {
  28. {"float", TypeId::kNumberTypeFloat32}, {"float16", TypeId::kNumberTypeFloat16},
  29. {"float32", TypeId::kNumberTypeFloat32}, {"float64", TypeId::kNumberTypeFloat64},
  30. {"int", TypeId::kNumberTypeInt}, {"int8", TypeId::kNumberTypeInt8},
  31. {"int16", TypeId::kNumberTypeInt16}, {"int32", TypeId::kNumberTypeInt32},
  32. {"int64", TypeId::kNumberTypeInt64}, {"uint", TypeId::kNumberTypeUInt},
  33. {"uint8", TypeId::kNumberTypeUInt8}, {"uint16", TypeId::kNumberTypeUInt16},
  34. {"uint32", TypeId::kNumberTypeUInt32}, {"uint64", TypeId::kNumberTypeUInt64},
  35. {"bool", TypeId::kNumberTypeBool},
  36. };
  37. const std::map<TypeId, std::string> type_id_str_map = {
  38. {TypeId::kNumberTypeFloat32, "float32"}, {TypeId::kNumberTypeFloat16, "float16"},
  39. {TypeId::kNumberTypeFloat, "float"}, {TypeId::kNumberTypeFloat64, "float64"},
  40. {TypeId::kNumberTypeInt, "int"}, {TypeId::kNumberTypeInt8, "int8"},
  41. {TypeId::kNumberTypeInt16, "int16"}, {TypeId::kNumberTypeInt32, "int32"},
  42. {TypeId::kNumberTypeInt64, "int64"}, {TypeId::kNumberTypeUInt, "uint"},
  43. {TypeId::kNumberTypeUInt8, "uint8"}, {TypeId::kNumberTypeUInt16, "uint16"},
  44. {TypeId::kNumberTypeUInt32, "uint32"}, {TypeId::kNumberTypeUInt64, "uint64"},
  45. {TypeId::kNumberTypeBool, "bool"},
  46. };
  47. const std::map<std::string, std::string> DATATYPE_STRING_MAP{
  48. {"Float32", "float32"}, {"Float16", "float16"}, {"Int8", "int8"}, {"Int16", "int16"},
  49. {"UInt16", "uint16"}, {"UInt8", "uint8"}, {"Int32", "int32"}, {"UInt32", "uint32"},
  50. {"Int64", "int64"}, {"UInt64", "uint64"}, {"Bool_", "bool"}, {"Float64", "double"},
  51. };
  52. const std::unordered_map<std::string, std::string> dtype_shortdtype_map_ = {
  53. {"float16", "f16"}, {"float32", "f32"}, {"float64", "f64"}, {"int8", "i8"}, {"int16", "i16"}, {"int32", "i32"},
  54. {"int64", "i64"}, {"uint8", "u8"}, {"uint16", "u16"}, {"uint32", "u32"}, {"uint64", "u64"}, {"bool", "bool"},
  55. };
  56. const std::unordered_map<std::string, size_t> dtype_nbyte_map = {
  57. {"float16", sizeof(float) / 2}, {"float32", sizeof(float)}, {"float64", sizeof(float) * 2},
  58. {"int8", sizeof(int) / 4}, {"int16", sizeof(int) / 2}, {"int32", sizeof(int)},
  59. {"int64", sizeof(int) * 2}, {"uint8", sizeof(int) / 4}, {"uint16", sizeof(int) / 2},
  60. {"uint32", sizeof(int)}, {"uint64", sizeof(int) * 2}, {"bool", sizeof(char)},
  61. };
  62. const std::unordered_map<std::string, FusionType> fusion_type_maps = {
  63. {"CONVLUTION", FusionType::CONVLUTION}, {"ELEMWISE", FusionType::ELEMWISE}, {"COMMREDUCE", FusionType::COMMREDUCE},
  64. {"SEGMENT", FusionType::SEGMENT}, {"OPAQUE", FusionType::OPAQUE},
  65. };
  66. bool IsAtomicNode(const CNodePtr &kernel_node) {
  67. MS_EXCEPTION_IF_NULL(kernel_node);
  68. auto kernel_mod = AnfAlgo::GetKernelMod(kernel_node);
  69. MS_EXCEPTION_IF_NULL(kernel_mod);
  70. auto parameters_indexs = kernel_mod->GenParameters();
  71. if (parameters_indexs.empty()) {
  72. return false;
  73. }
  74. auto atomic_flag = false;
  75. size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
  76. size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
  77. auto workspace_size_list = kernel_mod->GetWorkspaceSizeList();
  78. size_t workspace_num = kernel_mod->GetWorkspaceSizeList().size();
  79. if (input_num + workspace_num + output_num > parameters_indexs.size()) {
  80. size_t lossNum = (input_num + workspace_num + output_num) - parameters_indexs.size();
  81. for (size_t i = 0; i < lossNum; i++) {
  82. parameters_indexs.push_back(0);
  83. }
  84. }
  85. std::vector<int> clean_output_indexs;
  86. // in parameters data sort as input->workspace->output
  87. size_t index = 0;
  88. while (index < output_num) {
  89. if (parameters_indexs[input_num + workspace_num + index] == 1) {
  90. atomic_flag = true;
  91. clean_output_indexs.push_back(SizeToInt(index));
  92. }
  93. index++;
  94. }
  95. if (atomic_flag) {
  96. AnfAlgo::SetNodeAttr(kAttrAutomicOutputIndexs, MakeValue(clean_output_indexs), kernel_node);
  97. }
  98. for (size_t i = 0; i < workspace_num; ++i) {
  99. if (parameters_indexs[input_num + i] == 1) {
  100. atomic_flag = true;
  101. AnfAlgo::SetNodeAttr(kAttrAutomicWorkspaceSize,
  102. MakeValue(std::accumulate(workspace_size_list.begin(), workspace_size_list.end(), 0)),
  103. kernel_node);
  104. break;
  105. }
  106. }
  107. return atomic_flag;
  108. }
  109. void KernelMeta::Initialize() {
  110. kernel_meta_path_ = std::string(kGpuKernelMeta) + "_" + std::to_string(getpid()) + "/";
  111. // remove old kernel cache
  112. RemoveKernelCache();
  113. #if defined(_WIN32) || defined(_WIN64)
  114. auto ret = mkdir(kernel_meta_path_.c_str());
  115. #else
  116. auto ret = mkdir(kernel_meta_path_.c_str(), S_IRWXG | S_IRWXU);
  117. #endif
  118. if (ret != 0) {
  119. MS_LOG(INFO) << "kernel dir [" << kernel_meta_path_ << "], will be created later";
  120. }
  121. initialized_ = true;
  122. }
  123. void KernelMeta::RemoveKernelCache() {
  124. DIR *dir = opendir(kernel_meta_path_.c_str());
  125. if (dir == nullptr) {
  126. return;
  127. }
  128. struct dirent *entry;
  129. while ((entry = readdir(dir)) != nullptr) {
  130. std::string kernel_file = entry->d_name;
  131. std::string kernel_file_realpath = kernel_meta_path_ + kernel_file;
  132. (void)remove(kernel_file_realpath.c_str());
  133. }
  134. (void)closedir(dir);
  135. (void)rmdir(kernel_meta_path_.c_str());
  136. }
  137. std::string KernelMeta::Search(const std::string &kernel_name) const {
  138. if (!initialized_) {
  139. return "";
  140. }
  141. auto iter = kernel_meta_map_.find(kernel_name);
  142. if (iter == kernel_meta_map_.end()) {
  143. return "";
  144. } else {
  145. return iter->second;
  146. }
  147. }
  148. bool KernelMeta::Insert(const std::string &kernel_name, const std::string &kernel_json) {
  149. if (!initialized_) {
  150. return false;
  151. }
  152. kernel_meta_map_[kernel_name] = kernel_json;
  153. return true;
  154. }
  155. bool CheckCache(const std::string &kernel_name) {
  156. // check cache.
  157. KernelMeta *bin_map = KernelMeta::GetInstance();
  158. if (bin_map == nullptr) {
  159. MS_LOG(DEBUG) << "kernel cache is invalid.";
  160. return false;
  161. }
  162. std::string kernel_json = bin_map->Search(kernel_name);
  163. bool ret = (!kernel_json.empty());
  164. if (ret) {
  165. MS_LOG(INFO) << "Kernel name:" << kernel_name << " has registed.";
  166. } else {
  167. MS_LOG(INFO) << "Kernel name:" << kernel_name << " will been registed.";
  168. }
  169. return ret;
  170. }
  171. KernelPackPtr SearchCache(const std::string &kernel_name, const std::string &processor) {
  172. // search cache.
  173. KernelMeta *bin_map = KernelMeta::GetInstance();
  174. if (bin_map == nullptr) {
  175. MS_LOG(DEBUG) << "kernel cache is invalid.";
  176. return nullptr;
  177. }
  178. std::string kernel_json = bin_map->Search(kernel_name);
  179. if (!kernel_json.empty()) {
  180. KernelPackPtr kernel_pack = std::make_shared<KernelPack>();
  181. // just a tmp solution.
  182. if (!kernel_pack->ReadFromJsonFile(kernel_json, processor)) {
  183. MS_LOG(DEBUG) << "Read cache json and bin file failed[" << kernel_json << "].";
  184. return nullptr;
  185. } else {
  186. return kernel_pack;
  187. }
  188. } else {
  189. MS_LOG(INFO) << "cache kernel not found[" << kernel_name << "].";
  190. return nullptr;
  191. }
  192. }
  193. KernelPackPtr InsertCache(const std::string &kernel_name, const std::string &processor) {
  194. MS_LOG(INFO) << "kernel name:" << kernel_name << ", processr:" << processor;
  195. KernelMeta *bin_map = KernelMeta::GetInstance();
  196. std::string kernel_json;
  197. if (processor == kProcessorAiCore || processor == kProcessorAiCpu) {
  198. kernel_json = kCceKernelMeta;
  199. } else {
  200. kernel_json = bin_map->GetKernelMetaPath();
  201. }
  202. (void)kernel_json.append(kernel_name).append(kJsonSuffix);
  203. KernelPackPtr kernel_pack = std::make_shared<KernelPack>();
  204. if (!kernel_pack->ReadFromJsonFile(kernel_json, processor)) {
  205. MS_LOG(DEBUG) << "Read json and bin file failed[" << kernel_json << "].";
  206. return nullptr;
  207. }
  208. if (bin_map == nullptr) {
  209. MS_LOG(DEBUG) << "kernel cache is invalid.";
  210. return nullptr;
  211. }
  212. if (bin_map->Insert(kernel_name, kernel_json)) {
  213. MS_LOG(INFO) << "Insert to cache success[" << kernel_json << "], kernelname[" << kernel_name << "].";
  214. }
  215. return kernel_pack;
  216. }
  217. TypeId DtypeToTypeId(const std::string &dtypes) {
  218. auto iter = type_id_maps.find(dtypes);
  219. if (iter != type_id_maps.end()) {
  220. return iter->second;
  221. } else {
  222. MS_EXCEPTION(ArgumentError) << "Illegal input device dtype:" << dtypes;
  223. }
  224. }
  225. std::string Dtype2String(const std::string &dtypes) {
  226. auto iter = DATATYPE_STRING_MAP.find(dtypes);
  227. if (iter == DATATYPE_STRING_MAP.end()) {
  228. MS_EXCEPTION(ArgumentError) << "Illegal input dtype:" << dtypes;
  229. }
  230. return iter->second;
  231. }
  232. std::string TypeId2String(TypeId type_id) {
  233. auto iter = type_id_str_map.find(type_id);
  234. if (iter == type_id_str_map.end()) {
  235. return std::string(TypeIdLabel(type_id));
  236. }
  237. return iter->second;
  238. }
  239. std::string Dtype2ShortType(const std::string &dtypes) {
  240. auto iter = dtype_shortdtype_map_.find(dtypes);
  241. if (iter != dtype_shortdtype_map_.end()) {
  242. return iter->second;
  243. } else {
  244. MS_EXCEPTION(ArgumentError) << "Illegal input dtype:" << dtypes;
  245. }
  246. }
  247. size_t GetDtypeNbyte(const std::string &dtypes) {
  248. auto iter = dtype_nbyte_map.find(dtypes);
  249. if (iter != dtype_nbyte_map.end()) {
  250. return iter->second;
  251. } else {
  252. MS_EXCEPTION(ArgumentError) << "Illegal input dtype:" << dtypes;
  253. }
  254. }
  255. bool SetInputKernelBuilderInfo(const std::vector<std::shared_ptr<OpIOInfo>> &inputs, size_t real_input_num,
  256. size_t builder_idex, const std::vector<int> &dyn_input_sizes,
  257. const std::shared_ptr<KernelBuildInfo::KernelBuildInfoBuilder> &builder) {
  258. MS_EXCEPTION_IF_NULL(builder);
  259. std::vector<TypeId> inputs_device_type;
  260. std::vector<std::string> inputs_format;
  261. size_t dyn_input_idx = 0;
  262. size_t kernel_info_index = 0;
  263. MS_EXCEPTION_IF_NULL(inputs[0]);
  264. size_t kernel_info_cnt = inputs[0]->dtypes().size();
  265. for (const auto &input : inputs) {
  266. MS_EXCEPTION_IF_NULL(input);
  267. std::string param_type = input->param_type();
  268. std::vector<std::string> dtypes = input->dtypes();
  269. std::vector<std::string> formats = input->formats();
  270. if (dtypes.size() != kernel_info_cnt || formats.size() != kernel_info_cnt) {
  271. MS_LOG(DEBUG) << "Set input kernel builder info, dtyps size != formats size.";
  272. return false;
  273. }
  274. if (param_type == "dynamic") {
  275. if (dyn_input_sizes.empty()) {
  276. MS_LOG(DEBUG) << "Set input kernel builder info, dyn_input_sizes's size is 0 when param_type is dynamic";
  277. return false;
  278. }
  279. for (int t = 0; t < dyn_input_sizes[dyn_input_idx]; t++) {
  280. kernel_info_index++;
  281. auto type_id = DtypeToTypeId(dtypes[builder_idex]);
  282. inputs_device_type.push_back(type_id);
  283. inputs_format.push_back(formats[builder_idex]);
  284. }
  285. dyn_input_idx++;
  286. } else if (param_type == "required") {
  287. kernel_info_index++;
  288. auto type_id = DtypeToTypeId(dtypes[builder_idex]);
  289. inputs_device_type.push_back(type_id);
  290. inputs_format.push_back(formats[builder_idex]);
  291. } else {
  292. if (kernel_info_index < real_input_num) {
  293. MS_LOG(INFO) << "Set input kernel builder info, input type is optional, input index is :" << kernel_info_index;
  294. kernel_info_index++;
  295. auto type_id = DtypeToTypeId(dtypes[builder_idex]);
  296. inputs_device_type.push_back(type_id);
  297. inputs_format.push_back(formats[builder_idex]);
  298. }
  299. }
  300. }
  301. builder->SetInputsDeviceType(inputs_device_type);
  302. builder->SetInputsFormat(inputs_format);
  303. return true;
  304. }
  305. bool SetOutputKernelBuilderInfo(const std::vector<std::shared_ptr<OpIOInfo>> &outputs, size_t builder_idex,
  306. const size_t &real_output_num,
  307. const std::shared_ptr<KernelBuildInfo::KernelBuildInfoBuilder> &builder) {
  308. // not now but in the next we need to support dynamic output case
  309. MS_EXCEPTION_IF_NULL(builder);
  310. size_t output_idx = 0;
  311. std::vector<TypeId> outputs_device_type;
  312. std::vector<std::string> outputs_format;
  313. MS_EXCEPTION_IF_NULL(outputs[0]);
  314. size_t kernel_info_cnt = outputs[0]->dtypes().size();
  315. for (const auto &output : outputs) {
  316. MS_EXCEPTION_IF_NULL(output);
  317. if (output_idx >= real_output_num) {
  318. MS_LOG(DEBUG) << "real_output_num:" << real_output_num << ", output_idx:" << output_idx << " is out of limit!";
  319. continue;
  320. }
  321. size_t output_num = 0;
  322. if (output->param_type() == "dynamic") {
  323. if (outputs.size() > 1) {
  324. MS_EXCEPTION(ArgumentError) << "Dynamic output is unsupported multi output!";
  325. }
  326. output_num = real_output_num;
  327. } else if (output->param_type() == "required") {
  328. output_num = 1;
  329. } else {
  330. if (output_idx < real_output_num) {
  331. MS_LOG(INFO) << "Set output kernel builder info, output type is optional, output index is :" << output_idx;
  332. output_num = 1;
  333. }
  334. }
  335. for (size_t i = 0; i < output_num; i++) {
  336. std::vector<std::string> dtypes = output->dtypes();
  337. std::vector<std::string> formats = output->formats();
  338. if (dtypes.size() != kernel_info_cnt || formats.size() != kernel_info_cnt) {
  339. MS_LOG(DEBUG) << "Set output kernel builder info, dtyps size != formats size.";
  340. return false;
  341. }
  342. auto type_id = DtypeToTypeId(dtypes[builder_idex]);
  343. outputs_device_type.push_back(type_id);
  344. outputs_format.push_back(formats[builder_idex]);
  345. output_idx++;
  346. }
  347. }
  348. builder->SetOutputsFormat(outputs_format);
  349. builder->SetOutputsDeviceType(outputs_device_type);
  350. return true;
  351. }
  352. void SetKernelBuildInfo(const std::shared_ptr<KernelBuildInfo::KernelBuildInfoBuilder> &builder, Processor processor,
  353. const std::shared_ptr<const OpInfo> &op_info_ptr) {
  354. MS_EXCEPTION_IF_NULL(builder);
  355. MS_EXCEPTION_IF_NULL(op_info_ptr);
  356. auto imply_type = op_info_ptr->imply_type();
  357. builder->SetProcessor(processor);
  358. std::string fusion_type = op_info_ptr->fusion_type();
  359. auto iter = fusion_type_maps.find(fusion_type);
  360. if (iter != fusion_type_maps.end()) {
  361. builder->SetFusionType(iter->second);
  362. } else {
  363. if (imply_type == kAKG) {
  364. MS_EXCEPTION(NotExistsError) << "Illegal fusion type from dsl register:" << fusion_type;
  365. }
  366. }
  367. if (imply_type == kAKG) {
  368. builder->SetKernelType(AUTO_DIFF_KERNEL);
  369. } else if (imply_type == kAICPU) {
  370. builder->SetKernelType(AICPU_KERNEL);
  371. } else {
  372. builder->SetKernelType(TBE_KERNEL);
  373. }
  374. }
  375. bool ParseMetadata(const CNodePtr &kernel_node, const std::shared_ptr<const OpInfo> &op_info_ptr, Processor processor,
  376. std::vector<std::shared_ptr<KernelBuildInfo>> *const kernel_info_list) {
  377. MS_EXCEPTION_IF_NULL(kernel_node);
  378. MS_EXCEPTION_IF_NULL(kernel_info_list);
  379. size_t real_input_num = AnfAlgo::GetInputTensorNum(kernel_node);
  380. size_t real_output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
  381. std::vector<std::shared_ptr<OpIOInfo>> inputs = op_info_ptr->inputs_ptr();
  382. std::vector<std::shared_ptr<OpIOInfo>> outputs = op_info_ptr->outputs_ptr();
  383. std::vector<int> dyn_input_sizes;
  384. auto primitive = AnfAlgo::GetCNodePrimitive(kernel_node);
  385. MS_EXCEPTION_IF_NULL(primitive);
  386. if (primitive->GetAttr("dyn_input_sizes") != nullptr) {
  387. dyn_input_sizes = GetValue<std::vector<int>>(primitive->GetAttr("dyn_input_sizes"));
  388. }
  389. if (inputs.size() > 0) {
  390. MS_EXCEPTION_IF_NULL(inputs[0]);
  391. size_t kernel_info_cnt = inputs[0]->dtypes().size();
  392. for (size_t j = 0; j < kernel_info_cnt; j++) {
  393. auto builder = std::make_shared<KernelBuildInfo::KernelBuildInfoBuilder>();
  394. MS_EXCEPTION_IF_NULL(builder);
  395. SetKernelBuildInfo(builder, processor, op_info_ptr);
  396. if (!SetInputKernelBuilderInfo(inputs, real_input_num, j, dyn_input_sizes, builder)) {
  397. MS_LOG(DEBUG) << "Parse kernel metadata, set inputs kernel builder info failed.";
  398. return false;
  399. }
  400. if (outputs.size() > 0) {
  401. if (!SetOutputKernelBuilderInfo(outputs, j, real_output_num, builder)) {
  402. MS_LOG(DEBUG) << "Parse kernel metadata, set outputs kernel builder info failed.";
  403. return false;
  404. }
  405. }
  406. kernel_info_list->push_back(builder->Build());
  407. }
  408. } else if (outputs.size() > 0) {
  409. MS_EXCEPTION_IF_NULL(outputs[0]);
  410. size_t kernel_info_cnt = outputs[0]->dtypes().size();
  411. for (size_t j = 0; j < kernel_info_cnt; j++) {
  412. auto builder = std::make_shared<KernelBuildInfo::KernelBuildInfoBuilder>();
  413. MS_EXCEPTION_IF_NULL(builder);
  414. SetKernelBuildInfo(builder, processor, op_info_ptr);
  415. if (!SetOutputKernelBuilderInfo(outputs, j, real_output_num, builder)) {
  416. MS_LOG(DEBUG) << "Parse kernel metadata, set outputs kernel builder info failed.";
  417. return false;
  418. }
  419. kernel_info_list->push_back(builder->Build());
  420. }
  421. } else {
  422. if (processor == AICPU) {
  423. auto builder = std::make_shared<KernelBuildInfo::KernelBuildInfoBuilder>();
  424. MS_EXCEPTION_IF_NULL(builder);
  425. SetKernelBuildInfo(builder, processor, op_info_ptr);
  426. kernel_info_list->push_back(builder->Build());
  427. }
  428. }
  429. return true;
  430. }
  431. void SaveJsonInfo(const std::string &json_name, const std::string &info) {
  432. char real_path[PATH_MAX] = {0};
  433. std::string path = kCceKernelMeta + json_name + kInfoSuffix;
  434. if (path.size() > PATH_MAX) {
  435. MS_LOG(DEBUG) << "file path " << path << " is too long.";
  436. return;
  437. }
  438. std::ofstream filewrite;
  439. filewrite.open(path);
  440. if (!filewrite.is_open()) {
  441. return;
  442. }
  443. filewrite << info << std::endl;
  444. filewrite.close();
  445. #if defined(_WIN32) || defined(_WIN64)
  446. if (nullptr == _fullpath(real_path, path.c_str(), PATH_MAX)) {
  447. MS_LOG(DEBUG) << "dir " << path << " does not exit.";
  448. return;
  449. }
  450. #else
  451. if (nullptr == realpath(path.c_str(), real_path)) {
  452. MS_LOG(DEBUG) << "dir " << path << " does not exit.";
  453. return;
  454. }
  455. #endif
  456. MS_LOG(INFO) << "real path is :" << real_path;
  457. if (chmod(real_path, S_IRUSR) == -1) {
  458. MS_LOG(DEBUG) << "modify file:" << real_path << " to read only fail.";
  459. }
  460. }
  461. std::string GetProcessor(const AnfNodePtr &anf_node) {
  462. MS_EXCEPTION_IF_NULL(anf_node);
  463. std::string device;
  464. switch (AnfAlgo::GetProcessor(anf_node)) {
  465. case Processor::AICORE:
  466. device = kProcessorAiCore;
  467. break;
  468. case Processor::AICPU:
  469. device = kProcessorAiCpu;
  470. break;
  471. case Processor::CUDA:
  472. device = kProcessorCuda;
  473. break;
  474. default:
  475. MS_LOG(DEBUG) << "Unknown processor type.";
  476. break;
  477. }
  478. return device;
  479. }
  480. bool IsSameShape(const std::vector<size_t> &shape_a, const std::vector<size_t> &shape_b) {
  481. if (shape_a.size() != shape_b.size()) {
  482. return false;
  483. }
  484. for (size_t i = 0; i < shape_a.size(); ++i) {
  485. if (shape_a[i] != shape_b[i]) {
  486. return false;
  487. }
  488. }
  489. return true;
  490. }
  491. int Sign(float x) {
  492. if (x > 0) {
  493. return 1;
  494. }
  495. if (x < 0) {
  496. return -1;
  497. }
  498. return 0;
  499. }
  500. void DeduplicateIndexedSlices(const SparseGradient &origin_sparse_grad, SparseGradient *unique_grad, size_t first_dim,
  501. size_t outer_dim) {
  502. MS_EXCEPTION_IF_NULL(origin_sparse_grad.value_);
  503. MS_EXCEPTION_IF_NULL(origin_sparse_grad.indices_);
  504. MS_EXCEPTION_IF_NULL(unique_grad);
  505. MS_EXCEPTION_IF_NULL(unique_grad->value_);
  506. MS_EXCEPTION_IF_NULL(unique_grad->indices_);
  507. std::unordered_map<int, size_t> index_map;
  508. size_t unique_indices_size = 0;
  509. for (size_t i = 0; i < origin_sparse_grad.indices_size_; ++i) {
  510. int index = origin_sparse_grad.indices_[i];
  511. if (index < 0 || IntToSize(index) >= first_dim) {
  512. continue;
  513. }
  514. auto iter = index_map.find(index);
  515. if (iter == index_map.end()) {
  516. index_map[index] = unique_indices_size;
  517. unique_grad->indices_[unique_indices_size] = index;
  518. size_t start_index = unique_indices_size * outer_dim;
  519. size_t end_index = start_index + outer_dim;
  520. for (size_t j = start_index, k = i * outer_dim; j < end_index; ++j, ++k) {
  521. unique_grad->value_[j] = origin_sparse_grad.value_[k];
  522. }
  523. unique_indices_size++;
  524. } else {
  525. size_t first_index = iter->second;
  526. size_t start_index = first_index * outer_dim;
  527. size_t end_index = start_index + outer_dim;
  528. for (size_t j = start_index, k = i * outer_dim; j < end_index; ++j, ++k) {
  529. unique_grad->value_[j] += origin_sparse_grad.value_[k];
  530. }
  531. }
  532. }
  533. unique_grad->indices_size_ = unique_indices_size;
  534. }
  535. void ReduceSparseGradient(const SparseGradient &origin_sparse_grad, SparseGradient *unique_grad, size_t first_dim,
  536. size_t outer_dim) {
  537. MS_EXCEPTION_IF_NULL(origin_sparse_grad.value_);
  538. MS_EXCEPTION_IF_NULL(origin_sparse_grad.indices_);
  539. MS_EXCEPTION_IF_NULL(unique_grad);
  540. MS_EXCEPTION_IF_NULL(unique_grad->value_);
  541. MS_EXCEPTION_IF_NULL(unique_grad->indices_);
  542. size_t unique_indices_size = 0;
  543. std::vector<std::pair<int, size_t>> sorted_indices;
  544. sorted_indices.reserve(origin_sparse_grad.indices_size_);
  545. for (size_t i = 0; i < origin_sparse_grad.indices_size_; ++i) {
  546. int index = origin_sparse_grad.indices_[i];
  547. if (index < 0 || IntToSize(index) >= first_dim) {
  548. continue;
  549. }
  550. sorted_indices.emplace_back(std::pair<int, size_t>(index, i * outer_dim));
  551. }
  552. std::sort(
  553. sorted_indices.begin(), sorted_indices.end(),
  554. [](const std::pair<int, size_t> &left, const std::pair<int, size_t> &right) { return left.first < right.first; });
  555. int last_index = 0;
  556. size_t indices_size = sorted_indices.size();
  557. size_t start_index = 0;
  558. size_t end_index = outer_dim;
  559. size_t dst_len = indices_size * outer_dim;
  560. for (size_t i = 0; i < indices_size; ++i) {
  561. int index = sorted_indices[i].first;
  562. if (i == 0 || last_index != index) {
  563. if (i > 0 && last_index != index) {
  564. unique_indices_size++;
  565. start_index += outer_dim;
  566. end_index += outer_dim;
  567. }
  568. unique_grad->indices_[unique_indices_size] = index;
  569. auto ret_code = memcpy_s(unique_grad->value_ + start_index, dst_len - start_index,
  570. origin_sparse_grad.value_ + sorted_indices[i].second, outer_dim);
  571. if (ret_code != EOK) {
  572. MS_LOG(EXCEPTION) << "Failed to copy data!";
  573. }
  574. } else {
  575. for (size_t j = start_index, k = sorted_indices[i].second; j < end_index; ++j, ++k) {
  576. unique_grad->value_[j] += origin_sparse_grad.value_[k];
  577. }
  578. }
  579. last_index = index;
  580. }
  581. unique_grad->indices_size_ = unique_indices_size + 1;
  582. }
  583. } // namespace kernel
  584. } // namespace mindspore