You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

benchmark_unified_api.cc 37 kB

4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949
  1. /**
  2. * Copyright 2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "tools/benchmark/benchmark_unified_api.h"
  17. #define __STDC_FORMAT_MACROS
  18. #include <cinttypes>
  19. #undef __STDC_FORMAT_MACROS
  20. #include <algorithm>
  21. #include <utility>
  22. #include <functional>
  23. #include "include/context.h"
  24. #include "include/ms_tensor.h"
  25. #include "include/version.h"
  26. #include "schema/model_generated.h"
  27. #include "src/common/common.h"
  28. #include "src/tensor.h"
  29. #include "tools/common/string_util.h"
  30. #ifdef ENABLE_ARM64
  31. #include <linux/perf_event.h>
  32. #include <sys/ioctl.h>
  33. #include <asm/unistd.h>
  34. #include <unistd.h>
  35. #endif
  36. namespace mindspore {
  37. constexpr size_t kDataToStringMaxNum = 40;
  38. constexpr int kPrintDataNum = 20;
  39. constexpr int kFrequencyDefault = 3;
  40. constexpr int kPercentageDivisor = 100;
  41. constexpr int kDumpInputsAndOutputs = 0;
  42. constexpr int kDumpOutputs = 2;
  43. namespace lite {
  44. int BenchmarkUnifiedApi::GenerateInputData() {
  45. for (auto &tensor : ms_inputs_for_api_) {
  46. if (static_cast<int>(tensor.DataType()) == kObjectTypeString) {
  47. MSTensor *input = MSTensor::StringsToTensor(tensor.Name(), {"you're the best."});
  48. if (input == nullptr) {
  49. std::cerr << "StringsToTensor failed" << std::endl;
  50. MS_LOG(ERROR) << "StringsToTensor failed";
  51. return RET_ERROR;
  52. }
  53. tensor = *input;
  54. } else {
  55. auto input_data = tensor.MutableData();
  56. if (input_data == nullptr) {
  57. MS_LOG(ERROR) << "MallocData for inTensor failed";
  58. return RET_ERROR;
  59. }
  60. int status = GenerateRandomData(tensor.DataSize(), input_data, static_cast<int>(tensor.DataType()));
  61. if (status != RET_OK) {
  62. std::cerr << "GenerateRandomData for inTensor failed: " << status << std::endl;
  63. MS_LOG(ERROR) << "GenerateRandomData for inTensor failed:" << status;
  64. return status;
  65. }
  66. }
  67. }
  68. return RET_OK;
  69. }
  70. int BenchmarkUnifiedApi::ReadInputFile() {
  71. if (ms_inputs_for_api_.empty()) {
  72. return RET_OK;
  73. }
  74. if (this->flags_->in_data_type_ == kImage) {
  75. MS_LOG(ERROR) << "Not supported image input";
  76. return RET_ERROR;
  77. } else {
  78. for (size_t i = 0; i < flags_->input_data_list_.size(); i++) {
  79. auto &cur_tensor = ms_inputs_for_api_.at(i);
  80. MS_ASSERT(cur_tensor != nullptr);
  81. size_t size;
  82. char *bin_buf = ReadFile(flags_->input_data_list_[i].c_str(), &size);
  83. if (bin_buf == nullptr) {
  84. MS_LOG(ERROR) << "ReadFile return nullptr";
  85. return RET_ERROR;
  86. }
  87. if (static_cast<int>(cur_tensor.DataType()) == kObjectTypeString) {
  88. std::string str(bin_buf, size);
  89. MSTensor *input = MSTensor::StringsToTensor(cur_tensor.Name(), {str});
  90. if (input == nullptr) {
  91. std::cerr << "StringsToTensor failed" << std::endl;
  92. MS_LOG(ERROR) << "StringsToTensor failed";
  93. delete[] bin_buf;
  94. return RET_ERROR;
  95. }
  96. cur_tensor = *input;
  97. } else {
  98. auto tensor_data_size = cur_tensor.DataSize();
  99. if (size != tensor_data_size) {
  100. std::cerr << "Input binary file size error, required: " << tensor_data_size << ", in fact: " << size
  101. << std::endl;
  102. MS_LOG(ERROR) << "Input binary file size error, required: " << tensor_data_size << ", in fact: " << size;
  103. delete[] bin_buf;
  104. return RET_ERROR;
  105. }
  106. auto input_data = cur_tensor.MutableData();
  107. if (input_data == nullptr) {
  108. MS_LOG(ERROR) << "input_data is nullptr.";
  109. return RET_ERROR;
  110. }
  111. memcpy(input_data, bin_buf, tensor_data_size);
  112. }
  113. delete[] bin_buf;
  114. }
  115. }
  116. return RET_OK;
  117. }
  118. int BenchmarkUnifiedApi::ReadTensorData(std::ifstream &in_file_stream, const std::string &tensor_name,
  119. const std::vector<size_t> &dims) {
  120. std::string line;
  121. getline(in_file_stream, line);
  122. std::stringstream line_stream(line);
  123. if (this->benchmark_data_.find(tensor_name) != this->benchmark_data_.end()) {
  124. return RET_OK;
  125. }
  126. mindspore::MSTensor tensor = ms_model_.GetOutputByTensorName(tensor_name);
  127. if (tensor == nullptr) {
  128. MS_LOG(ERROR) << "Get tensor failed, tensor name: " << tensor_name;
  129. return RET_ERROR;
  130. }
  131. std::vector<float> data;
  132. std::vector<std::string> strings_data;
  133. size_t shape_size = std::accumulate(dims.begin(), dims.end(), 1, std::multiplies<size_t>());
  134. if (static_cast<int>(tensor.DataType()) == kObjectTypeString) {
  135. strings_data.push_back(line);
  136. for (size_t i = 1; i < shape_size; i++) {
  137. getline(in_file_stream, line);
  138. strings_data.push_back(line);
  139. }
  140. } else {
  141. for (size_t i = 0; i < shape_size; i++) {
  142. float tmp_data;
  143. line_stream >> tmp_data;
  144. data.push_back(tmp_data);
  145. }
  146. }
  147. auto *check_tensor = new (std::nothrow) CheckTensor(dims, data, strings_data);
  148. if (check_tensor == nullptr) {
  149. MS_LOG(ERROR) << "New CheckTensor failed, tensor name: " << tensor_name;
  150. return RET_ERROR;
  151. }
  152. this->benchmark_tensor_names_.push_back(tensor_name);
  153. this->benchmark_data_.insert(std::make_pair(tensor_name, check_tensor));
  154. return RET_OK;
  155. }
  156. void BenchmarkUnifiedApi::UpdateDistributionModelName(const std::shared_ptr<mindspore::Context> &context,
  157. std::string *name) {
  158. if (flags_->device_ != "GPU") {
  159. return;
  160. }
  161. auto device_info = context->MutableDeviceInfo().front();
  162. GPUDeviceInfo *gpu_info = reinterpret_cast<GPUDeviceInfo *>(device_info.get());
  163. auto rank_id = gpu_info->GetRankID();
  164. if (rank_id == 0) {
  165. return;
  166. }
  167. *name = name->replace(name->find("."), sizeof('.'), to_string(rank_id) + ".");
  168. return;
  169. }
  170. void BenchmarkUnifiedApi::InitMSContext(const std::shared_ptr<mindspore::Context> &context) {
  171. context->SetThreadNum(flags_->num_threads_);
  172. context->SetEnableParallel(flags_->enable_parallel_);
  173. context->SetThreadAffinity(flags_->cpu_bind_mode_);
  174. auto &device_list = context->MutableDeviceInfo();
  175. if (flags_->device_ == "GPU") {
  176. std::shared_ptr<GPUDeviceInfo> gpu_device_info = std::make_shared<GPUDeviceInfo>();
  177. gpu_device_info->SetEnableFP16(flags_->enable_fp16_);
  178. device_list.push_back(gpu_device_info);
  179. }
  180. if (flags_->device_ == "NPU") {
  181. std::shared_ptr<KirinNPUDeviceInfo> npu_device_info = std::make_shared<KirinNPUDeviceInfo>();
  182. npu_device_info->SetFrequency(kFrequencyDefault);
  183. device_list.push_back(npu_device_info);
  184. }
  185. if (flags_->device_ == "Ascend310") {
  186. std::shared_ptr<Ascend310DeviceInfo> ascend310_device_info = std::make_shared<Ascend310DeviceInfo>();
  187. ascend310_device_info->SetDeviceID(0);
  188. device_list.push_back(ascend310_device_info);
  189. }
  190. // CPU priority is behind GPU and NPU
  191. std::shared_ptr<CPUDeviceInfo> device_info = std::make_shared<CPUDeviceInfo>();
  192. device_info->SetEnableFP16(flags_->enable_fp16_);
  193. device_list.push_back(device_info);
  194. }
  195. int BenchmarkUnifiedApi::CompareOutput() {
  196. std::cout << "================ Comparing Output data ================" << std::endl;
  197. float total_bias = 0;
  198. int total_size = 0;
  199. // check the output tensor name.
  200. if (this->benchmark_tensor_names_ != ms_model_.GetOutputTensorNames()) {
  201. MS_LOG(ERROR) << "The output tensor name is wrong.";
  202. return RET_ERROR;
  203. }
  204. for (const auto &calib_tensor : benchmark_data_) {
  205. std::string tensor_name = calib_tensor.first;
  206. mindspore::MSTensor tensor = ms_model_.GetOutputByTensorName(tensor_name);
  207. if (tensor == nullptr) {
  208. MS_LOG(ERROR) << "Get tensor failed, tensor name: " << tensor_name;
  209. return RET_ERROR;
  210. }
  211. int ret;
  212. if (static_cast<int>(tensor.DataType()) == kObjectTypeString) {
  213. std::vector<std::string> output_strings = MSTensor::TensorToStrings(tensor);
  214. ret = CompareStringData(tensor_name, calib_tensor.second->strings_data, output_strings);
  215. } else {
  216. ret = CompareDataGetTotalBiasAndSize(tensor_name, &tensor, &total_bias, &total_size);
  217. }
  218. if (ret != RET_OK) {
  219. MS_LOG(ERROR) << "Error in CompareData";
  220. std::cerr << "Error in CompareData" << std::endl;
  221. std::cout << "=======================================================" << std::endl << std::endl;
  222. return ret;
  223. }
  224. }
  225. float mean_bias;
  226. if (total_size != 0) {
  227. mean_bias = ((total_bias / float_t(total_size)) * kPercentageDivisor);
  228. } else {
  229. mean_bias = 0;
  230. }
  231. std::cout << "Mean bias of all nodes/tensors: " << mean_bias << "%" << std::endl;
  232. std::cout << "=======================================================" << std::endl << std::endl;
  233. if (mean_bias > this->flags_->accuracy_threshold_) {
  234. MS_LOG(ERROR) << "Mean bias of all nodes/tensors is too big: " << mean_bias << "%";
  235. std::cerr << "Mean bias of all nodes/tensors is too big: " << mean_bias << "%" << std::endl;
  236. return RET_ERROR;
  237. }
  238. return RET_OK;
  239. }
  240. int BenchmarkUnifiedApi::CompareDataGetTotalBiasAndSize(const std::string &name, mindspore::MSTensor *tensor,
  241. float *total_bias, int *total_size) {
  242. float bias = 0;
  243. auto mutableData = tensor->MutableData();
  244. if (mutableData == nullptr) {
  245. MS_LOG(ERROR) << "mutableData is nullptr.";
  246. return RET_ERROR;
  247. }
  248. switch (static_cast<int>(tensor->DataType())) {
  249. case TypeId::kNumberTypeFloat:
  250. case TypeId::kNumberTypeFloat32: {
  251. bias = CompareData<float, int64_t>(name, tensor->Shape(), mutableData);
  252. break;
  253. }
  254. case TypeId::kNumberTypeInt8: {
  255. bias = CompareData<int8_t, int64_t>(name, tensor->Shape(), mutableData);
  256. break;
  257. }
  258. case TypeId::kNumberTypeUInt8: {
  259. bias = CompareData<uint8_t, int64_t>(name, tensor->Shape(), mutableData);
  260. break;
  261. }
  262. case TypeId::kNumberTypeInt32: {
  263. bias = CompareData<int32_t, int64_t>(name, tensor->Shape(), mutableData);
  264. break;
  265. }
  266. case TypeId::kNumberTypeInt16: {
  267. bias = CompareData<int16_t, int64_t>(name, tensor->Shape(), mutableData);
  268. break;
  269. }
  270. case TypeId::kNumberTypeBool: {
  271. bias = CompareData<bool, int64_t>(name, tensor->Shape(), mutableData);
  272. break;
  273. }
  274. default:
  275. MS_LOG(ERROR) << "Datatype " << static_cast<int>(tensor->DataType()) << " is not supported.";
  276. return RET_ERROR;
  277. }
  278. if (bias < 0) {
  279. MS_LOG(ERROR) << "CompareData failed, name: " << name;
  280. return RET_ERROR;
  281. }
  282. *total_bias += bias;
  283. *total_size += 1;
  284. return RET_OK;
  285. }
  286. int BenchmarkUnifiedApi::MarkPerformance() {
  287. MS_LOG(INFO) << "Running warm up loops...";
  288. std::cout << "Running warm up loops..." << std::endl;
  289. std::vector<MSTensor> outputs;
  290. for (int i = 0; i < flags_->warm_up_loop_count_; i++) {
  291. auto status = ms_model_.Predict(ms_inputs_for_api_, &outputs);
  292. if (status != kSuccess) {
  293. MS_LOG(ERROR) << "Inference error ";
  294. std::cerr << "Inference error " << std::endl;
  295. return RET_ERROR;
  296. }
  297. }
  298. MS_LOG(INFO) << "Running benchmark loops...";
  299. std::cout << "Running benchmark loops..." << std::endl;
  300. uint64_t time_min = 1000000;
  301. uint64_t time_max = 0;
  302. uint64_t time_avg = 0;
  303. for (int i = 0; i < flags_->loop_count_; i++) {
  304. auto inputs = ms_model_.GetInputs();
  305. for (auto tensor : inputs) {
  306. tensor.MutableData(); // prepare data
  307. }
  308. auto start = GetTimeUs();
  309. auto status = ms_model_.Predict(ms_inputs_for_api_, &outputs, ms_before_call_back_, ms_after_call_back_);
  310. if (status != kSuccess) {
  311. MS_LOG(ERROR) << "Inference error ";
  312. std::cerr << "Inference error ";
  313. return RET_ERROR;
  314. }
  315. auto end = GetTimeUs();
  316. auto time = end - start;
  317. time_min = std::min(time_min, time);
  318. time_max = std::max(time_max, time);
  319. time_avg += time;
  320. }
  321. if (flags_->time_profiling_) {
  322. const std::vector<std::string> per_op_name = {"opName", "avg(ms)", "percent", "calledTimes", "opTotalTime"};
  323. const std::vector<std::string> per_op_type = {"opType", "avg(ms)", "percent", "calledTimes", "opTotalTime"};
  324. PrintResult(per_op_name, op_times_by_name_);
  325. PrintResult(per_op_type, op_times_by_type_);
  326. #ifdef ENABLE_ARM64
  327. } else if (flags_->perf_profiling_) {
  328. if (flags_->perf_event_ == "CACHE") {
  329. const std::vector<std::string> per_op_name = {"opName", "cache ref(k)", "cache ref(%)", "miss(k)", "miss(%)"};
  330. const std::vector<std::string> per_op_type = {"opType", "cache ref(k)", "cache ref(%)", "miss(k)", "miss(%)"};
  331. PrintPerfResult(per_op_name, op_perf_by_name_);
  332. PrintPerfResult(per_op_type, op_perf_by_type_);
  333. } else if (flags_->perf_event_ == "STALL") {
  334. const std::vector<std::string> per_op_name = {"opName", "frontend(k)", "frontend(%)", "backendend(k)",
  335. "backendend(%)"};
  336. const std::vector<std::string> per_op_type = {"opType", "frontend(k)", "frontend(%)", "backendend(k)",
  337. "backendend(%)"};
  338. PrintPerfResult(per_op_name, op_perf_by_name_);
  339. PrintPerfResult(per_op_type, op_perf_by_type_);
  340. } else {
  341. const std::vector<std::string> per_op_name = {"opName", "cycles(k)", "cycles(%)", "ins(k)", "ins(%)"};
  342. const std::vector<std::string> per_op_type = {"opType", "cycles(k)", "cycles(%)", "ins(k)", "ins(%)"};
  343. PrintPerfResult(per_op_name, op_perf_by_name_);
  344. PrintPerfResult(per_op_type, op_perf_by_type_);
  345. }
  346. #endif
  347. }
  348. if (flags_->loop_count_ > 0) {
  349. time_avg /= flags_->loop_count_;
  350. MS_LOG(INFO) << "Model = " << flags_->model_file_.substr(flags_->model_file_.find_last_of(DELIM_SLASH) + 1).c_str()
  351. << ", NumThreads = " << flags_->num_threads_ << ", MinRunTime = " << time_min / kFloatMSEC
  352. << ", MaxRuntime = " << time_max / kFloatMSEC << ", AvgRunTime = " << time_avg / kFloatMSEC;
  353. printf("Model = %s, NumThreads = %d, MinRunTime = %f ms, MaxRuntime = %f ms, AvgRunTime = %f ms\n",
  354. flags_->model_file_.substr(flags_->model_file_.find_last_of(DELIM_SLASH) + 1).c_str(), flags_->num_threads_,
  355. time_min / kFloatMSEC, time_max / kFloatMSEC, time_avg / kFloatMSEC);
  356. }
  357. return RET_OK;
  358. }
  359. int BenchmarkUnifiedApi::MarkAccuracy() {
  360. MS_LOG(INFO) << "MarkAccuracy";
  361. std::cout << "MarkAccuracy" << std::endl;
  362. auto status = PrintInputData();
  363. if (status != RET_OK) {
  364. MS_LOG(ERROR) << "PrintInputData error " << status;
  365. std::cerr << "PrintInputData error " << status << std::endl;
  366. return status;
  367. }
  368. std::vector<MSTensor> outputs;
  369. auto ret = ms_model_.Predict(ms_inputs_for_api_, &outputs, ms_before_call_back_, ms_after_call_back_);
  370. if (ret != kSuccess) {
  371. MS_LOG(ERROR) << "Inference error ";
  372. std::cerr << "Inference error " << std::endl;
  373. return RET_ERROR;
  374. }
  375. status = ReadCalibData();
  376. if (status != RET_OK) {
  377. MS_LOG(ERROR) << "Read calib data error " << status;
  378. std::cerr << "Read calib data error " << status << std::endl;
  379. return status;
  380. }
  381. auto use_cosine_distance_threshold = getenv("COSINE_DISTANCE_THRESHOLD");
  382. if (use_cosine_distance_threshold != nullptr) {
  383. double cosine_distance_threshold;
  384. auto ret_bool = lite::ConvertDoubleNum(use_cosine_distance_threshold, &cosine_distance_threshold);
  385. if (!ret_bool) {
  386. MS_LOG(ERROR) << "Compare output error " << status;
  387. std::cerr << "Compare output error " << status << std::endl;
  388. return RET_ERROR;
  389. }
  390. status = CompareOutputByCosineDistance(static_cast<float>(cosine_distance_threshold));
  391. if (status != RET_OK) {
  392. MS_LOG(ERROR) << "Compare output error by consine distance" << status;
  393. std::cerr << "Compare output error by consine distance" << status << std::endl;
  394. return status;
  395. }
  396. } else {
  397. status = CompareOutput();
  398. if (status != RET_OK) {
  399. MS_LOG(ERROR) << "Compare output error " << status;
  400. std::cerr << "Compare output error " << status << std::endl;
  401. return status;
  402. }
  403. }
  404. return RET_OK;
  405. }
  406. int BenchmarkUnifiedApi::PrintInputData() {
  407. for (size_t i = 0; i < ms_inputs_for_api_.size(); i++) {
  408. auto input = ms_inputs_for_api_[i];
  409. MS_ASSERT(input != nullptr);
  410. auto tensor_data_type = static_cast<int>(input.DataType());
  411. std::cout << "InData" << i << ": ";
  412. if (tensor_data_type == TypeId::kObjectTypeString) {
  413. std::vector<std::string> output_strings = MSTensor::TensorToStrings(input);
  414. size_t print_num = std::min(output_strings.size(), static_cast<size_t>(20));
  415. for (size_t j = 0; j < print_num; j++) {
  416. std::cout << output_strings[j] << std::endl;
  417. }
  418. continue;
  419. }
  420. size_t print_num = std::min(static_cast<int>(input.ElementNum()), kPrintDataNum);
  421. const void *in_data = input.MutableData();
  422. if (in_data == nullptr) {
  423. MS_LOG(ERROR) << "in_data is nullptr.";
  424. return RET_ERROR;
  425. }
  426. for (size_t j = 0; j < print_num; j++) {
  427. if (tensor_data_type == TypeId::kNumberTypeFloat32 || tensor_data_type == TypeId::kNumberTypeFloat) {
  428. std::cout << static_cast<const float *>(in_data)[j] << " ";
  429. } else if (tensor_data_type == TypeId::kNumberTypeInt8) {
  430. std::cout << static_cast<const int8_t *>(in_data)[j] << " ";
  431. } else if (tensor_data_type == TypeId::kNumberTypeUInt8) {
  432. std::cout << static_cast<const uint8_t *>(in_data)[j] << " ";
  433. } else if (tensor_data_type == TypeId::kNumberTypeInt32) {
  434. std::cout << static_cast<const int32_t *>(in_data)[j] << " ";
  435. } else if (tensor_data_type == TypeId::kNumberTypeInt64) {
  436. std::cout << static_cast<const int64_t *>(in_data)[j] << " ";
  437. } else if (tensor_data_type == TypeId::kNumberTypeBool) {
  438. std::cout << static_cast<const bool *>(in_data)[j] << " ";
  439. } else {
  440. MS_LOG(ERROR) << "Datatype: " << tensor_data_type << " is not supported.";
  441. return RET_ERROR;
  442. }
  443. }
  444. std::cout << std::endl;
  445. }
  446. return RET_OK;
  447. }
  448. int BenchmarkUnifiedApi::RunBenchmark() {
  449. auto start_prepare_time = GetTimeUs();
  450. // Load graph
  451. std::string model_name = flags_->model_file_.substr(flags_->model_file_.find_last_of(DELIM_SLASH) + 1);
  452. mindspore::ModelType model_type = ModelTypeMap.at(flags_->model_type_);
  453. MS_LOG(INFO) << "start unified benchmark run";
  454. std::cout << "start unified benchmark run" << std::endl;
  455. auto context = std::make_shared<mindspore::Context>();
  456. if (context == nullptr) {
  457. MS_LOG(ERROR) << "New context failed while running " << model_name.c_str();
  458. std::cerr << "New context failed while running " << model_name.c_str() << std::endl;
  459. return RET_ERROR;
  460. }
  461. (void)InitMSContext(context);
  462. (void)UpdateDistributionModelName(context, &model_name);
  463. if (!flags_->config_file_.empty()) {
  464. auto config_ret = ms_model_.LoadConfig(flags_->config_file_);
  465. if (config_ret != kSuccess) {
  466. MS_LOG(ERROR) << "ms_model_.LoadConfig failed while running ", model_name.c_str();
  467. std::cout << "ms_model_.LoadConfig failed while running ", model_name.c_str();
  468. }
  469. }
  470. auto ret = ms_model_.Build(model_name, model_type, context);
  471. if (ret != kSuccess) {
  472. MS_LOG(ERROR) << "ms_model_.Build failed while running ", model_name.c_str();
  473. std::cout << "ms_model_.Build failed while running ", model_name.c_str();
  474. return RET_ERROR;
  475. }
  476. if (!flags_->resize_dims_.empty()) {
  477. std::vector<std::vector<int64_t>> resize_dims;
  478. (void)std::transform(flags_->resize_dims_.begin(), flags_->resize_dims_.end(), std::back_inserter(resize_dims),
  479. [&](auto &shapes) { return this->ConverterToInt64Vector<int>(shapes); });
  480. ret = ms_model_.Resize(ms_model_.GetInputs(), resize_dims);
  481. if (ret != kSuccess) {
  482. MS_LOG(ERROR) << "Input tensor resize failed.";
  483. std::cout << "Input tensor resize failed.";
  484. return RET_ERROR;
  485. }
  486. }
  487. ms_inputs_for_api_ = ms_model_.GetInputs();
  488. auto end_prepare_time = GetTimeUs();
  489. MS_LOG(INFO) << "PrepareTime = " << ((end_prepare_time - start_prepare_time) / kFloatMSEC) << " ms";
  490. std::cout << "PrepareTime = " << ((end_prepare_time - start_prepare_time) / kFloatMSEC) << " ms" << std::endl;
  491. // Load input
  492. MS_LOG(INFO) << "start generate input data";
  493. auto status = LoadInput();
  494. if (status != 0) {
  495. MS_LOG(ERROR) << "Generate input data error";
  496. return status;
  497. }
  498. if (!flags_->benchmark_data_file_.empty()) {
  499. status = MarkAccuracy();
  500. for (auto &data : benchmark_data_) {
  501. data.second->shape.clear();
  502. data.second->data.clear();
  503. delete data.second;
  504. data.second = nullptr;
  505. }
  506. benchmark_data_.clear();
  507. if (status != 0) {
  508. MS_LOG(ERROR) << "Run MarkAccuracy error: " << status;
  509. std::cout << "Run MarkAccuracy error: " << status << std::endl;
  510. return status;
  511. }
  512. } else {
  513. status = MarkPerformance();
  514. if (status != 0) {
  515. MS_LOG(ERROR) << "Run MarkPerformance error: " << status;
  516. std::cout << "Run MarkPerformance error: " << status << std::endl;
  517. return status;
  518. }
  519. }
  520. if (flags_->dump_tensor_data_) {
  521. std::cout << "Dumped file is saved to : " + dump_file_output_dir_ << std::endl;
  522. }
  523. return RET_OK;
  524. }
  525. int BenchmarkUnifiedApi::InitTimeProfilingCallbackParameter() {
  526. // before callback
  527. ms_before_call_back_ = [&](const std::vector<mindspore::MSTensor> &before_inputs,
  528. const std::vector<mindspore::MSTensor> &before_outputs,
  529. const MSCallBackParam &call_param) {
  530. if (before_inputs.empty()) {
  531. MS_LOG(INFO) << "The num of beforeInputs is empty";
  532. }
  533. if (before_outputs.empty()) {
  534. MS_LOG(INFO) << "The num of beforeOutputs is empty";
  535. }
  536. if (op_times_by_type_.find(call_param.node_type) == op_times_by_type_.end()) {
  537. op_times_by_type_.insert(std::make_pair(call_param.node_type, std::make_pair(0, 0.0f)));
  538. }
  539. if (op_times_by_name_.find(call_param.node_name) == op_times_by_name_.end()) {
  540. op_times_by_name_.insert(std::make_pair(call_param.node_name, std::make_pair(0, 0.0f)));
  541. }
  542. op_call_times_total_++;
  543. op_begin_ = GetTimeUs();
  544. return true;
  545. };
  546. // after callback
  547. ms_after_call_back_ = [&](const std::vector<mindspore::MSTensor> &after_inputs,
  548. const std::vector<mindspore::MSTensor> &after_outputs, const MSCallBackParam &call_param) {
  549. uint64_t opEnd = GetTimeUs();
  550. if (after_inputs.empty()) {
  551. MS_LOG(INFO) << "The num of after inputs is empty";
  552. }
  553. if (after_outputs.empty()) {
  554. MS_LOG(INFO) << "The num of after outputs is empty";
  555. }
  556. float cost = static_cast<float>(opEnd - op_begin_) / kFloatMSEC;
  557. if (flags_->device_ == "GPU") {
  558. auto gpu_param = reinterpret_cast<const GPUCallBackParam &>(call_param);
  559. cost = static_cast<float>(gpu_param.execute_time);
  560. }
  561. op_cost_total_ += cost;
  562. op_times_by_type_[call_param.node_type].first++;
  563. op_times_by_type_[call_param.node_type].second += cost;
  564. op_times_by_name_[call_param.node_name].first++;
  565. op_times_by_name_[call_param.node_name].second += cost;
  566. return true;
  567. };
  568. return RET_OK;
  569. }
  570. int BenchmarkUnifiedApi::InitPerfProfilingCallbackParameter() {
  571. #ifndef ENABLE_ARM64
  572. MS_LOG(ERROR) << "Only support perf_profiling on arm64.";
  573. return RET_ERROR;
  574. #else
  575. struct perf_event_attr pe, pe2;
  576. memset(&pe, 0, sizeof(struct perf_event_attr));
  577. memset(&pe2, 0, sizeof(struct perf_event_attr));
  578. pe.type = PERF_TYPE_HARDWARE;
  579. pe2.type = PERF_TYPE_HARDWARE;
  580. pe.size = sizeof(struct perf_event_attr);
  581. pe2.size = sizeof(struct perf_event_attr);
  582. pe.disabled = 1;
  583. pe2.disabled = 1;
  584. pe.exclude_kernel = 1; // don't count kernel
  585. pe2.exclude_kernel = 1; // don't count kernel
  586. pe.exclude_hv = 1; // don't count hypervisor
  587. pe2.exclude_hv = 1; // don't count hypervisor
  588. pe.read_format = PERF_FORMAT_GROUP | PERF_FORMAT_ID;
  589. pe2.read_format = PERF_FORMAT_GROUP | PERF_FORMAT_ID;
  590. if (flags_->perf_event_ == "CACHE") {
  591. pe.config = PERF_COUNT_HW_CACHE_REFERENCES;
  592. pe2.config = PERF_COUNT_HW_CACHE_MISSES;
  593. } else if (flags_->perf_event_ == "STALL") {
  594. pe.config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND;
  595. pe2.config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND;
  596. } else {
  597. pe.config = PERF_COUNT_HW_CPU_CYCLES;
  598. pe2.config = PERF_COUNT_HW_INSTRUCTIONS;
  599. }
  600. perf_fd = syscall(__NR_perf_event_open, pe, 0, -1, -1, 0);
  601. if (perf_fd == -1) {
  602. MS_LOG(ERROR) << "Failed to open perf event " << pe.config;
  603. return RET_ERROR;
  604. }
  605. perf_fd2 = syscall(__NR_perf_event_open, pe2, 0, -1, perf_fd, 0);
  606. if (perf_fd2 == -1) {
  607. MS_LOG(ERROR) << "Failed to open perf event " << pe2.config;
  608. return RET_ERROR;
  609. }
  610. struct PerfCount zero;
  611. zero.value[0] = 0;
  612. zero.value[1] = 0;
  613. // before callback
  614. ms_before_call_back_ = [&](const std::vector<mindspore::MSTensor> &before_inputs,
  615. const std::vector<mindspore::MSTensor> &before_outputs,
  616. const MSCallBackParam &call_param) {
  617. if (before_inputs.empty()) {
  618. MS_LOG(INFO) << "The num of beforeInputs is empty";
  619. }
  620. if (before_outputs.empty()) {
  621. MS_LOG(INFO) << "The num of beforeOutputs is empty";
  622. }
  623. if (op_perf_by_type_.find(call_param.node_type) == op_perf_by_type_.end()) {
  624. op_perf_by_type_.insert(std::make_pair(call_param.node_type, std::make_pair(0, zero)));
  625. }
  626. if (op_perf_by_name_.find(call_param.node_name) == op_perf_by_name_.end()) {
  627. op_perf_by_name_.insert(std::make_pair(call_param.node_name, std::make_pair(0, zero)));
  628. }
  629. op_call_times_total_++;
  630. ioctl(perf_fd, PERF_EVENT_IOC_RESET, PERF_IOC_FLAG_GROUP);
  631. ioctl(perf_fd, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP);
  632. return true;
  633. };
  634. // after callback
  635. ms_after_call_back_ = [&](const std::vector<mindspore::MSTensor> &after_inputs,
  636. const std::vector<mindspore::MSTensor> &after_outputs, const MSCallBackParam &call_param) {
  637. struct PerfResult res;
  638. ioctl(perf_fd, PERF_EVENT_IOC_DISABLE, PERF_IOC_FLAG_GROUP);
  639. if (read(perf_fd, &res, sizeof(struct PerfResult)) == -1) {
  640. MS_LOG(ERROR) << "Failed to read perf_fd";
  641. return false;
  642. }
  643. if (after_inputs.empty()) {
  644. MS_LOG(INFO) << "The num of after inputs is empty";
  645. }
  646. if (after_outputs.empty()) {
  647. MS_LOG(INFO) << "The num of after outputs is empty";
  648. }
  649. float cost1 = static_cast<float>(res.values[0].value);
  650. float cost2 = static_cast<float>(res.values[1].value);
  651. op_cost_total_ += cost1;
  652. op_cost2_total_ += cost2;
  653. op_perf_by_type_[call_param.node_type].first++;
  654. op_perf_by_type_[call_param.node_type].second.value[0] += cost1;
  655. op_perf_by_type_[call_param.node_type].second.value[1] += cost2;
  656. op_perf_by_name_[call_param.node_name].first++;
  657. op_perf_by_name_[call_param.node_name].second.value[0] += cost1;
  658. op_perf_by_name_[call_param.node_name].second.value[1] += cost2;
  659. return true;
  660. };
  661. #endif
  662. return RET_OK;
  663. }
  664. namespace {
  665. template <typename T>
  666. std::string DataToString(void *data, size_t data_number) {
  667. if (data == nullptr) {
  668. return "Data of tensor is nullptr";
  669. }
  670. std::ostringstream oss;
  671. auto casted_data = static_cast<T *>(data);
  672. for (size_t i = 0; i < kDataToStringMaxNum && i < data_number; i++) {
  673. oss << " " << casted_data[i];
  674. }
  675. return oss.str();
  676. }
  677. std::string DumpMSTensor(mindspore::MSTensor *tensor) {
  678. if (tensor == nullptr) {
  679. return "Tensor is nullptr";
  680. }
  681. std::ostringstream oss;
  682. oss << " DataType: " << static_cast<int>(tensor->DataType());
  683. oss << " Shape:";
  684. for (auto &dim : tensor->Shape()) {
  685. oss << " " << dim;
  686. }
  687. oss << std::endl << " Data:";
  688. switch (static_cast<int>(tensor->DataType())) {
  689. case kNumberTypeFloat32: {
  690. oss << DataToString<float>(tensor->MutableData(), tensor->ElementNum());
  691. } break;
  692. case kNumberTypeFloat16: {
  693. oss << DataToString<int16_t>(tensor->MutableData(), tensor->ElementNum());
  694. } break;
  695. case kNumberTypeInt32: {
  696. oss << DataToString<int32_t>(tensor->MutableData(), tensor->ElementNum());
  697. } break;
  698. case kNumberTypeInt16: {
  699. oss << DataToString<int16_t>(tensor->MutableData(), tensor->ElementNum());
  700. } break;
  701. case kNumberTypeInt8: {
  702. oss << DataToString<int8_t>(tensor->MutableData(), tensor->ElementNum());
  703. } break;
  704. default:
  705. oss << "Unsupported data type to print";
  706. break;
  707. }
  708. return oss.str();
  709. }
  710. std::string GenerateOutputFileName(mindspore::MSTensor *tensor, const std::string &op_name,
  711. const std::string &file_type, const size_t &idx) {
  712. std::string file_name = op_name;
  713. auto pos = file_name.find_first_of('/');
  714. while (pos != std::string::npos) {
  715. file_name.replace(pos, 1, ".");
  716. pos = file_name.find_first_of('/');
  717. }
  718. file_name += "_" + file_type + "_" + std::to_string(idx) + "_shape_";
  719. for (const auto &dim : tensor->Shape()) {
  720. file_name += std::to_string(dim) + "_";
  721. }
  722. if (kTypeIdMap.find(static_cast<int>(tensor->DataType())) != kTypeIdMap.end()) {
  723. file_name += kTypeIdMap.at(static_cast<int>(tensor->DataType()));
  724. }
  725. file_name += +".bin";
  726. return file_name;
  727. }
  728. } // namespace
  729. int BenchmarkUnifiedApi::InitPrintTensorDataCallbackParameter() {
  730. // before callback
  731. ms_before_call_back_ = [&](const std::vector<mindspore::MSTensor> &before_inputs,
  732. const std::vector<mindspore::MSTensor> &before_outputs,
  733. const MSCallBackParam &call_param) { return true; };
  734. // after callback
  735. ms_after_call_back_ = [&](const std::vector<mindspore::MSTensor> &after_inputs,
  736. const std::vector<mindspore::MSTensor> &after_outputs, const MSCallBackParam &call_param) {
  737. std::cout << "================================================================" << std::endl;
  738. std::cout << call_param.node_name << " inputs : " << std::endl;
  739. for (auto ms_tensor : after_inputs) {
  740. std::cout << DumpMSTensor(&ms_tensor) << std::endl;
  741. }
  742. std::cout << "----------------------------------------------------------------" << std::endl;
  743. std::cout << call_param.node_name << " outputs : " << std::endl;
  744. for (auto ms_tensor : after_outputs) {
  745. std::cout << DumpMSTensor(&ms_tensor) << std::endl;
  746. }
  747. std::cout << "================================================================" << std::endl;
  748. return true;
  749. };
  750. return RET_OK;
  751. }
  752. int BenchmarkUnifiedApi::InitDumpTensorDataCallbackParameter() {
  753. // before callback
  754. ms_before_call_back_ = [&](const std::vector<mindspore::MSTensor> &before_inputs,
  755. const std::vector<mindspore::MSTensor> &before_outputs,
  756. const MSCallBackParam &call_param) {
  757. auto dump_mode = dump_cfg_json_[dump::kSettings][dump::kMode].get<int>();
  758. auto input_output_mode = dump_cfg_json_[dump::kSettings][dump::kInputOutput].get<int>();
  759. auto kernels = dump_cfg_json_[dump::kSettings][dump::kKernels].get<std::vector<std::string>>();
  760. if (dump_mode == 0 || std::find(kernels.begin(), kernels.end(), call_param.node_name) != kernels.end()) {
  761. if (input_output_mode == 0 || input_output_mode == 1) {
  762. for (size_t i = 0; i < before_inputs.size(); i++) {
  763. auto ms_tensor = before_inputs.at(i);
  764. auto file_name = GenerateOutputFileName(&ms_tensor, call_param.node_name, "input", i);
  765. auto abs_file_path = dump_file_output_dir_ + "/" + file_name;
  766. if (WriteToBin(abs_file_path, ms_tensor.MutableData(), ms_tensor.DataSize()) != RET_OK) { // save to file
  767. MS_LOG(ERROR) << "write tensor data to file failed.";
  768. return false;
  769. }
  770. }
  771. }
  772. }
  773. return true;
  774. };
  775. // after callback
  776. ms_after_call_back_ = [&](const std::vector<mindspore::MSTensor> &after_inputs,
  777. const std::vector<mindspore::MSTensor> &after_outputs, const MSCallBackParam &call_param) {
  778. auto dump_mode = dump_cfg_json_[dump::kSettings][dump::kMode].get<int>();
  779. auto input_output_mode = dump_cfg_json_[dump::kSettings][dump::kInputOutput].get<int>();
  780. auto kernels = dump_cfg_json_[dump::kSettings][dump::kKernels].get<std::vector<std::string>>();
  781. if (dump_mode == kDumpInputsAndOutputs ||
  782. std::find(kernels.begin(), kernels.end(), call_param.node_name) != kernels.end()) {
  783. if (input_output_mode == kDumpInputsAndOutputs || input_output_mode == kDumpOutputs) {
  784. for (size_t i = 0; i < after_outputs.size(); i++) {
  785. auto ms_tensor = after_outputs.at(i);
  786. auto file_name = GenerateOutputFileName(&ms_tensor, call_param.node_name, "output", i);
  787. auto abs_file_path = dump_file_output_dir_ + "/" + file_name;
  788. if (WriteToBin(abs_file_path, ms_tensor.MutableData(), ms_tensor.DataSize()) != RET_OK) { // save to file
  789. MS_LOG(ERROR) << "write tensor data to file failed.";
  790. return false;
  791. }
  792. }
  793. }
  794. }
  795. return true;
  796. };
  797. return RET_OK;
  798. }
  799. int BenchmarkUnifiedApi::CompareOutputByCosineDistance(float cosine_distance_threshold) {
  800. std::cout << "================ Comparing Output data by Cosine Distance================" << std::endl;
  801. float total_cosine_distance = 0;
  802. int total_size = 0;
  803. for (const auto &calib_tensor : benchmark_data_) {
  804. std::string tensor_name = calib_tensor.first;
  805. mindspore::MSTensor tensor = ms_model_.GetOutputByTensorName(tensor_name);
  806. if (tensor == nullptr) {
  807. MS_LOG(ERROR) << "Get tensor failed, tensor name: " << tensor_name;
  808. return RET_ERROR;
  809. }
  810. int ret;
  811. if (static_cast<int>(tensor.DataType()) == kObjectTypeString) {
  812. std::cerr << tensor.Name() << " data type is kObjectTypeString, can not compared data " << std::endl;
  813. return RET_ERROR;
  814. } else {
  815. ret = CompareDataGetTotalCosineDistanceAndSize(tensor_name, &tensor, &total_cosine_distance, &total_size);
  816. }
  817. if (ret != RET_OK) {
  818. MS_LOG(ERROR) << "Error in CompareData";
  819. std::cerr << "Error in CompareData" << std::endl;
  820. std::cout << "=======================================================" << std::endl << std::endl;
  821. return ret;
  822. }
  823. }
  824. float mean_cosine_distance;
  825. if (total_size != 0) {
  826. mean_cosine_distance = total_cosine_distance / float_t(total_size);
  827. } else {
  828. mean_cosine_distance = 0;
  829. }
  830. std::cout << "Cosine distance of all nodes/tensors: " << mean_cosine_distance << std::endl;
  831. std::cout << "=======================================================" << std::endl << std::endl;
  832. if (mean_cosine_distance < cosine_distance_threshold) {
  833. MS_LOG(ERROR) << "cosine distance of all nodes/tensors is too big: " << mean_cosine_distance;
  834. std::cerr << "Mean cosine distance of all nodes/tensors is too big: " << mean_cosine_distance << std::endl;
  835. return RET_ERROR;
  836. }
  837. return RET_OK;
  838. }
  839. int BenchmarkUnifiedApi::CompareDataGetTotalCosineDistanceAndSize(const std::string &name, mindspore::MSTensor *tensor,
  840. float *total_cosine_distance, int *total_size) {
  841. float cos = 0;
  842. auto mutableData = tensor->MutableData();
  843. if (mutableData == nullptr) {
  844. MS_LOG(ERROR) << "mutableData is nullptr.";
  845. return RET_ERROR;
  846. }
  847. int ret = RET_ERROR;
  848. switch (static_cast<int>(tensor->DataType())) {
  849. case TypeId::kNumberTypeFloat:
  850. case TypeId::kNumberTypeFloat32: {
  851. ret = CompareDatabyCosineDistance<float>(name, tensor->Shape(), mutableData, &cos);
  852. break;
  853. }
  854. case TypeId::kNumberTypeInt8: {
  855. ret = CompareDatabyCosineDistance<int8_t>(name, tensor->Shape(), mutableData, &cos);
  856. break;
  857. }
  858. case TypeId::kNumberTypeUInt8: {
  859. ret = CompareDatabyCosineDistance<uint8_t>(name, tensor->Shape(), mutableData, &cos);
  860. break;
  861. }
  862. case TypeId::kNumberTypeInt32: {
  863. ret = CompareDatabyCosineDistance<int32_t>(name, tensor->Shape(), mutableData, &cos);
  864. break;
  865. }
  866. case TypeId::kNumberTypeInt16: {
  867. ret = CompareDatabyCosineDistance<int16_t>(name, tensor->Shape(), mutableData, &cos);
  868. break;
  869. }
  870. case TypeId::kNumberTypeBool: {
  871. ret = CompareDatabyCosineDistance<bool>(name, tensor->Shape(), mutableData, &cos);
  872. break;
  873. }
  874. default:
  875. MS_LOG(ERROR) << "Datatype " << static_cast<int>(tensor->DataType()) << " is not supported.";
  876. return RET_ERROR;
  877. }
  878. if (ret != RET_OK) {
  879. MS_LOG(ERROR) << "CompareData failed, name: " << name;
  880. return RET_ERROR;
  881. }
  882. *total_cosine_distance += cos;
  883. *total_size += 1;
  884. return RET_OK;
  885. }
  886. BenchmarkUnifiedApi::~BenchmarkUnifiedApi() {}
  887. } // namespace lite
  888. } // namespace mindspore