You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

data_saver.cc 8.8 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223
  1. /**
  2. * Copyright 2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "profiler/device/gpu/data_saver.h"
  17. #include <fstream>
  18. #include <numeric>
  19. #include "utils/log_adapter.h"
  20. namespace mindspore {
  21. namespace profiler {
  22. namespace gpu {
  23. OpDetailInfo::OpDetailInfo(std::shared_ptr<OpInfo> op_info, float proportion)
  24. : op_info_(op_info), proportion_(proportion) {
  25. // op_full_name is like 'xxx/xxx/{op_type}-op{node_id}'
  26. op_full_name_ = op_info->op_name;
  27. auto op_type_begin_iter = op_full_name_.rfind('/') + 1;
  28. auto op_type_end_iter = op_full_name_.rfind('-');
  29. op_type_ = op_full_name_.substr(op_type_begin_iter, op_type_end_iter - op_type_begin_iter);
  30. op_name_ = op_full_name_.substr(op_type_begin_iter);
  31. op_avg_time_ = op_info->op_host_cost_time / op_info->op_count;
  32. }
  33. ActivityData::ActivityData(std::shared_ptr<Event> data) : basic_info_(data) {
  34. grid_dim_ = basic_info_->activity_type == ActivityType::kKernel
  35. ? "\"" + std::to_string(basic_info_->kernel_info.grid_x) + ',' +
  36. std::to_string(basic_info_->kernel_info.grid_y) + ',' +
  37. std::to_string(basic_info_->kernel_info.grid_z) + "\""
  38. : "";
  39. block_dim_ = basic_info_->activity_type == ActivityType::kKernel
  40. ? "\"" + std::to_string(basic_info_->kernel_info.block_x) + ',' +
  41. std::to_string(basic_info_->kernel_info.block_y) + ',' +
  42. std::to_string(basic_info_->kernel_info.block_z) + "\""
  43. : "";
  44. count_ = 1;
  45. total_duration_ = (basic_info_->end_time_stamp - basic_info_->start_time_stamp) / kTimeUnit;
  46. avg_duration_ = total_duration_;
  47. max_duration_ = total_duration_;
  48. min_duration_ = total_duration_;
  49. }
  50. ActivityData &ActivityData::operator+=(const ActivityData &other) {
  51. this->count_ += other.count_;
  52. this->total_duration_ += other.total_duration_;
  53. // update max or min duration
  54. if (other.total_duration_ > this->max_duration_) {
  55. this->max_duration_ = other.total_duration_;
  56. } else if (other.max_duration_ < this->min_duration_) {
  57. this->min_duration_ = other.total_duration_;
  58. }
  59. return *this;
  60. }
  61. void DataSaver::ParseOpInfo(const OpInfoMap &op_info_maps) {
  62. op_detail_infos_.reserve(op_info_maps.size());
  63. float total_time_sum = GetTotalOpTime(op_info_maps);
  64. for (auto item : op_info_maps) {
  65. float proportion = item.second.op_host_cost_time / total_time_sum;
  66. auto op_info = std::make_shared<OpInfo>(item.second);
  67. OpDetailInfo op_detail_info = OpDetailInfo(op_info, proportion);
  68. op_detail_infos_.emplace_back(op_detail_info);
  69. AddOpDetailInfoForType(op_detail_info);
  70. }
  71. // update average time of op type
  72. for (auto &op_type : op_type_infos_) {
  73. // device_infos: <type_name, op_type_info>
  74. op_type.second.avg_time_ = op_type.second.total_time_ / op_type.second.count_;
  75. }
  76. MS_LOG(DEBUG) << "Get " << op_detail_infos_.size() << " operation items.";
  77. MS_LOG(DEBUG) << "Get " << op_type_infos_.size() << " operation type items.";
  78. }
  79. void DataSaver::AddOpDetailInfoForType(const OpDetailInfo &op_detail_info) {
  80. // Construct OpType object according to op detail info
  81. OpType op_type = OpType{op_detail_info.op_type_, op_detail_info.op_info_->op_count,
  82. op_detail_info.op_info_->op_host_cost_time, 0, op_detail_info.proportion_};
  83. // Set the OpType into op_type_infos_ map
  84. std::string type_name = op_detail_info.op_type_;
  85. auto iter = op_type_infos_.find(type_name);
  86. if (iter == op_type_infos_.end()) {
  87. op_type_infos_.emplace(type_name, op_type);
  88. } else {
  89. iter->second += op_type;
  90. }
  91. }
  92. float DataSaver::GetTotalOpTime(const OpInfoMap &op_info_maps) {
  93. float sum = 0;
  94. sum = std::accumulate(op_info_maps.begin(), op_info_maps.end(), sum,
  95. [](float i, auto iter) { return i + iter.second.op_host_cost_time; });
  96. MS_LOG(DEBUG) << "The total op time is " << sum;
  97. return sum;
  98. }
  99. void DataSaver::ParseEvent(const std::vector<Event> &events) {
  100. // Put Kernel activity events into activity_infos_
  101. for (const auto &event : events) {
  102. if (event.op_name.empty() || event.api_type != CUPTIApiType::kActivity ||
  103. event.activity_type != ActivityType::kKernel) {
  104. continue;
  105. }
  106. AddKernelEvent(event);
  107. }
  108. // update average time of kernel op cost
  109. for (auto &device_infos : activity_infos_) {
  110. // device_infos: <device_id, DeviceActivityInfos>
  111. for (auto &activity_info : device_infos.second) {
  112. // activity_info: <kernel_name, Activity>
  113. activity_info.second.avg_duration_ = activity_info.second.total_duration_ / activity_info.second.count_;
  114. }
  115. MS_LOG(DEBUG) << "Get " << device_infos.second.size() << " activity items for device:" << device_infos.first;
  116. }
  117. }
  118. void DataSaver::AddKernelEvent(const Event &event) {
  119. // Put kernel event to activity_infos according to device id
  120. uint32_t device_id = event.device_id;
  121. auto iter = activity_infos_.find(device_id);
  122. if (iter == activity_infos_.end()) {
  123. auto res_flag = activity_infos_.emplace(device_id, DeviceActivityInfos());
  124. AddKernelEventToDevice(event, &res_flag.first->second);
  125. } else {
  126. AddKernelEventToDevice(event, &iter->second);
  127. }
  128. }
  129. void DataSaver::AddKernelEventToDevice(const Event &event, DeviceActivityInfos *device_activity_infos) {
  130. // Combine kernel activity with same kernel name
  131. auto event_ptr = std::make_shared<Event>(event);
  132. ActivityData activity_data = ActivityData(event_ptr);
  133. std::string kernel_name = event.kernel_name;
  134. auto iter = device_activity_infos->find(kernel_name);
  135. if (iter == device_activity_infos->end()) {
  136. device_activity_infos->emplace(kernel_name, activity_data);
  137. } else {
  138. iter->second += activity_data;
  139. }
  140. }
  141. void DataSaver::WriteFile(std::string out_path_dir) {
  142. if (out_path_dir.empty()) {
  143. MS_LOG(WARNING) << "Output directory. Ignore the writing data.";
  144. return;
  145. }
  146. if (op_detail_infos_.empty() || op_type_infos_.empty() || activity_infos_.empty()) {
  147. MS_LOG(WARNING) << "No operation detail infos to write.";
  148. return;
  149. }
  150. // not support multi-device for operator info per process yet
  151. device_id_ = std::to_string(activity_infos_.begin()->first);
  152. WriteOpDetail(out_path_dir);
  153. WriteOpType(out_path_dir);
  154. WriteActivity(out_path_dir);
  155. }
  156. void DataSaver::WriteOpType(const std::string &saver_base_dir) {
  157. std::string file_path = saver_base_dir + "/gpu_op_type_info_" + device_id_ + ".csv";
  158. std::ofstream ofs(file_path);
  159. // check if the file is writable
  160. if (!ofs.is_open()) {
  161. MS_LOG(WARNING) << "Open file '" << file_path << "' failed!";
  162. return;
  163. }
  164. // write op type info into file
  165. ofs << OpType().GetHeader() << std::endl;
  166. for (auto op_type_info : op_type_infos_) {
  167. ofs << op_type_info.second << std::endl;
  168. }
  169. ofs.close();
  170. MS_LOG(INFO) << "Write " << op_type_infos_.size() << " op type infos into file: " << file_path;
  171. }
  172. void DataSaver::WriteOpDetail(const std::string &saver_base_dir) {
  173. std::string file_path = saver_base_dir + "/gpu_op_detail_info_" + device_id_ + ".csv";
  174. std::ofstream ofs(file_path);
  175. if (!ofs.is_open()) {
  176. MS_LOG(WARNING) << "Open file '" << file_path << "' failed!";
  177. return;
  178. }
  179. // write op detail info into file
  180. ofs << OpDetailInfo().GetHeader() << std::endl;
  181. for (auto op_detail : op_detail_infos_) {
  182. ofs << op_detail << std::endl;
  183. }
  184. ofs.close();
  185. MS_LOG(INFO) << "Write " << op_detail_infos_.size() << " op detail infos into file: " << file_path;
  186. }
  187. void DataSaver::WriteActivity(const std::string &saver_base_dir) {
  188. std::string file_path_base = saver_base_dir + "/gpu_activity_data_";
  189. for (auto device_info : activity_infos_) {
  190. std::string file_path = file_path_base + std::to_string(device_info.first) + ".csv";
  191. std::ofstream ofs(file_path);
  192. if (!ofs.is_open()) {
  193. MS_LOG(WARNING) << "Open file '" << file_path << "' failed!";
  194. return;
  195. }
  196. // write activity data into file
  197. ofs << ActivityData().GetHeader() << std::endl;
  198. for (auto activity_data : device_info.second) {
  199. ofs << activity_data.second << std::endl;
  200. }
  201. ofs.close();
  202. MS_LOG(INFO) << "Write " << device_info.second.size() << " activity infos into file: " << file_path;
  203. }
  204. }
  205. } // namespace gpu
  206. } // namespace profiler
  207. } // namespace mindspore