You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

dump_data_builder.h 4.4 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117
  1. /**
  2. * Copyright 2021-2022 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef MINDSPORE_CCSRC_DEBUG_DUMP_DATA_BUILDER_H_
  17. #define MINDSPORE_CCSRC_DEBUG_DUMP_DATA_BUILDER_H_
  18. #include <vector>
  19. #include <string>
  20. #include <iostream>
  21. #include <algorithm>
  22. #include "utils/log_adapter.h"
  23. #ifdef ENABLE_D
  24. #include "proto/dump_data.pb.h"
  25. #include "toolchain/adx_datadump_callback.h"
  26. using Adx::DumpChunk;
  27. #endif
  28. // This class is for building dump data receiving from adx server. Tensor Data for each kernel will be divided in pieces
  29. // and each piece would be wrapped into DumpChunk struct. This class provides function to merge dump chunks and
  30. // construct dump data object.
  31. class DumpDataBuilder {
  32. public:
  33. DumpDataBuilder() {}
  34. ~DumpDataBuilder() = default;
  35. #ifdef ENABLE_D
  36. /*
  37. * Feature group: Dump.
  38. * Target device group: Ascend.
  39. * Runtime category: Old runtime, MindRT.
  40. * Description: This function is for A+M dump only. In each callback, allocate memory and copy the dump chunk from
  41. * adx. Return false if OOM.
  42. */
  43. bool CopyDumpChunk(const DumpChunk *dump_chunk) {
  44. try {
  45. uint32_t buf_sz = dump_chunk->bufLen;
  46. std::string buffer_str(reinterpret_cast<const char *>(dump_chunk->dataBuf), buf_sz);
  47. chunk_list_.push_back(buffer_str);
  48. total_sz_ += buf_sz;
  49. } catch (std::bad_alloc &err) {
  50. MS_LOG(ERROR) << "Failed to allocate memory for " << dump_chunk->fileName << ", reason: " << err.what();
  51. return false;
  52. }
  53. return true;
  54. }
  55. /*
  56. * Feature group: Dump.
  57. * Target device group: Ascend.
  58. * Runtime category: Old runtime, MindRT.
  59. * Description: This function is for A+M dump only. When receiving the last chunk of the node (is_last_chunk = true),
  60. * parse and construct the dump data for dumping. It does the these steps: 1) merge all chunks for the node; 2)
  61. * parse header and protobuf string; 3) memcpy tensor data to contiguous memory segment.
  62. */
  63. bool ConstructDumpData(debugger::dump::DumpData *dump_data_proto, std::vector<char> *data_ptr) {
  64. if (chunk_list_.empty()) {
  65. return false;
  66. }
  67. // merge several chunks into one piece.
  68. std::string dump_proto_str;
  69. dump_proto_str.reserve(total_sz_);
  70. for (auto item : chunk_list_) {
  71. dump_proto_str += item;
  72. }
  73. chunk_list_.clear();
  74. const int8_t header_len_offset = 8;
  75. uint64_t header_len = *reinterpret_cast<const uint64_t *>(dump_proto_str.c_str());
  76. std::string header = dump_proto_str.substr(header_len_offset, header_len);
  77. if (!(*dump_data_proto).ParseFromString(header)) {
  78. MS_LOG(ERROR) << "Failed to parse dump proto file.";
  79. return false;
  80. }
  81. auto data_sz = total_sz_ - header_len_offset - header_len;
  82. data_ptr->resize(data_sz);
  83. // The security memory copy function 'memcpy_s' has a size limit (SECUREC_MEM_MAX_LEN). If the data size is greater
  84. // than that, it should be cut into segments to copy. Otherwise, memcpy_s will fail.
  85. int ret;
  86. if (data_sz < SECUREC_MEM_MAX_LEN) {
  87. ret = memcpy_s(data_ptr->data(), data_sz, dump_proto_str.c_str() + header_len_offset + header_len, data_sz);
  88. } else {
  89. size_t mem_cpy_len;
  90. for (size_t pos = 0; pos < data_sz; pos += SECUREC_MEM_MAX_LEN) {
  91. mem_cpy_len = std::min(data_sz - pos, SECUREC_MEM_MAX_LEN);
  92. ret = memcpy_s(data_ptr->data() + pos, mem_cpy_len,
  93. dump_proto_str.c_str() + header_len_offset + header_len + pos, mem_cpy_len);
  94. if (ret != 0) {
  95. break;
  96. }
  97. }
  98. }
  99. if (ret != 0) {
  100. MS_LOG(ERROR) << "Failed to memcpy: error code (" << ret << ").";
  101. return false;
  102. }
  103. return true;
  104. }
  105. #endif
  106. private:
  107. std::vector<std::string> chunk_list_;
  108. uint64_t total_sz_{0};
  109. };
  110. #endif // MINDSPORE_CCSRC_DEBUG_DUMP_DATA_BUILDER_H_