You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

aicpu_kernel_mod.cc 6.5 kB

5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154
  1. /**
  2. * Copyright 2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "kernel/aicpu/aicpu_kernel_mod.h"
  17. #include <memory>
  18. #include <vector>
  19. #include <string>
  20. #include <algorithm>
  21. #include "runtime/mem.h"
  22. #include "runtime/rt.h"
  23. #include "kernel/aicpu/aicpu_kernel_build.h"
  24. #include "utils/convert_utils.h"
  25. #include "kernel/aicpu/aicpu_util.h"
  26. using AicpuTaskInfoPtr = std::shared_ptr<ge::model_runner::AicpuTaskInfo>;
  27. namespace mindspore {
  28. namespace kernel {
  29. constexpr auto AICPU_OPS_SO_NAME = "libaicpu_kernels.so";
  30. AicpuOpKernelMod::AicpuOpKernelMod() : anf_node_(nullptr) {}
  31. AicpuOpKernelMod::~AicpuOpKernelMod() {
  32. args_.clear();
  33. inputList_.clear();
  34. outputList_.clear();
  35. anf_node_ = nullptr;
  36. input_size_list_.clear();
  37. output_size_list_.clear();
  38. workspace_size_list_.clear();
  39. }
  40. void AicpuOpKernelMod::SetInputSizeList(const std::vector<size_t> &size_list) { input_size_list_ = size_list; }
  41. const std::vector<size_t> &AicpuOpKernelMod::GetInputSizeList() const { return input_size_list_; }
  42. void AicpuOpKernelMod::SetOutputSizeList(const std::vector<size_t> &size_list) { output_size_list_ = size_list; }
  43. const std::vector<size_t> &AicpuOpKernelMod::GetOutputSizeList() const { return output_size_list_; }
  44. void AicpuOpKernelMod::SetWorkspaceSizeList(const std::vector<size_t> &size_list) { workspace_size_list_ = size_list; }
  45. const std::vector<size_t> &AicpuOpKernelMod::GetWorkspaceSizeList() const { return workspace_size_list_; }
  46. void AicpuOpKernelMod::SetInputList(const std::vector<int64_t> &inputList) { inputList_ = inputList; }
  47. void AicpuOpKernelMod::SetOutputList(const std::vector<int64_t> &outputList) { outputList_ = outputList; }
  48. void AicpuOpKernelMod::SetNodeDef(const std::string &nodeDef) { (void)node_def_str_.assign(nodeDef); }
  49. void AicpuOpKernelMod::SetNodeName(const std::string &node_name) { node_name_ = node_name; }
  50. void AicpuOpKernelMod::SetAnfNode(const mindspore::AnfNodePtr &anf_node) {
  51. MS_EXCEPTION_IF_NULL(anf_node);
  52. anf_node_ = anf_node;
  53. }
  54. void AicpuOpKernelMod::CreateCpuKernelInfo(const std::vector<AddressPtr> &inputs,
  55. const std::vector<AddressPtr> &outputs) {
  56. MS_LOG(INFO) << "CreateCpuKernelInfoOffline start";
  57. node_so_ = AICPU_OPS_SO_NAME;
  58. // InputOutputAddr
  59. vector<void *> io_addrs;
  60. (void)std::transform(std::begin(inputs), std::end(inputs), std::back_inserter(io_addrs),
  61. [](const AddressPtr &input) -> void * { return input->addr; });
  62. (void)std::transform(std::begin(outputs), std::end(outputs), std::back_inserter(io_addrs),
  63. [](const AddressPtr &output) -> void * { return output->addr; });
  64. auto io_addrs_num = io_addrs.size();
  65. // calculate paramLen: AicpuParamHead.len + ioAddrsSize + notifyId.len + customizedAttr.len
  66. auto param_len = sizeof(AicpuParamHead);
  67. // get input and output addrs size, no need to check overflow
  68. auto io_addrs_size = io_addrs_num * sizeof(uint64_t);
  69. // refresh paramLen, no need to check overflow
  70. param_len += io_addrs_size;
  71. auto node_def_len = node_def_str_.length();
  72. param_len += node_def_len;
  73. // Create taskArgs: AicpuParamHead + ioAddrs + notifyId + customizedAttr
  74. AicpuParamHead paramHead = {static_cast<uint32_t>(param_len), static_cast<uint32_t>(io_addrs_num)};
  75. args_.clear();
  76. (void)args_.append(reinterpret_cast<const char *>(&paramHead), sizeof(AicpuParamHead));
  77. // TaskArgs append ioAddrs
  78. if (io_addrs_size != 0) {
  79. (void)args_.append(reinterpret_cast<const char *>(io_addrs.data()), io_addrs_size);
  80. }
  81. // When it's aicpu customized ops, taskArgs should append customized attr
  82. if (node_def_len != 0) {
  83. (void)args_.append(reinterpret_cast<const char *>(node_def_str_.data()), node_def_len);
  84. }
  85. MS_LOG(INFO) << "CreateCpuKernelInfoOffline end";
  86. }
  87. bool AicpuOpKernelMod::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
  88. const std::vector<AddressPtr> &outputs, void *stream_ptr) {
  89. if (stream_ptr == nullptr) {
  90. MS_LOG(ERROR) << "stream_ptr should not be nullptr.";
  91. return false;
  92. }
  93. CreateCpuKernelInfo(inputs, outputs);
  94. if (node_name_ == kTopK) {
  95. node_name_ = kTopKV2;
  96. }
  97. MS_LOG(INFO) << "Aicpu launch, node_so_:" << node_so_ << ", node name:" << node_name_
  98. << ", args_size:" << args_.length();
  99. if (rtCpuKernelLaunch(reinterpret_cast<const void *>(node_so_.c_str()),
  100. reinterpret_cast<const void *>(node_name_.c_str()), 1,
  101. reinterpret_cast<const void *>(args_.data()), static_cast<uint32_t>(args_.length()), nullptr,
  102. stream_ptr) != RT_ERROR_NONE) {
  103. MS_LOG(ERROR) << "Aicpu op launch failed!";
  104. return false;
  105. }
  106. return true;
  107. }
  108. std::vector<TaskInfoPtr> AicpuOpKernelMod::GenTask(const std::vector<AddressPtr> &inputs,
  109. const std::vector<AddressPtr> &,
  110. const std::vector<AddressPtr> &outputs, uint32_t stream_id) {
  111. MS_LOG(INFO) << "AicpuOpKernelMod GenTask start";
  112. stream_id_ = stream_id;
  113. node_so_ = AICPU_OPS_SO_NAME;
  114. std::vector<void *> input_data_addrs;
  115. (void)std::transform(std::begin(inputs), std::end(inputs), std::back_inserter(input_data_addrs),
  116. [](const AddressPtr &input) -> void * { return input->addr; });
  117. std::vector<void *> output_data_addrs;
  118. (void)std::transform(std::begin(outputs), std::end(outputs), std::back_inserter(output_data_addrs),
  119. [](const AddressPtr &output) -> void * { return output->addr; });
  120. if (node_name_ == kTopK) {
  121. node_name_ = kTopKV2;
  122. }
  123. AicpuTaskInfoPtr task_info_ptr = make_shared<ge::model_runner::AicpuTaskInfo>(
  124. stream_id, node_so_, node_name_, node_def_str_, input_data_addrs, output_data_addrs);
  125. MS_LOG(INFO) << "AicpuOpKernelMod GenTask end";
  126. return {task_info_ptr};
  127. }
  128. } // namespace kernel
  129. } // namespace mindspore