You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

launch_kernel.cc 4.3 kB

4 years ago
4 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112
  1. /**
  2. * Copyright 2021 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "runtime/device/launch_kernel.h"
  17. #include <vector>
  18. #include <memory>
  19. namespace mindspore::device {
  20. std::vector<kernel::AddressPtr> LaunchKernel::ObtainKernelAddress(const std::vector<size_t> &list,
  21. std::vector<uint8_t *> *addr) {
  22. MS_EXCEPTION_IF_NULL(addr);
  23. std::vector<kernel::AddressPtr> kernel_address;
  24. if (addr->size() < list.size()) {
  25. MS_LOG_EXCEPTION << "Error addr size!";
  26. }
  27. for (size_t i = 0; i < list.size(); ++i) {
  28. auto size = AlignSizeForLaunchKernel(list[i]);
  29. (*addr)[i] = AllocDeviceMem(size);
  30. auto address = std::make_shared<kernel::Address>();
  31. MS_EXCEPTION_IF_NULL(address);
  32. address->addr = (*addr)[i];
  33. MS_EXCEPTION_IF_NULL(address->addr);
  34. address->size = size;
  35. kernel_address.push_back(address);
  36. }
  37. return kernel_address;
  38. }
  39. std::vector<kernel::AddressPtr> LaunchKernel::ObtainKernelInputs(const std::vector<size_t> &inputs_list,
  40. const std::vector<uint8_t *> &inputs_addr) {
  41. std::vector<kernel::AddressPtr> kernel_inputs;
  42. if (inputs_list.size() != inputs_addr.size()) {
  43. MS_LOG(ERROR) << "input_list size should equal to input_addr_ size, input_list size: " << inputs_list.size()
  44. << ", input_addr_ size: " << inputs_addr.size();
  45. }
  46. for (size_t i = 0; i < inputs_list.size(); ++i) {
  47. auto input_size = AlignSizeForLaunchKernel(inputs_list[i]);
  48. auto input = std::make_shared<kernel::Address>();
  49. MS_EXCEPTION_IF_NULL(input);
  50. input->addr = inputs_addr[i];
  51. MS_EXCEPTION_IF_NULL(input->addr);
  52. input->size = input_size;
  53. kernel_inputs.push_back(input);
  54. }
  55. return kernel_inputs;
  56. }
  57. std::vector<kernel::AddressPtr> LaunchKernel::ObtainKernelOutputs(const std::vector<size_t> &outputs_list) {
  58. // init output_addr_
  59. outputs_addr_ = std::vector<uint8_t *>(outputs_list.size(), nullptr);
  60. auto kernel_outputs = ObtainKernelAddress(outputs_list, &outputs_addr_);
  61. return kernel_outputs;
  62. }
  63. std::vector<kernel::AddressPtr> LaunchKernel::ObtainKernelWorkspaces(const std::vector<size_t> &workspaces_list) {
  64. std::vector<kernel::AddressPtr> kernel_workspace;
  65. if (workspaces_list.empty()) {
  66. return kernel_workspace;
  67. }
  68. // init workspace_addr_
  69. workspaces_addr_ = std::vector<uint8_t *>(workspaces_list.size(), nullptr);
  70. kernel_workspace = ObtainKernelAddress(workspaces_list, &workspaces_addr_);
  71. return kernel_workspace;
  72. }
  73. void LaunchKernel::LaunchSingleKernel(const std::vector<uint8_t *> &inputs_addr) {
  74. MS_EXCEPTION_IF_NULL(kernel_mod_);
  75. // obtain kernel inputs
  76. auto kernel_inputs = ObtainKernelInputs(kernel_mod_->GetInputSizeList(), inputs_addr);
  77. // obtain kernel outputs
  78. auto kernel_outputs = ObtainKernelOutputs(kernel_mod_->GetOutputSizeList());
  79. // obtain kernel workspace
  80. auto kernel_workspaces = ObtainKernelWorkspaces(kernel_mod_->GetWorkspaceSizeList());
  81. // launch
  82. auto ret_status = kernel_mod_->Launch(kernel_inputs, kernel_workspaces, kernel_outputs, stream_);
  83. if (!ret_status) {
  84. MS_LOG(ERROR) << "Launch single kernel failed.";
  85. }
  86. }
  87. void LaunchKernel::FreeOutputAndWorkspaceDeviceMem() {
  88. // free outputs_addr and workspaces_addr_
  89. for (size_t i = 0; i < outputs_addr_.size(); ++i) {
  90. if (outputs_addr_[i] != nullptr) {
  91. FreeDeviceMem(outputs_addr_[i]);
  92. outputs_addr_[i] = nullptr;
  93. }
  94. }
  95. for (size_t i = 0; i < workspaces_addr_.size(); ++i) {
  96. if (workspaces_addr_[i] != nullptr) {
  97. FreeDeviceMem(workspaces_addr_[i]);
  98. workspaces_addr_[i] = nullptr;
  99. }
  100. }
  101. outputs_addr_.clear();
  102. workspaces_addr_.clear();
  103. }
  104. } // namespace mindspore::device