You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

kernel_info_setter.cc 8.5 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199
  1. /**
  2. * Copyright 2019 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "device/gpu/kernel_info_setter.h"
  17. #include <string>
  18. #include <memory>
  19. #include "kernel/kernel.h"
  20. #include "utils/utils.h"
  21. #include "kernel/gpu/gpu_kernel_factory.h"
  22. #include "kernel/kernel_build_info.h"
  23. #include "session/anf_runtime_algorithm.h"
  24. #include "kernel/common_utils.h"
  25. #include "common/utils.h"
  26. #include "kernel/oplib/oplib.h"
  27. #include "kernel/oplib/opinfo.h"
  28. namespace mindspore {
  29. namespace device {
  30. namespace gpu {
  31. using AnfAlgo = mindspore::session::AnfRuntimeAlgorithm;
  32. using mindspore::kernel::KernelBuildInfo;
  33. namespace {
  34. bool CheckKernelInfo(const std::shared_ptr<KernelBuildInfo> &alternative_kernel_info,
  35. const std::shared_ptr<KernelBuildInfo> &selected_kernel_info) {
  36. MS_EXCEPTION_IF_NULL(selected_kernel_info);
  37. MS_EXCEPTION_IF_NULL(alternative_kernel_info);
  38. size_t selected_input_num = selected_kernel_info->GetInputNum();
  39. size_t alternative_input_num = alternative_kernel_info->GetInputNum();
  40. if (selected_input_num != alternative_input_num) {
  41. return false;
  42. }
  43. for (size_t i = 0; i < selected_input_num; i++) {
  44. if (selected_kernel_info->GetInputFormat(i) != alternative_kernel_info->GetInputFormat(i)) {
  45. return false;
  46. }
  47. if (selected_kernel_info->GetInputDeviceType(i) != alternative_kernel_info->GetInputDeviceType(i)) {
  48. return false;
  49. }
  50. }
  51. size_t selected_output_num = selected_kernel_info->GetOutputNum();
  52. size_t alternative_output_num = alternative_kernel_info->GetOutputNum();
  53. if (selected_output_num != alternative_output_num) {
  54. return false;
  55. }
  56. for (size_t i = 0; i < selected_output_num; i++) {
  57. if (selected_kernel_info->GetOutputFormat(i) != alternative_kernel_info->GetOutputFormat(i)) {
  58. return false;
  59. }
  60. if (selected_kernel_info->GetOutputDeviceType(i) != alternative_kernel_info->GetOutputDeviceType(i)) {
  61. return false;
  62. }
  63. }
  64. return true;
  65. }
  66. std::string SupportedTypeList(const CNodePtr &kernel_node) {
  67. std::string supported_type_lists =
  68. kernel::GpuKernelFactory::GetInstance().SupportedTypeList(AnfAlgo::GetCNodeName(kernel_node));
  69. if (!supported_type_lists.empty()) {
  70. return supported_type_lists;
  71. }
  72. std::vector<std::shared_ptr<KernelBuildInfo>> kernel_info_list;
  73. std::string op_name = AnfAlgo::GetCNodeName(kernel_node);
  74. auto op_info_ptr = mindspore::kernel::OpLib::FindOp(op_name, kernel::OpImplyType::kAKG);
  75. if (op_info_ptr == nullptr) {
  76. MS_LOG(EXCEPTION) << "Unsupported op [" << op_name << "]";
  77. }
  78. (void)ParseMetadata(kernel_node, op_info_ptr, kernel::Processor::CUDA, &kernel_info_list);
  79. for (size_t i = 0; i < kernel_info_list.size(); i++) {
  80. auto supported_akg_type = kernel_info_list[i]->GetAllInputDeviceTypes();
  81. std::string supported_akg_type_list = "[";
  82. for (auto type : supported_akg_type) {
  83. supported_akg_type_list = supported_akg_type_list + mindspore::kernel::TypeId2String(type);
  84. }
  85. supported_type_lists = supported_type_lists + supported_akg_type_list + "] ";
  86. }
  87. return supported_type_lists;
  88. }
  89. bool SelectAkgKernel(const CNodePtr &kernel_node, const std::shared_ptr<KernelBuildInfo> &selected_kernel_info) {
  90. MS_EXCEPTION_IF_NULL(kernel_node);
  91. MS_EXCEPTION_IF_NULL(selected_kernel_info);
  92. std::vector<std::shared_ptr<KernelBuildInfo>> kernel_info_list;
  93. std::string op_name = AnfAlgo::GetCNodeName(kernel_node);
  94. auto op_info_ptr = mindspore::kernel::OpLib::FindOp(op_name, kernel::OpImplyType::kAKG);
  95. if (op_info_ptr == nullptr) {
  96. MS_LOG(ERROR) << "Not find op[" << op_name << "] in akg";
  97. return false;
  98. }
  99. if (!ParseMetadata(kernel_node, op_info_ptr, kernel::Processor::CUDA, &kernel_info_list)) {
  100. MS_LOG(EXCEPTION) << "Parsed metadata of op[" << op_name << "] failed.";
  101. }
  102. if (kernel_info_list.empty()) {
  103. MS_LOG(EXCEPTION) << "Akg dose not has metadata of op[" << op_name << "].";
  104. }
  105. bool match = std::any_of(kernel_info_list.begin(), kernel_info_list.end(),
  106. [&](const std::shared_ptr<KernelBuildInfo> &alternative_kernel_info) {
  107. return CheckKernelInfo(alternative_kernel_info, selected_kernel_info);
  108. });
  109. if (!match) {
  110. MS_LOG(ERROR) << "Not find op[" << op_name << "] in akg";
  111. return false;
  112. }
  113. return true;
  114. }
  115. void SetTensorDeviceInfo(const kernel::KernelBuildInfo &selected_kernel_info, const CNodePtr &kernel_node) {
  116. MS_EXCEPTION_IF_NULL(kernel_node);
  117. for (size_t input_index = 0; input_index < AnfAlgo::GetInputTensorNum(kernel_node); ++input_index) {
  118. auto input_kernel_node = kernel_node->input(input_index + 1);
  119. MS_EXCEPTION_IF_NULL(input_kernel_node);
  120. if (!input_kernel_node->isa<Parameter>()) {
  121. continue;
  122. }
  123. std::shared_ptr<kernel::KernelBuildInfo::KernelBuildInfoBuilder> builder =
  124. std::make_shared<kernel::KernelBuildInfo::KernelBuildInfoBuilder>();
  125. auto param = input_kernel_node->cast<ParameterPtr>();
  126. MS_EXCEPTION_IF_NULL(param);
  127. if (!AnfAlgo::IsParameterWeight(param)) {
  128. std::vector<std::string> output_format = {kOpFormat_DEFAULT};
  129. builder->SetOutputsFormat(output_format);
  130. std::vector<TypeId> output_type = {AnfAlgo::GetOutputInferDataType(input_kernel_node, 0)};
  131. builder->SetOutputsDeviceType(output_type);
  132. AnfAlgo::SetSelectKernelBuildInfo(builder->Build(), input_kernel_node.get());
  133. continue;
  134. }
  135. if ((AnfAlgo::GetOutputDeviceDataType(input_kernel_node, 0) == kTypeUnknown) ||
  136. (AnfAlgo::GetCNodeName(kernel_node) == "ApplyMomentum")) {
  137. std::vector<std::string> output_format = {selected_kernel_info.GetInputFormat(input_index)};
  138. builder->SetOutputsFormat(output_format);
  139. std::vector<TypeId> output_type = {selected_kernel_info.GetInputDeviceType(input_index)};
  140. builder->SetOutputsDeviceType(output_type);
  141. AnfAlgo::SetSelectKernelBuildInfo(builder->Build(), input_kernel_node.get());
  142. }
  143. }
  144. }
  145. } // namespace
  146. void SetKernelInfo(const CNodePtr &kernel_node) {
  147. std::vector<std::string> inputs_format;
  148. std::vector<TypeId> inputs_type;
  149. std::shared_ptr<KernelBuildInfo::KernelBuildInfoBuilder> builder =
  150. std::make_shared<KernelBuildInfo::KernelBuildInfoBuilder>();
  151. for (size_t input_index = 0; input_index < AnfAlgo::GetInputTensorNum(kernel_node); ++input_index) {
  152. inputs_format.emplace_back(kOpFormat_DEFAULT);
  153. inputs_type.push_back(AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, input_index));
  154. }
  155. builder->SetInputsFormat(inputs_format);
  156. builder->SetInputsDeviceType(inputs_type);
  157. std::vector<std::string> outputs_format;
  158. std::vector<TypeId> outputs_type;
  159. for (size_t output_index = 0; output_index < AnfAlgo::GetOutputTensorNum(kernel_node); ++output_index) {
  160. outputs_format.emplace_back(kOpFormat_DEFAULT);
  161. outputs_type.push_back(AnfAlgo::GetOutputInferDataType(kernel_node, output_index));
  162. }
  163. builder->SetOutputsFormat(outputs_format);
  164. builder->SetOutputsDeviceType(outputs_type);
  165. bool result =
  166. kernel::GpuKernelFactory::GetInstance().SearchRegistered(AnfAlgo::GetCNodeName(kernel_node), builder->Build());
  167. KernelType kernel_type = UNKNOWN_KERNEL_TYPE;
  168. if (!result) {
  169. result = SelectAkgKernel(kernel_node, builder->Build());
  170. kernel_type = AUTO_DIFF_KERNEL;
  171. }
  172. if (!result) {
  173. auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
  174. auto supported_type_lists = SupportedTypeList(kernel_node);
  175. MS_LOG(EXCEPTION) << "Select GPU kernel op[" << kernel_name
  176. << "] fail! Incompatible data type!\nThe supported data types are " << supported_type_lists;
  177. }
  178. builder->SetKernelType(kernel_type);
  179. builder->SetProcessor(kernel::Processor::CUDA);
  180. AnfAlgo::SetSelectKernelBuildInfo(builder->Build(), kernel_node.get());
  181. SetTensorDeviceInfo(*(builder->Build()), kernel_node);
  182. }
  183. } // namespace gpu
  184. } // namespace device
  185. } // namespace mindspore