You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

kernel_info_setter.cc 9.2 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211
  1. /**
  2. * Copyright 2019 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "device/gpu/kernel_info_setter.h"
  17. #include <string>
  18. #include <memory>
  19. #include "kernel/kernel.h"
  20. #include "utils/utils.h"
  21. #include "kernel/gpu/gpu_kernel_factory.h"
  22. #include "kernel/kernel_build_info.h"
  23. #include "session/anf_runtime_algorithm.h"
  24. #include "kernel/common_utils.h"
  25. #include "common/utils.h"
  26. #include "kernel/oplib/oplib.h"
  27. #include "kernel/oplib/opinfo.h"
  28. namespace mindspore {
  29. namespace device {
  30. namespace gpu {
  31. using AnfAlgo = mindspore::session::AnfRuntimeAlgorithm;
  32. using mindspore::kernel::KernelBuildInfo;
  33. namespace {
  34. bool CheckKernelInfo(const std::shared_ptr<KernelBuildInfo> &alternative_kernel_info,
  35. const std::shared_ptr<KernelBuildInfo> &selected_kernel_info) {
  36. MS_EXCEPTION_IF_NULL(selected_kernel_info);
  37. MS_EXCEPTION_IF_NULL(alternative_kernel_info);
  38. size_t selected_input_num = selected_kernel_info->GetInputNum();
  39. size_t alternative_input_num = alternative_kernel_info->GetInputNum();
  40. if (selected_input_num != alternative_input_num) {
  41. return false;
  42. }
  43. for (size_t i = 0; i < selected_input_num; i++) {
  44. if (selected_kernel_info->GetInputFormat(i) != alternative_kernel_info->GetInputFormat(i)) {
  45. return false;
  46. }
  47. if (selected_kernel_info->GetInputDeviceType(i) != alternative_kernel_info->GetInputDeviceType(i)) {
  48. return false;
  49. }
  50. }
  51. size_t selected_output_num = selected_kernel_info->GetOutputNum();
  52. size_t alternative_output_num = alternative_kernel_info->GetOutputNum();
  53. if (selected_output_num != alternative_output_num) {
  54. return false;
  55. }
  56. for (size_t i = 0; i < selected_output_num; i++) {
  57. if (selected_kernel_info->GetOutputFormat(i) != alternative_kernel_info->GetOutputFormat(i)) {
  58. return false;
  59. }
  60. if (selected_kernel_info->GetOutputDeviceType(i) != alternative_kernel_info->GetOutputDeviceType(i)) {
  61. return false;
  62. }
  63. }
  64. return true;
  65. }
  66. std::string SupportedTypeList(const CNodePtr &kernel_node) {
  67. std::string supported_type_lists =
  68. kernel::GpuKernelFactory::GetInstance().SupportedTypeList(AnfAlgo::GetCNodeName(kernel_node));
  69. if (!supported_type_lists.empty()) {
  70. return supported_type_lists;
  71. }
  72. std::vector<std::shared_ptr<KernelBuildInfo>> kernel_info_list;
  73. std::string op_name = AnfAlgo::GetCNodeName(kernel_node);
  74. auto op_info_ptr = mindspore::kernel::OpLib::FindOp(op_name, kernel::OpImplyType::kAKG);
  75. if (op_info_ptr == nullptr) {
  76. MS_LOG(EXCEPTION) << "Unsupported op [" << op_name << "]";
  77. }
  78. (void)ParseMetadata(kernel_node, op_info_ptr, kernel::Processor::CUDA, &kernel_info_list);
  79. for (size_t i = 0; i < kernel_info_list.size(); i++) {
  80. auto supported_akg_type = kernel_info_list[i]->GetAllInputDeviceTypes();
  81. auto supported_akg_type_out = kernel_info_list[i]->GetAllOutputDeviceTypes();
  82. std::string supported_akg_type_list = "in[";
  83. for (auto type : supported_akg_type) {
  84. supported_akg_type_list = supported_akg_type_list + mindspore::kernel::TypeId2String(type);
  85. }
  86. supported_type_lists = supported_type_lists + supported_akg_type_list + "], out[";
  87. for (auto type : supported_akg_type_out) {
  88. supported_akg_type_list = supported_akg_type_list + mindspore::kernel::TypeId2String(type);
  89. }
  90. supported_type_lists += "]; ";
  91. }
  92. return supported_type_lists;
  93. }
  94. bool SelectAkgKernel(const CNodePtr &kernel_node, const std::shared_ptr<KernelBuildInfo> &selected_kernel_info) {
  95. MS_EXCEPTION_IF_NULL(kernel_node);
  96. MS_EXCEPTION_IF_NULL(selected_kernel_info);
  97. std::vector<std::shared_ptr<KernelBuildInfo>> kernel_info_list;
  98. std::string op_name = AnfAlgo::GetCNodeName(kernel_node);
  99. auto op_info_ptr = mindspore::kernel::OpLib::FindOp(op_name, kernel::OpImplyType::kAKG);
  100. if (op_info_ptr == nullptr) {
  101. MS_LOG(ERROR) << "Not find op[" << op_name << "] in akg";
  102. return false;
  103. }
  104. if (!ParseMetadata(kernel_node, op_info_ptr, kernel::Processor::CUDA, &kernel_info_list)) {
  105. MS_LOG(EXCEPTION) << "Parsed metadata of op[" << op_name << "] failed.";
  106. }
  107. if (kernel_info_list.empty()) {
  108. MS_LOG(EXCEPTION) << "Akg dose not has metadata of op[" << op_name << "].";
  109. }
  110. bool match = std::any_of(kernel_info_list.begin(), kernel_info_list.end(),
  111. [&](const std::shared_ptr<KernelBuildInfo> &alternative_kernel_info) {
  112. return CheckKernelInfo(alternative_kernel_info, selected_kernel_info);
  113. });
  114. if (!match) {
  115. MS_LOG(ERROR) << "Not find op[" << op_name << "] in akg";
  116. return false;
  117. }
  118. return true;
  119. }
  120. void SetTensorDeviceInfo(const kernel::KernelBuildInfo &selected_kernel_info, const CNodePtr &kernel_node) {
  121. MS_EXCEPTION_IF_NULL(kernel_node);
  122. for (size_t input_index = 0; input_index < AnfAlgo::GetInputTensorNum(kernel_node); ++input_index) {
  123. auto input_kernel_node = kernel_node->input(input_index + 1);
  124. MS_EXCEPTION_IF_NULL(input_kernel_node);
  125. if (!input_kernel_node->isa<Parameter>()) {
  126. continue;
  127. }
  128. std::shared_ptr<kernel::KernelBuildInfo::KernelBuildInfoBuilder> builder =
  129. std::make_shared<kernel::KernelBuildInfo::KernelBuildInfoBuilder>();
  130. auto param = input_kernel_node->cast<ParameterPtr>();
  131. MS_EXCEPTION_IF_NULL(param);
  132. if (!AnfAlgo::IsParameterWeight(param)) {
  133. std::vector<std::string> output_format = {kOpFormat_DEFAULT};
  134. builder->SetOutputsFormat(output_format);
  135. std::vector<TypeId> output_type = {AnfAlgo::GetOutputInferDataType(input_kernel_node, 0)};
  136. builder->SetOutputsDeviceType(output_type);
  137. AnfAlgo::SetSelectKernelBuildInfo(builder->Build(), input_kernel_node.get());
  138. continue;
  139. }
  140. if ((AnfAlgo::GetOutputDeviceDataType(input_kernel_node, 0) == kTypeUnknown) ||
  141. (AnfAlgo::GetCNodeName(kernel_node) == "ApplyMomentum")) {
  142. std::vector<std::string> output_format = {selected_kernel_info.GetInputFormat(input_index)};
  143. builder->SetOutputsFormat(output_format);
  144. std::vector<TypeId> output_type = {selected_kernel_info.GetInputDeviceType(input_index)};
  145. builder->SetOutputsDeviceType(output_type);
  146. AnfAlgo::SetSelectKernelBuildInfo(builder->Build(), input_kernel_node.get());
  147. }
  148. }
  149. }
  150. } // namespace
  151. void SetKernelInfo(const CNodePtr &kernel_node) {
  152. std::vector<std::string> inputs_format;
  153. std::vector<TypeId> inputs_type;
  154. std::shared_ptr<KernelBuildInfo::KernelBuildInfoBuilder> builder =
  155. std::make_shared<KernelBuildInfo::KernelBuildInfoBuilder>();
  156. for (size_t input_index = 0; input_index < AnfAlgo::GetInputTensorNum(kernel_node); ++input_index) {
  157. inputs_format.emplace_back(kOpFormat_DEFAULT);
  158. inputs_type.push_back(AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, input_index));
  159. }
  160. builder->SetInputsFormat(inputs_format);
  161. builder->SetInputsDeviceType(inputs_type);
  162. std::vector<std::string> outputs_format;
  163. std::vector<TypeId> outputs_type;
  164. for (size_t output_index = 0; output_index < AnfAlgo::GetOutputTensorNum(kernel_node); ++output_index) {
  165. outputs_format.emplace_back(kOpFormat_DEFAULT);
  166. outputs_type.push_back(AnfAlgo::GetOutputInferDataType(kernel_node, output_index));
  167. }
  168. builder->SetOutputsFormat(outputs_format);
  169. builder->SetOutputsDeviceType(outputs_type);
  170. bool result =
  171. kernel::GpuKernelFactory::GetInstance().SearchRegistered(AnfAlgo::GetCNodeName(kernel_node), builder->Build());
  172. KernelType kernel_type = UNKNOWN_KERNEL_TYPE;
  173. if (!result) {
  174. result = SelectAkgKernel(kernel_node, builder->Build());
  175. kernel_type = AKG_KERNEL;
  176. }
  177. if (!result) {
  178. auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
  179. std::string build_type = "in [";
  180. std::for_each(std::begin(inputs_type), std::end(inputs_type),
  181. [&build_type](auto i) { build_type += mindspore::kernel::TypeId2String(i) + " "; });
  182. build_type += "] out [";
  183. std::for_each(std::begin(outputs_type), std::end(outputs_type),
  184. [&build_type](auto i) { build_type += mindspore::kernel::TypeId2String(i) + " "; });
  185. build_type += "]";
  186. auto supported_type_lists = SupportedTypeList(kernel_node);
  187. MS_EXCEPTION(TypeError) << "Select GPU kernel op[" << kernel_name
  188. << "] fail! Incompatible data type!\nThe supported data types are " << supported_type_lists
  189. << ", but get " << build_type;
  190. }
  191. builder->SetKernelType(kernel_type);
  192. builder->SetProcessor(kernel::Processor::CUDA);
  193. AnfAlgo::SetSelectKernelBuildInfo(builder->Build(), kernel_node.get());
  194. SetTensorDeviceInfo(*(builder->Build()), kernel_node);
  195. }
  196. } // namespace gpu
  197. } // namespace device
  198. } // namespace mindspore