You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

kernel_select_cpu.cc 7.2 kB

5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170
  1. /**
  2. * Copyright 2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "device/cpu/kernel_select_cpu.h"
  17. #include <string>
  18. #include <memory>
  19. #include <algorithm>
  20. #include "kernel/cpu/cpu_kernel_factory.h"
  21. namespace mindspore {
  22. namespace device {
  23. namespace cpu {
  24. using AnfAlgo = mindspore::session::AnfRuntimeAlgorithm;
  25. using mindspore::kernel::KernelBuildInfo;
  26. namespace {
  27. bool IsInputNotCNode(const CNodePtr &kernel_node, size_t input_index) {
  28. auto input_node = AnfAlgo::VisitKernel(kernel_node->input(input_index + 1), 0).first;
  29. MS_EXCEPTION_IF_NULL(input_node);
  30. if (input_node->isa<Parameter>() || input_node->isa<ValueNode>()) {
  31. return true;
  32. }
  33. return false;
  34. }
  35. void UpdatePrevNotCNodeFormatDtype(const KernelAttr &kernel_attr, const std::vector<size_t> &input_not_cnode_indexes,
  36. const CNodePtr kernel_node) {
  37. for (auto &input_index : input_not_cnode_indexes) {
  38. auto input_node = AnfAlgo::VisitKernel(kernel_node->input(input_index + 1), 0).first;
  39. MS_EXCEPTION_IF_NULL(input_node);
  40. std::vector<TypeId> output_types;
  41. output_types.emplace_back(kernel_attr.GetInputAttr(input_index).first);
  42. auto builder = std::make_shared<kernel::KernelBuildInfo::KernelBuildInfoBuilder>();
  43. MS_EXCEPTION_IF_NULL(builder);
  44. builder->SetOutputsFormat({kOpFormat_DEFAULT});
  45. builder->SetOutputsDeviceType(output_types);
  46. AnfAlgo::SetSelectKernelBuildInfo(builder->Build(), input_node.get());
  47. }
  48. }
  49. void GetInputFormatsAndDtypes(const CNodePtr &kernel_node, std::vector<std::string> *input_formats,
  50. std::vector<TypeId> *input_types, std::vector<size_t> *input_no_cnode_indexes) {
  51. size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
  52. for (size_t input_index = 0; input_index < input_num; ++input_index) {
  53. TypeId dtype = kTypeUnknown;
  54. if (IsInputNotCNode(kernel_node, input_index)) {
  55. input_no_cnode_indexes->emplace_back(input_index);
  56. dtype = AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, input_index);
  57. } else {
  58. dtype = AnfAlgo::GetPrevNodeOutputDeviceDataType(kernel_node, input_index);
  59. }
  60. input_formats->emplace_back(kOpFormat_DEFAULT);
  61. input_types->emplace_back(dtype);
  62. }
  63. }
  64. void GetOutputFormatsAndDtypes(const CNodePtr &kernel_node, const KernelAttr &kernel_attr,
  65. std::vector<std::string> *output_formats, std::vector<TypeId> *output_types) {
  66. size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
  67. for (size_t output_index = 0; output_index < output_num; ++output_index) {
  68. output_formats->emplace_back(kernel_attr.GetOutputAttr(output_index).second);
  69. auto dtype = kernel_attr.GetOutputAttr(output_index).first;
  70. output_types->emplace_back(dtype);
  71. }
  72. }
  73. bool IsInputFormatDtypeMatched(const KernelAttr &kernel_attr, const std::vector<std::string> &input_formats,
  74. const std::vector<TypeId> &input_types,
  75. const std::vector<size_t> &input_not_cnode_indexes) {
  76. if (kernel_attr.GetInputSize() != input_types.size()) {
  77. MS_LOG(DEBUG) << "required input num:" << kernel_attr.GetInputSize() << ", actual input num:" << input_types.size();
  78. return false;
  79. }
  80. auto input_num = input_types.size();
  81. for (size_t i = 0; i < input_num; ++i) {
  82. bool is_not_cnode_idx = std::any_of(input_not_cnode_indexes.begin(), input_not_cnode_indexes.end(),
  83. [i](size_t index) { return index == i; });
  84. bool have_cnode_input = (input_types.size() != input_not_cnode_indexes.size());
  85. if (have_cnode_input && is_not_cnode_idx) {
  86. continue;
  87. }
  88. if (kernel_attr.GetInputAttr(i).first != input_types[i]) {
  89. MS_LOG(DEBUG) << "required dtype:" << kernel_attr.GetInputAttr(i).first
  90. << ", actual input dtype:" << input_types[i];
  91. return false;
  92. }
  93. if (kernel_attr.GetInputAttr(i).second != input_formats[i]) {
  94. MS_LOG(DEBUG) << "required format:" << kernel_attr.GetInputAttr(i).second
  95. << ", actual input format:" << input_formats[i];
  96. return false;
  97. }
  98. }
  99. return true;
  100. }
  101. void ExpandKernelAttr(const CNodePtr &kernel_node, KernelAttr *kernel_attr) {
  102. MS_EXCEPTION_IF_NULL(kernel_attr);
  103. TypeId input_dtype = kernel_attr->GetInputAttr(0).first;
  104. size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
  105. for (size_t i = 1; i < input_num; ++i) {
  106. kernel_attr->AddInputAttr(input_dtype);
  107. }
  108. TypeId output_dtype = kernel_attr->GetOutputAttr(0).first;
  109. size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
  110. for (size_t i = 1; i < output_num; ++i) {
  111. kernel_attr->AddOutputAttr(output_dtype);
  112. }
  113. }
  114. } // namespace
  115. void SetKernelInfo(const CNodePtr &kernel_node) {
  116. std::vector<std::string> input_formats;
  117. std::vector<TypeId> input_types;
  118. std::vector<size_t> input_not_cnode_indexes;
  119. std::vector<std::string> output_formats;
  120. std::vector<TypeId> output_types;
  121. MS_LOG(INFO) << "SetKernelInfo, CNode Name: " << AnfAlgo::GetCNodeName(kernel_node);
  122. GetInputFormatsAndDtypes(kernel_node, &input_formats, &input_types, &input_not_cnode_indexes);
  123. auto kernel_attrs =
  124. kernel::CPUKernelFactory::GetInstance().GetSupportedKernelAttrList(AnfAlgo::GetCNodeName(kernel_node));
  125. for (size_t index = 0; index < kernel_attrs.size(); ++index) {
  126. auto kernel_attr = kernel_attrs[index];
  127. if (kernel_attr.GetAllSame()) {
  128. ExpandKernelAttr(kernel_node, &kernel_attr);
  129. }
  130. if (IsInputFormatDtypeMatched(kernel_attr, input_formats, input_types, input_not_cnode_indexes)) {
  131. size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
  132. if (kernel_attr.GetOutputSize() != output_num) {
  133. MS_LOG(DEBUG) << "Output num is not equal!";
  134. continue;
  135. }
  136. MS_LOG(INFO) << "Input format and dtype is matched, index: " << index;
  137. GetOutputFormatsAndDtypes(kernel_node, kernel_attr, &output_formats, &output_types);
  138. UpdatePrevNotCNodeFormatDtype(kernel_attr, input_not_cnode_indexes, kernel_node);
  139. for (auto &input_index : input_not_cnode_indexes) {
  140. input_types[input_index] = kernel_attr.GetInputAttr(input_index).first;
  141. }
  142. break;
  143. }
  144. }
  145. auto builder = std::make_shared<kernel::KernelBuildInfo::KernelBuildInfoBuilder>();
  146. MS_EXCEPTION_IF_NULL(builder);
  147. builder->SetInputsFormat(input_formats);
  148. builder->SetInputsDeviceType(input_types);
  149. builder->SetOutputsFormat(output_formats);
  150. builder->SetOutputsDeviceType(output_types);
  151. AnfAlgo::SetSelectKernelBuildInfo(builder->Build(), kernel_node.get());
  152. }
  153. } // namespace cpu
  154. } // namespace device
  155. } // namespace mindspore