You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

kernel.cc 8.3 kB

4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223
  1. /**
  2. * Copyright 2021 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "kernel/kernel.h"
  17. #include <algorithm>
  18. #include <stack>
  19. #include "utils/ms_context.h"
  20. #include "utils/anf_utils.h"
  21. #include "runtime/device/ms_device_shape_transfer.h"
  22. #include "backend/common/session/anf_runtime_algorithm.h"
  23. #include "include/common/utils/anfalgo.h"
  24. #include "backend/common/optimizer/helper.h"
  25. namespace mindspore {
  26. namespace kernel {
  27. constexpr int64_t kInvalidShape = -2;
  28. void KernelMod::SetAtomicCleanNodes(const std::vector<CNodePtr> &atomic_clean_node) {
  29. atomic_clean_nodes_.resize(atomic_clean_node.size());
  30. for (size_t i = 0; i < atomic_clean_node.size(); ++i) {
  31. atomic_clean_nodes_[i] = atomic_clean_node[i];
  32. }
  33. }
  34. void KernelMod::InferShape() {
  35. auto node = anf_node_.lock();
  36. MS_EXCEPTION_IF_NULL(node);
  37. auto cnode = node->cast<CNodePtr>();
  38. MS_EXCEPTION_IF_NULL(cnode);
  39. MS_LOG(INFO) << "InferShape start, node:" << cnode->fullname_with_scope();
  40. GetDepndLists(cnode);
  41. auto ret = InferShapeForDefiniteOutputNode(cnode);
  42. if (ret) {
  43. return;
  44. }
  45. depend_tensor_map_.clear();
  46. auto &inputs = cnode->inputs();
  47. if (inputs.empty()) {
  48. MS_LOG(EXCEPTION) << "Invalid inputs.";
  49. }
  50. auto context = MsContext::GetInstance();
  51. MS_EXCEPTION_IF_NULL(context);
  52. AbstractBasePtrList args_spec_list;
  53. auto primitive = GetValueNode<PrimitivePtr>(inputs[0]);
  54. auto input_size = common::AnfAlgo::GetInputTensorNum(cnode);
  55. bool skip_nop_node = !context->get_param<bool>(MS_CTX_ENABLE_MINDRT);
  56. for (size_t i = 0; i < input_size; i++) {
  57. AnfNodePtr real_input = nullptr;
  58. size_t real_input_index = 0;
  59. if (real_input_nodes_.count(i) > 0) {
  60. real_input = real_input_nodes_[i].first.lock();
  61. real_input_index = real_input_nodes_[i].second;
  62. } else {
  63. auto input_node_with_index = common::AnfAlgo::GetPrevNodeOutput(cnode, i);
  64. real_input = input_node_with_index.first;
  65. real_input_index = input_node_with_index.second;
  66. }
  67. MS_EXCEPTION_IF_NULL(real_input);
  68. if (skip_nop_node) {
  69. InferShapeForNopNode(real_input);
  70. }
  71. if (depend_list_.find(i) != depend_list_.end()) {
  72. auto output_addr = AnfAlgo::GetMutableOutputAddr(real_input, real_input_index, skip_nop_node);
  73. auto shapes = trans::GetRuntimePaddingShape(real_input, real_input_index);
  74. auto host_type = common::AnfAlgo::GetOutputInferDataType(real_input, real_input_index);
  75. auto out_tensor = std::make_shared<tensor::Tensor>(host_type, shapes);
  76. MS_EXCEPTION_IF_NULL(out_tensor);
  77. // The second parameter must be false, otherwise the device address cannot be released and allocated, and the
  78. // address size will be wrong in the dynamic shape scenario.
  79. out_tensor->set_device_address(output_addr, false);
  80. auto ret2 = depend_tensor_map_.try_emplace(i, out_tensor);
  81. if (!ret2.second) {
  82. MS_LOG(EXCEPTION) << "Insert map failed.";
  83. }
  84. out_tensor->data_sync();
  85. // cppcheck-suppress unreadVariable
  86. auto lock = AnfUtils::GetAbstractLock(real_input.get());
  87. auto real_abs = real_input->abstract();
  88. if (real_abs->isa<abstract::AbstractTensor>()) {
  89. real_abs->set_value(out_tensor);
  90. } else if (real_abs->isa<abstract::AbstractTuple>()) {
  91. auto abstract_tuple = real_abs->cast<abstract::AbstractTuplePtr>();
  92. MS_EXCEPTION_IF_NULL(abstract_tuple);
  93. MS_EXCEPTION_IF_CHECK_FAIL((real_input_index < abstract_tuple->elements().size()), "Index is out of range.");
  94. auto tuple_elements = abstract_tuple->elements()[real_input_index];
  95. tuple_elements->set_value(out_tensor);
  96. }
  97. }
  98. common::AnfAlgo::AddArgList(&args_spec_list, real_input, real_input_index);
  99. }
  100. auto eval_result = opt::CppInferShape(primitive, args_spec_list);
  101. cnode->set_abstract(eval_result);
  102. }
  103. void KernelMod::UpdateOutputSizeList() {
  104. auto node = anf_node_.lock();
  105. MS_EXCEPTION_IF_NULL(node);
  106. auto cnode = node->cast<CNodePtr>();
  107. for (size_t i = 0; i < output_size_list_.size(); ++i) {
  108. auto ori_output_size = output_size_list_[i];
  109. auto real_output_size = AnfAlgo::GetOutputTensorMemSize(cnode, i);
  110. if (ori_output_size != real_output_size) {
  111. output_size_list_[i] = real_output_size;
  112. }
  113. }
  114. }
  115. bool KernelMod::InferShapeForDefiniteOutputNode(const CNodePtr &cnode) {
  116. MS_EXCEPTION_IF_NULL(cnode);
  117. if (!common::AnfAlgo::CheckPrimitiveType(cnode, prim::kPrimShape)) {
  118. return false;
  119. }
  120. auto input_size = common::AnfAlgo::GetInputTensorNum(cnode);
  121. if (input_size != 1) {
  122. MS_LOG(EXCEPTION) << "Node only has one input: " << cnode->fullname_with_scope();
  123. }
  124. auto cur_shape = dynamic_cast<mindspore::abstract::Shape *>(cnode->Shape().get())->shape();
  125. if (std::any_of(cur_shape.begin(), cur_shape.end(), [](int64_t x) { return x == kInvalidShape; })) {
  126. return false;
  127. }
  128. std::vector<int64_t> output_shape = {static_cast<int64_t>(cur_shape.size())};
  129. mindspore::abstract::BaseShapePtr shape = std::make_shared<mindspore::abstract::Shape>(output_shape);
  130. // cppcheck-suppress unreadVariable
  131. auto lock = AnfUtils::GetAbstractLock(cnode.get());
  132. auto abstract = cnode->abstract();
  133. MS_EXCEPTION_IF_NULL(abstract);
  134. abstract->set_shape(shape);
  135. return true;
  136. }
  137. void KernelMod::InferShapeForNopNode(const AnfNodePtr &input_node) {
  138. MS_EXCEPTION_IF_NULL(input_node);
  139. if (!common::AnfAlgo::IsNopNode(input_node) || !common::AnfAlgo::IsDynamicShape(input_node)) {
  140. MS_LOG(INFO) << "Input node is not a nop node, no need infer.";
  141. return;
  142. }
  143. if (!common::AnfAlgo::IsNeedSkipNopOpExecution(input_node)) {
  144. MS_LOG(INFO) << "The Nop node need execution, no need the InferShapeForNopNode.";
  145. return;
  146. }
  147. MS_LOG(INFO) << "Infer shape for nop node.";
  148. std::stack<AnfNodePtr> nop_road;
  149. nop_road.push(input_node);
  150. auto in_node = input_node;
  151. while (true) {
  152. auto input_node_with_idx = common::AnfAlgo::GetPrevNodeOutput(in_node, 0);
  153. in_node = input_node_with_idx.first;
  154. MS_EXCEPTION_IF_NULL(in_node);
  155. if (common::AnfAlgo::IsNopNode(in_node)) {
  156. nop_road.push(in_node);
  157. } else {
  158. break;
  159. }
  160. }
  161. while (!nop_road.empty()) {
  162. auto nop_node = nop_road.top();
  163. MS_EXCEPTION_IF_NULL(nop_node);
  164. AnfAlgo::InferShape(nop_node->cast<CNodePtr>());
  165. nop_road.pop();
  166. }
  167. }
  168. void KernelMod::GetDepndLists(const CNodePtr &cnode) {
  169. MS_EXCEPTION_IF_NULL(cnode);
  170. if (depend_list_.size() != 0) {
  171. return;
  172. }
  173. auto ret = abstract::GetDependsFormMap(cnode);
  174. if (ret.empty()) {
  175. MS_LOG(DEBUG) << "No dynamic_shape_depends found.";
  176. return;
  177. }
  178. MS_LOG(INFO) << "Have depends.";
  179. (void)std::transform(ret.begin(), ret.end(), std::inserter(depend_list_, depend_list_.begin()),
  180. [](const int64_t &value) { return static_cast<int>(value); });
  181. MS_LOG(INFO) << "Init End.";
  182. }
  183. bool KernelMod::NeedSkipExecute(const CNodePtr &cnode) {
  184. // Skip run ReduceSum when axis is a Empty Tensor
  185. MS_EXCEPTION_IF_NULL(cnode);
  186. auto op_name = common::AnfAlgo::GetCNodeName(cnode);
  187. if (op_name != kReduceSumOpName) {
  188. return false;
  189. }
  190. const size_t axes_index = 1;
  191. if (cnode->inputs().size() <= axes_index + 1) {
  192. return false;
  193. }
  194. auto input_axes = cnode->input(axes_index + 1);
  195. // cppcheck-suppress unreadVariable
  196. auto lock = AnfUtils::GetAbstractLock(input_axes.get());
  197. auto axes_abs = input_axes->abstract()->Clone();
  198. MS_EXCEPTION_IF_NULL(axes_abs);
  199. auto axes_shape = AnfAlgo::GetInputDeviceShape(cnode, axes_index);
  200. if (axes_abs->isa<abstract::AbstractTensor>()) {
  201. if (std::any_of(axes_shape.begin(), axes_shape.end(), [](ssize_t shape) { return shape == 0; })) {
  202. return true;
  203. }
  204. }
  205. return false;
  206. }
  207. } // namespace kernel
  208. } // namespace mindspore