You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

gpu_stream_assign.h 3.3 kB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273
  1. /**
  2. * Copyright 2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef MINDSPORE_CCSRC_DEVICE_GPU_GPU_STREAM_ASSIGN_H_
  17. #define MINDSPORE_CCSRC_DEVICE_GPU_GPU_STREAM_ASSIGN_H_
  18. #include <vector>
  19. #include <string>
  20. #include <memory>
  21. #include "session/kernel_graph.h"
  22. #include "session/anf_runtime_algorithm.h"
  23. namespace mindspore {
  24. namespace device {
  25. namespace gpu {
  26. enum StreamSwitchType { kAllReduceStreamSwitch, kStreamSwitchInvalidType = 255 };
  27. struct SendRecvPair {
  28. StreamSwitchType stream_switch_type;
  29. CNodePtr mock_send_node;
  30. CNodePtr mock_recv_node;
  31. size_t send_node_offset;
  32. size_t recv_node_offset;
  33. };
  34. struct StreamSwitchNode {
  35. size_t offset;
  36. CNodePtr cnode;
  37. bool operator<(const StreamSwitchNode &n) const {
  38. if (offset < n.offset) {
  39. return true;
  40. } else if (offset == n.offset) {
  41. return AnfAlgo::GetCNodeName(cnode) == kSendOpName ? true : false;
  42. } else {
  43. return false;
  44. }
  45. }
  46. };
  47. void AssignGpuStream(const std::shared_ptr<session::KernelGraph> &kernel_graph);
  48. bool FindAllReduceStreamSwitchPos(const std::shared_ptr<session::KernelGraph> &kernel_graph,
  49. std::vector<SendRecvPair> *send_recv_pairs);
  50. // Find Send node position according to "mock" recv node.
  51. // "mock" recv node is a gpu kernel node after a real Recv node, e.g. AllReduce node.
  52. std::vector<CNodePtr>::iterator FindSendNodePos(std::vector<CNodePtr>::iterator begin,
  53. std::vector<CNodePtr>::iterator end, const CNodePtr mock_recv_node,
  54. StreamSwitchType stream_switch_type);
  55. // Find Recv node position according to "mock" send node.
  56. // "mock" send node is a gpu kernel node before a real send node, e.g. AllReduce node.
  57. std::vector<CNodePtr>::iterator FindRecvNodePos(std::vector<CNodePtr>::iterator begin,
  58. std::vector<CNodePtr>::iterator end, const CNodePtr mock_send_node,
  59. StreamSwitchType stream_switch_type);
  60. void InsertStreamSwitchNode(const std::shared_ptr<session::KernelGraph> &kernel_graph,
  61. const std::vector<SendRecvPair> &send_recv_pairs);
  62. bool GenSendRecvCNodesForAllReduce(const std::shared_ptr<session::KernelGraph> &kernel_graph,
  63. const CNodePtr &mock_send_node, const CNodePtr &mock_recv_node, CNodePtr *send_node,
  64. CNodePtr *recv_node);
  65. CNodePtr CreateStreamSwitchNode(const std::shared_ptr<session::KernelGraph> &kernel_graph, const std::string &name);
  66. } // namespace gpu
  67. } // namespace device
  68. } // namespace mindspore
  69. #endif // MINDSPORE_CCSRC_DEVICE_GPU_GPU_STREAM_ASSIGN_H_