You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

init.cc 2.6 kB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374
  1. /**
  2. * Copyright 2021 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "distributed/init.h"
  17. #include <vector>
  18. #include <string>
  19. namespace mindspore {
  20. namespace distributed {
  21. bool Initialize() {
  22. if (!InitializeCluster()) {
  23. MS_LOG(ERROR) << "Failed to initialize cluster.";
  24. return false;
  25. }
  26. #if ((defined ENABLE_CPU) && (!defined _WIN32))
  27. if (cluster::ClusterContext::instance()->initialized()) {
  28. // Server and Scheduler don't use collective communication library.
  29. auto node = cluster::ClusterContext::instance()->node();
  30. MS_EXCEPTION_IF_NULL(node);
  31. if (node->role() != ps::core::NodeRole::SCHEDULER && node->role() != ps::core::NodeRole::SERVER) {
  32. // Global rank id and size should be manually set if cluster is initialized by MindSpore communication framework.
  33. auto abstract_node =
  34. std::dynamic_pointer_cast<ps::core::AbstractNode>(cluster::ClusterContext::instance()->node());
  35. MS_EXCEPTION_IF_NULL(abstract_node);
  36. collective::CollectiveManager::instance()->set_global_rank_id(abstract_node->rank_id());
  37. collective::CollectiveManager::instance()->set_global_rank_size(abstract_node->worker_num());
  38. if (!InitializeCollective()) {
  39. MS_LOG(ERROR) << "Failed to initialize collective communication.";
  40. return false;
  41. }
  42. }
  43. }
  44. #endif
  45. return true;
  46. }
  47. bool Finalize() {
  48. if (!FinalizeCollective()) {
  49. MS_LOG(ERROR) << "Failed to finalize collective communication.";
  50. return false;
  51. }
  52. if (!FinalizeCluster()) {
  53. MS_LOG(ERROR) << "Failed to finalize cluster.";
  54. return false;
  55. }
  56. return true;
  57. }
  58. bool InitializeCluster() { return cluster::ClusterContext::instance()->Initialize(); }
  59. bool FinalizeCluster() { return cluster::ClusterContext::instance()->Finalize(); }
  60. bool InitializeCollective() { return collective::CollectiveManager::instance()->Initialize(); }
  61. bool FinalizeCollective() { return collective::CollectiveManager::instance()->Finalize(); }
  62. } // namespace distributed
  63. } // namespace mindspore