You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

init.cc 2.5 kB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071
  1. /**
  2. * Copyright 2021 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "distributed/init.h"
  17. #include <vector>
  18. #include <string>
  19. namespace mindspore {
  20. namespace distributed {
  21. bool Initialize() {
  22. if (!InitializeCluster()) {
  23. MS_LOG(ERROR) << "Failed to initialize cluster.";
  24. return false;
  25. }
  26. #if ((defined ENABLE_CPU) && (!defined _WIN32))
  27. // Server and Scheduler don't use collective communication library.
  28. auto node = cluster::ClusterContext::instance()->node();
  29. MS_EXCEPTION_IF_NULL(node);
  30. if (node->role() != ps::core::NodeRole::SERVER && node->role() != ps::core::NodeRole::SCHEDULER) {
  31. // Global rank id and size should be manually set if cluster is initialized by MindSpore communication framework.
  32. auto abstract_node = std::dynamic_pointer_cast<ps::core::AbstractNode>(cluster::ClusterContext::instance()->node());
  33. MS_EXCEPTION_IF_NULL(abstract_node);
  34. collective::CollectiveManager::instance()->set_global_rank_id(abstract_node->rank_id());
  35. collective::CollectiveManager::instance()->set_global_rank_size(abstract_node->worker_num());
  36. if (!InitializeCollective()) {
  37. MS_LOG(ERROR) << "Failed to initialize collective communication.";
  38. return false;
  39. }
  40. }
  41. #endif
  42. return true;
  43. }
  44. bool Finalize() {
  45. if (!FinalizeCollective()) {
  46. MS_LOG(ERROR) << "Failed to finalize collective communication.";
  47. return false;
  48. }
  49. if (!FinalizeCluster()) {
  50. MS_LOG(ERROR) << "Failed to finalize cluster.";
  51. return false;
  52. }
  53. return true;
  54. }
  55. bool InitializeCluster() { return cluster::ClusterContext::instance()->Initialize(); }
  56. bool FinalizeCluster() { return cluster::ClusterContext::instance()->Finalize(); }
  57. bool InitializeCollective() { return collective::CollectiveManager::instance()->Initialize(); }
  58. bool FinalizeCollective() { return collective::CollectiveManager::instance()->Finalize(); }
  59. } // namespace distributed
  60. } // namespace mindspore