You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

collective_init.cc 2.1 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657
  1. /**
  2. * Copyright 2019 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "device/gpu/distribution/collective_init.h"
  17. #include "utils/log_adapter.h"
  18. namespace mindspore {
  19. namespace device {
  20. namespace gpu {
  21. CollectiveInitializer &CollectiveInitializer::instance() {
  22. static CollectiveInitializer instance = {};
  23. return instance;
  24. }
  25. bool CollectiveInitializer::collective_inited() const { return collective_inited_; }
  26. const void *CollectiveInitializer::collective_handle() const { return collective_handle_; }
  27. void CollectiveInitializer::InitCollective() {
  28. void *handle = dlopen("libgpu_collective.so", RTLD_LAZY);
  29. if (handle == nullptr) {
  30. MS_LOG(EXCEPTION)
  31. << "Loading libgpu_collective.so failed. Many reasons could cause this:\n1.libgpu_collective.so is not "
  32. "installed.\n2.nccl is not "
  33. "installed or found.\n3.mpi is not installed or found";
  34. }
  35. auto mpi_init_funcptr = reinterpret_cast<InitMPI>(dlsym(handle, "InitMPI"));
  36. MS_EXCEPTION_IF_NULL(mpi_init_funcptr);
  37. (*mpi_init_funcptr)();
  38. CollectiveInitializer::instance().collective_inited_ = true;
  39. CollectiveInitializer::instance().collective_handle_ = handle;
  40. }
  41. void CollectiveInitializer::FinalizeCollective() {
  42. if (CollectiveInitializer::instance().collective_handle_ != nullptr) {
  43. if (dlclose(CollectiveInitializer::instance().collective_handle_) != 0) {
  44. MS_LOG(EXCEPTION) << "Closing libgpu_collective.so handle failed.";
  45. }
  46. }
  47. }
  48. } // namespace gpu
  49. } // namespace device
  50. } // namespace mindspore