Browse Source

!13884 Add RANK_TABLE_FILE for PyNative

From: @jojobugfree
Reviewed-by: @kisnwang,@chujinjin
Signed-off-by: @chujinjin
pull/13884/MERGE
mindspore-ci-bot Gitee 5 years ago
parent
commit
2dc88b2cc0
1 changed files with 18 additions and 2 deletions
  1. +18
    -2
      mindspore/ccsrc/backend/kernel_compiler/hccl/hccl_context.cc

+ 18
- 2
mindspore/ccsrc/backend/kernel_compiler/hccl/hccl_context.cc View File

@@ -19,6 +19,7 @@
#include "hccl/hccl.h"

constexpr auto kHcclConfigFile = "MINDSPORE_HCCL_CONFIG_PATH";
constexpr auto kHcclConfigFileOld = "RANK_TABLE_FILE";

namespace mindspore {
namespace kernel {
@@ -37,10 +38,25 @@ bool HcclContext::InitHccl() {
}
auto config_file = std::getenv(kHcclConfigFile);
if (config_file == nullptr) {
MS_LOG(ERROR) << "Get hccl config file failed";
config_file = std::getenv(kHcclConfigFileOld);
if (config_file == nullptr) {
MS_LOG(ERROR) << "Get hccl rank table file failed. Please export MINDSPORE_HCCL_CONFIG_PATH or RANK_TABLE_FILE";
return false;
}
}

auto rank_id = GetRankId();
try {
rank_id_ = std::stoi(rank_id);
} catch (std::invalid_argument &e) {
MS_LOG(ERROR) << "Invalid rankd id env:" << rank_id;
return false;
}

if (rank_id_ < 0 || rank_id_ > 7) {
MS_LOG(ERROR) << "rank_id needs to be between 0-7";
return false;
}
rank_id_ = std::stoi(GetRankId());

auto hccl_result = HcclCommInitClusterInfo(config_file, rank_id_, &hccl_comm_);
if (hccl_result != HCCL_SUCCESS) {


Loading…
Cancel
Save