!5140 add cuda path checker

Merge pull request !5140 from zyli2020/add_cuda_path_check
5 years ago · 8c7444ab47
--- a/mindspore/ccsrc/backend/kernel_compiler/common_utils.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/common_utils.cc
@@ -177,7 +177,7 @@ KernelPackPtr InsertCache(const std::string &kernel_name, const std::string &pro
  if (processor == kProcessorAiCore || processor == kProcessorAiCpu) {
    kernel_json = kCceKernelMeta;
  } else {
    kernel_json = bin_map->GetKernelMetaPath();
    kernel_json = bin_map->kernel_meta_path();
  }
  (void)kernel_json.append(kernel_name).append(kJsonSuffix);
  KernelPackPtr kernel_pack = std::make_shared<KernelPack>();
--- a/mindspore/ccsrc/backend/kernel_compiler/common_utils.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/common_utils.h
@@ -57,8 +57,8 @@ class KernelMeta {
  void RemoveKernelCache();
  std::string Search(const std::string &kernel_name) const;
  bool Insert(const std::string &kernel_name, const std::string &kernel_json);
  std::string GetKernelMetaPath() { return kernel_meta_path_; }

  std::string kernel_meta_path() const { return kernel_meta_path_; }
  bool initialized() const { return initialized_; }
  static KernelMeta *GetInstance() {
    static KernelMeta kernel_meta;
    return &kernel_meta;
--- a/mindspore/ccsrc/runtime/device/gpu/cuda_env_checker.cc
+++ b/mindspore/ccsrc/runtime/device/gpu/cuda_env_checker.cc
@@ -0,0 +1,124 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #include "runtime/device/gpu/cuda_env_checker.h"
 #include <dirent.h>
 #include <cstdlib>
 #include <algorithm>
 #include "utils/log_adapter.h"

 namespace mindspore {
 namespace device {
 namespace gpu {
 bool CudaEnvChecker::CheckNvccInPath() {
  if (already_check_nvcc_) {
    return find_nvcc_;
  }

  auto checker = [](const std::string &cuda_path) {
    bool find_nvcc = false;
    DIR *dir = opendir(cuda_path.c_str());
    if (dir == nullptr) {
      return find_nvcc;
    }
    struct dirent *entry;
    while ((entry = readdir(dir)) != nullptr) {
      std::string bin_file = entry->d_name;
      if (bin_file == kNvcc) {
        find_nvcc = true;
        break;
      }
    }
    (void)closedir(dir);
    return find_nvcc;
  };

  auto cuda_paths = GetCudaRealPaths();
  find_nvcc_ = any_of(cuda_paths.begin(), cuda_paths.end(), checker);
  already_check_nvcc_ = true;
  return find_nvcc_;
 }

 std::vector<std::string> CudaEnvChecker::GetCudaRealPaths() const {
  std::vector<std::string> res;
  auto env_paths_ptr = std::getenv(kPathEnv);
  if (env_paths_ptr == nullptr) {
    MS_LOG(ERROR) << "Please export env: PATH";
    return res;
  }
  std::string env_paths = env_paths_ptr;
  if (env_paths.empty()) {
    MS_LOG(ERROR) << "env PATH is empty";
    return res;
  }

  std::string cur_path;
  for (const auto &ch : env_paths) {
    if (ch != ':') {
      cur_path += ch;
      continue;
    }
    auto real_path_pair = IsCudaRealPath(cur_path);
    if (real_path_pair.second) {
      res.push_back(real_path_pair.first);
    }
    cur_path.clear();
  }
  if (!cur_path.empty()) {
    auto last_real_path_pair = IsCudaRealPath(cur_path);
    if (last_real_path_pair.second) {
      res.push_back(last_real_path_pair.first);
    }
  }
  return res;
 }

 std::pair<std::string, bool> CudaEnvChecker::IsCudaRealPath(const std::string &path) const {
  std::string real_path = path;
  bool valid_path = false;

  // 8: string length of kCudaSoftLinkPath
  if (real_path.size() < 8) {
    return {"", false};
  }

  // remove redundance space in path
  auto front_space_pos = real_path.find_first_not_of(' ');
  if (front_space_pos != 0) {
    real_path.erase(0, front_space_pos);
  }
  auto back_space_pos = real_path.find_last_not_of(' ');
  if (back_space_pos != real_path.size() - 1) {
    real_path.erase(back_space_pos + 1);
  }

  auto cuda_softlink_path_pos = real_path.rfind(kCudaSoftLinkPath);
  auto cuda_real_path_pos = real_path.rfind(kCudaRealPath);
  auto start = (cuda_softlink_path_pos == std::string::npos || cuda_real_path_pos == std::string::npos)
                 ? std::min(cuda_softlink_path_pos, cuda_real_path_pos)
                 : std::max(cuda_softlink_path_pos, cuda_real_path_pos);
  if (start == std::string::npos) {
    return {"", false};
  }

  auto end = real_path.find('n', start);
  valid_path = (end == real_path.size() - 1) ? true : ((end == real_path.size() - 2) && (real_path.back() == '/'));
  return {real_path.substr(0, end + 1), valid_path};
 }

 }  // namespace gpu
 }  // namespace device
 }  // namespace mindspore
--- a/mindspore/ccsrc/runtime/device/gpu/cuda_env_checker.h
+++ b/mindspore/ccsrc/runtime/device/gpu/cuda_env_checker.h
@@ -0,0 +1,54 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef MINDSPORE_CCSRC_RUNTIME_DEVICE_GPU_CUDA_ENV_CHECKER_H_
 #define MINDSPORE_CCSRC_RUNTIME_DEVICE_GPU_CUDA_ENV_CHECKER_H_

 #include <vector>
 #include <string>
 #include <utility>

 namespace mindspore {
 namespace device {
 namespace gpu {
 class CudaEnvChecker {
 public:
  bool CheckNvccInPath();

  static CudaEnvChecker &GetInstance() {
    static CudaEnvChecker instance;
    return instance;
  }

 private:
  CudaEnvChecker() = default;
  ~CudaEnvChecker() = default;
  CudaEnvChecker(const CudaEnvChecker &);
  CudaEnvChecker &operator=(const CudaEnvChecker &);

  std::vector<std::string> GetCudaRealPaths() const;
  std::pair<std::string, bool> IsCudaRealPath(const std::string &path) const;

  bool already_check_nvcc_ = false;
  bool find_nvcc_ = false;
  static constexpr auto kPathEnv = "PATH";
  static constexpr auto kNvcc = "nvcc";
  static constexpr auto kCudaSoftLinkPath = "cuda/bin";
  static constexpr auto kCudaRealPath = "cuda-10.1/bin";
 };
 }  // namespace gpu
 }  // namespace device
 }  // namespace mindspore
 #endif  // MINDSPORE_CCSRC_RUNTIME_DEVICE_GPU_CUDA_ENV_CHECKER_H_
--- a/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_build.cc
+++ b/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_build.cc
@@ -22,6 +22,7 @@
 #include "frontend/operator/ops.h"
 #include "backend/session/anf_runtime_algorithm.h"
 #include "backend/session/kernel_build_client.h"
 #include "runtime/device/gpu/cuda_env_checker.h"

 namespace mindspore {
 namespace device {
@@ -29,9 +30,8 @@ namespace gpu {
 void GpuBuild(const KernelGraphPtr &kernel_graph) {
  kernel::KernelMeta *bin_map = kernel::KernelMeta::GetInstance();
  MS_EXCEPTION_IF_NULL(bin_map);
  auto pid = mindspore::kernel::GpuKernelBuildClient::Instance().AkgGetPid();
  bin_map->Initialize(pid);
  MS_EXCEPTION_IF_NULL(kernel_graph);
  bool already_check_nvcc = false;
  auto kernels = kernel_graph->execution_order();
  for (const auto &kernel : kernels) {
    std::string kernel_name = session::AnfRuntimeAlgorithm::GetCNodeName(kernel);
@@ -41,6 +41,19 @@ void GpuBuild(const KernelGraphPtr &kernel_graph) {
    }

    if (session::AnfRuntimeAlgorithm::GetKernelType(kernel) == KernelType::AKG_KERNEL) {
      if (!bin_map->initialized()) {
        auto pid = mindspore::kernel::GpuKernelBuildClient::Instance().AkgGetPid();
        bin_map->Initialize(pid);
      }
      if (!already_check_nvcc) {
        already_check_nvcc = true;
        if (!CudaEnvChecker::GetInstance().CheckNvccInPath()) {
          MS_LOG(EXCEPTION)
            << "Failed to find nvcc compiler, please add nvcc position to the PATH environment variable, run "
               "the command: export PATH=${CUDA_PATH}/bin:${PATH}, CUDA_PATH is the installation path of the "
               "cuda library(eg. /usr/local/cuda).";
        }
      }
      auto gpu_kernel_ptr = kernel::AkgGpuKernelBuild(kernel);
      if (!gpu_kernel_ptr) {
        MS_LOG(EXCEPTION) << "Build akg kernel op[" << kernel_name << "] failed";