You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

_check_version.py 14 kB

5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333
  1. # Copyright 2020 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ============================================================================
  15. """version and config check"""
  16. import os
  17. import sys
  18. import subprocess
  19. from pathlib import Path
  20. from abc import abstractmethod, ABCMeta
  21. from packaging import version
  22. from . import log as logger
  23. from .version import __version__
  24. from .default_config import __package_name__
  25. class EnvChecker(metaclass=ABCMeta):
  26. """basic class for environment check"""
  27. @abstractmethod
  28. def check_env(self, e):
  29. pass
  30. @abstractmethod
  31. def set_env(self):
  32. pass
  33. @abstractmethod
  34. def check_version(self):
  35. pass
  36. class GPUEnvChecker(EnvChecker):
  37. """gpu environment check"""
  38. def __init__(self):
  39. self.version = ["10.1"]
  40. self.cuda_path = "/usr/local/cuda"
  41. if os.path.exists(self.cuda_path):
  42. # cuda default path
  43. self.cuda_bin = self.cuda_path + "/bin"
  44. self.cuda_lib = self.cuda_path + "/lib64"
  45. self.cuda_version = self.cuda_path + "/version.txt"
  46. else:
  47. # custom or unknown environment
  48. self.cuda_path = ""
  49. self.cuda_bin = ""
  50. self.cuda_lib = ""
  51. self.cuda_version = ""
  52. # env
  53. self.path = os.getenv("PATH")
  54. self.ld_lib_path = os.getenv("LD_LIBRARY_PATH")
  55. # check
  56. self.path_check = "/cuda"
  57. self.ld_lib_path_check = "/cuda"
  58. self.v = "0"
  59. def check_env(self, e):
  60. self._check_env()
  61. raise e
  62. def set_env(self):
  63. if not self.cuda_bin:
  64. self._check_env()
  65. return
  66. if Path(self.cuda_bin).is_dir():
  67. os.environ['PATH'] = self.cuda_bin + ":" + os.environ['PATH']
  68. else:
  69. raise EnvironmentError(
  70. f"No such directory: {self.cuda_bin}, please check if cuda is installed correctly.")
  71. def check_version(self):
  72. if not Path(self.cuda_version).is_file():
  73. logger.warning("Using custom cuda path, cuda version checking is skipped, please make sure "
  74. "cuda version is supported, you can reference to the installation guidelines "
  75. "https://www.mindspore.cn/install")
  76. return
  77. v = self._read_version(self.cuda_version)
  78. v = version.parse(v)
  79. v_str = str(v.major) + "." + str(v.minor)
  80. if v_str not in self.version:
  81. logger.warning(f"MindSpore version {__version__} and cuda version {v_str} does not match, "
  82. "reference to the match info on: https://www.mindspore.cn/install")
  83. def _check_env(self):
  84. """gpu cuda path check"""
  85. if self.path is None or self.path_check not in self.path:
  86. logger.warning("Can not find nvcc compiler(need by mindspore-gpu), please check if you have set env "
  87. "PATH, you can reference to the installation guidelines https://www.mindspore.cn/install")
  88. if self.ld_lib_path is None or self.ld_lib_path_check not in self.ld_lib_path:
  89. logger.warning("Can not find cuda so(need by mindspore-gpu), please check if you have set env "
  90. "LD_LIBRARY_PATH, you can reference to the installation guidelines "
  91. "https://www.mindspore.cn/install")
  92. def _read_version(self, file_path):
  93. """get gpu version info"""
  94. with open(file_path, 'r') as f:
  95. all_info = f.readlines()
  96. for line in all_info:
  97. if line.startswith("CUDA Version"):
  98. self.v = line.strip().split("CUDA Version")[1]
  99. return self.v
  100. return self.v
  101. class AscendEnvChecker(EnvChecker):
  102. """ascend environment check"""
  103. def __init__(self):
  104. self.version = ["1.77.T20.0.B200"]
  105. atlas_nnae_version = "/usr/local/Ascend/nnae/latest/fwkacllib/version.info"
  106. atlas_toolkit_version = "/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/version.info"
  107. hisi_fwk_version = "/usr/local/Ascend/fwkacllib/version.info"
  108. if os.path.exists(atlas_nnae_version):
  109. # atlas default path
  110. self.fwk_path = "/usr/local/Ascend/nnae/latest/fwkacllib"
  111. self.op_impl_path = "/usr/local/Ascend/nnae/latest/opp/op_impl/built-in/ai_core/tbe"
  112. self.tbe_path = self.fwk_path + "/lib64"
  113. self.cce_path = self.fwk_path + "/ccec_compiler/bin"
  114. self.fwk_version = atlas_nnae_version
  115. self.op_path = "/usr/local/Ascend/nnae/latest/opp"
  116. elif os.path.exists(atlas_toolkit_version):
  117. # atlas default path
  118. self.fwk_path = "/usr/local/Ascend/ascend-toolkit/latest/fwkacllib"
  119. self.op_impl_path = "/usr/local/Ascend/ascend-toolkit/latest/opp/op_impl/built-in/ai_core/tbe"
  120. self.tbe_path = self.fwk_path + "/lib64"
  121. self.cce_path = self.fwk_path + "/ccec_compiler/bin"
  122. self.fwk_version = atlas_toolkit_version
  123. self.op_path = "/usr/local/Ascend/ascend-toolkit/latest/opp"
  124. elif os.path.exists(hisi_fwk_version):
  125. # hisi default path
  126. self.fwk_path = "/usr/local/Ascend/fwkacllib"
  127. self.op_impl_path = "/usr/local/Ascend/opp/op_impl/built-in/ai_core/tbe"
  128. self.tbe_path = self.fwk_path + "/lib64"
  129. self.cce_path = self.fwk_path + "/ccec_compiler/bin"
  130. self.fwk_version = hisi_fwk_version
  131. self.op_path = "/usr/local/Ascend/opp"
  132. else:
  133. # custom or unknown environment
  134. self.fwk_path = ""
  135. self.op_impl_path = ""
  136. self.tbe_path = ""
  137. self.cce_path = ""
  138. self.fwk_version = ""
  139. self.op_path = ""
  140. # env
  141. self.path = os.getenv("PATH")
  142. self.python_path = os.getenv("PYTHONPATH")
  143. self.ld_lib_path = os.getenv("LD_LIBRARY_PATH")
  144. self.ascend_opp_path = os.getenv("ASCEND_OPP_PATH")
  145. # check content
  146. self.path_check = "/fwkacllib/ccec_compiler/bin"
  147. self.python_path_check = "opp/op_impl/built-in/ai_core/tbe"
  148. self.ld_lib_path_check_fwk = "/fwkacllib/lib64"
  149. self.ld_lib_path_check_addons = "/add-ons"
  150. self.ascend_opp_path_check = "/op"
  151. self.v = ""
  152. def check_env(self, e):
  153. self._check_env()
  154. raise e
  155. def check_version(self):
  156. if not Path(self.fwk_version).is_file():
  157. logger.warning("Using custom Ascend 910 AI software package path, package version checking is skipped, "
  158. "please make sure Ascend 910 AI software package version is supported, you can reference to "
  159. "the installation guidelines https://www.mindspore.cn/install")
  160. return
  161. v = self._read_version(self.fwk_version)
  162. if v not in self.version:
  163. v_list = str([x for x in self.version])
  164. logger.warning(f"MindSpore version {__version__} and Ascend 910 AI software package version {v} does not "
  165. f"match, the version of software package expect one of {v_list}, "
  166. "please reference to the match info on: https://www.mindspore.cn/install")
  167. def check_deps_version(self):
  168. """
  169. te, topi, hccl wheel package version check
  170. in order to update the change of 'LD_LIBRARY_PATH' env, run a sub process
  171. """
  172. input_args = ["--mindspore_version=" + __version__]
  173. for v in self.version:
  174. input_args.append("--supported_version=" + v)
  175. deps_version_checker = os.path.join(os.path.split(os.path.realpath(__file__))[0], "_check_deps_version.py")
  176. call_cmd = [sys.executable, deps_version_checker] + input_args
  177. try:
  178. process = subprocess.run(call_cmd, timeout=3, text=True, capture_output=True, check=False)
  179. if process.stdout.strip() != "":
  180. logger.warning(process.stdout.strip())
  181. except subprocess.TimeoutExpired:
  182. logger.info("Package te, topi, hccl version check timed out, skip.")
  183. def set_env(self):
  184. if not self.tbe_path:
  185. self._check_env()
  186. return
  187. try:
  188. # pylint: disable=unused-import
  189. import te
  190. # pylint: disable=broad-except
  191. except Exception:
  192. if Path(self.tbe_path).is_dir():
  193. if os.getenv('LD_LIBRARY_PATH'):
  194. os.environ['LD_LIBRARY_PATH'] = self.tbe_path + ":" + os.environ['LD_LIBRARY_PATH']
  195. else:
  196. os.environ['LD_LIBRARY_PATH'] = self.tbe_path
  197. else:
  198. raise EnvironmentError(
  199. f"No such directory: {self.tbe_path}, Please check if Ascend 910 AI software package is "
  200. "installed correctly.")
  201. # check te version after set te env
  202. self.check_deps_version()
  203. if Path(self.op_impl_path).is_dir():
  204. # python path for sub process
  205. if os.getenv('PYTHONPATH'):
  206. os.environ['PYTHONPATH'] = self.op_impl_path + ":" + os.environ['PYTHONPATH']
  207. else:
  208. os.environ['PYTHONPATH'] = self.op_impl_path
  209. # sys path for this process
  210. sys.path.append(self.op_impl_path)
  211. os.environ['TBE_IMPL_PATH'] = self.op_impl_path
  212. else:
  213. raise EnvironmentError(
  214. f"No such directory: {self.op_impl_path}, Please check if Ascend 910 AI software package is "
  215. "installed correctly.")
  216. if Path(self.cce_path).is_dir():
  217. os.environ['PATH'] = self.cce_path + ":" + os.environ['PATH']
  218. else:
  219. raise EnvironmentError(
  220. f"No such directory: {self.cce_path}, Please check if Ascend 910 AI software package is "
  221. "installed correctly.")
  222. if self.op_path is None:
  223. pass
  224. elif Path(self.op_path).is_dir():
  225. os.environ['ASCEND_OPP_PATH'] = self.op_path
  226. else:
  227. raise EnvironmentError(
  228. f"No such directory: {self.op_path}, Please check if Ascend 910 AI software package is "
  229. "installed correctly.")
  230. def _check_env(self):
  231. """ascend dependence path check"""
  232. if self.path is None or self.path_check not in self.path:
  233. logger.warning("Can not find ccec_compiler(need by mindspore-ascend), please check if you have set env "
  234. "PATH, you can reference to the installation guidelines https://www.mindspore.cn/install")
  235. if self.python_path is None or self.python_path_check not in self.python_path:
  236. logger.warning(
  237. "Can not find tbe op implement(need by mindspore-ascend), please check if you have set env "
  238. "PYTHONPATH, you can reference to the installation guidelines "
  239. "https://www.mindspore.cn/install")
  240. if self.ld_lib_path is None or not (self.ld_lib_path_check_fwk in self.ld_lib_path and
  241. self.ld_lib_path_check_addons in self.ld_lib_path):
  242. logger.warning("Can not find driver so(need by mindspore-ascend), please check if you have set env "
  243. "LD_LIBRARY_PATH, you can reference to the installation guidelines "
  244. "https://www.mindspore.cn/install")
  245. if self.ascend_opp_path is None or self.ascend_opp_path_check not in self.ascend_opp_path:
  246. logger.warning(
  247. "Can not find opp path (need by mindspore-ascend), please check if you have set env ASCEND_OPP_PATH, "
  248. "you can reference to the installation guidelines https://www.mindspore.cn/install")
  249. def _read_version(self, file_path):
  250. """get ascend version info"""
  251. with open(file_path, 'r') as f:
  252. all_info = f.readlines()
  253. for line in all_info:
  254. if line.startswith("Version="):
  255. self.v = line.strip().split("=")[1]
  256. return self.v
  257. return self.v
  258. def check_version_and_env_config():
  259. """check version and env config"""
  260. if __package_name__.lower() == "mindspore-ascend":
  261. env_checker = AscendEnvChecker()
  262. elif __package_name__.lower() == "mindspore-gpu":
  263. env_checker = GPUEnvChecker()
  264. else:
  265. logger.info(f"Package version {__package_name__} does not need to check any environment variable, skipping.")
  266. return
  267. try:
  268. # pylint: disable=unused-import
  269. from . import _c_expression
  270. # check version of ascend site or cuda
  271. env_checker.check_version()
  272. env_checker.set_env()
  273. except ImportError as e:
  274. env_checker.check_env(e)
  275. def _set_pb_env():
  276. """Set env variable `PROTOCOL_BUFFERS` to prevent memory overflow."""
  277. if os.getenv("PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION") == "cpp":
  278. logger.info("Current env variable `PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=cpp`. "
  279. "When the checkpoint file is too large, "
  280. "it may cause memory limit error during load checkpoint file. "
  281. "This can be solved by set env `PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python`.")
  282. elif os.getenv("PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION") is None:
  283. logger.info("Setting the env `PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python` to prevent memory overflow "
  284. "during save or load checkpoint file.")
  285. os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python"
  286. check_version_and_env_config()
  287. _set_pb_env()