You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

validate_path.py 2.7 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384
  1. # Copyright 2019 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ============================================================================
  15. """Validate the input path."""
  16. import os
  17. import re
  18. def check_valid_character_of_path(file_path):
  19. """
  20. Validates path.
  21. The output path of profiler only supports alphabets(a-zA-Z), digit(0-9) or {'-', '_', '.', '/'}.
  22. Note:
  23. Chinese and other paths are not supported at present.
  24. Args:
  25. path (str): Normalized Path.
  26. Returns:
  27. bool, whether valid.
  28. """
  29. re_path = r'^[/\\_a-zA-Z0-9-_.]+$'
  30. path_valid = re.fullmatch(re_path, file_path)
  31. if not path_valid:
  32. msg = "The output path of profiler only supports alphabets(a-zA-Z), " \
  33. "digit(0-9) or {'-', '_', '.', '/'}, but got the absolute path= " + file_path
  34. raise RuntimeError(msg)
  35. def validate_and_normalize_path(
  36. path,
  37. check_absolute_path=False,
  38. allow_parent_dir=True,
  39. ):
  40. """
  41. Validates path and returns its normalized form.
  42. If path has a valid scheme, treat path as url, otherwise consider path a
  43. unix local path.
  44. Note:
  45. File scheme (rfc8089) is currently not supported.
  46. Args:
  47. path (str): Path to be normalized.
  48. check_absolute_path (bool): Whether check path scheme is supported.
  49. allow_parent_dir (bool): Whether allow parent dir in path.
  50. Returns:
  51. str, normalized path.
  52. """
  53. if not path:
  54. raise RuntimeError("The path is invalid!")
  55. path_str = str(path)
  56. if not allow_parent_dir:
  57. path_components = path_str.split("/")
  58. if ".." in path_components:
  59. raise RuntimeError("The parent path is not allowed!")
  60. # path does not have valid schema, treat it as unix local path.
  61. if check_absolute_path:
  62. if not path_str.startswith("/"):
  63. raise RuntimeError("The path is invalid!")
  64. try:
  65. # most unix systems allow
  66. normalized_path = os.path.realpath(path)
  67. except ValueError:
  68. raise RuntimeError("The path is invalid!")
  69. check_valid_character_of_path(normalized_path)
  70. return normalized_path