You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

preprocess_imagenet_validate_dataset.py 2.5 kB

4 years ago
4 years ago
4 years ago
12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152
  1. # Copyright 2019 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ==============================================================================
  15. """Process imagenet validate dataset.
  16. """
  17. import os
  18. import stat
  19. from mindspore import log as logger
  20. def preprocess_imagenet_validation_dataset(train_dataset_path, validation_dataset_path, image_label_mapping_file):
  21. """
  22. Call this function before read imagenet validation dataset.
  23. Args:
  24. train_dataset_path (str): train dataset path
  25. validation_dataset_path (str): validation dataset path
  26. image_label_mapping_file (str): imagenet_validate_dataset_2012_image_dir_map.txt file path
  27. """
  28. train_dataset_path = os.path.realpath(train_dataset_path)
  29. sub_dir = [dir_.name for dir_ in os.scandir(train_dataset_path) if dir_.is_dir()]
  30. for sub_dir_name in sub_dir:
  31. validate_sub_dir = os.path.join(validation_dataset_path, sub_dir_name)
  32. validate_sub_dir = os.path.realpath(validate_sub_dir)
  33. if not os.path.exists(validate_sub_dir):
  34. os.makedirs(validate_sub_dir, mode=stat.S_IRWXU)
  35. real_file_path = os.path.realpath(image_label_mapping_file)
  36. mappings = [mapping.strip() for mapping in open(real_file_path).readlines()]
  37. for mapping in mappings:
  38. image_dir = mapping.split(':')
  39. old_image_path = os.path.join(validation_dataset_path, image_dir[0])
  40. old_image_path = os.path.realpath(old_image_path)
  41. if not os.path.exists(old_image_path):
  42. logger.warning('Image is not existed %s', old_image_path)
  43. new_image_sub_dir = os.path.join(validation_dataset_path, image_dir[1])
  44. new_image_sub_dir = os.path.realpath(new_image_sub_dir)
  45. new_image_path = os.path.join(new_image_sub_dir, image_dir[0])
  46. new_image_path = os.path.realpath(new_image_path)
  47. if not os.path.exists(new_image_sub_dir):
  48. logger.warning('Image sub dir is not existed %s', new_image_sub_dir)
  49. os.rename(old_image_path, new_image_path)