You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

preprocess_imagenet_validate_dataset.py 2.5 kB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152
  1. # Copyright 2019 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ==============================================================================
  15. """Process imagenet validate dataset.
  16. """
  17. import os
  18. from mindspore import log as logger
  19. def preprocess_imagenet_validation_dataset(train_dataset_path, validation_dataset_path, image_label_mapping_file):
  20. """
  21. Call this function before read imagenet validation dataset.
  22. Args:
  23. train_dataset_path (str): train dataset path
  24. validation_dataset_path (str): validation dataset path
  25. image_label_mapping_file (str): imagenet_validate_dataset_2012_image_dir_map.txt file path
  26. """
  27. train_dataset_path = os.path.realpath(train_dataset_path)
  28. sub_dir = [dir.name for dir in os.scandir(train_dataset_path) if dir.is_dir()]
  29. for sub_dir_name in sub_dir:
  30. validate_sub_dir = os.path.join(validation_dataset_path, sub_dir_name)
  31. validate_sub_dir = os.path.realpath(validate_sub_dir)
  32. if not os.path.exists(validate_sub_dir):
  33. os.makedirs(validate_sub_dir)
  34. mappings = [mapping.strip() for mapping in open(image_label_mapping_file).readlines()]
  35. for mapping in mappings:
  36. image_dir = mapping.split(':')
  37. old_image_path = os.path.join(validation_dataset_path, image_dir[0])
  38. old_image_path = os.path.realpath(old_image_path)
  39. if not os.path.exists(old_image_path):
  40. logger.warning('Image is not existed %s', old_image_path)
  41. new_image_sub_dir = os.path.join(validation_dataset_path, image_dir[1])
  42. new_image_sub_dir = os.path.realpath(new_image_sub_dir)
  43. new_image_path = os.path.join(new_image_sub_dir, image_dir[0])
  44. new_image_path = os.path.realpath(new_image_path)
  45. if not os.path.exists(new_image_sub_dir):
  46. logger.warning('Image sub dir is not existed %s', new_image_sub_dir)
  47. os.rename(old_image_path, new_image_path)