You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

dataset.py 2.4 kB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152
  1. # Copyright 2020 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ============================================================================
  15. """dataset base."""
  16. import os
  17. from mindspore import dataset as ds
  18. from mindspore.common import dtype as mstype
  19. from mindspore.dataset.transforms import c_transforms as C
  20. from mindspore.dataset.vision import Inter
  21. from mindspore.dataset.vision import c_transforms as CV
  22. def create_mnist_dataset(mode='train', num_samples=2, batch_size=2):
  23. """create dataset for train or test"""
  24. mnist_path = '/home/workspace/mindspore_dataset/mnist'
  25. num_parallel_workers = 1
  26. # define dataset
  27. mnist_ds = ds.MnistDataset(os.path.join(mnist_path, mode), num_samples=num_samples, shuffle=False)
  28. resize_height, resize_width = 32, 32
  29. # define map operations
  30. resize_op = CV.Resize((resize_height, resize_width), interpolation=Inter.LINEAR) # Bilinear mode
  31. rescale_nml_op = CV.Rescale(1 / 0.3081, -1 * 0.1307 / 0.3081)
  32. rescale_op = CV.Rescale(1.0 / 255.0, shift=0.0)
  33. hwc2chw_op = CV.HWC2CHW()
  34. type_cast_op = C.TypeCast(mstype.int32)
  35. # apply map operations on images
  36. mnist_ds = mnist_ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=num_parallel_workers)
  37. mnist_ds = mnist_ds.map(operations=resize_op, input_columns="image", num_parallel_workers=num_parallel_workers)
  38. mnist_ds = mnist_ds.map(operations=rescale_op, input_columns="image", num_parallel_workers=num_parallel_workers)
  39. mnist_ds = mnist_ds.map(operations=rescale_nml_op, input_columns="image", num_parallel_workers=num_parallel_workers)
  40. mnist_ds = mnist_ds.map(operations=hwc2chw_op, input_columns="image", num_parallel_workers=num_parallel_workers)
  41. # apply DatasetOps
  42. mnist_ds = mnist_ds.batch(batch_size=batch_size, drop_remainder=True)
  43. return mnist_ds