You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

test_onehot_op.py 2.1 kB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465
  1. # Copyright 2019 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ==============================================================================
  15. """
  16. Testing the one_hot op in DE
  17. """
  18. import mindspore.dataset.transforms.vision.c_transforms as vision
  19. import mindspore.dataset.transforms.c_transforms as data_trans
  20. import numpy as np
  21. import mindspore.dataset as ds
  22. from mindspore import log as logger
  23. DATA_DIR = ["../data/dataset/test_tf_file_3_images/train-0000-of-0001.data"]
  24. SCHEMA_DIR = "../data/dataset/test_tf_file_3_images/datasetSchema.json"
  25. def one_hot(index, depth):
  26. """
  27. Apply the one_hot
  28. """
  29. arr = np.zeros([1, depth], dtype=np.int32)
  30. arr[0, index] = 1
  31. return arr
  32. def test_one_hot():
  33. """
  34. Test one_hot
  35. """
  36. logger.info("Test one_hot")
  37. depth = 10
  38. # First dataset
  39. data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
  40. one_hot_op = data_trans.OneHot(depth)
  41. data1 = data1.map(input_columns=["label"], operations=one_hot_op, columns_order=["label"])
  42. # Second dataset
  43. data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["label"], shuffle=False)
  44. num_iter = 0
  45. for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()):
  46. assert len(item1) == len(item2)
  47. label1 = item1["label"]
  48. label2 = one_hot(item2["label"][0], depth)
  49. mse = np.sum(label1 - label2)
  50. logger.info("DE one_hot: {}, Numpy one_hot: {}, diff: {}".format(label1, label2, mse))
  51. num_iter += 1
  52. if __name__ == "__main__":
  53. test_one_hot()