You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

test_rename.py 2.0 kB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849
  1. # Copyright 2019 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ==============================================================================
  15. import mindspore.dataset as ds
  16. from mindspore import log as logger
  17. DATA_DIR = ["../data/dataset/testTFBert5Rows1/5TFDatas.data"]
  18. DATA_DIR_2 = ["../data/dataset/testTFBert5Rows2/5TFDatas.data"]
  19. SCHEMA_DIR = "../data/dataset/testTFBert5Rows1/datasetSchema.json"
  20. SCHEMA_DIR_2 = "../data/dataset/testTFBert5Rows2/datasetSchema.json"
  21. def test_rename():
  22. data1 = ds.TFRecordDataset(DATA_DIR_2, SCHEMA_DIR_2, shuffle=False)
  23. data2 = ds.TFRecordDataset(DATA_DIR_2, SCHEMA_DIR_2, shuffle=False)
  24. data2 = data2.rename(input_columns=["input_ids", "segment_ids"], output_columns=["masks", "seg_ids"])
  25. data = ds.zip((data1, data2))
  26. data = data.repeat(3)
  27. num_iter = 0
  28. for i, item in enumerate(data.create_dict_iterator()):
  29. logger.info("item[mask] is {}".format(item["masks"]))
  30. assert item["masks"].all() == item["input_ids"].all()
  31. logger.info("item[seg_ids] is {}".format(item["seg_ids"]))
  32. assert item["segment_ids"].all() == item["seg_ids"].all()
  33. # need to consume the data in the buffer
  34. num_iter += 1
  35. logger.info("Number of data in data: {}".format(num_iter))
  36. assert num_iter == 15
  37. if __name__ == '__main__':
  38. logger.info('===========test Rename Repeat===========')
  39. test_rename()
  40. logger.info('\n')