You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

utils.py 1.6 kB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849
  1. from typing import List
  2. def iob2(tags:List[str])->List[str]:
  3. """
  4. 检查数据是否是合法的IOB数据,如果是IOB1会被自动转换为IOB2。
  5. :param tags: 需要转换的tags
  6. """
  7. for i, tag in enumerate(tags):
  8. if tag == "O":
  9. continue
  10. split = tag.split("-")
  11. if len(split) != 2 or split[0] not in ["I", "B"]:
  12. raise TypeError("The encoding schema is not a valid IOB type.")
  13. if split[0] == "B":
  14. continue
  15. elif i == 0 or tags[i - 1] == "O": # conversion IOB1 to IOB2
  16. tags[i] = "B" + tag[1:]
  17. elif tags[i - 1][1:] == tag[1:]:
  18. continue
  19. else: # conversion IOB1 to IOB2
  20. tags[i] = "B" + tag[1:]
  21. return tags
  22. def iob2bioes(tags:List[str])->List[str]:
  23. """
  24. 将iob的tag转换为bmeso编码
  25. :param tags:
  26. :return:
  27. """
  28. new_tags = []
  29. for i, tag in enumerate(tags):
  30. if tag == 'O':
  31. new_tags.append(tag)
  32. else:
  33. split = tag.split('-')[0]
  34. if split == 'B':
  35. if i+1!=len(tags) and tags[i+1].split('-')[0] == 'I':
  36. new_tags.append(tag)
  37. else:
  38. new_tags.append(tag.replace('B-', 'S-'))
  39. elif split == 'I':
  40. if i + 1<len(tags) and tags[i+1].split('-')[0] == 'I':
  41. new_tags.append(tag)
  42. else:
  43. new_tags.append(tag.replace('I-', 'E-'))
  44. else:
  45. raise TypeError("Invalid IOB format.")
  46. return new_tags