You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

trie_tree_match.py 2.4 kB

4 years ago
12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091
  1. import collections
  2. class TrieNode:
  3. def __init__(self):
  4. self.children = collections.defaultdict(TrieNode)
  5. self.is_word = False
  6. def __repr__(self):
  7. s = ''
  8. first = True
  9. for k, v in self.children.items():
  10. if first:
  11. if v.is_word:
  12. s += '{} -> {}\n'.format(k, v)
  13. else:
  14. s += '{} -> {}'.format(k, v)
  15. first = False
  16. continue
  17. if v.is_word:
  18. s += '{}\n'.format(k)
  19. else:
  20. s += '{} -> {}'.format(k, v)
  21. return s
  22. class Trie_tree:
  23. def __init__(self):
  24. self.root = TrieNode()
  25. def insert(self, word):
  26. current = self.root
  27. for letter in word:
  28. current = current.children[letter]
  29. current.is_word = True
  30. def load_vocab(self,vocabs):
  31. for v in vocabs:
  32. self.insert(v)
  33. def search(self, word):
  34. current = self.root
  35. for letter in word:
  36. current = current.children.get(letter)
  37. if current is None:
  38. return False
  39. return current.is_word
  40. def starts_with(self, prefix):
  41. current = self.root
  42. for letter in prefix:
  43. current = current.children.get(letter)
  44. if current is None:
  45. return False
  46. return True
  47. def __repr__(self):
  48. return repr(self.root).replace('\n\n', '\n').replace('\n\n', '\n')
  49. def find_one(self, word):
  50. '''找到第一个匹配的词
  51. :param word: str
  52. :return: 第一个匹配的词 or None
  53. >>> a = Trie()
  54. >>> a.insert('感冒')
  55. >>> a.find_one('我感冒了好难受怎么办')
  56. '感冒'
  57. '''
  58. res = []
  59. for i in range(len(word)):
  60. c = word[i]
  61. node = self.root.children.get(c)
  62. if node:
  63. for j in range(i + 1, len(word)):
  64. _c = word[j]
  65. node = node.children.get(_c)
  66. if node:
  67. if node.is_word:
  68. res.append(word[i:j + 1])
  69. else:
  70. break
  71. return res
  72. if __name__ == '__main__':
  73. a = Trie_tree()
  74. a.load_vocab(["免费","动作","动作电影"])
  75. print(a)
  76. print(a.find_one('免费看动作电影'))

No Description