|
12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091 |
- import collections
-
-
- class TrieNode:
- def __init__(self):
- self.children = collections.defaultdict(TrieNode)
- self.is_word = False
-
- def __repr__(self):
- s = ''
- first = True
- for k, v in self.children.items():
- if first:
- if v.is_word:
- s += '{} -> {}\n'.format(k, v)
- else:
- s += '{} -> {}'.format(k, v)
- first = False
- continue
- if v.is_word:
- s += '{}\n'.format(k)
- else:
- s += '{} -> {}'.format(k, v)
- return s
-
-
- class Trie_tree:
- def __init__(self):
- self.root = TrieNode()
-
- def insert(self, word):
- current = self.root
- for letter in word:
- current = current.children[letter]
- current.is_word = True
-
- def load_vocab(self,vocabs):
- for v in vocabs:
- self.insert(v)
-
- def search(self, word):
- current = self.root
- for letter in word:
- current = current.children.get(letter)
- if current is None:
- return False
- return current.is_word
-
- def starts_with(self, prefix):
- current = self.root
- for letter in prefix:
- current = current.children.get(letter)
- if current is None:
- return False
- return True
-
- def __repr__(self):
- return repr(self.root).replace('\n\n', '\n').replace('\n\n', '\n')
-
- def find_one(self, word):
- '''找到第一个匹配的词
-
- :param word: str
- :return: 第一个匹配的词 or None
-
- >>> a = Trie()
- >>> a.insert('感冒')
- >>> a.find_one('我感冒了好难受怎么办')
- '感冒'
- '''
- res = []
- for i in range(len(word)):
- c = word[i]
- node = self.root.children.get(c)
- if node:
- for j in range(i + 1, len(word)):
- _c = word[j]
- node = node.children.get(_c)
- if node:
- if node.is_word:
- res.append(word[i:j + 1])
- else:
- break
- return res
-
-
- if __name__ == '__main__':
- a = Trie_tree()
- a.load_vocab(["免费","动作","动作电影"])
- print(a)
- print(a.find_one('免费看动作电影'))
|