| @@ -19,7 +19,7 @@ utils provides some general methods for NLP text processing. | |||||
| """ | """ | ||||
| import platform | import platform | ||||
| from .transforms import Lookup, JiebaTokenizer, UnicodeCharTokenizer, Ngram, WordpieceTokenizer, TruncateSequencePair, \ | from .transforms import Lookup, JiebaTokenizer, UnicodeCharTokenizer, Ngram, WordpieceTokenizer, TruncateSequencePair, \ | ||||
| ToNumber, SlidingWindow, SentencePieceTokenizer | |||||
| ToNumber, SlidingWindow, SentencePieceTokenizer, PythonTokenizer | |||||
| from .utils import to_str, to_bytes, JiebaMode, Vocab, NormalizeForm, SentencePieceVocab, SentencePieceModel, \ | from .utils import to_str, to_bytes, JiebaMode, Vocab, NormalizeForm, SentencePieceVocab, SentencePieceModel, \ | ||||
| SPieceTokenizerOutType, SPieceTokenizerLoadType | SPieceTokenizerOutType, SPieceTokenizerLoadType | ||||
| @@ -33,7 +33,7 @@ __all__ = [ | |||||
| if platform.system().lower() != 'windows': | if platform.system().lower() != 'windows': | ||||
| from .transforms import UnicodeScriptTokenizer, WhitespaceTokenizer, CaseFold, NormalizeUTF8, \ | from .transforms import UnicodeScriptTokenizer, WhitespaceTokenizer, CaseFold, NormalizeUTF8, \ | ||||
| RegexReplace, RegexTokenizer, BasicTokenizer, BertTokenizer, PythonTokenizer | |||||
| RegexReplace, RegexTokenizer, BasicTokenizer, BertTokenizer | |||||
| __all__.append(["UnicodeScriptTokenizer", "WhitespaceTokenizer", "CaseFold", "NormalizeUTF8", | __all__.append(["UnicodeScriptTokenizer", "WhitespaceTokenizer", "CaseFold", "NormalizeUTF8", | ||||
| "RegexReplace", "RegexTokenizer", "BasicTokenizer", "BertTokenizer"]) | "RegexReplace", "RegexTokenizer", "BasicTokenizer", "BertTokenizer"]) | ||||