|
|
|
@@ -4893,15 +4893,15 @@ class BuildVocabDataset(DatasetOp): |
|
|
|
text.Vocab.from_dataset() |
|
|
|
|
|
|
|
Args: |
|
|
|
vocab(Vocab): vocab object |
|
|
|
columns(str or list, optional): column names to get words from. It can be a list of column names. |
|
|
|
(Default is None where all columns will be used. If any column isn't string type, will return error) |
|
|
|
vocab(Vocab): vocab object. |
|
|
|
columns(str or list, optional): column names to get words from. It can be a list of column names (Default is |
|
|
|
None, all columns are used, return error if any column isn't string). |
|
|
|
freq_range(tuple, optional): A tuple of integers (min_frequency, max_frequency). Words within the frequency |
|
|
|
range would be kept. 0 <= min_frequency <= max_frequency <= total_words. min_frequency/max_frequency |
|
|
|
can be None, which corresponds to 0/total_words separately (default is None, all words are included) |
|
|
|
can be None, which corresponds to 0/total_words separately (default is None, all words are included). |
|
|
|
top_k(int, optional): top_k > 0. Number of words to be built into vocab. top_k most frequent words are |
|
|
|
taken. top_k is taken after freq_range. If not enough top_k, all words will be taken. (default is None |
|
|
|
all words are included) |
|
|
|
taken. The top_k is taken after freq_range. If not enough top_k, all words will be taken (default is None |
|
|
|
all words are included). |
|
|
|
|
|
|
|
Returns: |
|
|
|
BuildVocabDataset |
|
|
|
|