diff --git a/src/TensorFlowNET.Keras/Preprocessings/Tokenizer.cs b/src/TensorFlowNET.Keras/Preprocessings/Tokenizer.cs
index 3bf14ce5..29cbec8e 100644
--- a/src/TensorFlowNET.Keras/Preprocessings/Tokenizer.cs
+++ b/src/TensorFlowNET.Keras/Preprocessings/Tokenizer.cs
@@ -16,6 +16,10 @@ namespace Tensorflow.Keras.Text
/// (each integer being the index of a token in a dictionary) or into a vector where the coefficient for
/// each token could be binary, based on word count, based on tf-idf...
///
+ ///
+ /// This code is a fairly straight port of the Python code for Keras text preprocessing found at:
+ /// https://github.com/keras-team/keras-preprocessing/blob/master/keras_preprocessing/text.py
+ ///
public class Tokenizer
{
private readonly int num_words;
@@ -51,7 +55,7 @@ namespace Tensorflow.Keras.Text
this.split = split;
this.char_level = char_level;
this.oov_token = oov_token;
- this.analyzer = analyzer;
+ this.analyzer = analyzer != null ? analyzer : (text) => TextApi.text_to_word_sequence(text, filters, lower, split);
}
///
diff --git a/src/TensorFlowNET.Keras/TextApi.cs b/src/TensorFlowNET.Keras/TextApi.cs
index 2e62e25b..8ce8d685 100644
--- a/src/TensorFlowNET.Keras/TextApi.cs
+++ b/src/TensorFlowNET.Keras/TextApi.cs
@@ -17,14 +17,7 @@ namespace Tensorflow.Keras
string oov_token = null,
Func> analyzer = null)
{
- if (analyzer != null)
- {
- return new Keras.Text.Tokenizer(num_words, filters, lower, split, char_level, oov_token, analyzer);
- }
- else
- {
- return new Keras.Text.Tokenizer(num_words, filters, lower, split, char_level, oov_token, (text) => text_to_word_sequence(text, filters, lower, split));
- }
+ return new Keras.Text.Tokenizer(num_words, filters, lower, split, char_level, oov_token, analyzer);
}
public static IEnumerable text_to_word_sequence(string text, string filters = DefaultFilter, bool lower = true, char split = ' ')