From 13be215f7307e38a3405da4d2672a02fd356316f Mon Sep 17 00:00:00 2001
From: Niklas Gustafsson <niklasg@microsoft.com>
Date: Fri, 29 Jan 2021 16:05:10 -0800
Subject: [PATCH] Cleaned up defaulting the string analyzer in Tokenizer.

---
 src/TensorFlowNET.Keras/Preprocessings/Tokenizer.cs | 6 +++++-
 src/TensorFlowNET.Keras/TextApi.cs                  | 9 +--------
 2 files changed, 6 insertions(+), 9 deletions(-)
diff --git a/src/TensorFlowNET.Keras/Preprocessings/Tokenizer.cs b/src/TensorFlowNET.Keras/Preprocessings/Tokenizer.cs
index 3bf14ce5..29cbec8e 100644
--- a/src/TensorFlowNET.Keras/Preprocessings/Tokenizer.cs
+++ b/src/TensorFlowNET.Keras/Preprocessings/Tokenizer.cs
@@ -16,6 +16,10 @@ namespace Tensorflow.Keras.Text
     /// (each integer being the index of a token in a dictionary) or into a vector where the coefficient for 
     /// each token could be binary, based on word count, based on tf-idf...
     /// </summary>
+    /// <remarks>
+    /// This code is a fairly straight port of the Python code for Keras text preprocessing found at:
+    /// https://github.com/keras-team/keras-preprocessing/blob/master/keras_preprocessing/text.py
+    /// </remarks>
     public class Tokenizer
     {
         private readonly int num_words;
@@ -51,7 +55,7 @@ namespace Tensorflow.Keras.Text
             this.split = split;
             this.char_level = char_level;
             this.oov_token = oov_token;
-            this.analyzer = analyzer;
+            this.analyzer = analyzer != null ? analyzer : (text) => TextApi.text_to_word_sequence(text, filters, lower, split);
         }
 
         /// <summary>
diff --git a/src/TensorFlowNET.Keras/TextApi.cs b/src/TensorFlowNET.Keras/TextApi.cs
index 2e62e25b..8ce8d685 100644
--- a/src/TensorFlowNET.Keras/TextApi.cs
+++ b/src/TensorFlowNET.Keras/TextApi.cs
@@ -17,14 +17,7 @@ namespace Tensorflow.Keras
                 string oov_token = null,
                 Func<string, IEnumerable<string>> analyzer = null)
         {
-            if (analyzer != null)
-            {
-                return new Keras.Text.Tokenizer(num_words, filters, lower, split, char_level, oov_token, analyzer);
-            }
-            else
-            {
-                return new Keras.Text.Tokenizer(num_words, filters, lower, split, char_level, oov_token, (text) => text_to_word_sequence(text, filters, lower, split));
-            }
+            return new Keras.Text.Tokenizer(num_words, filters, lower, split, char_level, oov_token, analyzer);
         }
 
         public static IEnumerable<string> text_to_word_sequence(string text, string filters = DefaultFilter, bool lower = true, char split = ' ')