From 566a092a0e47f22146b1a94cbe994043ab99fe89 Mon Sep 17 00:00:00 2001 From: Oceania2018 Date: Sat, 19 Dec 2020 09:31:46 -0600 Subject: [PATCH 1/2] Add Tensorflow.Text project. --- TensorFlow.NET.sln | 28 +++++++++++++- src/TensorFlowNET.Console/Exploring.cs | 20 ++++++++++ src/TensorFlowNET.Console/Program.cs | 4 ++ ...nsole.csproj => Tensorflow.Console.csproj} | 3 +- .../Tensorflow.Binding.csproj | 1 - src/TensorFlowNET.Text/Enums/Reduction.cs | 12 ++++++ src/TensorFlowNET.Text/Enums/WordShape.cs | 14 +++++++ .../Operations/TextOps.ngrams.cs | 16 ++++++++ .../Operations/TextOps.wordshape.cs | 12 ++++++ src/TensorFlowNET.Text/Tensorflow.Text.csproj | 19 ++++++++++ src/TensorFlowNET.Text/TextApi.cs | 12 ++++++ src/TensorFlowNET.Text/TextInterface.cs | 37 +++++++++++++++++++ .../Tokenizers/ITokenizer.cs | 11 ++++++ .../Tokenizers/UnicodeScriptTokenizer.cs | 14 +++++++ .../Tokenizers/WhitespaceTokenizer.cs | 19 ++++++++++ .../Tensorflow.UnitTest.csproj | 5 ++- .../Text/TokenizerTest.cs | 19 ++++++++++ 17 files changed, 241 insertions(+), 5 deletions(-) create mode 100644 src/TensorFlowNET.Console/Exploring.cs rename src/TensorFlowNET.Console/{TensorFlowNET.Console.csproj => Tensorflow.Console.csproj} (83%) create mode 100644 src/TensorFlowNET.Text/Enums/Reduction.cs create mode 100644 src/TensorFlowNET.Text/Enums/WordShape.cs create mode 100644 src/TensorFlowNET.Text/Operations/TextOps.ngrams.cs create mode 100644 src/TensorFlowNET.Text/Operations/TextOps.wordshape.cs create mode 100644 src/TensorFlowNET.Text/Tensorflow.Text.csproj create mode 100644 src/TensorFlowNET.Text/TextApi.cs create mode 100644 src/TensorFlowNET.Text/TextInterface.cs create mode 100644 src/TensorFlowNET.Text/Tokenizers/ITokenizer.cs create mode 100644 src/TensorFlowNET.Text/Tokenizers/UnicodeScriptTokenizer.cs create mode 100644 src/TensorFlowNET.Text/Tokenizers/WhitespaceTokenizer.cs create mode 100644 test/TensorFlowNET.UnitTest/Text/TokenizerTest.cs diff --git a/TensorFlow.NET.sln b/TensorFlow.NET.sln index c5e28fd3..6d618d8d 100644 --- a/TensorFlow.NET.sln +++ b/TensorFlow.NET.sln @@ -9,10 +9,12 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Tensorflow.Benchmark", "src EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Tensorflow.UnitTest", "test\TensorFlowNET.UnitTest\Tensorflow.UnitTest.csproj", "{23C28035-2FCE-41F3-9A12-E73CE8A5AE32}" EndProject -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "TensorFlowNET.Console", "src\TensorFlowNET.Console\TensorFlowNET.Console.csproj", "{03F06299-3F4B-4449-A709-3A647657BC0C}" +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Tensorflow.Console", "src\TensorFlowNET.Console\Tensorflow.Console.csproj", "{03F06299-3F4B-4449-A709-3A647657BC0C}" EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Tensorflow.Keras", "src\TensorFlowNET.Keras\Tensorflow.Keras.csproj", "{49D71826-C03D-4FA7-9BAC-22C1327E65CF}" EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Tensorflow.Text", "src\TensorFlowNET.Text\Tensorflow.Text.csproj", "{1AB8108D-4FFE-4A16-88E7-328EAF686370}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -149,6 +151,30 @@ Global {49D71826-C03D-4FA7-9BAC-22C1327E65CF}.Release|x64.Build.0 = Release|x64 {49D71826-C03D-4FA7-9BAC-22C1327E65CF}.Release|x86.ActiveCfg = Release|Any CPU {49D71826-C03D-4FA7-9BAC-22C1327E65CF}.Release|x86.Build.0 = Release|Any CPU + {1AB8108D-4FFE-4A16-88E7-328EAF686370}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {1AB8108D-4FFE-4A16-88E7-328EAF686370}.Debug|Any CPU.Build.0 = Debug|Any CPU + {1AB8108D-4FFE-4A16-88E7-328EAF686370}.Debug|x64.ActiveCfg = Debug|Any CPU + {1AB8108D-4FFE-4A16-88E7-328EAF686370}.Debug|x64.Build.0 = Debug|Any CPU + {1AB8108D-4FFE-4A16-88E7-328EAF686370}.Debug|x86.ActiveCfg = Debug|Any CPU + {1AB8108D-4FFE-4A16-88E7-328EAF686370}.Debug|x86.Build.0 = Debug|Any CPU + {1AB8108D-4FFE-4A16-88E7-328EAF686370}.Debug-Minimal|Any CPU.ActiveCfg = Debug|Any CPU + {1AB8108D-4FFE-4A16-88E7-328EAF686370}.Debug-Minimal|Any CPU.Build.0 = Debug|Any CPU + {1AB8108D-4FFE-4A16-88E7-328EAF686370}.Debug-Minimal|x64.ActiveCfg = Debug|Any CPU + {1AB8108D-4FFE-4A16-88E7-328EAF686370}.Debug-Minimal|x64.Build.0 = Debug|Any CPU + {1AB8108D-4FFE-4A16-88E7-328EAF686370}.Debug-Minimal|x86.ActiveCfg = Debug|Any CPU + {1AB8108D-4FFE-4A16-88E7-328EAF686370}.Debug-Minimal|x86.Build.0 = Debug|Any CPU + {1AB8108D-4FFE-4A16-88E7-328EAF686370}.Publish|Any CPU.ActiveCfg = Debug|Any CPU + {1AB8108D-4FFE-4A16-88E7-328EAF686370}.Publish|Any CPU.Build.0 = Debug|Any CPU + {1AB8108D-4FFE-4A16-88E7-328EAF686370}.Publish|x64.ActiveCfg = Debug|Any CPU + {1AB8108D-4FFE-4A16-88E7-328EAF686370}.Publish|x64.Build.0 = Debug|Any CPU + {1AB8108D-4FFE-4A16-88E7-328EAF686370}.Publish|x86.ActiveCfg = Debug|Any CPU + {1AB8108D-4FFE-4A16-88E7-328EAF686370}.Publish|x86.Build.0 = Debug|Any CPU + {1AB8108D-4FFE-4A16-88E7-328EAF686370}.Release|Any CPU.ActiveCfg = Release|Any CPU + {1AB8108D-4FFE-4A16-88E7-328EAF686370}.Release|Any CPU.Build.0 = Release|Any CPU + {1AB8108D-4FFE-4A16-88E7-328EAF686370}.Release|x64.ActiveCfg = Release|Any CPU + {1AB8108D-4FFE-4A16-88E7-328EAF686370}.Release|x64.Build.0 = Release|Any CPU + {1AB8108D-4FFE-4A16-88E7-328EAF686370}.Release|x86.ActiveCfg = Release|Any CPU + {1AB8108D-4FFE-4A16-88E7-328EAF686370}.Release|x86.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE diff --git a/src/TensorFlowNET.Console/Exploring.cs b/src/TensorFlowNET.Console/Exploring.cs new file mode 100644 index 00000000..4241c9bf --- /dev/null +++ b/src/TensorFlowNET.Console/Exploring.cs @@ -0,0 +1,20 @@ +using System; +using System.Collections.Generic; +using System.Text; +using static Tensorflow.Binding; +using static Tensorflow.TextApi; + +namespace Tensorflow +{ + public class Exploring + { + public void Run() + { + var docs = tf.constant(new[] { "Everything not saved will be lost." }); + var tokenizer = text.WhitespaceTokenizer(); + text.wordshape(docs, Text.WordShape.HAS_TITLE_CASE); + + throw new NotImplementedException(""); + } + } +} diff --git a/src/TensorFlowNET.Console/Program.cs b/src/TensorFlowNET.Console/Program.cs index 7b91e6ad..23647749 100644 --- a/src/TensorFlowNET.Console/Program.cs +++ b/src/TensorFlowNET.Console/Program.cs @@ -6,6 +6,10 @@ namespace Tensorflow { static void Main(string[] args) { + // this class is used explor new features. + var exploring = new Exploring(); + // exploring.Run(); + // boot .net core 10.5M. var mm = new MemoryMonitor(); // warm up tensorflow.net 28.5M. diff --git a/src/TensorFlowNET.Console/TensorFlowNET.Console.csproj b/src/TensorFlowNET.Console/Tensorflow.Console.csproj similarity index 83% rename from src/TensorFlowNET.Console/TensorFlowNET.Console.csproj rename to src/TensorFlowNET.Console/Tensorflow.Console.csproj index 6cc631f4..8d7a66d1 100644 --- a/src/TensorFlowNET.Console/TensorFlowNET.Console.csproj +++ b/src/TensorFlowNET.Console/Tensorflow.Console.csproj @@ -8,11 +8,12 @@ - + + diff --git a/src/TensorFlowNET.Core/Tensorflow.Binding.csproj b/src/TensorFlowNET.Core/Tensorflow.Binding.csproj index 4ea95719..fa292679 100644 --- a/src/TensorFlowNET.Core/Tensorflow.Binding.csproj +++ b/src/TensorFlowNET.Core/Tensorflow.Binding.csproj @@ -82,7 +82,6 @@ TensorFlow .NET v0.30 is focused on making more Keras API work including: - diff --git a/src/TensorFlowNET.Text/Enums/Reduction.cs b/src/TensorFlowNET.Text/Enums/Reduction.cs new file mode 100644 index 00000000..aa725229 --- /dev/null +++ b/src/TensorFlowNET.Text/Enums/Reduction.cs @@ -0,0 +1,12 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace Tensorflow.Text +{ + public enum Reduction + { + None, + STRING_JOIN + } +} diff --git a/src/TensorFlowNET.Text/Enums/WordShape.cs b/src/TensorFlowNET.Text/Enums/WordShape.cs new file mode 100644 index 00000000..c1117312 --- /dev/null +++ b/src/TensorFlowNET.Text/Enums/WordShape.cs @@ -0,0 +1,14 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace Tensorflow.Text +{ + public enum WordShape + { + HAS_TITLE_CASE, + IS_UPPERCASE, + HAS_SOME_PUNCT_OR_SYMBOL, + IS_NUMERIC_VALUE + } +} diff --git a/src/TensorFlowNET.Text/Operations/TextOps.ngrams.cs b/src/TensorFlowNET.Text/Operations/TextOps.ngrams.cs new file mode 100644 index 00000000..0ea953dd --- /dev/null +++ b/src/TensorFlowNET.Text/Operations/TextOps.ngrams.cs @@ -0,0 +1,16 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace Tensorflow.Text +{ + public partial class TextOps + { + public static Tensor ngrams(Tensor input, int width, + int axis = -1, + Reduction reduction_type = Reduction.None, + string string_separator = " ", + string name = null) + => throw new NotImplementedException(""); + } +} diff --git a/src/TensorFlowNET.Text/Operations/TextOps.wordshape.cs b/src/TensorFlowNET.Text/Operations/TextOps.wordshape.cs new file mode 100644 index 00000000..b0b2bf4f --- /dev/null +++ b/src/TensorFlowNET.Text/Operations/TextOps.wordshape.cs @@ -0,0 +1,12 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace Tensorflow.Text +{ + public partial class TextOps + { + public static Tensor wordshape(Tensor input, WordShape pattern, string name = null) + => throw new NotImplementedException(""); + } +} diff --git a/src/TensorFlowNET.Text/Tensorflow.Text.csproj b/src/TensorFlowNET.Text/Tensorflow.Text.csproj new file mode 100644 index 00000000..9a56bea2 --- /dev/null +++ b/src/TensorFlowNET.Text/Tensorflow.Text.csproj @@ -0,0 +1,19 @@ + + + + netstandard2.0 + Tensorflow.Text + Tensorflow.Text + true + 0.0.1 + + + + DEBUG;TRACE + + + + + + + diff --git a/src/TensorFlowNET.Text/TextApi.cs b/src/TensorFlowNET.Text/TextApi.cs new file mode 100644 index 00000000..68a9c740 --- /dev/null +++ b/src/TensorFlowNET.Text/TextApi.cs @@ -0,0 +1,12 @@ +using System; +using System.Collections.Generic; +using System.Text; +using Tensorflow.Text; + +namespace Tensorflow +{ + public class TextApi + { + public static TextInterface text { get; } = new TextInterface(); + } +} diff --git a/src/TensorFlowNET.Text/TextInterface.cs b/src/TensorFlowNET.Text/TextInterface.cs new file mode 100644 index 00000000..a631bd57 --- /dev/null +++ b/src/TensorFlowNET.Text/TextInterface.cs @@ -0,0 +1,37 @@ +using System; +using System.Collections.Generic; +using System.Text; +using Tensorflow.Text.Tokenizers; + +namespace Tensorflow.Text +{ + public class TextInterface + { + public ITokenizer WhitespaceTokenizer() + => new WhitespaceTokenizer(); + + public Tensor wordshape(Tensor input, WordShape pattern, string name = null) + => TextOps.wordshape(input, pattern, name: name); + + /// + /// Create a tensor of n-grams based on the input data `data`. + /// + /// + /// + /// + /// + /// + /// + /// + public static Tensor ngrams(Tensor input, int width, + int axis = -1, + Reduction reduction_type = Reduction.None, + string string_separator = " ", + string name = null) + => TextOps.ngrams(input, width, + axis: axis, + reduction_type: reduction_type, + string_separator: string_separator, + name: name); + } +} diff --git a/src/TensorFlowNET.Text/Tokenizers/ITokenizer.cs b/src/TensorFlowNET.Text/Tokenizers/ITokenizer.cs new file mode 100644 index 00000000..8b585d4d --- /dev/null +++ b/src/TensorFlowNET.Text/Tokenizers/ITokenizer.cs @@ -0,0 +1,11 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace Tensorflow.Text.Tokenizers +{ + public interface ITokenizer + { + Tensor tokenize(Tensor input); + } +} diff --git a/src/TensorFlowNET.Text/Tokenizers/UnicodeScriptTokenizer.cs b/src/TensorFlowNET.Text/Tokenizers/UnicodeScriptTokenizer.cs new file mode 100644 index 00000000..c9c84525 --- /dev/null +++ b/src/TensorFlowNET.Text/Tokenizers/UnicodeScriptTokenizer.cs @@ -0,0 +1,14 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace Tensorflow.Text.Tokenizers +{ + public class UnicodeScriptTokenizer : ITokenizer + { + public Tensor tokenize(Tensor input) + { + throw new NotImplementedException(); + } + } +} diff --git a/src/TensorFlowNET.Text/Tokenizers/WhitespaceTokenizer.cs b/src/TensorFlowNET.Text/Tokenizers/WhitespaceTokenizer.cs new file mode 100644 index 00000000..a0bbe473 --- /dev/null +++ b/src/TensorFlowNET.Text/Tokenizers/WhitespaceTokenizer.cs @@ -0,0 +1,19 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace Tensorflow.Text.Tokenizers +{ + public class WhitespaceTokenizer : ITokenizer + { + /// + /// Tokenizes a tensor of UTF-8 strings on whitespaces. + /// + /// + /// + public Tensor tokenize(Tensor input) + { + throw new NotImplementedException(""); + } + } +} diff --git a/test/TensorFlowNET.UnitTest/Tensorflow.UnitTest.csproj b/test/TensorFlowNET.UnitTest/Tensorflow.UnitTest.csproj index 68b70eb4..f5792971 100644 --- a/test/TensorFlowNET.UnitTest/Tensorflow.UnitTest.csproj +++ b/test/TensorFlowNET.UnitTest/Tensorflow.UnitTest.csproj @@ -48,14 +48,15 @@ - + - + + diff --git a/test/TensorFlowNET.UnitTest/Text/TokenizerTest.cs b/test/TensorFlowNET.UnitTest/Text/TokenizerTest.cs new file mode 100644 index 00000000..3b8237b9 --- /dev/null +++ b/test/TensorFlowNET.UnitTest/Text/TokenizerTest.cs @@ -0,0 +1,19 @@ +using Microsoft.VisualStudio.TestTools.UnitTesting; +using System; +using System.Collections.Generic; +using System.Text; +using static Tensorflow.Binding; +using static Tensorflow.TextApi; + +namespace TensorFlowNET.UnitTest.Text +{ + [TestClass] + public class TokenizerTest + { + [TestMethod] + public void Tokenize() + { + var docs = tf.constant(new[] { "Everything not saved will be lost." }); + } + } +} From 405c4a441c023798aa0113dafbdd7316040fef7f Mon Sep 17 00:00:00 2001 From: Oceania2018 Date: Sat, 19 Dec 2020 10:03:44 -0600 Subject: [PATCH 2/2] Make RuntimeHelpers be internal classs. --- src/TensorFlowNET.Core/Util/RuntimeHelpers.cs | 2 +- src/TensorFlowNET.Keras/Utils/RuntimeHelpers.cs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/TensorFlowNET.Core/Util/RuntimeHelpers.cs b/src/TensorFlowNET.Core/Util/RuntimeHelpers.cs index 4e0c28b1..22f158c4 100644 --- a/src/TensorFlowNET.Core/Util/RuntimeHelpers.cs +++ b/src/TensorFlowNET.Core/Util/RuntimeHelpers.cs @@ -1,6 +1,6 @@ namespace System.Runtime.CompilerServices { - public static class RuntimeHelpers + internal static class RuntimeHelpers { /// /// Slices the specified array using the specified range. diff --git a/src/TensorFlowNET.Keras/Utils/RuntimeHelpers.cs b/src/TensorFlowNET.Keras/Utils/RuntimeHelpers.cs index 4e0c28b1..22f158c4 100644 --- a/src/TensorFlowNET.Keras/Utils/RuntimeHelpers.cs +++ b/src/TensorFlowNET.Keras/Utils/RuntimeHelpers.cs @@ -1,6 +1,6 @@ namespace System.Runtime.CompilerServices { - public static class RuntimeHelpers + internal static class RuntimeHelpers { /// /// Slices the specified array using the specified range.