From 30dde0fde957163fe2a53fdcdf49755d4aef49ac Mon Sep 17 00:00:00 2001 From: Meinrad Recheis Date: Thu, 11 Apr 2019 15:54:31 +0200 Subject: [PATCH] TestSuite: added all examples with very small training sets (runs through within seconds) --- .../KMeansClustering.cs | 5 ++++- .../LogisticRegression.cs | 14 ++++++++------ .../TensorFlowNET.Examples/NearestNeighbor.cs | 8 +++++--- .../TextClassification/DataHelpers.cs | 11 ++++++----- .../TextClassificationTrain.cs | 3 ++- .../Utility/MnistDataSet.cs | 19 ++++++++++++------- .../ExamplesTests/ExamplesTest.cs | 6 +++--- 7 files changed, 40 insertions(+), 26 deletions(-) diff --git a/test/TensorFlowNET.Examples/KMeansClustering.cs b/test/TensorFlowNET.Examples/KMeansClustering.cs index bbbaad40..b7c80b89 100644 --- a/test/TensorFlowNET.Examples/KMeansClustering.cs +++ b/test/TensorFlowNET.Examples/KMeansClustering.cs @@ -18,6 +18,9 @@ namespace TensorFlowNET.Examples public int Priority => 8; public bool Enabled { get; set; } = true; public string Name => "K-means Clustering"; + public int DataSize = 5000; + public int TestSize = 5000; + public int BatchSize = 100; Datasets mnist; NDArray full_data_x; @@ -45,7 +48,7 @@ namespace TensorFlowNET.Examples public void PrepareData() { - mnist = MnistDataSet.read_data_sets("mnist", one_hot: true); + mnist = MnistDataSet.read_data_sets("mnist", one_hot: true, validation_size: DataSize, test_size:TestSize); full_data_x = mnist.train.images; } } diff --git a/test/TensorFlowNET.Examples/LogisticRegression.cs b/test/TensorFlowNET.Examples/LogisticRegression.cs index 32133ce2..89aaa27a 100644 --- a/test/TensorFlowNET.Examples/LogisticRegression.cs +++ b/test/TensorFlowNET.Examples/LogisticRegression.cs @@ -21,8 +21,10 @@ namespace TensorFlowNET.Examples public string Name => "Logistic Regression"; private float learning_rate = 0.01f; - private int training_epochs = 10; - private int batch_size = 100; + public int TrainingEpochs = 10; + public int DataSize = 5000; + public int TestSize = 5000; + public int BatchSize = 100; private int display_step = 1; Datasets mnist; @@ -57,14 +59,14 @@ namespace TensorFlowNET.Examples sess.run(init); // Training cycle - foreach (var epoch in range(training_epochs)) + foreach (var epoch in range(TrainingEpochs)) { var avg_cost = 0.0f; - var total_batch = mnist.train.num_examples / batch_size; + var total_batch = mnist.train.num_examples / BatchSize; // Loop over all batches foreach (var i in range(total_batch)) { - var (batch_xs, batch_ys) = mnist.train.next_batch(batch_size); + var (batch_xs, batch_ys) = mnist.train.next_batch(BatchSize); // Run optimization op (backprop) and cost op (to get loss value) var result = sess.run(new object[] { optimizer, cost }, new FeedItem(x, batch_xs), @@ -96,7 +98,7 @@ namespace TensorFlowNET.Examples public void PrepareData() { - mnist = MnistDataSet.read_data_sets("mnist", one_hot: true); + mnist = MnistDataSet.read_data_sets("mnist", one_hot: true, validation_size: DataSize, test_size: TestSize); } public void SaveModel(Session sess) diff --git a/test/TensorFlowNET.Examples/NearestNeighbor.cs b/test/TensorFlowNET.Examples/NearestNeighbor.cs index 4b236558..6894009a 100644 --- a/test/TensorFlowNET.Examples/NearestNeighbor.cs +++ b/test/TensorFlowNET.Examples/NearestNeighbor.cs @@ -19,6 +19,8 @@ namespace TensorFlowNET.Examples public string Name => "Nearest Neighbor"; Datasets mnist; NDArray Xtr, Ytr, Xte, Yte; + public int DataSize = 5000; + public int TestBatchSize = 200; public bool Run() { @@ -62,10 +64,10 @@ namespace TensorFlowNET.Examples public void PrepareData() { - mnist = MnistDataSet.read_data_sets("mnist", one_hot: true); + mnist = MnistDataSet.read_data_sets("mnist", one_hot: true, validation_size: DataSize); // In this example, we limit mnist data - (Xtr, Ytr) = mnist.train.next_batch(5000); // 5000 for training (nn candidates) - (Xte, Yte) = mnist.test.next_batch(200); // 200 for testing + (Xtr, Ytr) = mnist.train.next_batch(DataSize); // 5000 for training (nn candidates) + (Xte, Yte) = mnist.test.next_batch(TestBatchSize); // 200 for testing } } } diff --git a/test/TensorFlowNET.Examples/TextClassification/DataHelpers.cs b/test/TensorFlowNET.Examples/TextClassification/DataHelpers.cs index 92333c61..43137b02 100644 --- a/test/TensorFlowNET.Examples/TextClassification/DataHelpers.cs +++ b/test/TensorFlowNET.Examples/TextClassification/DataHelpers.cs @@ -13,7 +13,7 @@ namespace TensorFlowNET.Examples.CnnTextClassification private const string TRAIN_PATH = "text_classification/dbpedia_csv/train.csv"; private const string TEST_PATH = "text_classification/dbpedia_csv/test.csv"; - public static (int[][], int[], int) build_char_dataset(string step, string model, int document_max_len) + public static (int[][], int[], int) build_char_dataset(string step, string model, int document_max_len, int? limit=null) { string alphabet = "abcdefghijklmnopqrstuvwxyz0123456789-,;.!?:’'\"/|_#$%ˆ&*˜‘+=<>()[]{} "; /*if (step == "train") @@ -25,10 +25,11 @@ namespace TensorFlowNET.Examples.CnnTextClassification char_dict[c.ToString()] = char_dict.Count; var contents = File.ReadAllLines(TRAIN_PATH); - - var x = new int[contents.Length][]; - var y = new int[contents.Length]; - for (int i = 0; i < contents.Length; i++) + var size = limit == null ? contents.Length : limit.Value; + + var x = new int[size][]; + var y = new int[size]; + for (int i = 0; i < size; i++) { string[] parts = contents[i].ToLower().Split(",\"").ToArray(); string content = parts[2]; diff --git a/test/TensorFlowNET.Examples/TextClassification/TextClassificationTrain.cs b/test/TensorFlowNET.Examples/TextClassification/TextClassificationTrain.cs index e08882f1..f7e69fc6 100644 --- a/test/TensorFlowNET.Examples/TextClassification/TextClassificationTrain.cs +++ b/test/TensorFlowNET.Examples/TextClassification/TextClassificationTrain.cs @@ -17,6 +17,7 @@ namespace TensorFlowNET.Examples.CnnTextClassification public int Priority => 100; public bool Enabled { get; set; }= false; public string Name => "Text Classification"; + public int? DataLimit = null; private string dataDir = "text_classification"; private string dataFileName = "dbpedia_csv.tar.gz"; @@ -28,7 +29,7 @@ namespace TensorFlowNET.Examples.CnnTextClassification { PrepareData(); Console.WriteLine("Building dataset..."); - var (x, y, alphabet_size) = DataHelpers.build_char_dataset("train", "vdcnn", CHAR_MAX_LEN); + var (x, y, alphabet_size) = DataHelpers.build_char_dataset("train", "vdcnn", CHAR_MAX_LEN, DataLimit); var (train_x, valid_x, train_y, valid_y) = train_test_split(x, y, test_size: 0.15f); diff --git a/test/TensorFlowNET.Examples/Utility/MnistDataSet.cs b/test/TensorFlowNET.Examples/Utility/MnistDataSet.cs index e0e7c0ee..7616449c 100644 --- a/test/TensorFlowNET.Examples/Utility/MnistDataSet.cs +++ b/test/TensorFlowNET.Examples/Utility/MnistDataSet.cs @@ -21,23 +21,26 @@ namespace TensorFlowNET.Examples.Utility TF_DataType dtype = TF_DataType.TF_FLOAT, bool reshape = true, int validation_size = 5000, + int test_size = 5000, string source_url = DEFAULT_SOURCE_URL) { + var train_size = validation_size * 2; + Web.Download(source_url + TRAIN_IMAGES, train_dir, TRAIN_IMAGES); Compress.ExtractGZip(Path.Join(train_dir, TRAIN_IMAGES), train_dir); - var train_images = extract_images(Path.Join(train_dir, TRAIN_IMAGES.Split('.')[0])); + var train_images = extract_images(Path.Join(train_dir, TRAIN_IMAGES.Split('.')[0]), limit: train_size); Web.Download(source_url + TRAIN_LABELS, train_dir, TRAIN_LABELS); Compress.ExtractGZip(Path.Join(train_dir, TRAIN_LABELS), train_dir); - var train_labels = extract_labels(Path.Join(train_dir, TRAIN_LABELS.Split('.')[0]), one_hot: one_hot); + var train_labels = extract_labels(Path.Join(train_dir, TRAIN_LABELS.Split('.')[0]), one_hot: one_hot, limit: train_size); Web.Download(source_url + TEST_IMAGES, train_dir, TEST_IMAGES); Compress.ExtractGZip(Path.Join(train_dir, TEST_IMAGES), train_dir); - var test_images = extract_images(Path.Join(train_dir, TEST_IMAGES.Split('.')[0])); + var test_images = extract_images(Path.Join(train_dir, TEST_IMAGES.Split('.')[0]), limit: test_size); Web.Download(source_url + TEST_LABELS, train_dir, TEST_LABELS); Compress.ExtractGZip(Path.Join(train_dir, TEST_LABELS), train_dir); - var test_labels = extract_labels(Path.Join(train_dir, TEST_LABELS.Split('.')[0]), one_hot: one_hot); + var test_labels = extract_labels(Path.Join(train_dir, TEST_LABELS.Split('.')[0]), one_hot: one_hot, limit:test_size); int end = train_images.shape[0]; var validation_images = train_images[np.arange(validation_size)]; @@ -52,14 +55,15 @@ namespace TensorFlowNET.Examples.Utility return new Datasets(train, validation, test); } - public static NDArray extract_images(string file) + public static NDArray extract_images(string file, int? limit=null) { using (var bytestream = new FileStream(file, FileMode.Open)) { var magic = _read32(bytestream); if (magic != 2051) throw new ValueError($"Invalid magic number {magic} in MNIST image file: {file}"); - var num_images = _read32(bytestream); + var num_images = _read32(bytestream); + num_images = limit == null ? num_images : Math.Min(num_images, (uint)limit); var rows = _read32(bytestream); var cols = _read32(bytestream); var buf = new byte[rows * cols * num_images]; @@ -70,7 +74,7 @@ namespace TensorFlowNET.Examples.Utility } } - public static NDArray extract_labels(string file, bool one_hot = false, int num_classes = 10) + public static NDArray extract_labels(string file, bool one_hot = false, int num_classes = 10, int? limit = null) { using (var bytestream = new FileStream(file, FileMode.Open)) { @@ -78,6 +82,7 @@ namespace TensorFlowNET.Examples.Utility if (magic != 2049) throw new ValueError($"Invalid magic number {magic} in MNIST label file: {file}"); var num_items = _read32(bytestream); + num_items = limit == null ? num_items : Math.Min(num_items,(uint) limit); var buf = new byte[num_items]; bytestream.Read(buf, 0, buf.Length); var labels = np.frombuffer(buf, np.uint8); diff --git a/test/TensorFlowNET.UnitTest/ExamplesTests/ExamplesTest.cs b/test/TensorFlowNET.UnitTest/ExamplesTests/ExamplesTest.cs index beb97d48..e884708c 100644 --- a/test/TensorFlowNET.UnitTest/ExamplesTests/ExamplesTest.cs +++ b/test/TensorFlowNET.UnitTest/ExamplesTests/ExamplesTest.cs @@ -51,7 +51,7 @@ namespace TensorFlowNET.UnitTest.ExamplesTests [TestMethod] public void LogisticRegression() { - new LogisticRegression() { Enabled = true }.Run(); + new LogisticRegression() { Enabled = true, TrainingEpochs=10, DataSize = 500, TestSize = 500 }.Run(); } [Ignore] @@ -78,14 +78,14 @@ namespace TensorFlowNET.UnitTest.ExamplesTests [TestMethod] public void NearestNeighbor() { - new NearestNeighbor() { Enabled = true }.Run(); + new NearestNeighbor() { Enabled = true, DataSize = 500, TestBatchSize = 100 }.Run(); } [Ignore] [TestMethod] public void TextClassificationTrain() { - new TextClassificationTrain() { Enabled = true }.Run(); + new TextClassificationTrain() { Enabled = true, DataLimit=100 }.Run(); } [Ignore]