| @@ -0,0 +1,40 @@ | |||||
| using System; | |||||
| using System.Collections.Generic; | |||||
| using System.Diagnostics.CodeAnalysis; | |||||
| using System.Text; | |||||
| using Razorvine.Pickle; | |||||
| namespace Tensorflow.NumPy | |||||
| { | |||||
| /// <summary> | |||||
| /// | |||||
| /// </summary> | |||||
| [SuppressMessage("ReSharper", "InconsistentNaming")] | |||||
| [SuppressMessage("ReSharper", "MemberCanBePrivate.Global")] | |||||
| [SuppressMessage("ReSharper", "MemberCanBeMadeStatic.Global")] | |||||
| class DtypeConstructor : IObjectConstructor | |||||
| { | |||||
| public object construct(object[] args) | |||||
| { | |||||
| Console.WriteLine("DtypeConstructor"); | |||||
| Console.WriteLine(args.Length); | |||||
| for (int i = 0; i < args.Length; i++) | |||||
| { | |||||
| Console.WriteLine(args[i]); | |||||
| } | |||||
| return new demo(); | |||||
| } | |||||
| } | |||||
| class demo | |||||
| { | |||||
| public void __setstate__(object[] args) | |||||
| { | |||||
| Console.WriteLine("demo __setstate__"); | |||||
| Console.WriteLine(args.Length); | |||||
| for (int i = 0; i < args.Length; i++) | |||||
| { | |||||
| Console.WriteLine(args[i]); | |||||
| } | |||||
| } | |||||
| } | |||||
| } | |||||
| @@ -4,6 +4,7 @@ using System.IO; | |||||
| using System.Linq; | using System.Linq; | ||||
| using System.Text; | using System.Text; | ||||
| using Tensorflow.Util; | using Tensorflow.Util; | ||||
| using Razorvine.Pickle; | |||||
| using static Tensorflow.Binding; | using static Tensorflow.Binding; | ||||
| namespace Tensorflow.NumPy | namespace Tensorflow.NumPy | ||||
| @@ -93,10 +94,25 @@ namespace Tensorflow.NumPy | |||||
| var buffer = reader.ReadBytes(bytes * total); | var buffer = reader.ReadBytes(bytes * total); | ||||
| System.Buffer.BlockCopy(buffer, 0, matrix, 0, buffer.Length); | System.Buffer.BlockCopy(buffer, 0, matrix, 0, buffer.Length); | ||||
| return matrix; | return matrix; | ||||
| } | } | ||||
| NDArray ReadObjectMatrix(BinaryReader reader, Array matrix, int[] shape) | |||||
| { | |||||
| //int data = reader.ReadByte(); | |||||
| //Console.WriteLine(data); | |||||
| //Console.WriteLine(reader.ReadByte()); | |||||
| Stream stream = reader.BaseStream; | |||||
| Unpickler.registerConstructor("numpy.core.multiarray", "_reconstruct", new MultiArrayConstructor()); | |||||
| Unpickler.registerConstructor("numpy", "dtype", new DtypeConstructor()); | |||||
| var unpickler = new Unpickler(); | |||||
| NDArray result = (NDArray) unpickler.load(stream); | |||||
| Console.WriteLine(result.dims); | |||||
| return result; | |||||
| } | |||||
| public (NDArray, NDArray) meshgrid<T>(T[] array, bool copy = true, bool sparse = false) | public (NDArray, NDArray) meshgrid<T>(T[] array, bool copy = true, bool sparse = false) | ||||
| { | { | ||||
| var tensors = array_ops.meshgrid(array, copy: copy, sparse: sparse); | var tensors = array_ops.meshgrid(array, copy: copy, sparse: sparse); | ||||
| @@ -27,9 +27,20 @@ namespace Tensorflow.NumPy | |||||
| Array matrix = Array.CreateInstance(type, shape); | Array matrix = Array.CreateInstance(type, shape); | ||||
| //if (type == typeof(String)) | //if (type == typeof(String)) | ||||
| //return ReadStringMatrix(reader, matrix, bytes, type, shape); | |||||
| //return ReadStringMatrix(reader, matrix, bytes, type, shape); | |||||
| NDArray res = ReadObjectMatrix(reader, matrix, shape); | |||||
| Console.WriteLine("LoadMatrix"); | |||||
| Console.WriteLine(res.dims[0]); | |||||
| Console.WriteLine((int)res[0][0]); | |||||
| Console.WriteLine(res.dims[1]); | |||||
| //if (type == typeof(Object)) | |||||
| //{ | |||||
| //} | |||||
| //else | |||||
| return ReadValueMatrix(reader, matrix, bytes, type, shape); | return ReadValueMatrix(reader, matrix, bytes, type, shape); | ||||
| } | } | ||||
| } | } | ||||
| public T Load<T>(Stream stream) | public T Load<T>(Stream stream) | ||||
| @@ -37,7 +48,7 @@ namespace Tensorflow.NumPy | |||||
| ICloneable, IList, ICollection, IEnumerable, IStructuralComparable, IStructuralEquatable | ICloneable, IList, ICollection, IEnumerable, IStructuralComparable, IStructuralEquatable | ||||
| { | { | ||||
| // if (typeof(T).IsArray && (typeof(T).GetElementType().IsArray || typeof(T).GetElementType() == typeof(string))) | // if (typeof(T).IsArray && (typeof(T).GetElementType().IsArray || typeof(T).GetElementType() == typeof(string))) | ||||
| // return LoadJagged(stream) as T; | |||||
| // return LoadJagged(stream) as T; | |||||
| return LoadMatrix(stream) as T; | return LoadMatrix(stream) as T; | ||||
| } | } | ||||
| @@ -48,7 +59,7 @@ namespace Tensorflow.NumPy | |||||
| shape = null; | shape = null; | ||||
| // The first 6 bytes are a magic string: exactly "x93NUMPY" | // The first 6 bytes are a magic string: exactly "x93NUMPY" | ||||
| if (reader.ReadChar() != 63) return false; | |||||
| if (reader.ReadByte() != 0x93) return false; | |||||
| if (reader.ReadChar() != 'N') return false; | if (reader.ReadChar() != 'N') return false; | ||||
| if (reader.ReadChar() != 'U') return false; | if (reader.ReadChar() != 'U') return false; | ||||
| if (reader.ReadChar() != 'M') return false; | if (reader.ReadChar() != 'M') return false; | ||||
| @@ -64,6 +75,7 @@ namespace Tensorflow.NumPy | |||||
| ushort len = reader.ReadUInt16(); | ushort len = reader.ReadUInt16(); | ||||
| string header = new String(reader.ReadChars(len)); | string header = new String(reader.ReadChars(len)); | ||||
| Console.WriteLine(header); | |||||
| string mark = "'descr': '"; | string mark = "'descr': '"; | ||||
| int s = header.IndexOf(mark) + mark.Length; | int s = header.IndexOf(mark) + mark.Length; | ||||
| int e = header.IndexOf("'", s + 1); | int e = header.IndexOf("'", s + 1); | ||||
| @@ -93,7 +105,7 @@ namespace Tensorflow.NumPy | |||||
| Type GetType(string dtype, out int bytes, out bool? isLittleEndian) | Type GetType(string dtype, out int bytes, out bool? isLittleEndian) | ||||
| { | { | ||||
| isLittleEndian = IsLittleEndian(dtype); | isLittleEndian = IsLittleEndian(dtype); | ||||
| bytes = Int32.Parse(dtype.Substring(2)); | |||||
| bytes = dtype.Length > 2 ? Int32.Parse(dtype.Substring(2)) : 0; | |||||
| string typeCode = dtype.Substring(1); | string typeCode = dtype.Substring(1); | ||||
| @@ -121,6 +133,8 @@ namespace Tensorflow.NumPy | |||||
| return typeof(Double); | return typeof(Double); | ||||
| if (typeCode.StartsWith("S")) | if (typeCode.StartsWith("S")) | ||||
| return typeof(String); | return typeof(String); | ||||
| if (typeCode == "O") | |||||
| return typeof(Object); | |||||
| throw new NotSupportedException(); | throw new NotSupportedException(); | ||||
| } | } | ||||
| @@ -0,0 +1,44 @@ | |||||
| using System; | |||||
| using System.Collections.Generic; | |||||
| using System.Diagnostics.CodeAnalysis; | |||||
| using System.Text; | |||||
| using Razorvine.Pickle; | |||||
| namespace Tensorflow.NumPy | |||||
| { | |||||
| /// <summary> | |||||
| /// Creates multiarrays of objects. Returns a primitive type multiarray such as int[][] if | |||||
| /// the objects are ints, etc. | |||||
| /// </summary> | |||||
| [SuppressMessage("ReSharper", "InconsistentNaming")] | |||||
| [SuppressMessage("ReSharper", "MemberCanBePrivate.Global")] | |||||
| [SuppressMessage("ReSharper", "MemberCanBeMadeStatic.Global")] | |||||
| public class MultiArrayConstructor : IObjectConstructor | |||||
| { | |||||
| public object construct(object[] args) | |||||
| { | |||||
| //Console.WriteLine(args.Length); | |||||
| //for (int i = 0; i < args.Length; i++) | |||||
| //{ | |||||
| // Console.WriteLine(args[i]); | |||||
| //} | |||||
| Console.WriteLine("MultiArrayConstructor"); | |||||
| var arg1 = (Object[])args[1]; | |||||
| var dims = new int[arg1.Length]; | |||||
| for (var i = 0; i < arg1.Length; i++) | |||||
| { | |||||
| dims[i] = (int)arg1[i]; | |||||
| } | |||||
| var dtype = TF_DataType.DtInvalid; | |||||
| switch (args[2]) | |||||
| { | |||||
| case "b": dtype = TF_DataType.DtUint8Ref; break; | |||||
| default: throw new NotImplementedException("cannot parse" + args[2]); | |||||
| } | |||||
| return new NDArray(new Shape(dims), dtype); | |||||
| } | |||||
| } | |||||
| } | |||||
| @@ -0,0 +1,19 @@ | |||||
| using System; | |||||
| using System.Collections.Generic; | |||||
| using System.Text; | |||||
| namespace Tensorflow.NumPy | |||||
| { | |||||
| public partial class NDArray | |||||
| { | |||||
| public void __setstate__(object[] args) | |||||
| { | |||||
| Console.WriteLine("NDArray __setstate__"); | |||||
| Console.WriteLine(args.Length); | |||||
| for (int i = 0; i < args.Length; i++) | |||||
| { | |||||
| Console.WriteLine(args[i]); | |||||
| } | |||||
| } | |||||
| } | |||||
| } | |||||
| @@ -112,6 +112,7 @@ https://tensorflownet.readthedocs.io</Description> | |||||
| <PackageReference Include="Newtonsoft.Json" Version="13.0.3" /> | <PackageReference Include="Newtonsoft.Json" Version="13.0.3" /> | ||||
| <PackageReference Include="OneOf" Version="3.0.223" /> | <PackageReference Include="OneOf" Version="3.0.223" /> | ||||
| <PackageReference Include="Protobuf.Text" Version="0.7.0" /> | <PackageReference Include="Protobuf.Text" Version="0.7.0" /> | ||||
| <PackageReference Include="Razorvine.Pickle" Version="1.4.0" /> | |||||
| <PackageReference Include="Serilog.Sinks.Console" Version="4.1.0" /> | <PackageReference Include="Serilog.Sinks.Console" Version="4.1.0" /> | ||||
| </ItemGroup> | </ItemGroup> | ||||
| </Project> | </Project> | ||||
| @@ -5,6 +5,13 @@ using System.Text; | |||||
| using Tensorflow.Keras.Utils; | using Tensorflow.Keras.Utils; | ||||
| using Tensorflow.NumPy; | using Tensorflow.NumPy; | ||||
| using System.Linq; | using System.Linq; | ||||
| using Google.Protobuf.Collections; | |||||
| using Microsoft.VisualBasic; | |||||
| using OneOf.Types; | |||||
| using static HDF.PInvoke.H5; | |||||
| using System.Data; | |||||
| using System.Reflection.Emit; | |||||
| using System.Xml.Linq; | |||||
| namespace Tensorflow.Keras.Datasets | namespace Tensorflow.Keras.Datasets | ||||
| { | { | ||||
| @@ -12,13 +19,59 @@ namespace Tensorflow.Keras.Datasets | |||||
| /// This is a dataset of 25,000 movies reviews from IMDB, labeled by sentiment | /// This is a dataset of 25,000 movies reviews from IMDB, labeled by sentiment | ||||
| /// (positive/negative). Reviews have been preprocessed, and each review is | /// (positive/negative). Reviews have been preprocessed, and each review is | ||||
| /// encoded as a list of word indexes(integers). | /// encoded as a list of word indexes(integers). | ||||
| /// For convenience, words are indexed by overall frequency in the dataset, | |||||
| /// so that for instance the integer "3" encodes the 3rd most frequent word in | |||||
| /// the data.This allows for quick filtering operations such as: | |||||
| /// "only consider the top 10,000 most | |||||
| /// common words, but eliminate the top 20 most common words". | |||||
| /// As a convention, "0" does not stand for a specific word, but instead is used | |||||
| /// to encode the pad token. | |||||
| /// Args: | |||||
| /// path: where to cache the data (relative to %TEMP%/imdb/imdb.npz). | |||||
| /// num_words: integer or None.Words are | |||||
| /// ranked by how often they occur(in the training set) and only | |||||
| /// the `num_words` most frequent words are kept.Any less frequent word | |||||
| /// will appear as `oov_char` value in the sequence data.If None, | |||||
| /// all words are kept.Defaults to `None`. | |||||
| /// skip_top: skip the top N most frequently occurring words | |||||
| /// (which may not be informative). These words will appear as | |||||
| /// `oov_char` value in the dataset.When 0, no words are | |||||
| /// skipped. Defaults to `0`. | |||||
| /// maxlen: int or None.Maximum sequence length. | |||||
| /// Any longer sequence will be truncated. None, means no truncation. | |||||
| /// Defaults to `None`. | |||||
| /// seed: int. Seed for reproducible data shuffling. | |||||
| /// start_char: int. The start of a sequence will be marked with this | |||||
| /// character. 0 is usually the padding character. Defaults to `1`. | |||||
| /// oov_char: int. The out-of-vocabulary character. | |||||
| /// Words that were cut out because of the `num_words` or | |||||
| /// `skip_top` limits will be replaced with this character. | |||||
| /// index_from: int. Index actual words with this index and higher. | |||||
| /// Returns: | |||||
| /// Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`. | |||||
| /// | |||||
| /// ** x_train, x_test**: lists of sequences, which are lists of indexes | |||||
| /// (integers). If the num_words argument was specific, the maximum | |||||
| /// possible index value is `num_words - 1`. If the `maxlen` argument was | |||||
| /// specified, the largest possible sequence length is `maxlen`. | |||||
| /// | |||||
| /// ** y_train, y_test**: lists of integer labels(1 or 0). | |||||
| /// | |||||
| /// Raises: | |||||
| /// ValueError: in case `maxlen` is so low | |||||
| /// that no input sequence could be kept. | |||||
| /// Note that the 'out of vocabulary' character is only used for | |||||
| /// words that were present in the training set but are not included | |||||
| /// because they're not making the `num_words` cut here. | |||||
| /// Words that were not seen in the training set but are in the test set | |||||
| /// have simply been skipped. | |||||
| /// </summary> | /// </summary> | ||||
| /// """Loads the [IMDB dataset](https://ai.stanford.edu/~amaas/data/sentiment/). | |||||
| public class Imdb | public class Imdb | ||||
| { | { | ||||
| string origin_folder = "https://storage.googleapis.com/tensorflow/tf-keras-datasets/"; | string origin_folder = "https://storage.googleapis.com/tensorflow/tf-keras-datasets/"; | ||||
| string file_name = "imdb.npz"; | string file_name = "imdb.npz"; | ||||
| string dest_folder = "imdb"; | string dest_folder = "imdb"; | ||||
| /// <summary> | /// <summary> | ||||
| /// Loads the [IMDB dataset](https://ai.stanford.edu/~amaas/data/sentiment/). | /// Loads the [IMDB dataset](https://ai.stanford.edu/~amaas/data/sentiment/). | ||||
| /// </summary> | /// </summary> | ||||
| @@ -41,8 +94,10 @@ namespace Tensorflow.Keras.Datasets | |||||
| int index_from = 3) | int index_from = 3) | ||||
| { | { | ||||
| var dst = Download(); | var dst = Download(); | ||||
| var lines = File.ReadAllLines(Path.Combine(dst, "imdb_train.txt")); | |||||
| var fileBytes = File.ReadAllBytes(Path.Combine(dst, file_name)); | |||||
| var (x_train, x_test) = LoadX(fileBytes); | |||||
| var (y_train, y_test) = LoadY(fileBytes); | |||||
| /*var lines = File.ReadAllLines(Path.Combine(dst, "imdb_train.txt")); | |||||
| var x_train_string = new string[lines.Length]; | var x_train_string = new string[lines.Length]; | ||||
| var y_train = np.zeros(new int[] { lines.Length }, np.int64); | var y_train = np.zeros(new int[] { lines.Length }, np.int64); | ||||
| for (int i = 0; i < lines.Length; i++) | for (int i = 0; i < lines.Length; i++) | ||||
| @@ -62,7 +117,7 @@ namespace Tensorflow.Keras.Datasets | |||||
| x_test_string[i] = lines[i].Substring(2); | x_test_string[i] = lines[i].Substring(2); | ||||
| } | } | ||||
| var x_test = np.array(x_test_string); | |||||
| var x_test = np.array(x_test_string);*/ | |||||
| return new DatasetPass | return new DatasetPass | ||||
| { | { | ||||
| @@ -1,7 +1,9 @@ | |||||
| using Microsoft.VisualStudio.TestTools.UnitTesting; | using Microsoft.VisualStudio.TestTools.UnitTesting; | ||||
| using System; | using System; | ||||
| using System.Collections.Generic; | |||||
| using System.Linq; | using System.Linq; | ||||
| using static Tensorflow.Binding; | using static Tensorflow.Binding; | ||||
| using static Tensorflow.KerasApi; | |||||
| namespace TensorFlowNET.UnitTest.Dataset | namespace TensorFlowNET.UnitTest.Dataset | ||||
| { | { | ||||
| @@ -195,5 +197,20 @@ namespace TensorFlowNET.UnitTest.Dataset | |||||
| Assert.IsFalse(allEqual); | Assert.IsFalse(allEqual); | ||||
| } | } | ||||
| [TestMethod] | |||||
| public void GetData() | |||||
| { | |||||
| var vocab_size = 20000; // Only consider the top 20k words | |||||
| var maxlen = 200; // Only consider the first 200 words of each movie review | |||||
| var dataset = keras.datasets.imdb.load_data(num_words: vocab_size); | |||||
| var x_train = dataset.Train.Item1; | |||||
| var y_train = dataset.Train.Item2; | |||||
| var x_val = dataset.Test.Item1; | |||||
| var y_val = dataset.Test.Item2; | |||||
| print(len(x_train) + "Training sequences"); | |||||
| print(len(x_val) + "Validation sequences"); | |||||
| x_train = keras.preprocessing.sequence.pad_sequences((IEnumerable<int[]>)x_train, maxlen: maxlen); | |||||
| x_val = keras.preprocessing.sequence.pad_sequences((IEnumerable<int[]>)x_val, maxlen: maxlen); | |||||
| } | |||||
| } | } | ||||
| } | } | ||||