Browse Source

Refactor string tensor.

tags/yolov3
Oceania2018 4 years ago
parent
commit
1f6748864a
11 changed files with 150 additions and 211 deletions
  1. +1
    -1
      src/TensorFlowNET.Console/Program.cs
  2. +0
    -60
      src/TensorFlowNET.Core/Tensors/Tensor.Conversions.cs
  3. +13
    -68
      src/TensorFlowNET.Core/Tensors/Tensor.Creation.cs
  4. +107
    -0
      src/TensorFlowNET.Core/Tensors/Tensor.String.cs
  5. +2
    -69
      src/TensorFlowNET.Core/Tensors/Tensor.Value.cs
  6. +3
    -0
      src/TensorFlowNET.Core/Tensors/TensorShape.cs
  7. +7
    -2
      src/TensorFlowNET.Core/Tensors/tensor_util.cs
  8. +2
    -2
      src/TensorFlowNET.Core/Training/Saving/Saver.cs
  9. +7
    -4
      test/TensorFlowNET.Keras.UnitTest/Tensorflow.Keras.UnitTest.csproj
  10. +4
    -1
      test/TensorFlowNET.Native.UnitTest/Tensorflow.Native.UnitTest.csproj
  11. +4
    -4
      test/TensorFlowNET.UnitTest/ManagedAPI/StringsApiTest.cs

+ 1
- 1
src/TensorFlowNET.Console/Program.cs View File

@@ -25,7 +25,7 @@ namespace Tensorflow

FuncGraph(mm);

// 85M
// 65M
Console.WriteLine("Finished.");
Console.ReadLine();
}


+ 0
- 60
src/TensorFlowNET.Core/Tensors/Tensor.Conversions.cs View File

@@ -29,66 +29,6 @@ namespace Tensorflow
[SuppressMessage("ReSharper", "InvokeAsExtensionMethod")]
public partial class Tensor
{
public T ToScalar<T>()
{
unsafe
{
if (typeof(T).as_dtype() == this.dtype && this.dtype != TF_DataType.TF_STRING)
return Unsafe.Read<T>(this.buffer.ToPointer());

switch (this.dtype)
{
#if _REGEN
%foreach supported_numericals_TF_DataType,supported_numericals,supported_numericals_lowercase%
case TF_DataType.#1:
return Converts.ChangeType<T>(*(#3*) this.buffer);
%
#else

case TF_DataType.TF_UINT8:
return Converts.ChangeType<T>(*(byte*)this.buffer);
case TF_DataType.TF_INT16:
return Converts.ChangeType<T>(*(short*)this.buffer);
case TF_DataType.TF_UINT16:
return Converts.ChangeType<T>(*(ushort*)this.buffer);
case TF_DataType.TF_INT32:
return Converts.ChangeType<T>(*(int*)this.buffer);
case TF_DataType.TF_UINT32:
return Converts.ChangeType<T>(*(uint*)this.buffer);
case TF_DataType.TF_INT64:
return Converts.ChangeType<T>(*(long*)this.buffer);
case TF_DataType.TF_UINT64:
return Converts.ChangeType<T>(*(ulong*)this.buffer);
case TF_DataType.TF_DOUBLE:
return Converts.ChangeType<T>(*(double*)this.buffer);
case TF_DataType.TF_FLOAT:
return Converts.ChangeType<T>(*(float*)this.buffer);
#endif
case TF_DataType.TF_STRING:
if (this.NDims != 0)
throw new ArgumentException($"{nameof(Tensor)} can only be scalar.");

IntPtr stringStartAddress = IntPtr.Zero;
ulong dstLen = 0;

c_api.TF_StringDecode((byte*)this.buffer + 8, this.bytesize, (byte**)&stringStartAddress, ref dstLen, tf.Status.Handle);
tf.Status.Check(true);

var dstLenInt = checked((int)dstLen);
var value = Encoding.UTF8.GetString((byte*)stringStartAddress, dstLenInt);
if (typeof(T) == typeof(string))
return (T)(object)value;
else
return Converts.ChangeType<T>(value);

case TF_DataType.TF_COMPLEX64:
case TF_DataType.TF_COMPLEX128:
default:
throw new NotSupportedException();
}
}
}

public unsafe void CopyTo(NDArray nd)
{
if (!nd.Shape.IsContiguous)


+ 13
- 68
src/TensorFlowNET.Core/Tensors/Tensor.Creation.cs View File

@@ -457,53 +457,15 @@ namespace Tensorflow
/// </summary>
public unsafe Tensor(string str)
{
var buffer = Encoding.UTF8.GetBytes(str);
var size = c_api.TF_StringEncodedSize((ulong)buffer.Length);
var handle = TF_AllocateTensor(TF_DataType.TF_STRING, null, 0, size + sizeof(ulong));
AllocationType = AllocationType.Tensorflow;

IntPtr tensor = c_api.TF_TensorData(handle);
Marshal.WriteInt64(tensor, 0);
fixed (byte* src = buffer)
c_api.TF_StringEncode(src, (ulong)buffer.Length, (byte*)(tensor + sizeof(long)), size, tf.Status.Handle);
_handle = handle;
tf.Status.Check(true);
_handle = StringTensor(new string[] { str }, TensorShape.Scalar);
#if TRACK_TENSOR_LIFE
print($"New Tensor 0x{_handle.ToString("x16")} {AllocationType} String Data: 0x{TensorDataPointer.ToString("x16")}");
#endif
}

public unsafe Tensor(string[] strings)
{
// convert string array to byte[][]
var buffer = new byte[strings.Length][];
for (var i = 0; i < strings.Length; i++)
buffer[i] = Encoding.UTF8.GetBytes(strings[i]);
long[] shape = new long[] { strings.Length };

ulong size = 0;
foreach (var b in buffer)
size += TF_StringEncodedSize((ulong)b.Length);

ulong src_size = size + (ulong)buffer.Length * sizeof(ulong);
_handle = TF_AllocateTensor(TF_DataType.TF_STRING, shape, shape.Length, src_size);
AllocationType = AllocationType.Tensorflow;

// Clear offset table
IntPtr input = TensorDataPointer;
IntPtr data_start = input + buffer.Length * sizeof(ulong);
IntPtr limit = input + (int)src_size;
ulong offset = 0;
for (int i = 0; i < buffer.Length; i++)
{
Marshal.WriteInt64(input, i * sizeof(ulong), (long)offset);
fixed (byte* src = &buffer[i][0])
{
var written = TF_StringEncode(src, (ulong)buffer[i].Length, (byte*)data_start, (ulong)(limit.ToInt64() - data_start.ToInt64()), tf.Status.Handle);
tf.Status.Check(true);
//input += 8;
data_start += (int)written;
offset += written;
}
}

_handle = StringTensor(strings, new TensorShape(strings.Length));
#if TRACK_TENSOR_LIFE
print($"New Tensor 0x{_handle.ToString("x16")} {AllocationType} String Data: 0x{TensorDataPointer.ToString("x16")}");
#endif
@@ -515,12 +477,12 @@ namespace Tensorflow
tensorDType = nd.dtype.as_dtype();

// todo: handle nd of type "String" here too
if (tensorDType == TF_DataType.TF_STRING && nd.typecode == NPTypeCode.Byte)
/*if (tensorDType == TF_DataType.TF_STRING && nd.typecode == NPTypeCode.Byte)
{
if (nd.Unsafe.Storage.Shape.IsContiguous)
{
var bytesLength = (ulong)nd.size;
var size = c_api.TF_StringEncodedSize(bytesLength);
var size = bytesLength + 1;
var handle = TF_AllocateTensor(TF_DataType.TF_STRING, null, 0, size + 8);
AllocationType = AllocationType.Tensorflow;

@@ -534,7 +496,7 @@ namespace Tensorflow
else
{
var buffer = nd.ToArray<byte>();
var size = c_api.TF_StringEncodedSize((ulong)buffer.Length);
var size = (ulong)buffer.Length + 1;
var handle = TF_AllocateTensor(TF_DataType.TF_STRING, null, 0, size + 8);
AllocationType = AllocationType.Tensorflow;

@@ -549,9 +511,12 @@ namespace Tensorflow
}

return;
}
}*/

CreateTensorFromNDArray(nd, tensorDType);
#if TRACK_TENSOR_LIFE
print($"New Tensor 0x{_handle.ToString("x16")} {AllocationType} Data: 0x{TensorDataPointer.ToString("x16")}");
#endif
}

private unsafe void CreateTensorFromNDArray(NDArray nd, TF_DataType? given_dtype)
@@ -576,10 +541,6 @@ namespace Tensorflow
}
else
AllocationType = AllocationType.Tensorflow;

#if TRACK_TENSOR_LIFE
print($"New Tensor 0x{_handle.ToString("x16")} {AllocationType} Data: 0x{TensorDataPointer.ToString("x16")}");
#endif
}

public Tensor(Operation op, int value_index, TF_DataType dtype)
@@ -608,26 +569,10 @@ namespace Tensorflow
protected IntPtr CreateTensorFromArray(TF_DataType dt, long[] shape, Array data, int element_size)
{
if (dt == TF_DataType.TF_STRING && data is byte[] buffer)
return CreateStringTensorFromBytes(buffer, shape);
return StringTensor(new byte[][] { buffer }, TensorShape.Scalar);
return CreateTensorFromArray(dt, shape, data, 0, data.Length, element_size);
}

protected unsafe IntPtr CreateStringTensorFromBytes(byte[] buffer, long[] shape)
{
var size = c_api.TF_StringEncodedSize((ulong)buffer.Length);
var handle = TF_AllocateTensor(TF_DataType.TF_STRING, shape, 0, size + sizeof(long));
AllocationType = AllocationType.Tensorflow;

IntPtr tensor = c_api.TF_TensorData(handle);
Marshal.WriteInt64(tensor, 0);

fixed (byte* src = buffer)
c_api.TF_StringEncode(src, (ulong)buffer.Length, (byte*)(tensor + sizeof(long)), size, tf.Status.Handle);

tf.Status.Check(true);
return handle;
}

/// <summary>
/// Creates a new tensor from a subsection of the given array without copying memory. The array is pinned down and the pointer passed on.
/// </summary>


+ 107
- 0
src/TensorFlowNET.Core/Tensors/Tensor.String.cs View File

@@ -0,0 +1,107 @@
using System;
using System.Linq;
using System.Runtime.InteropServices;
using System.Text;
using static Tensorflow.Binding;

namespace Tensorflow
{
public partial class Tensor
{
public unsafe IntPtr StringTensor(string[] strings, TensorShape shape)
{
// convert string array to byte[][]
var buffer = new byte[strings.Length][];
for (var i = 0; i < strings.Length; i++)
buffer[i] = Encoding.UTF8.GetBytes(strings[i]);

return StringTensor(buffer, shape);
}

public unsafe IntPtr StringTensor(byte[][] buffer, TensorShape shape)
{
ulong size = 0;
foreach (var b in buffer)
size += c_api.TF_StringEncodedSize((ulong)b.Length);

var src_size = size + (ulong)buffer.Length * sizeof(ulong);
var handle = c_api.TF_AllocateTensor(TF_DataType.TF_STRING,
shape.dims.Select(x => (long)x).ToArray(),
shape.ndim,
src_size);
AllocationType = AllocationType.Tensorflow;

IntPtr data_start = c_api.TF_TensorData(handle);
IntPtr string_start = data_start + buffer.Length * sizeof(ulong);
IntPtr limit = data_start + (int)src_size;
ulong offset = 0;
for (int i = 0; i < buffer.Length; i++)
{
Marshal.WriteInt64(data_start, i * sizeof(ulong), (long)offset);
if (buffer[i].Length == 0)
{
Marshal.WriteByte(string_start, 0);
break;
}

fixed (byte* src = &buffer[i][0])
{
/*Marshal.WriteByte(string_start, Convert.ToByte(buffer[i].Length));
tf.memcpy((string_start + 1).ToPointer(), src, (ulong)buffer[i].Length);
string_start += buffer[i].Length + 1;
offset += buffer[i].Length + 1;*/

var written = c_api.TF_StringEncode(src, (ulong)buffer[i].Length, (byte*)string_start, (ulong)(limit.ToInt64() - string_start.ToInt64()), tf.Status.Handle);
tf.Status.Check(true);
string_start += (int)written;
offset += written;
}
}

return handle;
}

/// <summary>
/// Extracts string array from current Tensor.
/// </summary>
/// <exception cref="InvalidOperationException">When <see cref="dtype"/> != TF_DataType.TF_STRING</exception>
public unsafe string[] StringData()
{
var buffer = StringBytes();

var _str = new string[buffer.Length];
for (int i = 0; i < _str.Length; i++)
_str[i] = Encoding.UTF8.GetString(buffer[i]);

return _str;
}

public unsafe byte[][] StringBytes()
{
if (dtype != TF_DataType.TF_STRING)
throw new InvalidOperationException($"Unable to call StringData when dtype != TF_DataType.TF_STRING (dtype is {dtype})");

//
// TF_STRING tensors are encoded with a table of 8-byte offsets followed by TF_StringEncode-encoded bytes.
// [offset1, offset2,...,offsetn, s1size, s1bytes, s2size, s2bytes,...,snsize,snbytes]
//
long size = 1;
foreach (var s in TensorShape.dims)
size *= s;

var buffer = new byte[size][];
var data_start = c_api.TF_TensorData(_handle);
var string_start = data_start + (int)(size * sizeof(ulong));
for (int i = 0; i < buffer.Length; i++)
{
var len = *(byte*)string_start;
buffer[i] = new byte[len];
string_start += 1;
Marshal.Copy(string_start, buffer[i], 0, len);
string_start += len;
}

return buffer;
}
}
}

+ 2
- 69
src/TensorFlowNET.Core/Tensors/Tensor.Value.cs View File

@@ -162,7 +162,8 @@ namespace Tensorflow
storage = new UnmanagedStorage(NPTypeCode.Boolean);
break;
case TF_DataType.TF_STRING:
return np.array(StringBytes()[0]);
var nd = np.array(StringData());
return nd;
case TF_DataType.TF_UINT8:
storage = new UnmanagedStorage(NPTypeCode.Byte);
break;
@@ -202,73 +203,5 @@ namespace Tensorflow

return data;
}

/// <summary>
/// Extracts string array from current Tensor.
/// </summary>
/// <exception cref="InvalidOperationException">When <see cref="dtype"/> != TF_DataType.TF_STRING</exception>
public unsafe string[] StringData()
{
if (dtype != TF_DataType.TF_STRING)
throw new InvalidOperationException($"Unable to call StringData when dtype != TF_DataType.TF_STRING (dtype is {dtype})");

//
// TF_STRING tensors are encoded with a table of 8-byte offsets followed by TF_StringEncode-encoded bytes.
// [offset1, offset2,...,offsetn, s1size, s1bytes, s2size, s2bytes,...,snsize,snbytes]
//
long size = 1;
foreach (var s in TensorShape.dims)
size *= s;

var buffer = new byte[size][];
var src = c_api.TF_TensorData(_handle);
src += (int)(size * 8);
for (int i = 0; i < buffer.Length; i++)
{
IntPtr dst = IntPtr.Zero;
ulong dstLen = 0;
var read = c_api.TF_StringDecode((byte*)src, bytesize, (byte**)&dst, ref dstLen, tf.Status.Handle);
tf.Status.Check(true);
buffer[i] = new byte[(int)dstLen];
Marshal.Copy(dst, buffer[i], 0, buffer[i].Length);
src += (int)read;
}

var _str = new string[buffer.Length];
for (int i = 0; i < _str.Length; i++)
_str[i] = Encoding.UTF8.GetString(buffer[i]);

return _str;
}

public unsafe byte[][] StringBytes()
{
if (dtype != TF_DataType.TF_STRING)
throw new InvalidOperationException($"Unable to call StringData when dtype != TF_DataType.TF_STRING (dtype is {dtype})");

//
// TF_STRING tensors are encoded with a table of 8-byte offsets followed by TF_StringEncode-encoded bytes.
// [offset1, offset2,...,offsetn, s1size, s1bytes, s2size, s2bytes,...,snsize,snbytes]
//
long size = 1;
foreach (var s in TensorShape.dims)
size *= s;

var buffer = new byte[size][];
var src = c_api.TF_TensorData(_handle);
src += (int)(size * 8);
for (int i = 0; i < buffer.Length; i++)
{
IntPtr dst = IntPtr.Zero;
ulong dstLen = 0;
var read = c_api.TF_StringDecode((byte*)src, bytesize, (byte**)&dst, ref dstLen, tf.Status.Handle);
tf.Status.Check(true);
buffer[i] = new byte[(int)dstLen];
Marshal.Copy(dst, buffer[i], 0, buffer[i].Length);
src += (int)read;
}

return buffer;
}
}
}

+ 3
- 0
src/TensorFlowNET.Core/Tensors/TensorShape.cs View File

@@ -59,6 +59,9 @@ namespace Tensorflow
shape = new Shape();
}

public static TensorShape Scalar
=> new TensorShape(new int[0]);

public TensorShape(TensorShapeProto proto)
{
if (proto.UnknownRank) return;


+ 7
- 2
src/TensorFlowNET.Core/Tensors/tensor_util.cs View File

@@ -576,9 +576,14 @@ would not be rank 1.", tensor.op.get_attr("axis")));
{
var dtype = tensor.dtype;

if (dtype == TF_DataType.TF_STRING && tensor.NDims > 0)
if (dtype == TF_DataType.TF_STRING)
{
return $"['{string.Join("', '", tensor.StringData())}']";
if (tensor.rank == 0)
return "'" + string.Join(string.Empty, tensor.StringBytes()[0]
.Take(25)
.Select(x => x < 32 || x > 127 ? "\\x" + x.ToString("x") : Convert.ToChar(x).ToString())) + "'";
else
return $"['{string.Join("', '", tensor.StringData().Take(25))}']";
}

var nd = tensor.numpy();


+ 2
- 2
src/TensorFlowNET.Core/Training/Saving/Saver.cs View File

@@ -193,7 +193,7 @@ namespace Tensorflow

if (write_state)
{
var path = UTF8Encoding.UTF8.GetString((byte[])model_checkpoint_path[0]);
var path = NDArray.AsStringArray(model_checkpoint_path[0])[0];
_RecordLastCheckpoint(path);
checkpoint_management.update_checkpoint_state_internal(
save_dir: save_path_parent,
@@ -211,7 +211,7 @@ namespace Tensorflow
export_meta_graph(meta_graph_filename, strip_default_attrs: strip_default_attrs, save_debug_info: save_debug_info);
}

return _is_empty ? string.Empty : UTF8Encoding.UTF8.GetString((byte[])model_checkpoint_path[0]);
return _is_empty ? string.Empty : NDArray.AsStringArray(model_checkpoint_path[0])[0];
}

public (Saver, object) import_meta_graph(string meta_graph_or_file,


+ 7
- 4
test/TensorFlowNET.Keras.UnitTest/Tensorflow.Keras.UnitTest.csproj View File

@@ -14,10 +14,13 @@
</PropertyGroup>

<ItemGroup>
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="16.7.1" />
<PackageReference Include="MSTest.TestAdapter" Version="2.1.1" />
<PackageReference Include="MSTest.TestFramework" Version="2.1.1" />
<PackageReference Include="coverlet.collector" Version="1.3.0" />
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="16.8.3" />
<PackageReference Include="MSTest.TestAdapter" Version="2.1.2" />
<PackageReference Include="MSTest.TestFramework" Version="2.1.2" />
<PackageReference Include="coverlet.collector" Version="3.0.2">
<PrivateAssets>all</PrivateAssets>
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
</PackageReference>
<PackageReference Include="SciSharp.TensorFlow.Redist" Version="2.3.1" />
</ItemGroup>



+ 4
- 1
test/TensorFlowNET.Native.UnitTest/Tensorflow.Native.UnitTest.csproj View File

@@ -29,7 +29,10 @@
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="16.8.3" />
<PackageReference Include="MSTest.TestAdapter" Version="2.1.2" />
<PackageReference Include="MSTest.TestFramework" Version="2.1.2" />
<PackageReference Include="coverlet.collector" Version="1.3.0" />
<PackageReference Include="coverlet.collector" Version="3.0.2">
<PrivateAssets>all</PrivateAssets>
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
</PackageReference>
<PackageReference Include="SciSharp.TensorFlow.Redist" Version="2.3.1" />
</ItemGroup>



+ 4
- 4
test/TensorFlowNET.UnitTest/ManagedAPI/StringsApiTest.cs View File

@@ -12,7 +12,7 @@ namespace TensorFlowNET.UnitTest.ManagedAPI
{
var jpg = tf.constant(new byte[] { 0x41, 0xff, 0xd8, 0xff }, tf.@string);
var strings = jpg.ToString();
Assert.AreEqual(strings, @"tf.Tensor: shape=(), dtype=string, numpy=A\xff\xd8\xff");
Assert.AreEqual(strings, @"tf.Tensor: shape=(), dtype=string, numpy='A\xff\xd8\xff'");
}

[TestMethod]
@@ -21,16 +21,16 @@ namespace TensorFlowNET.UnitTest.ManagedAPI
var str1 = tf.constant("Hello1");
var str2 = tf.constant("Hello2");
var result = tf.equal(str1, str2);
Assert.IsFalse(result.ToScalar<bool>());
Assert.IsFalse(result.numpy());

var str3 = tf.constant("Hello1");
result = tf.equal(str1, str3);
Assert.IsTrue(result.ToScalar<bool>());
Assert.IsTrue(result.numpy());

var str4 = tf.strings.substr(str1, 0, 5);
var str5 = tf.strings.substr(str2, 0, 5);
result = tf.equal(str4, str5);
Assert.IsTrue(result.ToScalar<bool>());
Assert.IsTrue(result.numpy());
}

[TestMethod]


Loading…
Cancel
Save