diff --git a/TensorFlow.NET.sln b/TensorFlow.NET.sln index 96a8af5c..16f524a4 100644 --- a/TensorFlow.NET.sln +++ b/TensorFlow.NET.sln @@ -15,36 +15,102 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "TensorFlowNET.Hub", "src\Te EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "TensorFlowNET.Datasets", "src\TensorFlowNET.Datasets\TensorFlowNET.Datasets.csproj", "{494D6CAD-2C0D-4C0B-90E2-B097DB039383}" EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "NumSharp.Core", "..\NumSharp\src\NumSharp.Core\NumSharp.Core.csproj", "{9249BCC4-3FEB-4EF5-8AB9-789FFE4040B4}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU + Debug|x64 = Debug|x64 + Publish|Any CPU = Publish|Any CPU + Publish|x64 = Publish|x64 Release|Any CPU = Release|Any CPU + Release|x64 = Release|x64 EndGlobalSection GlobalSection(ProjectConfigurationPlatforms) = postSolution {029A8CF1-CF95-4DCB-98AA-9D3D96A83B3E}.Debug|Any CPU.ActiveCfg = Debug|Any CPU {029A8CF1-CF95-4DCB-98AA-9D3D96A83B3E}.Debug|Any CPU.Build.0 = Debug|Any CPU + {029A8CF1-CF95-4DCB-98AA-9D3D96A83B3E}.Debug|x64.ActiveCfg = Debug|Any CPU + {029A8CF1-CF95-4DCB-98AA-9D3D96A83B3E}.Debug|x64.Build.0 = Debug|Any CPU + {029A8CF1-CF95-4DCB-98AA-9D3D96A83B3E}.Publish|Any CPU.ActiveCfg = Release|Any CPU + {029A8CF1-CF95-4DCB-98AA-9D3D96A83B3E}.Publish|Any CPU.Build.0 = Release|Any CPU + {029A8CF1-CF95-4DCB-98AA-9D3D96A83B3E}.Publish|x64.ActiveCfg = Release|Any CPU + {029A8CF1-CF95-4DCB-98AA-9D3D96A83B3E}.Publish|x64.Build.0 = Release|Any CPU {029A8CF1-CF95-4DCB-98AA-9D3D96A83B3E}.Release|Any CPU.ActiveCfg = Release|Any CPU {029A8CF1-CF95-4DCB-98AA-9D3D96A83B3E}.Release|Any CPU.Build.0 = Release|Any CPU + {029A8CF1-CF95-4DCB-98AA-9D3D96A83B3E}.Release|x64.ActiveCfg = Release|Any CPU + {029A8CF1-CF95-4DCB-98AA-9D3D96A83B3E}.Release|x64.Build.0 = Release|Any CPU {FD682AC0-7B2D-45D3-8B0D-C6D678B04144}.Debug|Any CPU.ActiveCfg = Debug|Any CPU {FD682AC0-7B2D-45D3-8B0D-C6D678B04144}.Debug|Any CPU.Build.0 = Debug|Any CPU + {FD682AC0-7B2D-45D3-8B0D-C6D678B04144}.Debug|x64.ActiveCfg = Debug|Any CPU + {FD682AC0-7B2D-45D3-8B0D-C6D678B04144}.Debug|x64.Build.0 = Debug|Any CPU + {FD682AC0-7B2D-45D3-8B0D-C6D678B04144}.Publish|Any CPU.ActiveCfg = Release|Any CPU + {FD682AC0-7B2D-45D3-8B0D-C6D678B04144}.Publish|Any CPU.Build.0 = Release|Any CPU + {FD682AC0-7B2D-45D3-8B0D-C6D678B04144}.Publish|x64.ActiveCfg = Release|Any CPU + {FD682AC0-7B2D-45D3-8B0D-C6D678B04144}.Publish|x64.Build.0 = Release|Any CPU {FD682AC0-7B2D-45D3-8B0D-C6D678B04144}.Release|Any CPU.ActiveCfg = Release|Any CPU {FD682AC0-7B2D-45D3-8B0D-C6D678B04144}.Release|Any CPU.Build.0 = Release|Any CPU + {FD682AC0-7B2D-45D3-8B0D-C6D678B04144}.Release|x64.ActiveCfg = Release|Any CPU + {FD682AC0-7B2D-45D3-8B0D-C6D678B04144}.Release|x64.Build.0 = Release|Any CPU {D03F94CF-B283-4730-B177-21A57641061F}.Debug|Any CPU.ActiveCfg = Debug|Any CPU {D03F94CF-B283-4730-B177-21A57641061F}.Debug|Any CPU.Build.0 = Debug|Any CPU + {D03F94CF-B283-4730-B177-21A57641061F}.Debug|x64.ActiveCfg = Debug|Any CPU + {D03F94CF-B283-4730-B177-21A57641061F}.Debug|x64.Build.0 = Debug|Any CPU + {D03F94CF-B283-4730-B177-21A57641061F}.Publish|Any CPU.ActiveCfg = Release|Any CPU + {D03F94CF-B283-4730-B177-21A57641061F}.Publish|Any CPU.Build.0 = Release|Any CPU + {D03F94CF-B283-4730-B177-21A57641061F}.Publish|x64.ActiveCfg = Release|Any CPU + {D03F94CF-B283-4730-B177-21A57641061F}.Publish|x64.Build.0 = Release|Any CPU {D03F94CF-B283-4730-B177-21A57641061F}.Release|Any CPU.ActiveCfg = Release|Any CPU {D03F94CF-B283-4730-B177-21A57641061F}.Release|Any CPU.Build.0 = Release|Any CPU + {D03F94CF-B283-4730-B177-21A57641061F}.Release|x64.ActiveCfg = Release|Any CPU + {D03F94CF-B283-4730-B177-21A57641061F}.Release|x64.Build.0 = Release|Any CPU {904472F8-40E1-4650-AA6F-C7F209B3691B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU {904472F8-40E1-4650-AA6F-C7F209B3691B}.Debug|Any CPU.Build.0 = Debug|Any CPU + {904472F8-40E1-4650-AA6F-C7F209B3691B}.Debug|x64.ActiveCfg = Debug|Any CPU + {904472F8-40E1-4650-AA6F-C7F209B3691B}.Debug|x64.Build.0 = Debug|Any CPU + {904472F8-40E1-4650-AA6F-C7F209B3691B}.Publish|Any CPU.ActiveCfg = Release|Any CPU + {904472F8-40E1-4650-AA6F-C7F209B3691B}.Publish|Any CPU.Build.0 = Release|Any CPU + {904472F8-40E1-4650-AA6F-C7F209B3691B}.Publish|x64.ActiveCfg = Release|Any CPU + {904472F8-40E1-4650-AA6F-C7F209B3691B}.Publish|x64.Build.0 = Release|Any CPU {904472F8-40E1-4650-AA6F-C7F209B3691B}.Release|Any CPU.ActiveCfg = Release|Any CPU {904472F8-40E1-4650-AA6F-C7F209B3691B}.Release|Any CPU.Build.0 = Release|Any CPU + {904472F8-40E1-4650-AA6F-C7F209B3691B}.Release|x64.ActiveCfg = Release|Any CPU + {904472F8-40E1-4650-AA6F-C7F209B3691B}.Release|x64.Build.0 = Release|Any CPU {4EAFAE19-C832-47C6-B01E-0F4268C9072C}.Debug|Any CPU.ActiveCfg = Debug|Any CPU {4EAFAE19-C832-47C6-B01E-0F4268C9072C}.Debug|Any CPU.Build.0 = Debug|Any CPU + {4EAFAE19-C832-47C6-B01E-0F4268C9072C}.Debug|x64.ActiveCfg = Debug|Any CPU + {4EAFAE19-C832-47C6-B01E-0F4268C9072C}.Debug|x64.Build.0 = Debug|Any CPU + {4EAFAE19-C832-47C6-B01E-0F4268C9072C}.Publish|Any CPU.ActiveCfg = Release|Any CPU + {4EAFAE19-C832-47C6-B01E-0F4268C9072C}.Publish|Any CPU.Build.0 = Release|Any CPU + {4EAFAE19-C832-47C6-B01E-0F4268C9072C}.Publish|x64.ActiveCfg = Release|Any CPU + {4EAFAE19-C832-47C6-B01E-0F4268C9072C}.Publish|x64.Build.0 = Release|Any CPU {4EAFAE19-C832-47C6-B01E-0F4268C9072C}.Release|Any CPU.ActiveCfg = Release|Any CPU {4EAFAE19-C832-47C6-B01E-0F4268C9072C}.Release|Any CPU.Build.0 = Release|Any CPU + {4EAFAE19-C832-47C6-B01E-0F4268C9072C}.Release|x64.ActiveCfg = Release|Any CPU + {4EAFAE19-C832-47C6-B01E-0F4268C9072C}.Release|x64.Build.0 = Release|Any CPU {494D6CAD-2C0D-4C0B-90E2-B097DB039383}.Debug|Any CPU.ActiveCfg = Debug|Any CPU {494D6CAD-2C0D-4C0B-90E2-B097DB039383}.Debug|Any CPU.Build.0 = Debug|Any CPU + {494D6CAD-2C0D-4C0B-90E2-B097DB039383}.Debug|x64.ActiveCfg = Debug|Any CPU + {494D6CAD-2C0D-4C0B-90E2-B097DB039383}.Debug|x64.Build.0 = Debug|Any CPU + {494D6CAD-2C0D-4C0B-90E2-B097DB039383}.Publish|Any CPU.ActiveCfg = Release|Any CPU + {494D6CAD-2C0D-4C0B-90E2-B097DB039383}.Publish|Any CPU.Build.0 = Release|Any CPU + {494D6CAD-2C0D-4C0B-90E2-B097DB039383}.Publish|x64.ActiveCfg = Release|Any CPU + {494D6CAD-2C0D-4C0B-90E2-B097DB039383}.Publish|x64.Build.0 = Release|Any CPU {494D6CAD-2C0D-4C0B-90E2-B097DB039383}.Release|Any CPU.ActiveCfg = Release|Any CPU {494D6CAD-2C0D-4C0B-90E2-B097DB039383}.Release|Any CPU.Build.0 = Release|Any CPU + {494D6CAD-2C0D-4C0B-90E2-B097DB039383}.Release|x64.ActiveCfg = Release|Any CPU + {494D6CAD-2C0D-4C0B-90E2-B097DB039383}.Release|x64.Build.0 = Release|Any CPU + {9249BCC4-3FEB-4EF5-8AB9-789FFE4040B4}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {9249BCC4-3FEB-4EF5-8AB9-789FFE4040B4}.Debug|Any CPU.Build.0 = Debug|Any CPU + {9249BCC4-3FEB-4EF5-8AB9-789FFE4040B4}.Debug|x64.ActiveCfg = Debug|x64 + {9249BCC4-3FEB-4EF5-8AB9-789FFE4040B4}.Debug|x64.Build.0 = Debug|x64 + {9249BCC4-3FEB-4EF5-8AB9-789FFE4040B4}.Publish|Any CPU.ActiveCfg = Publish|Any CPU + {9249BCC4-3FEB-4EF5-8AB9-789FFE4040B4}.Publish|Any CPU.Build.0 = Publish|Any CPU + {9249BCC4-3FEB-4EF5-8AB9-789FFE4040B4}.Publish|x64.ActiveCfg = Publish|x64 + {9249BCC4-3FEB-4EF5-8AB9-789FFE4040B4}.Publish|x64.Build.0 = Publish|x64 + {9249BCC4-3FEB-4EF5-8AB9-789FFE4040B4}.Release|Any CPU.ActiveCfg = Release|Any CPU + {9249BCC4-3FEB-4EF5-8AB9-789FFE4040B4}.Release|Any CPU.Build.0 = Release|Any CPU + {9249BCC4-3FEB-4EF5-8AB9-789FFE4040B4}.Release|x64.ActiveCfg = Release|x64 + {9249BCC4-3FEB-4EF5-8AB9-789FFE4040B4}.Release|x64.Build.0 = Release|x64 EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE diff --git a/src/TensorFlowNET.Core/APIs/keras.layers.cs b/src/TensorFlowNET.Core/APIs/keras.layers.cs index aba24115..92900e76 100644 --- a/src/TensorFlowNET.Core/APIs/keras.layers.cs +++ b/src/TensorFlowNET.Core/APIs/keras.layers.cs @@ -38,13 +38,22 @@ namespace Tensorflow var batch_size = batch_shape[0]; var shape = batch_shape.Skip(1).ToArray(); - var input_layer = new InputLayer( - input_shape: shape, - batch_size: batch_size, - name: name, - dtype: dtype, - sparse: sparse, - input_tensor: tensor); + InputLayer input_layer = null; + if (batch_shape != null) + input_layer = new InputLayer( + batch_input_shape: batch_shape, + name: name, + dtype: dtype, + sparse: sparse, + input_tensor: tensor); + else + input_layer = new InputLayer( + input_shape: shape, + batch_size: batch_size, + name: name, + dtype: dtype, + sparse: sparse, + input_tensor: tensor); var outputs = input_layer.inbound_nodes[0].output_tensors; diff --git a/src/TensorFlowNET.Core/APIs/tf.array.cs b/src/TensorFlowNET.Core/APIs/tf.array.cs index 3a674e83..34303bf9 100644 --- a/src/TensorFlowNET.Core/APIs/tf.array.cs +++ b/src/TensorFlowNET.Core/APIs/tf.array.cs @@ -53,6 +53,16 @@ namespace Tensorflow public Tensor boolean_mask(T1 tensor, T2 mask, string name = "boolean_mask", int axis = 0) => array_ops.boolean_mask(tensor, mask, name: name, axis: axis); + /// + /// Broadcast an array for a compatible shape. + /// + /// + /// + /// + /// + public Tensor broadcast_to(Tensor input, TensorShape shape, string name = null) + => gen_array_ops.broadcast_to(input, shape, name: name); + public Tensor check_numerics(Tensor tensor, string message, string name = null) => gen_array_ops.check_numerics(tensor, message, name: name); diff --git a/src/TensorFlowNET.Core/APIs/tf.nn.cs b/src/TensorFlowNET.Core/APIs/tf.nn.cs index ea52ab57..e9805010 100644 --- a/src/TensorFlowNET.Core/APIs/tf.nn.cs +++ b/src/TensorFlowNET.Core/APIs/tf.nn.cs @@ -115,8 +115,8 @@ namespace Tensorflow public Tensor relu(Tensor features, string name = null) => gen_nn_ops.relu(features, name); public Tensor[] fused_batch_norm(Tensor x, - RefVariable scale, - RefVariable offset, + VariableV1 scale, + VariableV1 offset, Tensor mean = null, Tensor variance = null, float epsilon = 0.001f, diff --git a/src/TensorFlowNET.Core/Binding.Util.cs b/src/TensorFlowNET.Core/Binding.Util.cs index dcf191ed..334f4f74 100644 --- a/src/TensorFlowNET.Core/Binding.Util.cs +++ b/src/TensorFlowNET.Core/Binding.Util.cs @@ -113,6 +113,7 @@ namespace Tensorflow } } + [DebuggerStepThrough] [DebuggerNonUserCode()] // with "Just My Code" enabled this lets the debugger break at the origin of the exception public static void tf_with(T py, Action action) where T : IObjectLife { diff --git a/src/TensorFlowNET.Core/Gradients/array_grad.cs b/src/TensorFlowNET.Core/Gradients/array_grad.cs index f07d2825..74e9ef10 100644 --- a/src/TensorFlowNET.Core/Gradients/array_grad.cs +++ b/src/TensorFlowNET.Core/Gradients/array_grad.cs @@ -27,6 +27,27 @@ namespace Tensorflow.Gradients [RegisterGradient("array_grad")] public class array_grad { + [RegisterGradient("BroadcastTo")] + public static Tensor[] _BroadcastToGrad(Operation op, Tensor[] grads) + { + var grad = grads[0]; + var input_value = op.inputs[0]; + var broadcast_shape = op.inputs[1]; + var input_value_shape = array_ops.shape(input_value); + var (_, reduction_axes) = gen_array_ops.broadcast_gradient_args(broadcast_shape, + input_value_shape); + var updates_grad_reshaped = math_ops.reduce_sum(grad, + axis: reduction_axes, + keepdims: true); + var updates_grad = array_ops.reshape(updates_grad_reshaped, input_value_shape); + + return new Tensor[] + { + updates_grad, + null + }; + } + [RegisterGradient("ConcatV2")] public static Tensor[] _ConcatGradV2(Operation op, Tensor[] grads) { diff --git a/src/TensorFlowNET.Core/Gradients/math_grad.cs b/src/TensorFlowNET.Core/Gradients/math_grad.cs index 49dcbc45..b3c620c0 100644 --- a/src/TensorFlowNET.Core/Gradients/math_grad.cs +++ b/src/TensorFlowNET.Core/Gradients/math_grad.cs @@ -58,6 +58,20 @@ namespace Tensorflow.Gradients return new Tensor[] { r1, r2 }; } + [RegisterGradient("Cumsum")] + public static Tensor[] _CumsumGrad(Operation op, Tensor[] grads) + { + var grad = grads[0]; + var axis = op.inputs[1]; + var exclusive = op.get_attr("exclusive"); + var reverse = op.get_attr("reverse"); + return new Tensor[] + { + math_ops.cumsum(grad, axis, exclusive: exclusive, reverse: !reverse), + null + }; + } + [RegisterGradient("DivNoNan")] public static Tensor[] _DivNoNanGrad(Operation op, Tensor[] grads) { diff --git a/src/TensorFlowNET.Core/Graphs/Graph.cs b/src/TensorFlowNET.Core/Graphs/Graph.cs index 7119a4ad..0c43582d 100644 --- a/src/TensorFlowNET.Core/Graphs/Graph.cs +++ b/src/TensorFlowNET.Core/Graphs/Graph.cs @@ -266,7 +266,7 @@ namespace Tensorflow name = op_type; // If a names ends with a '/' it is a "name scope" and we use it as-is, // after removing the trailing '/'. - name = name.EndsWith("/") ? ops._name_from_scope_name(name) : unique_name(name); + name = name.EndsWith("/") ? ops.name_from_scope_name(name) : unique_name(name); var node_def = ops._NodeDef(op_type, name, device: "", attrs: attrs); var input_ops = inputs.Select(x => x.op).ToArray(); @@ -341,7 +341,7 @@ namespace Tensorflow if (string.IsNullOrEmpty(name)) new_stack = ""; else if (name.EndsWith("/")) - new_stack = ops._name_from_scope_name(name); + new_stack = ops.name_from_scope_name(name); else new_stack = unique_name(name); diff --git a/src/TensorFlowNET.Core/Keras/Engine/Model.cs b/src/TensorFlowNET.Core/Keras/Engine/Model.cs index d56c49bc..d4cde39e 100644 --- a/src/TensorFlowNET.Core/Keras/Engine/Model.cs +++ b/src/TensorFlowNET.Core/Keras/Engine/Model.cs @@ -1,11 +1,33 @@ -namespace Tensorflow.Keras.Engine +using Tensorflow.Keras.Optimizers; + +namespace Tensorflow.Keras.Engine { public class Model : Network { + bool _cloning; + bool _is_compiled; + string loss; + IOptimizer optimizer; + public Model(string name = null) : base(name: name) { } + + public void compile(string optimizerName, string lossName) + { + switch (optimizerName) + { + case "rmsprop": + optimizer = new RMSprop(); + break; + } + + loss = lossName; + _is_compiled = true; + + // Prepare list of loss functions, same size of model outputs. + } } } diff --git a/src/TensorFlowNET.Core/Keras/Engine/Sequential.cs b/src/TensorFlowNET.Core/Keras/Engine/Sequential.cs index e18b401c..e9f85530 100644 --- a/src/TensorFlowNET.Core/Keras/Engine/Sequential.cs +++ b/src/TensorFlowNET.Core/Keras/Engine/Sequential.cs @@ -20,6 +20,9 @@ namespace Tensorflow.Keras.Engine { public class Sequential : Model, IObjectLife { + bool _is_graph_network; + Tensor[] outputs; + public Sequential(string name = null) : base(name: name) { @@ -42,21 +45,40 @@ namespace Tensorflow.Keras.Engine var set_inputs = false; if(_layers.Count == 0) { - var (batch_shape, dtype) = (layer._batch_input_shape, layer._dtype); - if(batch_shape != null) + if(layer is InputLayer) { - // Instantiate an input layer. - var x = keras.layers.Input( - batch_shape: batch_shape, - dtype: dtype, - name: layer.name + "_input"); - - // This will build the current layer - // and create the node connecting the current layer - // to the input layer we just created. - layer.__call__(x); - set_inputs = true; + } + else + { + var (batch_shape, dtype) = (layer._batch_input_shape, layer._dtype); + if (batch_shape != null) + { + // Instantiate an input layer. + var x = keras.layers.Input( + batch_shape: batch_shape, + dtype: dtype, + name: layer.name + "_input"); + + // This will build the current layer + // and create the node connecting the current layer + // to the input layer we just created. + layer.__call__(x); + set_inputs = true; + } + } + + if (set_inputs) + { + // If an input layer (placeholder) is available. + // outputs = layer._inbound_nodes; + } + + } + + if (set_inputs || _is_graph_network) + { + } } diff --git a/src/TensorFlowNET.Core/Keras/Layers/BatchNormalization.cs b/src/TensorFlowNET.Core/Keras/Layers/BatchNormalization.cs index 929b3a3f..0428b2ad 100644 --- a/src/TensorFlowNET.Core/Keras/Layers/BatchNormalization.cs +++ b/src/TensorFlowNET.Core/Keras/Layers/BatchNormalization.cs @@ -37,8 +37,8 @@ namespace Tensorflow.Keras.Layers private IInitializer gamma_initializer; private IInitializer moving_mean_initializer; private IInitializer moving_variance_initializer; - private RefVariable gamma; - private RefVariable beta; + private VariableV1 gamma; + private VariableV1 beta; private RefVariable moving_mean; private RefVariable moving_variance; @@ -117,7 +117,7 @@ namespace Tensorflow.Keras.Layers } - moving_mean = add_weight("moving_mean", + moving_mean = (RefVariable)add_weight("moving_mean", param_shape, dtype: param_dtype, initializer: moving_mean_initializer, @@ -125,7 +125,7 @@ namespace Tensorflow.Keras.Layers trainable: false, aggregation: VariableAggregation.Mean); - moving_variance = add_weight("moving_variance", + moving_variance = (RefVariable)add_weight("moving_variance", shape: param_shape, dtype: param_dtype, initializer: moving_variance_initializer, diff --git a/src/TensorFlowNET.Core/Keras/Layers/Conv.cs b/src/TensorFlowNET.Core/Keras/Layers/Conv.cs index 8319041f..dc40ae8c 100644 --- a/src/TensorFlowNET.Core/Keras/Layers/Conv.cs +++ b/src/TensorFlowNET.Core/Keras/Layers/Conv.cs @@ -75,13 +75,13 @@ namespace Tensorflow.Keras.Layers input_shape.dims[input_shape.ndim + channel_axis] : input_shape.dims[channel_axis]; var kernel_shape = new int[] { kernel_size[0], kernel_size[1], input_dim, filters }; - kernel = add_weight(name: "kernel", + kernel = (RefVariable)add_weight(name: "kernel", shape: kernel_shape, initializer: kernel_initializer, trainable: true, dtype: _dtype); if (use_bias) - bias = add_weight(name: "bias", + bias = (RefVariable)add_weight(name: "bias", shape: new int[] { filters }, initializer: bias_initializer, trainable: true, diff --git a/src/TensorFlowNET.Core/Keras/Layers/Dense.cs b/src/TensorFlowNET.Core/Keras/Layers/Dense.cs index adfae5d1..2564da6d 100644 --- a/src/TensorFlowNET.Core/Keras/Layers/Dense.cs +++ b/src/TensorFlowNET.Core/Keras/Layers/Dense.cs @@ -55,14 +55,14 @@ namespace Tensorflow.Keras.Layers var axes = new Dictionary(); axes[-1] = last_dim; input_spec = new InputSpec(min_ndim: 2, axes: axes); - kernel = add_weight( + kernel = (RefVariable)add_weight( "kernel", shape: new int[] { last_dim, units }, initializer: kernel_initializer, dtype: _dtype, trainable: true); if (use_bias) - bias = add_weight( + bias = (RefVariable)add_weight( "bias", shape: new int[] { units }, initializer: bias_initializer, diff --git a/src/TensorFlowNET.Core/Keras/Layers/Embedding.cs b/src/TensorFlowNET.Core/Keras/Layers/Embedding.cs index 37f15baf..f10499c4 100644 --- a/src/TensorFlowNET.Core/Keras/Layers/Embedding.cs +++ b/src/TensorFlowNET.Core/Keras/Layers/Embedding.cs @@ -23,20 +23,23 @@ namespace Tensorflow.Keras.Layers private int input_dim; private int output_dim; private bool mask_zero; - public RefVariable embeddings; + public VariableV1 embeddings; public IInitializer embeddings_initializer; + int input_length; public Embedding(int input_dim, int output_dim, IInitializer embeddings_initializer = null, bool mask_zero = false, TF_DataType dtype = TF_DataType.TF_FLOAT, - int[] input_shape = null) : base(dtype: dtype, input_shape: input_shape) + int[] input_shape = null, + int input_length = -1) : base(dtype: dtype, input_shape: input_shape ?? new[] { input_length }) { this.input_dim = input_dim; this.output_dim = output_dim; this.embeddings_initializer = embeddings_initializer == null ? tf.uniform_initializer : embeddings_initializer; this.mask_zero = mask_zero; supports_masking = mask_zero; + this.input_length = input_length; } protected override void build(TensorShape input_shape) @@ -46,5 +49,15 @@ namespace Tensorflow.Keras.Layers name: "embeddings"); built = true; } + + protected override Tensor call(Tensor inputs, Tensor training = null) + { + var dtype = inputs.dtype; + if (dtype != tf.int32 && dtype != tf.int64) + inputs = math_ops.cast(inputs, tf.int32); + + var @out = embedding_ops.embedding_lookup(embeddings, inputs); + return @out; + } } } diff --git a/src/TensorFlowNET.Core/Keras/Layers/InputLayer.cs b/src/TensorFlowNET.Core/Keras/Layers/InputLayer.cs index ce029fa7..be5515ec 100644 --- a/src/TensorFlowNET.Core/Keras/Layers/InputLayer.cs +++ b/src/TensorFlowNET.Core/Keras/Layers/InputLayer.cs @@ -15,6 +15,8 @@ ******************************************************************************/ using System; +using System.Collections.Generic; +using System.Linq; namespace Tensorflow.Keras.Layers { @@ -28,21 +30,47 @@ namespace Tensorflow.Keras.Layers public bool is_placeholder; public InputLayer(int[] input_shape = null, + int[] batch_input_shape = null, int? batch_size = null, TF_DataType dtype = TF_DataType.DtInvalid, string name = null, bool sparse = false, - Tensor input_tensor = null) + Tensor input_tensor = null) : base(dtype: dtype, name: name) { built = true; this.sparse = sparse; this.batch_size = batch_size; this.supports_masking = true; + if(batch_input_shape != null) + { + batch_size = batch_input_shape[0]; + input_shape = batch_input_shape.Skip(1).ToArray(); + } + + // moved to base class + if (string.IsNullOrEmpty(name)) + { + var prefix = "input"; + name = prefix + '_' + backend.get_uid(prefix); + } + if (input_tensor == null) { - var batch_input_shape = new int[] { batch_size.HasValue ? batch_size.Value : -1, -1 }; + if(input_shape != null) + { + var dims = new List { batch_size.HasValue ? batch_size.Value : -1 }; + dims.AddRange(input_shape); + batch_input_shape = dims.ToArray(); + } + else + { + batch_input_shape = null; + } + + var graph = backend.get_graph().as_default(); + // In graph mode, create a graph placeholder to call the layer on. if (sparse) { throw new NotImplementedException("InputLayer sparse is true"); @@ -59,6 +87,10 @@ namespace Tensorflow.Keras.Layers _batch_input_shape = batch_input_shape; } + // Create an input node to add to self.outbound_node + // and set output_tensors' _keras_history. + // input_tensor._keras_history = base_layer.KerasHistory(self, 0, 0) + // input_tensor._keras_mask = None new Node(this, inbound_layers: new Layer[0], node_indices: new int[0], diff --git a/src/TensorFlowNET.Core/Keras/Layers/Layer.cs b/src/TensorFlowNET.Core/Keras/Layers/Layer.cs index 6681ec56..22cef8e1 100644 --- a/src/TensorFlowNET.Core/Keras/Layers/Layer.cs +++ b/src/TensorFlowNET.Core/Keras/Layers/Layer.cs @@ -51,7 +51,7 @@ namespace Tensorflow.Keras.Layers /// protected InputSpec input_spec; protected bool supports_masking; - protected List _trainable_weights; + protected List _trainable_weights; private string _name; public string name => _name; protected string _base_name; @@ -65,6 +65,8 @@ namespace Tensorflow.Keras.Layers private List _outbound_nodes; public List outbound_nodes => _outbound_nodes; + float _initial_weights; + public Layer(bool trainable = true, string name = null, TF_DataType dtype = TF_DataType.DtInvalid, @@ -81,13 +83,18 @@ namespace Tensorflow.Keras.Layers this.supports_masking = false; _init_set_name(name); - _trainable_weights = new List(); + _trainable_weights = new List(); _compute_previous_mask = false; _updates = new List(); // Manage input shape information if passed. - - _batch_input_shape = new int[] { -1, -1 }; + if(input_shape != null) + { + var shapes = new List { -1 }; + shapes.AddRange(input_shape); + _batch_input_shape = shapes.ToArray(); + } + _dtype = dtype; @@ -186,12 +193,12 @@ namespace Tensorflow.Keras.Layers built = true; } - protected virtual RefVariable add_weight(string name, + protected virtual VariableV1 add_weight(string name, int[] shape, TF_DataType dtype = TF_DataType.DtInvalid, IInitializer initializer = null, bool? trainable = null, - Func getter = null) + Func getter = null) { if (dtype == TF_DataType.DtInvalid) dtype = TF_DataType.TF_FLOAT; diff --git a/src/TensorFlowNET.Core/Keras/Optimizers/IOptimizer.cs b/src/TensorFlowNET.Core/Keras/Optimizers/IOptimizer.cs new file mode 100644 index 00000000..0c1d411e --- /dev/null +++ b/src/TensorFlowNET.Core/Keras/Optimizers/IOptimizer.cs @@ -0,0 +1,10 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace Tensorflow.Keras.Optimizers +{ + public interface IOptimizer + { + } +} diff --git a/src/TensorFlowNET.Core/Keras/Optimizers/OptimizerV2.cs b/src/TensorFlowNET.Core/Keras/Optimizers/OptimizerV2.cs new file mode 100644 index 00000000..2f22a721 --- /dev/null +++ b/src/TensorFlowNET.Core/Keras/Optimizers/OptimizerV2.cs @@ -0,0 +1,14 @@ +using System; +using System.Collections.Generic; +using System.Text; +using Tensorflow.Train; + +namespace Tensorflow.Keras.Optimizers +{ + /// + /// Updated base class for optimizers. + /// + public class OptimizerV2 : Trackable, IOptimizer + { + } +} diff --git a/src/TensorFlowNET.Core/Keras/Optimizers/RMSprop.cs b/src/TensorFlowNET.Core/Keras/Optimizers/RMSprop.cs new file mode 100644 index 00000000..51b65b57 --- /dev/null +++ b/src/TensorFlowNET.Core/Keras/Optimizers/RMSprop.cs @@ -0,0 +1,14 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace Tensorflow.Keras.Optimizers +{ + /// + /// Optimizer that implements the RMSprop algorithm. + /// + public class RMSprop : OptimizerV2 + { + + } +} diff --git a/src/TensorFlowNET.Core/Keras/Utils/base_layer_utils.cs b/src/TensorFlowNET.Core/Keras/Utils/base_layer_utils.cs index d4feebc2..6e2dc745 100644 --- a/src/TensorFlowNET.Core/Keras/Utils/base_layer_utils.cs +++ b/src/TensorFlowNET.Core/Keras/Utils/base_layer_utils.cs @@ -32,36 +32,25 @@ namespace Tensorflow.Keras.Utils /// /// /// - public static RefVariable make_variable(string name, + public static VariableV1 make_variable(string name, int[] shape, TF_DataType dtype = TF_DataType.TF_FLOAT, IInitializer initializer = null, - bool trainable = true) => make_variable(name, shape, dtype, initializer, trainable, true); - - /// - /// Adds a new variable to the layer. - /// - /// - /// - /// - /// - /// - /// - public static RefVariable make_variable(string name, - int[] shape, - TF_DataType dtype = TF_DataType.TF_FLOAT, - IInitializer initializer = null, - bool trainable = true, - bool use_resource = true) + bool trainable = true) { var initializing_from_value = false; + bool use_resource = true; ops.init_scope(); Func init_val = () => initializer.call(new TensorShape(shape), dtype: dtype); var variable_dtype = dtype.as_base_dtype(); - var v = tf.Variable(init_val); + var v = tf.VariableV1(init_val, + use_resource: use_resource, + dtype: dtype, + shape: shape, + name: name); return v; } diff --git a/src/TensorFlowNET.Core/Keras/backend.cs b/src/TensorFlowNET.Core/Keras/backend.cs index 46769bd8..73d7d335 100644 --- a/src/TensorFlowNET.Core/Keras/backend.cs +++ b/src/TensorFlowNET.Core/Keras/backend.cs @@ -42,12 +42,12 @@ namespace Tensorflow.Keras /// Allows to give unique autogenerated names to layers, in a graph-specific way. /// public static Dictionary> PER_GRAPH_LAYER_NAME_UIDS = new Dictionary>(); - public static Dictionary _GRAPH_VARIABLES = new Dictionary(); + public static Dictionary _GRAPH_VARIABLES = new Dictionary(); public static Dictionary _GRAPH_TF_OPTIMIZERS = new Dictionary(); public static _DummyEagerGraph _DUMMY_EAGER_GRAPH = new _DummyEagerGraph(); - public static void track_variable(RefVariable v) + public static void track_variable(VariableV1 v) { var graph = v.graph; _GRAPH_VARIABLES[graph.graph_key] = v; diff --git a/src/TensorFlowNET.Core/Layers/Layer.cs b/src/TensorFlowNET.Core/Layers/Layer.cs index 444c2dd4..138f0fc7 100644 --- a/src/TensorFlowNET.Core/Layers/Layer.cs +++ b/src/TensorFlowNET.Core/Layers/Layer.cs @@ -42,7 +42,7 @@ namespace Tensorflow.Layers this._reuse = _reuse; // Avoid an incorrect lint error - _trainable_weights = new List(); + _trainable_weights = new List(); this.built = false; _keras_style = false; } @@ -109,7 +109,7 @@ namespace Tensorflow.Layers /// /// /// - protected virtual RefVariable add_weight(string name, + protected virtual VariableV1 add_weight(string name, int[] shape, TF_DataType dtype = TF_DataType.DtInvalid, IInitializer initializer = null, diff --git a/src/TensorFlowNET.Core/Operations/Initializers/VarianceScaling.cs b/src/TensorFlowNET.Core/Operations/Initializers/VarianceScaling.cs index e2b2a0d6..e1ac0204 100644 --- a/src/TensorFlowNET.Core/Operations/Initializers/VarianceScaling.cs +++ b/src/TensorFlowNET.Core/Operations/Initializers/VarianceScaling.cs @@ -84,7 +84,7 @@ namespace Tensorflow.Operations.Initializers // Assuming convolution kernels (2D, 3D, or more). // kernel shape: (..., input_depth, depth) int receptive_field_size = 1; - foreach (var dim in shape.Take(2)) + foreach (var dim in shape.Take(shape.Length - 2)) receptive_field_size *= dim; var fan_in = shape[shape.Length - 2] * receptive_field_size; var fan_out = shape[shape.Length - 1] * receptive_field_size; diff --git a/src/TensorFlowNET.Core/Operations/array_ops.py.cs b/src/TensorFlowNET.Core/Operations/array_ops.py.cs index 12094e41..b7ef6440 100644 --- a/src/TensorFlowNET.Core/Operations/array_ops.py.cs +++ b/src/TensorFlowNET.Core/Operations/array_ops.py.cs @@ -600,7 +600,7 @@ namespace Tensorflow return gen_array_ops.concat_v2(values, axis, name: name); } - public static Tensor gather(Tensor @params, Tensor indices, string name = null, int axis = 0) + public static Tensor gather(T1 @params, T2 indices, string name = null, int axis = 0) => gen_array_ops.gather_v2(@params, indices, axis, name: name); public static Tensor transpose(T1 a, T2 perm, string name = "transpose", bool conjugate = false) diff --git a/src/TensorFlowNET.Core/Operations/embedding_ops.cs b/src/TensorFlowNET.Core/Operations/embedding_ops.cs index 3c02e825..1b23fab3 100644 --- a/src/TensorFlowNET.Core/Operations/embedding_ops.cs +++ b/src/TensorFlowNET.Core/Operations/embedding_ops.cs @@ -52,6 +52,38 @@ namespace Tensorflow }); } + /// + /// Helper function for embedding_lookup and _compute_sampled_logits. + /// + /// + /// + /// + /// + /// + /// + public static Tensor _embedding_lookup_and_transform(VariableV1 @params, + Tensor ids, + string partition_strategy = "mod", + string name = null, + string max_norm = null) + { + return tf_with(ops.name_scope(name, "embedding_lookup", new { @params, ids }), scope => + { + name = scope; + int np = 1; + ids = ops.convert_to_tensor(ids, name: "ids"); + if (np == 1) + { + var gather = array_ops.gather(@params, ids, name: name); + var result = _clip(gather, ids, max_norm); + + return array_ops.identity(result); + } + + throw new NotImplementedException("_embedding_lookup_and_transform"); + }); + } + public static Tensor _embedding_lookup_and_transform(Tensor[] @params, Tensor ids, string partition_strategy = "mod", @@ -98,5 +130,18 @@ namespace Tensorflow name: name, max_norm: max_norm); } + + public static Tensor embedding_lookup(VariableV1 @params, Tensor ids, + string partition_strategy = "mod", + string name = null, + bool validate_indices = true, + string max_norm = null) + { + return _embedding_lookup_and_transform(@params: @params, + ids: ids, + partition_strategy: partition_strategy, + name: name, + max_norm: max_norm); + } } } diff --git a/src/TensorFlowNET.Core/Operations/gen_array_ops.cs b/src/TensorFlowNET.Core/Operations/gen_array_ops.cs index 36837477..01231035 100644 --- a/src/TensorFlowNET.Core/Operations/gen_array_ops.cs +++ b/src/TensorFlowNET.Core/Operations/gen_array_ops.cs @@ -106,7 +106,7 @@ namespace Tensorflow return _op.outputs[0]; } - public static Tensor gather_v2(Tensor @params, Tensor indices, int axis, string name = null) + public static Tensor gather_v2(T1 @params, T2 indices, int axis, string name = null) { var _op = _op_def_lib._apply_op_helper("GatherV2", name: name, new { @params, indices, axis }); @@ -515,5 +515,19 @@ namespace Tensorflow return _op.outputs[0]; } + + /// + /// Broadcast an array for a compatible shape. + /// + /// + /// + /// + /// + public static Tensor broadcast_to(Tensor input, int[] shape, string name = null) + { + var _op = _op_def_lib._apply_op_helper("BroadcastTo", name, args: new { input, shape, name }); + + return _op.outputs[0]; + } } } diff --git a/src/TensorFlowNET.Core/Operations/gen_math_ops.cs b/src/TensorFlowNET.Core/Operations/gen_math_ops.cs index 81870e5b..7192dc57 100644 --- a/src/TensorFlowNET.Core/Operations/gen_math_ops.cs +++ b/src/TensorFlowNET.Core/Operations/gen_math_ops.cs @@ -238,7 +238,7 @@ namespace Tensorflow return _op.outputs[0]; } - public static Tensor cumsum(Tensor x, int axis = 0, bool exclusive = false, bool reverse = false, string name = null) + public static Tensor cumsum(Tensor x, T axis, bool exclusive = false, bool reverse = false, string name = null) { var _op = _op_def_lib._apply_op_helper("Cumsum", name, args: new { x, axis, exclusive, reverse }); diff --git a/src/TensorFlowNET.Core/Operations/gen_resource_variable_ops.cs b/src/TensorFlowNET.Core/Operations/gen_resource_variable_ops.cs index 304a5b55..7b00b604 100644 --- a/src/TensorFlowNET.Core/Operations/gen_resource_variable_ops.cs +++ b/src/TensorFlowNET.Core/Operations/gen_resource_variable_ops.cs @@ -26,5 +26,12 @@ namespace Tensorflow return _op; } + + public static Tensor var_is_initialized_op(Tensor resource, string name = null) + { + var _op = _op_def_lib._apply_op_helper("VarIsInitializedOp", name, new { resource }); + + return _op; + } } } diff --git a/src/TensorFlowNET.Core/Operations/math_ops.cs b/src/TensorFlowNET.Core/Operations/math_ops.cs index 94c42ba2..d4dfc12b 100644 --- a/src/TensorFlowNET.Core/Operations/math_ops.cs +++ b/src/TensorFlowNET.Core/Operations/math_ops.cs @@ -80,7 +80,7 @@ namespace Tensorflow }); } - public static Tensor cumsum(Tensor x, int axis = 0, bool exclusive = false, bool reverse = false, string name = null) + public static Tensor cumsum(Tensor x, T axis = default, bool exclusive = false, bool reverse = false, string name = null) { return tf_with(ops.name_scope(name, "Cumsum", new {x}), scope => { diff --git a/src/TensorFlowNET.Core/Operations/nn_impl.py.cs b/src/TensorFlowNET.Core/Operations/nn_impl.py.cs index bced0047..42103b00 100644 --- a/src/TensorFlowNET.Core/Operations/nn_impl.py.cs +++ b/src/TensorFlowNET.Core/Operations/nn_impl.py.cs @@ -97,9 +97,9 @@ namespace Tensorflow /// /// /// - public static Tensor[] fused_batch_norm(Tensor x, - RefVariable scale, - RefVariable offset, + public static Tensor[] fused_batch_norm(Tensor x, + VariableV1 scale, + VariableV1 offset, Tensor mean, Tensor variance, float epsilon = 0.001f, diff --git a/src/TensorFlowNET.Core/Operations/resource_variable_ops.cs b/src/TensorFlowNET.Core/Operations/resource_variable_ops.cs index 41bd0ddf..b301063c 100644 --- a/src/TensorFlowNET.Core/Operations/resource_variable_ops.cs +++ b/src/TensorFlowNET.Core/Operations/resource_variable_ops.cs @@ -74,6 +74,29 @@ namespace Tensorflow return var is ResourceVariable; } + /// + /// Creates a variable handle with information to do shape inference. + /// + /// + /// + /// + /// + /// + /// + public static Tensor eager_safe_variable_handle(Tensor initial_value, TensorShape shape, + string shared_name, string name, bool graph_mode) + { + var dtype = initial_value.dtype.as_base_dtype(); + return variable_handle_from_shape_and_dtype( + shape, dtype, shared_name, name, graph_mode, initial_value); + } + + public static Tensor variable_handle_from_shape_and_dtype(TensorShape shape, TF_DataType dtype, + string shared_name, string name, bool graph_mode, Tensor extra_handle_data = null) + { + throw new NotImplementedException(""); + } + /// /// Represents a future for a read of a variable. /// Pretends to be the tensor if anyone looks. diff --git a/src/TensorFlowNET.Core/TensorFlowNET.Core.csproj b/src/TensorFlowNET.Core/TensorFlowNET.Core.csproj index 12a4c5f3..33914c3a 100644 --- a/src/TensorFlowNET.Core/TensorFlowNET.Core.csproj +++ b/src/TensorFlowNET.Core/TensorFlowNET.Core.csproj @@ -5,7 +5,7 @@ TensorFlow.NET Tensorflow 1.14.0 - 0.11.5 + 0.11.6 Haiping Chen, Meinrad Recheis, Eli Belash SciSharp STACK true @@ -17,7 +17,7 @@ TensorFlow, NumSharp, SciSharp, MachineLearning, TensorFlow.NET, C# Google's TensorFlow full binding in .NET Standard. Docs: https://tensorflownet.readthedocs.io - 0.11.5.0 + 0.11.6.0 Changes since v0.10.0: 1. Upgrade NumSharp to v0.20.3. 2. Add DisposableObject class to manage object lifetime. @@ -29,9 +29,11 @@ Docs: https://tensorflownet.readthedocs.io 8. Add tf.random_normal, tf.constant, tf.pad, tf.shape, tf.image.resize_nearest_neighbor. 9. MultiThread is safe. 10. Support n-dim indexing for tensor. -11. Add RegisterNoGradient +11. Add RegisterNoGradients +12. Add CumsumGrad, BroadcastToGrad. +13. Return VariableV1 instead of RefVariable. 7.3 - 0.11.5.0 + 0.11.6.0 LICENSE true true @@ -63,7 +65,6 @@ Docs: https://tensorflownet.readthedocs.io - @@ -71,4 +72,8 @@ Docs: https://tensorflownet.readthedocs.io + + + + diff --git a/src/TensorFlowNET.Core/Train/AdamOptimizer.cs b/src/TensorFlowNET.Core/Train/AdamOptimizer.cs index faf6fec2..39228691 100644 --- a/src/TensorFlowNET.Core/Train/AdamOptimizer.cs +++ b/src/TensorFlowNET.Core/Train/AdamOptimizer.cs @@ -143,8 +143,8 @@ namespace Tensorflow.Train { ops.init_scope(); var graph = ops.get_default_graph(); - return (_get_non_slot_variable("beta1_power", graph: graph), - _get_non_slot_variable("beta2_power", graph: graph)); + return (_get_non_slot_variable("beta1_power", graph: graph) as RefVariable, + _get_non_slot_variable("beta2_power", graph: graph) as RefVariable); } public override void _prepare() diff --git a/src/TensorFlowNET.Core/Train/Optimizer.cs b/src/TensorFlowNET.Core/Train/Optimizer.cs index e0040ecf..524a0e34 100644 --- a/src/TensorFlowNET.Core/Train/Optimizer.cs +++ b/src/TensorFlowNET.Core/Train/Optimizer.cs @@ -44,7 +44,7 @@ namespace Tensorflow public Tensor LearningRateTensor => _lr_t; public bool _use_locking; public Dictionary> _slots; - public Dictionary _non_slot_dict; + public Dictionary _non_slot_dict; public Dictionary _deferred_slot_restorations; SlotCreator slot_creator = new SlotCreator(); @@ -58,7 +58,7 @@ namespace Tensorflow _lr = learning_rate; // Dictionary of slots. _slots = new Dictionary>(); - _non_slot_dict = new Dictionary(); + _non_slot_dict = new Dictionary(); _deferred_slot_restorations = new Dictionary(); } @@ -72,7 +72,7 @@ namespace Tensorflow _lr_t = learning_rate; // Dictionary of slots. _slots = new Dictionary>(); - _non_slot_dict = new Dictionary(); + _non_slot_dict = new Dictionary(); _deferred_slot_restorations = new Dictionary(); } @@ -239,7 +239,7 @@ namespace Tensorflow /// /// /// - protected RefVariable _create_non_slot_variable(float initial_value, string name, RefVariable colocate_with) + protected VariableV1 _create_non_slot_variable(float initial_value, string name, RefVariable colocate_with) { // Recommendation: Use OptimizerV2 if your optimizer uses non-slot variables. var graph = colocate_with.graph; @@ -333,7 +333,7 @@ namespace Tensorflow return $"{var.op.graph.graph_key}.{var.op.name}"; } - protected RefVariable _get_non_slot_variable(string name, Graph graph = null) + protected VariableV1 _get_non_slot_variable(string name, Graph graph = null) { var key = $"{name}.{graph.graph_key}"; var non_slot = _non_slot_dict.ContainsKey(key) ? _non_slot_dict[key] : null; diff --git a/src/TensorFlowNET.Core/Train/Trackable.cs b/src/TensorFlowNET.Core/Train/Trackable.cs index a718c869..36083d84 100644 --- a/src/TensorFlowNET.Core/Train/Trackable.cs +++ b/src/TensorFlowNET.Core/Train/Trackable.cs @@ -26,11 +26,11 @@ namespace Tensorflow.Train /// Restore-on-create for a variable be saved with this `Checkpointable`. /// /// - protected virtual RefVariable _add_variable_with_custom_getter(string name, + protected virtual VariableV1 _add_variable_with_custom_getter(string name, int[] shape, TF_DataType dtype = TF_DataType.TF_FLOAT, IInitializer initializer = null, - Func getter = null, + Func getter = null, bool overwrite = false, bool trainable = false) { @@ -53,13 +53,13 @@ namespace Tensorflow.Train /// /// /// - protected void _handle_deferred_dependencies(string name, RefVariable trackable) + protected void _handle_deferred_dependencies(string name, VariableV1 trackable) { _maybe_initialize_trackable(); // TODO } - protected RefVariable _track_checkpointable(RefVariable checkpointable, string name, bool overwrite = false) + protected VariableV1 _track_checkpointable(VariableV1 checkpointable, string name, bool overwrite = false) { return checkpointable; } diff --git a/src/TensorFlowNET.Core/Variables/RefVariable.cs b/src/TensorFlowNET.Core/Variables/RefVariable.cs index 97e1d0f4..4b0a35fb 100644 --- a/src/TensorFlowNET.Core/Variables/RefVariable.cs +++ b/src/TensorFlowNET.Core/Variables/RefVariable.cs @@ -28,14 +28,14 @@ namespace Tensorflow public Tensor _initial_value; public string _graph_key; public bool _trainable; - public Tensor _variable; + public Tensor _snapshot; public bool _save_slice_info; private Operation _initializer_op; public override Operation initializer => _initializer_op; public override Operation op => _variable.op; - public Graph graph => _variable.graph; + public TF_DataType dtype => _variable.dtype; public TensorShape shape => tensor_util.to_shape(_variable.shape); @@ -143,7 +143,7 @@ namespace Tensorflow // Use attr_scope and device(None) to simulate the behavior of // colocate_with when the variable we want to colocate with doesn't // yet exist. - string true_name = ops._name_from_scope_name(name); + string true_name = ops.name_from_scope_name(name); var attr = new AttrValue { List = new AttrValue.Types.ListValue() diff --git a/src/TensorFlowNET.Core/Variables/ResourceVariable.cs b/src/TensorFlowNET.Core/Variables/ResourceVariable.cs index 85d2ca56..83774734 100644 --- a/src/TensorFlowNET.Core/Variables/ResourceVariable.cs +++ b/src/TensorFlowNET.Core/Variables/ResourceVariable.cs @@ -14,8 +14,10 @@ limitations under the License. ******************************************************************************/ +using Google.Protobuf; using System; using System.Collections.Generic; +using static Tensorflow.Binding; namespace Tensorflow { @@ -53,7 +55,8 @@ namespace Tensorflow string name = null, VariableDef variable_def = null, TF_DataType dtype = TF_DataType.DtInvalid, - string import_scope = "") : base(initial_value, + string import_scope = "", + TensorShape shape = null) : base(initial_value, trainable, collections, validate_shape, @@ -69,11 +72,83 @@ namespace Tensorflow } else { - throw new NotImplementedException("ResourceVariable _init_from_args"); - //_init_from_args(initial_value, trainable, collections, validate_shape, caching_device, name, dtype); + _init_from_args(initial_value: initial_value, + trainable: trainable, + collections: collections, + caching_device: caching_device, + name: name, + dtype: dtype, + shape: shape); } } + private void _init_from_args(object initial_value = null, + bool trainable = true, + List collections = null, + string caching_device = "", + string name = null, + TF_DataType dtype = TF_DataType.DtInvalid, + TensorShape shape = null) + { + var init_from_fn = initial_value.GetType().Name == "Func`1"; + if(collections == null) + collections = new List() { tf.GraphKeys.GLOBAL_VARIABLES }; + _trainable = trainable; + _graph_key = ops.get_default_graph().graph_key; + + ops.init_scope(); + _in_graph_mode = true; + tf_with(ops.name_scope(name, "Variable"), scope => + { + name = scope; + var handle_name = ops.name_from_scope_name(name); + var shared_name = handle_name; + var unique_id = shared_name; + + var attr = new AttrValue(); + attr.List = new AttrValue.Types.ListValue(); + attr.List.S.Add(ByteString.CopyFromUtf8($"loc:{handle_name}")); + tf_with(ops.name_scope("Initializer"), delegate + { + initial_value = ops.convert_to_tensor(init_from_fn ? (initial_value as Func)() : initial_value, + name: "initial_value", + dtype: dtype); + }); + _shape = shape ?? (initial_value as Tensor).TensorShape; + _handle = resource_variable_ops.eager_safe_variable_handle( + initial_value: _initial_value, + shape: _shape, + shared_name: shared_name, + name: name, + graph_mode: _in_graph_mode); + _unique_id = unique_id; + _initial_value = initial_value as Tensor; + _handle_name = handle_name + ":0"; + _dtype = _initial_value.dtype.as_base_dtype(); + // _constraint = constraint; + + if (_in_graph_mode) + { + tf_with(ops.name_scope("IsInitialized"), delegate + { + _is_initialized_op = gen_resource_variable_ops.var_is_initialized_op(_handle); + }); + if(initial_value != null) + { + tf_with(ops.name_scope("Assign"), scope1 => + { + string n = scope1; + _initializer_op = gen_resource_variable_ops.assign_variable_op(_handle, + variables._try_guard_against_uninitialized_dependencies(name, _initial_value), + name: n); + }); + } + } + }); + + throw new NotImplementedException(""); + } + private void _init_from_proto(VariableDef variable_def, string import_scope = null) { _in_graph_mode = true; diff --git a/src/TensorFlowNET.Core/Variables/VariableScope.cs b/src/TensorFlowNET.Core/Variables/VariableScope.cs index ad7750a1..52766e4f 100644 --- a/src/TensorFlowNET.Core/Variables/VariableScope.cs +++ b/src/TensorFlowNET.Core/Variables/VariableScope.cs @@ -71,7 +71,7 @@ namespace Tensorflow trainable: trainable, collections: collections, synchronization: synchronization, - aggregation: aggregation); + aggregation: aggregation) as RefVariable; }); } } diff --git a/src/TensorFlowNET.Core/Variables/VariableV1.cs b/src/TensorFlowNET.Core/Variables/VariableV1.cs index eb3349fd..8f873291 100644 --- a/src/TensorFlowNET.Core/Variables/VariableV1.cs +++ b/src/TensorFlowNET.Core/Variables/VariableV1.cs @@ -28,12 +28,17 @@ namespace Tensorflow /// the variable are fixed. The value can be changed using one of the assign methods. /// https://tensorflow.org/guide/variables /// - public class VariableV1 + public abstract class VariableV1 { public virtual string name { get; } public virtual Tensor graph_element { get; } public virtual Operation op { get; } public virtual Operation initializer { get; } + public Tensor _variable; + protected string _graph_key; + public Graph graph => _variable.graph; + + public Tensor _is_initialized_op { get; set; } public VariableV1(object initial_value = null, bool trainable = true, diff --git a/src/TensorFlowNET.Core/Variables/_VariableStore.cs b/src/TensorFlowNET.Core/Variables/_VariableStore.cs index d0fbf161..5b706a95 100644 --- a/src/TensorFlowNET.Core/Variables/_VariableStore.cs +++ b/src/TensorFlowNET.Core/Variables/_VariableStore.cs @@ -36,7 +36,7 @@ namespace Tensorflow _store_eager_variables = false; } - public RefVariable get_variable(string name, + public VariableV1 get_variable(string name, TensorShape shape = null, TF_DataType dtype = TF_DataType.TF_FLOAT, object initializer = null, // IInitializer or Tensor @@ -61,7 +61,7 @@ namespace Tensorflow aggregation: aggregation); } - private RefVariable _true_getter(string name, + private VariableV1 _true_getter(string name, TensorShape shape = null, TF_DataType dtype = TF_DataType.TF_FLOAT, object initializer = null, @@ -110,7 +110,7 @@ namespace Tensorflow } } - private RefVariable _get_single_variable(string name, + private VariableV1 _get_single_variable(string name, TensorShape shape = null, TF_DataType dtype = TF_DataType.DtInvalid, IInitializer initializer = null, @@ -136,7 +136,7 @@ namespace Tensorflow throw new NotImplementedException("_get_single_variable"); } - RefVariable v = null; + VariableV1 v = null; // Create the tensor to initialize the variable with default value. if (initializer == null) { diff --git a/src/TensorFlowNET.Core/Variables/variable_scope.py.cs b/src/TensorFlowNET.Core/Variables/variable_scope.py.cs index 4f357b12..f4a01054 100644 --- a/src/TensorFlowNET.Core/Variables/variable_scope.py.cs +++ b/src/TensorFlowNET.Core/Variables/variable_scope.py.cs @@ -172,11 +172,12 @@ namespace Tensorflow return $"{prefix}_{idx}"; } - public static RefVariable default_variable_creator(object initial_value, + public static VariableV1 default_variable_creator(object initial_value, string name = null, bool? trainable = null, List collections = null, TF_DataType dtype = TF_DataType.DtInvalid, + int[] shape = null, bool validate_shape = false, bool ? use_resource = null, VariableSynchronization synchronization = VariableSynchronization.Auto, @@ -193,7 +194,13 @@ namespace Tensorflow if (use_resource.Value) { - throw new NotImplementedException(); + return new ResourceVariable(initial_value, + trainable: trainable.Value, + validate_shape: validate_shape, + collections: collections, + name: name, + dtype: dtype, + shape: shape); } else { diff --git a/src/TensorFlowNET.Core/Variables/variables.py.cs b/src/TensorFlowNET.Core/Variables/variables.py.cs index 6e9d0e4c..d898a4aa 100644 --- a/src/TensorFlowNET.Core/Variables/variables.py.cs +++ b/src/TensorFlowNET.Core/Variables/variables.py.cs @@ -84,6 +84,60 @@ namespace Tensorflow return gen_control_flow_ops.no_op(name: name); } + public static Tensor _try_guard_against_uninitialized_dependencies(string name, Tensor initial_value) + { + return _safe_initial_value_from_tensor(name, initial_value, new Dictionary()); + } + + public static Tensor _safe_initial_value_from_tensor(string name, Tensor tensor, Dictionary op_cache) + { + var op = tensor.op; + Operation new_op = op_cache.ContainsKey(op.name) ? op_cache[op.name] : null; + if(new_op == null) + { + new_op = _safe_initial_value_from_op(name, op, op_cache); + op_cache[op.name] = new_op; + } + + return new_op.outputs[tensor.value_index]; + } + + /// + /// Replace dependencies on variables with their initialized values. + /// + /// + /// + /// + /// + public static Operation _safe_initial_value_from_op(string name, Operation op, Dictionary op_cache) + { + var op_type = op.node_def.Op; + if (op_type == "IsVariableInitialized" || + op_type == "VarIsInitializedOp" || + op_type == "ReadVariableOp") + return op; + + if(op_type == "Variable" || + op_type == "VariableV2" || + op_type == "VarHandleOp") + { + throw new NotImplementedException(""); + } + + // Recursively build initializer expressions for inputs. + bool modified = false; + var new_op_inputs = new List(); + foreach(Tensor op_input in op.inputs) + { + var new_op_input = _safe_initial_value_from_tensor(name, op_input, op_cache); + new_op_inputs.Add(new_op_input); + modified = modified || new_op_input != op_input; + } + + // If at least one input was modified, replace the op. + return op; + } + public static Tensor global_variables_initializer() { throw new NotImplementedException(); diff --git a/src/TensorFlowNET.Core/ops.cs b/src/TensorFlowNET.Core/ops.cs index 846de1ea..d1e423c9 100644 --- a/src/TensorFlowNET.Core/ops.cs +++ b/src/TensorFlowNET.Core/ops.cs @@ -274,7 +274,7 @@ namespace Tensorflow return node_def; } - public static string _name_from_scope_name(string name) + public static string name_from_scope_name(string name) { if (name.EndsWith("/")) { diff --git a/src/TensorFlowNET.Core/tensorflow.cs b/src/TensorFlowNET.Core/tensorflow.cs index cf973864..39fd2ac9 100644 --- a/src/TensorFlowNET.Core/tensorflow.cs +++ b/src/TensorFlowNET.Core/tensorflow.cs @@ -55,7 +55,24 @@ namespace Tensorflow trainable: trainable, validate_shape: validate_shape, name: name, - dtype: dtype); + dtype: dtype) as RefVariable; + } + + public VariableV1 VariableV1(T data, + bool trainable = true, + bool validate_shape = true, + string name = null, + TF_DataType dtype = TF_DataType.DtInvalid, + bool use_resource = false, + int[] shape = null) + { + return Tensorflow.variable_scope.default_variable_creator(data, + trainable: trainable, + validate_shape: validate_shape, + name: name, + dtype: dtype, + use_resource: use_resource, + shape: shape); } public unsafe Tensor placeholder(TF_DataType dtype, TensorShape shape = null, string name = null) diff --git a/src/TensorFlowNET.Hub/TensorFlowNET.Hub.csproj b/src/TensorFlowNET.Hub/TensorFlowNET.Hub.csproj index 27b5128b..10d27a5c 100644 --- a/src/TensorFlowNET.Hub/TensorFlowNET.Hub.csproj +++ b/src/TensorFlowNET.Hub/TensorFlowNET.Hub.csproj @@ -18,6 +18,6 @@ TensorFlow.Hub - + \ No newline at end of file diff --git a/test/TensorFlowNET.UnitTest/Keras/EmbeddingTest.cs b/test/TensorFlowNET.UnitTest/Keras/EmbeddingTest.cs new file mode 100644 index 00000000..0168f22c --- /dev/null +++ b/test/TensorFlowNET.UnitTest/Keras/EmbeddingTest.cs @@ -0,0 +1,32 @@ +using Microsoft.VisualStudio.TestTools.UnitTesting; +using System; +using System.Collections.Generic; +using System.Text; +using Tensorflow.Keras.Engine; +using Tensorflow.Keras.Layers; +using NumSharp; + +namespace TensorFlowNET.UnitTest.Keras +{ + /// + /// https://www.tensorflow.org/versions/r1.14/api_docs/python/tf/keras/layers/Embedding + /// + [TestClass] + public class EmbeddingTest + { + [TestMethod] + public void Embedding() + { + var model = new Sequential(); + model.add(new Embedding(1000, 64, input_length: 10)); + // the model will take as input an integer matrix of size (batch, + // input_length). + // the largest integer (i.e. word index) in the input should be no larger + // than 999 (vocabulary size). + // now model.output_shape == (None, 10, 64), where None is the batch + // dimension. + var input_array = np.random.randint(1000, size: (32, 10)); + model.compile("rmsprop", "mse"); + } + } +} diff --git a/test/TensorFlowNET.UnitTest/gradients_test/GradientsTest.cs b/test/TensorFlowNET.UnitTest/gradients_test/GradientsTest.cs index ecd69977..2fae1e5b 100644 --- a/test/TensorFlowNET.UnitTest/gradients_test/GradientsTest.cs +++ b/test/TensorFlowNET.UnitTest/gradients_test/GradientsTest.cs @@ -11,6 +11,39 @@ namespace TensorFlowNET.UnitTest.gradients_test [TestClass] public class GradientsTest : PythonTest { + [TestMethod] + public void BroadcastToGrad() + { + var graph = tf.Graph().as_default(); + + var x = tf.constant(2, dtype: dtypes.float32); + var y = tf.broadcast_to(x, (2, 4, 3)); + var grad = tf.gradients(y, x); + + using (var sess = tf.Session(graph)) + { + float result = sess.run(grad[0]); + Assert.AreEqual(result, 24.0f); + } + } + + [TestMethod] + public void CumsumGrad() + { + var graph = tf.Graph().as_default(); + + var x = tf.constant(2, dtype: dtypes.float32); + var y = tf.broadcast_to(x, (2, 4, 3)); + var z = tf.cumsum(y, axis: 1); + var grad = tf.gradients(z, x); + + using (var sess = tf.Session(graph)) + { + float result = sess.run(grad[0]); + Assert.AreEqual(result, 60.0f); + } + } + [Ignore("TODO")] [TestMethod] public void testGradients()