diff --git a/src/TensorFlowNET.Core/Python.cs b/src/TensorFlowNET.Core/Python.cs index 1f066583..bf13c452 100644 --- a/src/TensorFlowNET.Core/Python.cs +++ b/src/TensorFlowNET.Core/Python.cs @@ -19,8 +19,26 @@ namespace Tensorflow Console.WriteLine(obj.ToString()); } - protected int len(IEnumerable a) - => a.Count(); + //protected int len(IEnumerable a) + // => a.Count(); + + protected int len(object a) + { + switch (a) + { + case Array arr: + return arr.Length; + case IList arr: + return arr.Count; + case ICollection arr: + return arr.Count; + case NDArray ndArray: + return ndArray.len; + case IEnumerable enumerable: + return enumerable.OfType().Count(); + } + throw new NotImplementedException("len() not implemented for type: " + a.GetType()); + } protected IEnumerable range(int end) { @@ -152,8 +170,8 @@ namespace Tensorflow dictionary.Add(name, obj); } return dictionary; - } - + } + public static bool hasattr(object obj, string key) { var __type__ = (obj).GetType(); diff --git a/src/TensorFlowNET.Core/Train/Optimizer.cs b/src/TensorFlowNET.Core/Train/Optimizer.cs index 9a88601b..2e8125f2 100644 --- a/src/TensorFlowNET.Core/Train/Optimizer.cs +++ b/src/TensorFlowNET.Core/Train/Optimizer.cs @@ -43,19 +43,44 @@ namespace Tensorflow /// /// Add operations to minimize `loss` by updating `var_list` + /// + /// This method simply combines calls `compute_gradients()` and + /// `apply_gradients()`. If you want to process the gradient before applying + /// them call `compute_gradients()` and `apply_gradients()` explicitly instead + /// of using this function. /// - /// + /// A `Tensor` containing the value to minimize. + /// Optional `Variable` to increment by one after the + /// variables have been updated. + /// Optional list or tuple of `Variable` objects to update to + /// minimize `loss`. Defaults to the list of variables collected in + /// the graph under the key `GraphKeys.TRAINABLE_VARIABLES`. + /// + /// How to gate the computation of gradients. Can be + /// `GATE_NONE`, `GATE_OP`, or `GATE_GRAPH`. + /// + /// + /// Specifies the method used to combine gradient terms. + /// Valid values are defined in the class `AggregationMethod`. + /// + /// + /// Optional name for the returned operation. + /// Optional. A `Tensor` holding the gradient computed for `loss`. /// /// An Operation that updates the variables in `var_list`. If `global_step` /// was not `None`, that operation also increments `global_step`. /// public Operation minimize(Tensor loss, RefVariable global_step = null, + List var_list=null, GateGradientType gate_gradients = GateGradientType.GATE_OP, - bool colocate_gradients_with_ops = false) + int? aggregation_method=null, + bool colocate_gradients_with_ops = false, string name=null, Tensor grad_loss=null) { - var grads_and_vars = compute_gradients(loss, + // TODO: strongly type aggregation_method + var grads_and_vars = compute_gradients(loss, var_list:var_list, gate_gradients: gate_gradients, + aggregation_method:aggregation_method, colocate_gradients_with_ops: colocate_gradients_with_ops); var vars_with_grad = grads_and_vars.Where(x => x.Item1 != null).Select(x => x.Item2).ToArray(); @@ -63,10 +88,25 @@ namespace Tensorflow throw new ValueError($"No gradients provided for any variable, check your graph for ops" + $" that do not support gradients, between variables {string.Join(",", vars_with_grad.Select(x => x.name))} and loss {loss}."); - return apply_gradients(grads_and_vars); - } - - public Operation apply_gradients(Tuple[] grads_and_vars, Tensor global_step = null, string name = null) + return apply_gradients(grads_and_vars, global_step:global_step, name:name); + } + + /// + /// Apply gradients to variables. + /// + /// This is the second part of `minimize()`. It returns an `Operation` that + /// applies gradients. + /// + /// List of (gradient, variable) pairs as returned by + /// `compute_gradients()`. + /// Optional `Variable` to increment by one after the + /// variables have been updated. + /// Optional name for the returned operation. Default to the + /// name passed to the `Optimizer` constructor. + /// + /// An `Operation` that applies the specified gradients. If `global_step` + /// was not None, that operation also increments `global_step`. + public Operation apply_gradients(Tuple[] grads_and_vars, RefVariable global_step = null, string name = null) { // No DistributionStrategy case. var converted_grads_and_vars = new List>(); @@ -113,7 +153,24 @@ namespace Tensorflow } else { - + with(ops.control_dependencies(new object[] {_finish(update_ops.ToArray(), "update")}), dep => + { + ops.colocate_with(global_step); + // TODO: port this if branch once ResourceVariable has been ported! + //if (global_step is ResourceVariable) + //{ + // # TODO(apassos): the implicit read in assign_add is slow; consider + // # making it less so. + // apply_updates = resource_variable_ops.assign_add_variable_op( + // global_step.handle, + // ops.convert_to_tensor(1, dtype = global_step.dtype), + // name = name) + //} + //else + { + apply_updates = state_ops.assign_add(global_step, tf.constant(1), name: name); + } + }); } if (!tf.context.executing_eagerly()) diff --git a/src/TensorFlowNET.Core/Variables/gen_state_ops.py.cs b/src/TensorFlowNET.Core/Variables/gen_state_ops.py.cs index a4eebc67..4b4237a0 100644 --- a/src/TensorFlowNET.Core/Variables/gen_state_ops.py.cs +++ b/src/TensorFlowNET.Core/Variables/gen_state_ops.py.cs @@ -75,6 +75,28 @@ namespace Tensorflow var _op = _op_def_lib._apply_op_helper("AssignSub", name: name, args: new { @ref, value, use_locking }); return _op.outputs[0]; - } + } + + + // Update 'ref' by adding 'value' to it. + // This operation outputs "ref" after the update is done. + // This makes it easier to chain operations that need to use the reset value. + // Args: + // ref: A mutable `Tensor`. Must be one of the following types: `float32`, `float64`, `int32`, `uint8`, `int16`, `int8`, `complex64`, `int64`, `qint8`, `quint8`, `qint32`, `bfloat16`, `uint16`, `complex128`, `half`, `uint32`, `uint64`. + // Should be from a `Variable` node. + // value: A `Tensor`. Must have the same type as `ref`. + // The value to be added to the variable. + // use_locking: An optional `bool`. Defaults to `False`. + // If True, the addition will be protected by a lock; + // otherwise the behavior is undefined, but may exhibit less contention. + // name: A name for the operation(optional). + // Returns: + // A mutable `Tensor`. Has the same type as `ref`. + public static Tensor assign_add(RefVariable @ref, Tensor value, bool use_locking = false, string name = null) + { + var _op = _op_def_lib._apply_op_helper("AssignAdd", name: name, args: new { @ref, value, use_locking }); + return _op.outputs[0]; + } + } } diff --git a/src/TensorFlowNET.Core/Variables/state_ops.cs b/src/TensorFlowNET.Core/Variables/state_ops.cs index bfecd8f7..aaa27e85 100644 --- a/src/TensorFlowNET.Core/Variables/state_ops.cs +++ b/src/TensorFlowNET.Core/Variables/state_ops.cs @@ -47,5 +47,30 @@ namespace Tensorflow value, use_locking: use_locking, name: name); + + //"""Update 'ref' by adding 'value' to it. + // + // This operation outputs "ref" after the update is done. + // This makes it easier to chain operations that need to use the reset value. + // + // Args: + // ref: A mutable `Tensor`. Must be one of the following types: + // `float32`, `float64`, `int64`, `int32`, `uint8`, `uint16`, `int16`, + // `int8`, `complex64`, `complex128`, `qint8`, `quint8`, `qint32`, `half`. + // Should be from a `Variable` node. + // value: A `Tensor`. Must have the same type as `ref`. + // The value to be added to the variable. + // use_locking: An optional `bool`. Defaults to `False`. + // If True, the addition will be protected by a lock; + // otherwise the behavior is undefined, but may exhibit less contention. + // name: A name for the operation (optional). + // + // Returns: + // Same as "ref". Returned as a convenience for operations that want + // to use the new value after the variable has been updated. + public static Tensor assign_add(RefVariable @ref, + Tensor value, + bool use_locking = false, + string name = null) => gen_state_ops.assign_add(@ref, value, use_locking: use_locking, name: name); } } diff --git a/test/TensorFlowNET.Examples/NeuralNetXor.cs b/test/TensorFlowNET.Examples/NeuralNetXor.cs index dca0aa54..404bb542 100644 --- a/test/TensorFlowNET.Examples/NeuralNetXor.cs +++ b/test/TensorFlowNET.Examples/NeuralNetXor.cs @@ -43,7 +43,7 @@ namespace TensorFlowNET.Examples var predictions = tf.sigmoid(tf.squeeze(logits)); var loss = tf.reduce_mean(tf.square(predictions - tf.cast(labels, tf.float32)), name:"loss"); - var gs = tf.Variable(0, trainable: false); + var gs = tf.Variable(0, trainable: false, name: "global_step"); var train_op = tf.train.GradientDescentOptimizer(0.2f).minimize(loss, global_step: gs); return (train_op, loss, gs); @@ -91,7 +91,7 @@ namespace TensorFlowNET.Examples // ) var result = sess.run(new ITensorOrOperation[] { train_op, global_step, loss }, new FeedItem(features, data), new FeedItem(labels, y_)); loss_value = result[2]; - step++; + step = result[1]; if (step % 1000 == 0) Console.WriteLine($"Step {step} loss: {loss_value}"); } @@ -124,8 +124,7 @@ namespace TensorFlowNET.Examples { var result = sess.run(new ITensorOrOperation[] { train_op, gs, loss }, new FeedItem(features, data), new FeedItem(labels, y_)); loss_value = result[2]; - //step = result[1]; - step++; + step = result[1]; if (step % 1000 == 0) Console.WriteLine($"Step {step} loss: {loss_value}"); }