Optimizer: fixed global_step which was not incremented until now

6 years ago · de5a3cd66b
--- a/src/TensorFlowNET.Core/Python.cs
+++ b/src/TensorFlowNET.Core/Python.cs
@@ -19,8 +19,26 @@ namespace Tensorflow
            Console.WriteLine(obj.ToString());
        }

        protected int len<T>(IEnumerable<T> a)
            => a.Count();
        //protected int len<T>(IEnumerable<T> a)
        //    => a.Count();

        protected int len(object a)
        {
            switch (a)
            {
                case Array arr:
                    return arr.Length;
                case IList arr:
                    return arr.Count;
                case ICollection arr:
                    return arr.Count;
                case NDArray ndArray:
                    return ndArray.len;
                case IEnumerable enumerable:
                    return enumerable.OfType<object>().Count();
            }
            throw new NotImplementedException("len() not implemented for type: " + a.GetType());
        }

        protected IEnumerable<int> range(int end)
        {
@@ -152,8 +170,8 @@ namespace Tensorflow
                dictionary.Add(name, obj);
            }
            return dictionary;
        }
        
        }

        public static bool hasattr(object obj, string key)
        {
            var __type__ = (obj).GetType();
--- a/src/TensorFlowNET.Core/Train/Optimizer.cs
+++ b/src/TensorFlowNET.Core/Train/Optimizer.cs
@@ -43,19 +43,44 @@ namespace Tensorflow

        /// <summary>
        /// Add operations to minimize `loss` by updating `var_list`
        ///  
        ///  This method simply combines calls `compute_gradients()` and
        ///  `apply_gradients()`. If you want to process the gradient before applying
        ///  them call `compute_gradients()` and `apply_gradients()` explicitly instead
        ///  of using this function.
        /// </summary>
        /// <param name="loss"></param>
        /// <param name="loss">A `Tensor` containing the value to minimize.</param>
        /// <param name="global_step">Optional `Variable` to increment by one after the
        /// variables have been updated.</param>
        /// <param name="var_list">Optional list or tuple of `Variable` objects to update to
        /// minimize `loss`.  Defaults to the list of variables collected in
        /// the graph under the key `GraphKeys.TRAINABLE_VARIABLES`.</param>
        /// <param name="gate_gradients">
        /// How to gate the computation of gradients.  Can be
        /// `GATE_NONE`, `GATE_OP`, or  `GATE_GRAPH`.
        /// </param>
        /// <param name="aggregation_method">
        /// Specifies the method used to combine gradient terms.
        /// Valid values are defined in the class `AggregationMethod`.
        /// </param>
        /// <param name="colocate_gradients_with_ops"></param>
        /// <param name="name">Optional name for the returned operation.</param>
        /// <param name="grad_loss">Optional. A `Tensor` holding the gradient computed for `loss`.</param>
        /// <returns>
        /// An Operation that updates the variables in `var_list`.  If `global_step`
        /// was not `None`, that operation also increments `global_step`.
        /// </returns>
        public Operation minimize(Tensor loss, 
            RefVariable global_step = null,
            List<RefVariable> var_list=null,
            GateGradientType gate_gradients = GateGradientType.GATE_OP,
            bool colocate_gradients_with_ops = false)
            int? aggregation_method=null,
            bool colocate_gradients_with_ops = false, string name=null, Tensor grad_loss=null)
        {
            var grads_and_vars = compute_gradients(loss, 
            // TODO: strongly type aggregation_method
            var grads_and_vars = compute_gradients(loss, var_list:var_list,
                gate_gradients: gate_gradients, 
                aggregation_method:aggregation_method,
                colocate_gradients_with_ops: colocate_gradients_with_ops);

            var vars_with_grad = grads_and_vars.Where(x => x.Item1 != null).Select(x => x.Item2).ToArray();
@@ -63,10 +88,25 @@ namespace Tensorflow
                throw new ValueError($"No gradients provided for any variable, check your graph for ops" +
                    $" that do not support gradients, between variables {string.Join(",", vars_with_grad.Select(x => x.name))} and loss {loss}.");

            return apply_gradients(grads_and_vars);
        }

        public Operation apply_gradients(Tuple<Tensor, RefVariable>[] grads_and_vars, Tensor global_step = null, string name = null)
            return apply_gradients(grads_and_vars, global_step:global_step, name:name);
        }

        /// <summary>
        /// Apply gradients to variables.
        /// 
        /// This is the second part of `minimize()`. It returns an `Operation` that
        /// applies gradients.
        /// </summary>
        /// <param name="grads_and_vars">List of (gradient, variable) pairs as returned by
        /// `compute_gradients()`.</param>
        /// <param name="global_step">Optional `Variable` to increment by one after the
        /// variables have been updated.</param>
        /// <param name="name">Optional name for the returned operation.  Default to the
        /// name passed to the `Optimizer` constructor.</param>
        /// <returns>
        /// An `Operation` that applies the specified gradients. If `global_step`
        /// was not None, that operation also increments `global_step`.</returns>
        public Operation apply_gradients(Tuple<Tensor, RefVariable>[] grads_and_vars, RefVariable global_step = null, string name = null)
        {
            // No DistributionStrategy case.
            var converted_grads_and_vars = new List<Tuple<Tensor, RefVariable, _OptimizableVariable>>();
@@ -113,7 +153,24 @@ namespace Tensorflow
                }
                else
                {

                    with(ops.control_dependencies(new object[] {_finish(update_ops.ToArray(), "update")}), dep =>
                    {
                        ops.colocate_with(global_step);
                        // TODO: port this if branch once ResourceVariable has been ported!
                        //if (global_step is ResourceVariable)
                        //{
                        //        # TODO(apassos): the implicit read in assign_add is slow; consider
                        //        # making it less so.
                        //        apply_updates = resource_variable_ops.assign_add_variable_op(
                        //            global_step.handle,
                        //            ops.convert_to_tensor(1, dtype = global_step.dtype),
                        //            name = name)
                        //}
                        //else
                        {
                            apply_updates = state_ops.assign_add(global_step, tf.constant(1), name: name);
                        }
                    });
                }

                if (!tf.context.executing_eagerly())
--- a/src/TensorFlowNET.Core/Variables/gen_state_ops.py.cs
+++ b/src/TensorFlowNET.Core/Variables/gen_state_ops.py.cs
@@ -75,6 +75,28 @@ namespace Tensorflow
            var _op = _op_def_lib._apply_op_helper("AssignSub", name: name, args: new { @ref, value, use_locking });

            return _op.outputs[0];
        }
        }


        //  Update 'ref' by adding 'value' to it.
        //  This operation outputs "ref" after the update is done.
        //  This makes it easier to chain operations that need to use the reset value.
        //  Args:
        //    ref: A mutable `Tensor`. Must be one of the following types: `float32`, `float64`, `int32`, `uint8`, `int16`, `int8`, `complex64`, `int64`, `qint8`, `quint8`, `qint32`, `bfloat16`, `uint16`, `complex128`, `half`, `uint32`, `uint64`.
        //      Should be from a `Variable` node.
        //    value: A `Tensor`. Must have the same type as `ref`.
        //      The value to be added to the variable.
        //    use_locking: An optional `bool`. Defaults to `False`.
        //      If True, the addition will be protected by a lock;
        //        otherwise the behavior is undefined, but may exhibit less contention.
        //      name: A name for the operation(optional).
        //  Returns:
        //    A mutable `Tensor`. Has the same type as `ref`.
        public static Tensor assign_add(RefVariable @ref, Tensor value, bool use_locking = false, string name = null)
        {
            var _op = _op_def_lib._apply_op_helper("AssignAdd", name: name, args: new { @ref, value, use_locking });
            return _op.outputs[0];
        }

    }
 }
--- a/src/TensorFlowNET.Core/Variables/state_ops.cs
+++ b/src/TensorFlowNET.Core/Variables/state_ops.cs
@@ -47,5 +47,30 @@ namespace Tensorflow
                value,
                use_locking: use_locking,
                name: name);

        //"""Update 'ref' by adding 'value' to it.
        //
        //  This operation outputs "ref" after the update is done.
        //  This makes it easier to chain operations that need to use the reset value.
        //
        //  Args:
        //    ref: A mutable `Tensor`. Must be one of the following types:
        //      `float32`, `float64`, `int64`, `int32`, `uint8`, `uint16`, `int16`,
        //      `int8`, `complex64`, `complex128`, `qint8`, `quint8`, `qint32`, `half`.
        //      Should be from a `Variable` node.
        //    value: A `Tensor`. Must have the same type as `ref`.
        //      The value to be added to the variable.
        //    use_locking: An optional `bool`. Defaults to `False`.
        //      If True, the addition will be protected by a lock;
        //      otherwise the behavior is undefined, but may exhibit less contention.
        //    name: A name for the operation (optional).
        //
        //  Returns:
        //    Same as "ref".  Returned as a convenience for operations that want
        //    to use the new value after the variable has been updated.
        public static Tensor assign_add(RefVariable @ref,
            Tensor value,
            bool use_locking = false,
            string name = null) => gen_state_ops.assign_add(@ref, value, use_locking: use_locking, name: name);
    }
 }
--- a/test/TensorFlowNET.Examples/NeuralNetXor.cs
+++ b/test/TensorFlowNET.Examples/NeuralNetXor.cs
@@ -43,7 +43,7 @@ namespace TensorFlowNET.Examples
            var predictions = tf.sigmoid(tf.squeeze(logits));
            var loss = tf.reduce_mean(tf.square(predictions - tf.cast(labels, tf.float32)), name:"loss");

            var gs = tf.Variable(0, trainable: false);
            var gs = tf.Variable(0, trainable: false, name: "global_step");
            var train_op = tf.train.GradientDescentOptimizer(0.2f).minimize(loss, global_step: gs);

            return (train_op, loss, gs);
@@ -91,7 +91,7 @@ namespace TensorFlowNET.Examples
                    //      )
                    var result = sess.run(new ITensorOrOperation[] { train_op, global_step, loss }, new FeedItem(features, data), new FeedItem(labels, y_));
                    loss_value = result[2];
                    step++;
                    step = result[1];
                    if (step % 1000 == 0)
                        Console.WriteLine($"Step {step} loss: {loss_value}");
                }
@@ -124,8 +124,7 @@ namespace TensorFlowNET.Examples
                {
                    var result = sess.run(new ITensorOrOperation[] { train_op, gs, loss }, new FeedItem(features, data), new FeedItem(labels, y_));
                    loss_value = result[2];
                    //step = result[1];
                    step++;
                    step = result[1];
                    if (step % 1000 == 0)
                        Console.WriteLine($"Step {step} loss: {loss_value}");
                }