Browse Source

Optimizer: fixed global_step which was not incremented until now

tags/v0.9
Meinrad Recheis 6 years ago
parent
commit
de5a3cd66b
5 changed files with 138 additions and 17 deletions
  1. +22
    -4
      src/TensorFlowNET.Core/Python.cs
  2. +65
    -8
      src/TensorFlowNET.Core/Train/Optimizer.cs
  3. +23
    -1
      src/TensorFlowNET.Core/Variables/gen_state_ops.py.cs
  4. +25
    -0
      src/TensorFlowNET.Core/Variables/state_ops.cs
  5. +3
    -4
      test/TensorFlowNET.Examples/NeuralNetXor.cs

+ 22
- 4
src/TensorFlowNET.Core/Python.cs View File

@@ -19,8 +19,26 @@ namespace Tensorflow
Console.WriteLine(obj.ToString());
}

protected int len<T>(IEnumerable<T> a)
=> a.Count();
//protected int len<T>(IEnumerable<T> a)
// => a.Count();

protected int len(object a)
{
switch (a)
{
case Array arr:
return arr.Length;
case IList arr:
return arr.Count;
case ICollection arr:
return arr.Count;
case NDArray ndArray:
return ndArray.len;
case IEnumerable enumerable:
return enumerable.OfType<object>().Count();
}
throw new NotImplementedException("len() not implemented for type: " + a.GetType());
}

protected IEnumerable<int> range(int end)
{
@@ -152,8 +170,8 @@ namespace Tensorflow
dictionary.Add(name, obj);
}
return dictionary;
}
}
public static bool hasattr(object obj, string key)
{
var __type__ = (obj).GetType();


+ 65
- 8
src/TensorFlowNET.Core/Train/Optimizer.cs View File

@@ -43,19 +43,44 @@ namespace Tensorflow

/// <summary>
/// Add operations to minimize `loss` by updating `var_list`
///
/// This method simply combines calls `compute_gradients()` and
/// `apply_gradients()`. If you want to process the gradient before applying
/// them call `compute_gradients()` and `apply_gradients()` explicitly instead
/// of using this function.
/// </summary>
/// <param name="loss"></param>
/// <param name="loss">A `Tensor` containing the value to minimize.</param>
/// <param name="global_step">Optional `Variable` to increment by one after the
/// variables have been updated.</param>
/// <param name="var_list">Optional list or tuple of `Variable` objects to update to
/// minimize `loss`. Defaults to the list of variables collected in
/// the graph under the key `GraphKeys.TRAINABLE_VARIABLES`.</param>
/// <param name="gate_gradients">
/// How to gate the computation of gradients. Can be
/// `GATE_NONE`, `GATE_OP`, or `GATE_GRAPH`.
/// </param>
/// <param name="aggregation_method">
/// Specifies the method used to combine gradient terms.
/// Valid values are defined in the class `AggregationMethod`.
/// </param>
/// <param name="colocate_gradients_with_ops"></param>
/// <param name="name">Optional name for the returned operation.</param>
/// <param name="grad_loss">Optional. A `Tensor` holding the gradient computed for `loss`.</param>
/// <returns>
/// An Operation that updates the variables in `var_list`. If `global_step`
/// was not `None`, that operation also increments `global_step`.
/// </returns>
public Operation minimize(Tensor loss,
RefVariable global_step = null,
List<RefVariable> var_list=null,
GateGradientType gate_gradients = GateGradientType.GATE_OP,
bool colocate_gradients_with_ops = false)
int? aggregation_method=null,
bool colocate_gradients_with_ops = false, string name=null, Tensor grad_loss=null)
{
var grads_and_vars = compute_gradients(loss,
// TODO: strongly type aggregation_method
var grads_and_vars = compute_gradients(loss, var_list:var_list,
gate_gradients: gate_gradients,
aggregation_method:aggregation_method,
colocate_gradients_with_ops: colocate_gradients_with_ops);

var vars_with_grad = grads_and_vars.Where(x => x.Item1 != null).Select(x => x.Item2).ToArray();
@@ -63,10 +88,25 @@ namespace Tensorflow
throw new ValueError($"No gradients provided for any variable, check your graph for ops" +
$" that do not support gradients, between variables {string.Join(",", vars_with_grad.Select(x => x.name))} and loss {loss}.");

return apply_gradients(grads_and_vars);
}

public Operation apply_gradients(Tuple<Tensor, RefVariable>[] grads_and_vars, Tensor global_step = null, string name = null)
return apply_gradients(grads_and_vars, global_step:global_step, name:name);
}
/// <summary>
/// Apply gradients to variables.
///
/// This is the second part of `minimize()`. It returns an `Operation` that
/// applies gradients.
/// </summary>
/// <param name="grads_and_vars">List of (gradient, variable) pairs as returned by
/// `compute_gradients()`.</param>
/// <param name="global_step">Optional `Variable` to increment by one after the
/// variables have been updated.</param>
/// <param name="name">Optional name for the returned operation. Default to the
/// name passed to the `Optimizer` constructor.</param>
/// <returns>
/// An `Operation` that applies the specified gradients. If `global_step`
/// was not None, that operation also increments `global_step`.</returns>
public Operation apply_gradients(Tuple<Tensor, RefVariable>[] grads_and_vars, RefVariable global_step = null, string name = null)
{
// No DistributionStrategy case.
var converted_grads_and_vars = new List<Tuple<Tensor, RefVariable, _OptimizableVariable>>();
@@ -113,7 +153,24 @@ namespace Tensorflow
}
else
{

with(ops.control_dependencies(new object[] {_finish(update_ops.ToArray(), "update")}), dep =>
{
ops.colocate_with(global_step);
// TODO: port this if branch once ResourceVariable has been ported!
//if (global_step is ResourceVariable)
//{
// # TODO(apassos): the implicit read in assign_add is slow; consider
// # making it less so.
// apply_updates = resource_variable_ops.assign_add_variable_op(
// global_step.handle,
// ops.convert_to_tensor(1, dtype = global_step.dtype),
// name = name)
//}
//else
{
apply_updates = state_ops.assign_add(global_step, tf.constant(1), name: name);
}
});
}

if (!tf.context.executing_eagerly())


+ 23
- 1
src/TensorFlowNET.Core/Variables/gen_state_ops.py.cs View File

@@ -75,6 +75,28 @@ namespace Tensorflow
var _op = _op_def_lib._apply_op_helper("AssignSub", name: name, args: new { @ref, value, use_locking });

return _op.outputs[0];
}
}
// Update 'ref' by adding 'value' to it.
// This operation outputs "ref" after the update is done.
// This makes it easier to chain operations that need to use the reset value.
// Args:
// ref: A mutable `Tensor`. Must be one of the following types: `float32`, `float64`, `int32`, `uint8`, `int16`, `int8`, `complex64`, `int64`, `qint8`, `quint8`, `qint32`, `bfloat16`, `uint16`, `complex128`, `half`, `uint32`, `uint64`.
// Should be from a `Variable` node.
// value: A `Tensor`. Must have the same type as `ref`.
// The value to be added to the variable.
// use_locking: An optional `bool`. Defaults to `False`.
// If True, the addition will be protected by a lock;
// otherwise the behavior is undefined, but may exhibit less contention.
// name: A name for the operation(optional).
// Returns:
// A mutable `Tensor`. Has the same type as `ref`.
public static Tensor assign_add(RefVariable @ref, Tensor value, bool use_locking = false, string name = null)
{
var _op = _op_def_lib._apply_op_helper("AssignAdd", name: name, args: new { @ref, value, use_locking });
return _op.outputs[0];
}

}
}

+ 25
- 0
src/TensorFlowNET.Core/Variables/state_ops.cs View File

@@ -47,5 +47,30 @@ namespace Tensorflow
value,
use_locking: use_locking,
name: name);

//"""Update 'ref' by adding 'value' to it.
//
// This operation outputs "ref" after the update is done.
// This makes it easier to chain operations that need to use the reset value.
//
// Args:
// ref: A mutable `Tensor`. Must be one of the following types:
// `float32`, `float64`, `int64`, `int32`, `uint8`, `uint16`, `int16`,
// `int8`, `complex64`, `complex128`, `qint8`, `quint8`, `qint32`, `half`.
// Should be from a `Variable` node.
// value: A `Tensor`. Must have the same type as `ref`.
// The value to be added to the variable.
// use_locking: An optional `bool`. Defaults to `False`.
// If True, the addition will be protected by a lock;
// otherwise the behavior is undefined, but may exhibit less contention.
// name: A name for the operation (optional).
//
// Returns:
// Same as "ref". Returned as a convenience for operations that want
// to use the new value after the variable has been updated.
public static Tensor assign_add(RefVariable @ref,
Tensor value,
bool use_locking = false,
string name = null) => gen_state_ops.assign_add(@ref, value, use_locking: use_locking, name: name);
}
}

+ 3
- 4
test/TensorFlowNET.Examples/NeuralNetXor.cs View File

@@ -43,7 +43,7 @@ namespace TensorFlowNET.Examples
var predictions = tf.sigmoid(tf.squeeze(logits));
var loss = tf.reduce_mean(tf.square(predictions - tf.cast(labels, tf.float32)), name:"loss");
var gs = tf.Variable(0, trainable: false);
var gs = tf.Variable(0, trainable: false, name: "global_step");
var train_op = tf.train.GradientDescentOptimizer(0.2f).minimize(loss, global_step: gs);
return (train_op, loss, gs);
@@ -91,7 +91,7 @@ namespace TensorFlowNET.Examples
// )
var result = sess.run(new ITensorOrOperation[] { train_op, global_step, loss }, new FeedItem(features, data), new FeedItem(labels, y_));
loss_value = result[2];
step++;
step = result[1];
if (step % 1000 == 0)
Console.WriteLine($"Step {step} loss: {loss_value}");
}
@@ -124,8 +124,7 @@ namespace TensorFlowNET.Examples
{
var result = sess.run(new ITensorOrOperation[] { train_op, gs, loss }, new FeedItem(features, data), new FeedItem(labels, y_));
loss_value = result[2];
//step = result[1];
step++;
step = result[1];
if (step % 1000 == 0)
Console.WriteLine($"Step {step} loss: {loss_value}");
}


Loading…
Cancel
Save