Browse Source

fixed #170

tags/v0.8.0
Oceania2018 6 years ago
parent
commit
8408dcd61f
16 changed files with 281 additions and 50 deletions
  1. +4
    -4
      src/TensorFlowNET.Core/Gradients/gradients_impl.py.cs
  2. +2
    -2
      src/TensorFlowNET.Core/Gradients/math_grad.py.cs
  3. +1
    -1
      src/TensorFlowNET.Core/Graphs/Graph.Gradient.cs.cs
  4. +1
    -1
      src/TensorFlowNET.Core/Operations/OpDefLibrary.cs
  5. +69
    -15
      src/TensorFlowNET.Core/Operations/control_flow_ops.py.cs
  6. +9
    -0
      src/TensorFlowNET.Core/Operations/math_ops.py.cs
  7. +1
    -1
      src/TensorFlowNET.Core/Python.cs
  8. +6
    -0
      src/TensorFlowNET.Core/Train/GradientDescentOptimizer.cs
  9. +105
    -5
      src/TensorFlowNET.Core/Train/Optimizer.cs
  10. +1
    -1
      src/TensorFlowNET.Core/Train/_OptimizableVariable.cs
  11. +24
    -0
      src/TensorFlowNET.Core/Train/gen_training_ops.py.cs
  12. +4
    -2
      src/TensorFlowNET.Core/Train/optimizer.py.cs
  13. +13
    -0
      src/TensorFlowNET.Core/Variables/RefVariable.cs
  14. +1
    -1
      src/TensorFlowNET.Core/Variables/variables.py.cs
  15. +38
    -15
      src/TensorFlowNET.Core/ops.py.cs
  16. +2
    -2
      test/TensorFlowNET.Examples/LinearRegression.cs

+ 4
- 4
src/TensorFlowNET.Core/Gradients/gradients_impl.py.cs View File

@@ -140,7 +140,8 @@ namespace Tensorflow


if (gate_gradients && in_grads.Count(x => x != null) > 1) if (gate_gradients && in_grads.Count(x => x != null) > 1)
{ {

ops._colocate_with_for_gradient(null, gradient_uid, ignore_existing: true);
in_grads = control_flow_ops.tuple(in_grads);
} }
}); });
} }
@@ -223,10 +224,9 @@ namespace Tensorflow
$"inputs {op.inputs._inputs.Count()}"); $"inputs {op.inputs._inputs.Count()}");
} }


private static Tensor[] _MaybeCompile(string scope, Operation op, Tensor out_grads, Action func, Func<Operation, Tensor, (Tensor, Tensor)> grad_fn)
private static Tensor[] _MaybeCompile(string scope, Operation op, Tensor out_grads, Action func, Func<Operation, Tensor, Tensor[]> grad_fn)
{ {
var in_grads = grad_fn(op, out_grads);
return new Tensor[] { in_grads.Item1, in_grads.Item2 };
return grad_fn(op, out_grads);
} }


private static bool _IsPartitionedCall(Operation op) private static bool _IsPartitionedCall(Operation op)


+ 2
- 2
src/TensorFlowNET.Core/Gradients/math_grad.py.cs View File

@@ -27,9 +27,9 @@ namespace Tensorflow
return (r1, r2); return (r1, r2);
} }


public static (Tensor, Tensor) _IdGrad(Operation op, Tensor grad)
public static Tensor _IdGrad(Operation op, Tensor grad)
{ {
return (grad, null);
return grad;
} }


public static (Tensor, Tensor) _MulGrad(Operation op, Tensor grad) public static (Tensor, Tensor) _MulGrad(Operation op, Tensor grad)


+ 1
- 1
src/TensorFlowNET.Core/Graphs/Graph.Gradient.cs.cs View File

@@ -6,7 +6,7 @@ namespace Tensorflow
{ {
public partial class Graph public partial class Graph
{ {
public void _colocate_with_for_gradient(Operation op, int? gradient_uid, bool ignore_existing = false)
public void _colocate_with_for_gradient(Operation op, string gradient_uid, bool ignore_existing = false)
{ {


} }


+ 1
- 1
src/TensorFlowNET.Core/Operations/OpDefLibrary.cs View File

@@ -106,7 +106,7 @@ namespace Tensorflow
} }
else else
{ {
keywords[input_name] = ops.internal_convert_to_tensor(values, name: input_name);
keywords[input_name] = ops.internal_convert_to_tensor(values, name: input_name, as_ref: input_arg.IsRef);
} }


if (!String.IsNullOrEmpty(input_arg.TypeAttr)) if (!String.IsNullOrEmpty(input_arg.TypeAttr))


+ 69
- 15
src/TensorFlowNET.Core/Operations/control_flow_ops.py.cs View File

@@ -7,17 +7,20 @@ namespace Tensorflow
{ {
public class control_flow_ops public class control_flow_ops
{ {
public static Operation group(List<Operation> inputs, string name = "")
public static Operation group(Operation[] inputs, string name = "")
{ {
using(var namescope = new ops.name_scope(name, "group_deps", inputs))
return Python.with<ops.name_scope, Operation>(new ops.name_scope(name, "group_deps", inputs), scope =>
{ {
name = namescope;
name = scope;


// Sorts *inputs according to their devices. // Sorts *inputs according to their devices.
var ops_on_device = new Dictionary<string, Operation[]>();
var ops_on_device = new Dictionary<string, List<Operation>>();
foreach (var inp in inputs) foreach (var inp in inputs)
{ {
ops_on_device[inp.Device] = new Operation[] { inp };
if (ops_on_device.ContainsKey(inp.Device))
ops_on_device[inp.Device].Add(inp);
else
ops_on_device[inp.Device] = new List<Operation> { inp };
} }


// 1-level tree. The root node is the returned NoOp node. // 1-level tree. The root node is the returned NoOp node.
@@ -25,32 +28,28 @@ namespace Tensorflow
{ {
var dev = ops_on_device.Keys.First(); var dev = ops_on_device.Keys.First();
var deps = ops_on_device.Values.First(); var deps = ops_on_device.Values.First();
return _GroupControlDeps(dev, deps, name);
return _GroupControlDeps(dev, deps.ToArray(), name);
} }


// 2-level tree. The root node is the returned NoOp node. // 2-level tree. The root node is the returned NoOp node.
// deps contains 1 NoOp node for each device. // deps contains 1 NoOp node for each device.
return null; return null;
}
});
} }


private static Operation _GroupControlDeps(string dev, Operation[] deps, string name = "") private static Operation _GroupControlDeps(string dev, Operation[] deps, string name = "")
{ {
Operation result = null;

Python.with(ops.control_dependencies(deps), delegate
return Python.with<_ControlDependenciesController, Operation>(ops.control_dependencies(deps), ctl =>
{ {
if (string.IsNullOrEmpty(dev))
if (dev == null)
{ {
result = gen_control_flow_ops.no_op(name);
return gen_control_flow_ops.no_op(name);
} }
else else
{ {
result = gen_control_flow_ops.no_op(name);
return gen_control_flow_ops.no_op(name);
} }
}); });

return result;
} }


/// <summary> /// <summary>
@@ -81,5 +80,60 @@ namespace Tensorflow
{ {
return op.OpType == "Exit" || op.OpType == "RefExit"; return op.OpType == "Exit" || op.OpType == "RefExit";
} }

public static Tensor[] tuple(Tensor[] tensors, string name = "", Operation[] control_inputs = null)
{
return Python.with<ops.name_scope, Tensor[]>(new ops.name_scope(name, "tuple", tensors), scope =>
{
name = scope;
var gating_ops = tensors.Select(x => x.op).ToList();

if(control_inputs != null)
{
foreach (var c in control_inputs)
gating_ops.Add(c);
}

// Note that in order to ensure ordering in the pbtxt, we must take care to
// ensure the order here.
gating_ops = gating_ops.OrderBy(x => x._id).ToList();
var gate = group(gating_ops.ToArray());

var tpl = new List<Tensor>();
foreach(var t in tensors)
{
tpl.Add(with_dependencies(new Operation[] { gate }, t));
}

return tpl.ToArray();
});
}

public static Tensor with_dependencies(Operation[] dependencies, Tensor output_tensor, string name = "")
{
var values = new List<object>();
values.AddRange(dependencies);
values.Add(output_tensor);

return Python.with<ops.name_scope, Tensor>(new ops.name_scope(name, "control_dependency", values), scope =>
{
name = scope;

return Python.with<_ControlDependenciesController, Tensor>(ops.control_dependencies(dependencies), ctl =>
{
output_tensor = ops.convert_to_tensor_or_composite(output_tensor);
return _Identity(output_tensor, name: name);
});
});
}

public static Tensor _Identity(Tensor data, string name = "")
{
data = ops.internal_convert_to_tensor_or_composite(data, as_ref: true);
if ((int)data.dtype > 100)
throw new NotImplementedException("_Identity");
else
return gen_array_ops.identity(data, name: name);
}
} }
} }

+ 9
- 0
src/TensorFlowNET.Core/Operations/math_ops.py.cs View File

@@ -6,6 +6,15 @@ namespace Tensorflow
{ {
public class math_ops public class math_ops
{ {
public static Tensor cast(Tensor x, TF_DataType dtype = TF_DataType.DtInvalid, string name = "")
{
var base_type = dtype.as_base_dtype();
if(base_type == x.dtype)
return x;

throw new NotImplementedException("math_ops.cast");
}

/// <summary> /// <summary>
/// Helper function for reduction ops. /// Helper function for reduction ops.
/// </summary> /// </summary>


+ 1
- 1
src/TensorFlowNET.Core/Python.cs View File

@@ -78,7 +78,7 @@ namespace Tensorflow
yield return(t1.Data<T>(index), t2.Data<T>(index)); yield return(t1.Data<T>(index), t2.Data<T>(index));
} }


public static IEnumerable<(T, T)> zip<T>(IList<T> t1, IList<T> t2)
public static IEnumerable<(T1, T2)> zip<T1, T2>(IList<T1> t1, IList<T2> t2)
{ {
for (int i = 0; i < t1.Count; i++) for (int i = 0; i < t1.Count; i++)
yield return (t1[i], t2[i]); yield return (t1[i], t2[i]);


+ 6
- 0
src/TensorFlowNET.Core/Train/GradientDescentOptimizer.cs View File

@@ -12,5 +12,11 @@ namespace Tensorflow
LearningRate = learning_rate; LearningRate = learning_rate;
LearningRateTensor = null; LearningRateTensor = null;
} }

public override void _prepare()
{
LearningRate = _call_if_callable(LearningRate);
LearningRateTensor = ops.convert_to_tensor(LearningRate, name: "learning_rate");
}
} }
} }

+ 105
- 5
src/TensorFlowNET.Core/Train/Optimizer.cs View File

@@ -56,7 +56,99 @@ namespace Tensorflow
gate_gradients: gate_gradients, gate_gradients: gate_gradients,
colocate_gradients_with_ops: colocate_gradients_with_ops); colocate_gradients_with_ops: colocate_gradients_with_ops);


return null;
var vars_with_grad = grads_and_vars.Where(x => x.Item1 != null).Select(x => x.Item2).ToArray();
if (vars_with_grad.Length == 0)
throw new ValueError($"No gradients provided for any variable, check your graph for ops" +
$" that do not support gradients, between variables {string.Join(",", vars_with_grad.Select(x => x.name))} and loss {loss}.");

return apply_gradients(grads_and_vars);
}

public Operation apply_gradients(Tuple<Tensor, RefVariable>[] grads_and_vars, Tensor global_step = null, string name = "")
{
// No DistributionStrategy case.
var converted_grads_and_vars = new List<Tuple<Tensor, RefVariable, _OptimizableVariable>>();
foreach (var (g, v) in grads_and_vars)
{
if(g != null)
{
// Convert the grad to Tensor or IndexedSlices if necessary.
var gR = ops.convert_to_tensor_or_indexed_slices(g);
var p = _get_processor(v);
converted_grads_and_vars.Add(new Tuple<Tensor, RefVariable, _OptimizableVariable>(gR, v, p));
}
}

var var_list = converted_grads_and_vars.Where(x => x.Item1 != null).Select(x => x.Item2).ToArray();
if (var_list.Length == 0)
throw new ValueError($"No gradients provided for any variable");

ops.init_scope();
_create_slots(var_list);

var update_ops = new List<Operation>();
return Python.with<ops.name_scope, Operation>(new ops.name_scope(name, Name), scope =>
{
name = scope;
_prepare();

foreach(var (grad, var, processor) in converted_grads_and_vars)
{
if (grad == null)
continue;

var scope_name = var.op.Name;
Python.with<ops.name_scope>(new ops.name_scope("update_" + scope_name), scope2 =>
{
update_ops.Add(processor.update_op(this, grad));
});
}

Operation apply_updates = null;
if (global_step == null)
{
apply_updates = _finish(update_ops.ToArray(), name);
}
else
{

}

return apply_updates;
});
}

private void _create_slots(RefVariable[] var_list)
{

}

public virtual Operation _finish(Operation[] update_ops, string name_scope)
{
return control_flow_ops.group(update_ops, name_scope);
}

public virtual Operation _apply_dense(Tensor grad, RefVariable var)
{
var alpha = math_ops.cast(LearningRateTensor, var.dtype.as_base_dtype());
return gen_training_ops.apply_gradient_descent(var, alpha, grad, use_locking: _use_locking).op;
}

public virtual void _prepare()
{

}

private _OptimizableVariable _get_processor(RefVariable v)
{
if(v is RefVariable)
{
return new _RefVariableProcessor(v);
}
else
{
throw new NotImplementedException("_get_processor");
}
} }


/// <summary> /// <summary>
@@ -68,7 +160,7 @@ namespace Tensorflow
/// A list of (gradient, variable) pairs. Variable is always present, but /// A list of (gradient, variable) pairs. Variable is always present, but
/// gradient can be `None`. /// gradient can be `None`.
/// </returns> /// </returns>
public List<KeyValuePair<Tensor, RefVariable>> compute_gradients(Tensor loss,
public Tuple<Tensor, RefVariable>[] compute_gradients(Tensor loss,
List<RefVariable> var_list = null, List<RefVariable> var_list = null,
int? aggregation_method = null, int? aggregation_method = null,
GateGradientType gate_gradients = GateGradientType.GATE_OP, GateGradientType gate_gradients = GateGradientType.GATE_OP,
@@ -97,11 +189,19 @@ namespace Tensorflow
aggregation_method: aggregation_method, aggregation_method: aggregation_method,
colocate_gradients_with_ops: colocate_gradients_with_ops); colocate_gradients_with_ops: colocate_gradients_with_ops);


//if ((int)gate_gradients == Optimizer.GATE_GRAPH)
//grads = control_flow_ops.tuple(grads);
if ((int)gate_gradients == Optimizer.GATE_GRAPH)
grads = control_flow_ops.tuple(grads);

var grads_and_vars = Python.zip(grads, var_list)
.Select(x => new Tuple<Tensor, RefVariable>(x.Item1, x.Item2))
.ToArray();


return grads_and_vars;
}


return null;
protected T _call_if_callable<T>(T param)
{
return param;
} }
} }
} }

+ 1
- 1
src/TensorFlowNET.Core/Train/_OptimizableVariable.cs View File

@@ -7,6 +7,6 @@ namespace Tensorflow
public interface _OptimizableVariable public interface _OptimizableVariable
{ {
Tensor target(); Tensor target();
void update_op(Graph g);
Operation update_op(Optimizer optimizer, Tensor g);
} }
} }

+ 24
- 0
src/TensorFlowNET.Core/Train/gen_training_ops.py.cs View File

@@ -0,0 +1,24 @@
using System;
using System.Collections.Generic;
using System.Text;

namespace Tensorflow
{
public class gen_training_ops
{
public static OpDefLibrary _op_def_lib = new OpDefLibrary();

public static Tensor apply_gradient_descent(RefVariable var, Tensor alpha, Tensor delta, bool use_locking = false, string name = "")
{
var _op = _op_def_lib._apply_op_helper("ApplyGradientDescent", name, new
{
var,
alpha,
delta,
use_locking
});

return _op.outputs[0];
}
}
}

+ 4
- 2
src/TensorFlowNET.Core/Train/optimizer.py.cs View File

@@ -26,9 +26,11 @@ namespace Tensorflow
return _v._ref(); return _v._ref();
} }


public void update_op(Graph g)
public Operation update_op(Optimizer optimizer, Tensor g)
{ {
var update_op = optimizer._apply_dense(g, _v);

return update_op;
} }
} }
} }

+ 13
- 0
src/TensorFlowNET.Core/Variables/RefVariable.cs View File

@@ -114,11 +114,24 @@ namespace Tensorflow
return _variable; return _variable;
} }


public Tensor value()
{
return _snapshot;
}

public Tensor _AsTensor() public Tensor _AsTensor()
{ {
return _snapshot; return _snapshot;
} }


public Tensor _TensorConversionFunction(bool as_ref = false)
{
if (as_ref)
return _ref();
else
return value();
}

/// <summary> /// <summary>
/// Attempt to guard against dependencies on uninitialized variables. /// Attempt to guard against dependencies on uninitialized variables.
/// </summary> /// </summary>


+ 1
- 1
src/TensorFlowNET.Core/Variables/variables.py.cs View File

@@ -42,7 +42,7 @@ namespace Tensorflow
/// <returns>An Op that run the initializers of all the specified variables.</returns> /// <returns>An Op that run the initializers of all the specified variables.</returns>
public static Operation variables_initializer(RefVariable[] var_list, string name = "init") public static Operation variables_initializer(RefVariable[] var_list, string name = "init")
{ {
return control_flow_ops.group(var_list.Select(x => x.initializer).ToList(), name);
return control_flow_ops.group(var_list.Select(x => x.initializer).ToArray(), name);
} }
} }
} }

+ 38
- 15
src/TensorFlowNET.Core/ops.py.cs View File

@@ -80,6 +80,16 @@ namespace Tensorflow
} }
} }


public static Tensor convert_to_tensor_or_composite(Tensor value, TF_DataType dtype = TF_DataType.DtInvalid, string name = "")
{
return internal_convert_to_tensor_or_composite(value: value, dtype: dtype, name: name, as_ref: false);
}

public static Tensor internal_convert_to_tensor_or_composite(Tensor value, TF_DataType dtype = TF_DataType.DtInvalid, string name = "", bool as_ref = false)
{
return internal_convert_to_tensor<Tensor>(value, dtype: dtype.as_datatype_enum(), name: name, as_ref: as_ref);
}

/// <summary> /// <summary>
/// Wrapper for `Graph.control_dependencies()` using the default graph. /// Wrapper for `Graph.control_dependencies()` using the default graph.
/// </summary> /// </summary>
@@ -222,7 +232,7 @@ namespace Tensorflow
_colocate_with_for_gradient(tensor.op, null, ignore_existing); _colocate_with_for_gradient(tensor.op, null, ignore_existing);
} }


private static void _colocate_with_for_gradient(Operation op, int? gradient_uid, bool ignore_existing = false)
public static void _colocate_with_for_gradient(Operation op, string gradient_uid, bool ignore_existing = false)
{ {
var default_graph = get_default_graph(); var default_graph = get_default_graph();
default_graph._colocate_with_for_gradient(op, gradient_uid, ignore_existing); default_graph._colocate_with_for_gradient(op, gradient_uid, ignore_existing);
@@ -282,7 +292,7 @@ namespace Tensorflow
return tf.Session(); return tf.Session();
} }


public static Func<Operation, Tensor, (Tensor, Tensor)> get_gradient_function(Operation op)
public static Func<Operation, Tensor, Tensor[]> get_gradient_function(Operation op)
{ {
if (op.inputs == null) return null; if (op.inputs == null) return null;


@@ -293,31 +303,42 @@ namespace Tensorflow
switch (oper.type) switch (oper.type)
{ {
case "Add": case "Add":
return math_grad._AddGrad(oper, out_grads);
var add = math_grad._AddGrad(oper, out_grads);
return new Tensor[] { add.Item1, add.Item2 };
case "Identity": case "Identity":
return math_grad._IdGrad(oper, out_grads);
var id = math_grad._IdGrad(oper, out_grads);
return new Tensor[] { id };
case "Mul": case "Mul":
return math_grad._MulGrad(oper, out_grads);
var mul = math_grad._MulGrad(oper, out_grads);
return new Tensor[] { mul.Item1, mul.Item2 };
case "Sum": case "Sum":
return math_grad._SumGrad(oper, out_grads);
var sum = math_grad._SumGrad(oper, out_grads);
return new Tensor[] { sum.Item1, sum.Item2 };
case "Sub": case "Sub":
return math_grad._SubGrad(oper, out_grads);
var sub = math_grad._SubGrad(oper, out_grads);
return new Tensor[] { sub.Item1, sub.Item2 };
case "Pow": case "Pow":
return math_grad._PowGrad(oper, out_grads);
var pow = math_grad._PowGrad(oper, out_grads);
return new Tensor[] { pow.Item1, pow.Item2 };
case "RealDiv": case "RealDiv":
return math_grad._RealDivGrad(oper, out_grads);
var realdiv = math_grad._RealDivGrad(oper, out_grads);
return new Tensor[] { realdiv.Item1, realdiv.Item2 };
default: default:
throw new NotImplementedException($"get_gradient_function {oper.type}"); throw new NotImplementedException($"get_gradient_function {oper.type}");
} }
/*var result = typeof(math_grad).GetMethod($"_{op.type}Grad").Invoke(null, new object[] { op, out_grads });
var p1 = result.GetType().GetProperty("Item1");
var p2 = result.GetType().GetProperty("Item2");

return (p1.GetValue(result, null) as Tensor, p2.GetValue(result, null) as Tensor);*/
}; };
} }


public static Tensor convert_to_tensor_or_indexed_slices(Tensor value, TF_DataType dtype = TF_DataType.DtInvalid, string name = "")
{
return internal_convert_to_tensor_or_indexed_slices(value: value, dtype: dtype, name: name, as_ref: false);
}

public static Tensor internal_convert_to_tensor_or_indexed_slices(Tensor value, TF_DataType dtype = TF_DataType.DtInvalid, string name = "", bool as_ref = false)
{
return value;
}

public static Tensor[] internal_convert_n_to_tensor<T>(T[] values, DataType dtype = DataType.DtInvalid, public static Tensor[] internal_convert_n_to_tensor<T>(T[] values, DataType dtype = DataType.DtInvalid,
string name = "", DataType preferred_dtype = DataType.DtInvalid, string name = "", DataType preferred_dtype = DataType.DtInvalid,
bool as_ref = false) bool as_ref = false)
@@ -345,6 +366,8 @@ namespace Tensorflow
return constant_op.constant(Convert.ToInt32(value), name); return constant_op.constant(Convert.ToInt32(value), name);
case "Double": case "Double":
return constant_op.constant(Convert.ToDouble(value), name); return constant_op.constant(Convert.ToDouble(value), name);
case "RefVariable":
return (value as RefVariable)._TensorConversionFunction(as_ref: as_ref);
default: default:
throw new NotImplementedException($"internal_convert_to_tensor: Can't convert {typeof(T).Name} to Tensor"); throw new NotImplementedException($"internal_convert_to_tensor: Can't convert {typeof(T).Name} to Tensor");
} }


+ 2
- 2
test/TensorFlowNET.Examples/LinearRegression.cs View File

@@ -63,11 +63,11 @@ namespace TensorFlowNET.Examples
// Fit all training data // Fit all training data
for (int i = 0; i < training_epochs; i++) for (int i = 0; i < training_epochs; i++)
{ {
foreach((double x, double y) in Python.zip<double>(train_X, train_Y))
foreach(var (x, y) in Python.zip<double>(train_X, train_Y))
{ {
var feed_dict = new Dictionary<Tensor, NDArray>(); var feed_dict = new Dictionary<Tensor, NDArray>();


//sess.run(optimizer, feed_dict);
// sess.run(optimizer, feed_dict);
} }
} }
}); });


Loading…
Cancel
Save