Browse Source

fixed #170

tags/v0.8.0
Oceania2018 6 years ago
parent
commit
8408dcd61f
16 changed files with 281 additions and 50 deletions
  1. +4
    -4
      src/TensorFlowNET.Core/Gradients/gradients_impl.py.cs
  2. +2
    -2
      src/TensorFlowNET.Core/Gradients/math_grad.py.cs
  3. +1
    -1
      src/TensorFlowNET.Core/Graphs/Graph.Gradient.cs.cs
  4. +1
    -1
      src/TensorFlowNET.Core/Operations/OpDefLibrary.cs
  5. +69
    -15
      src/TensorFlowNET.Core/Operations/control_flow_ops.py.cs
  6. +9
    -0
      src/TensorFlowNET.Core/Operations/math_ops.py.cs
  7. +1
    -1
      src/TensorFlowNET.Core/Python.cs
  8. +6
    -0
      src/TensorFlowNET.Core/Train/GradientDescentOptimizer.cs
  9. +105
    -5
      src/TensorFlowNET.Core/Train/Optimizer.cs
  10. +1
    -1
      src/TensorFlowNET.Core/Train/_OptimizableVariable.cs
  11. +24
    -0
      src/TensorFlowNET.Core/Train/gen_training_ops.py.cs
  12. +4
    -2
      src/TensorFlowNET.Core/Train/optimizer.py.cs
  13. +13
    -0
      src/TensorFlowNET.Core/Variables/RefVariable.cs
  14. +1
    -1
      src/TensorFlowNET.Core/Variables/variables.py.cs
  15. +38
    -15
      src/TensorFlowNET.Core/ops.py.cs
  16. +2
    -2
      test/TensorFlowNET.Examples/LinearRegression.cs

+ 4
- 4
src/TensorFlowNET.Core/Gradients/gradients_impl.py.cs View File

@@ -140,7 +140,8 @@ namespace Tensorflow

if (gate_gradients && in_grads.Count(x => x != null) > 1)
{

ops._colocate_with_for_gradient(null, gradient_uid, ignore_existing: true);
in_grads = control_flow_ops.tuple(in_grads);
}
});
}
@@ -223,10 +224,9 @@ namespace Tensorflow
$"inputs {op.inputs._inputs.Count()}");
}

private static Tensor[] _MaybeCompile(string scope, Operation op, Tensor out_grads, Action func, Func<Operation, Tensor, (Tensor, Tensor)> grad_fn)
private static Tensor[] _MaybeCompile(string scope, Operation op, Tensor out_grads, Action func, Func<Operation, Tensor, Tensor[]> grad_fn)
{
var in_grads = grad_fn(op, out_grads);
return new Tensor[] { in_grads.Item1, in_grads.Item2 };
return grad_fn(op, out_grads);
}

private static bool _IsPartitionedCall(Operation op)


+ 2
- 2
src/TensorFlowNET.Core/Gradients/math_grad.py.cs View File

@@ -27,9 +27,9 @@ namespace Tensorflow
return (r1, r2);
}

public static (Tensor, Tensor) _IdGrad(Operation op, Tensor grad)
public static Tensor _IdGrad(Operation op, Tensor grad)
{
return (grad, null);
return grad;
}

public static (Tensor, Tensor) _MulGrad(Operation op, Tensor grad)


+ 1
- 1
src/TensorFlowNET.Core/Graphs/Graph.Gradient.cs.cs View File

@@ -6,7 +6,7 @@ namespace Tensorflow
{
public partial class Graph
{
public void _colocate_with_for_gradient(Operation op, int? gradient_uid, bool ignore_existing = false)
public void _colocate_with_for_gradient(Operation op, string gradient_uid, bool ignore_existing = false)
{

}


+ 1
- 1
src/TensorFlowNET.Core/Operations/OpDefLibrary.cs View File

@@ -106,7 +106,7 @@ namespace Tensorflow
}
else
{
keywords[input_name] = ops.internal_convert_to_tensor(values, name: input_name);
keywords[input_name] = ops.internal_convert_to_tensor(values, name: input_name, as_ref: input_arg.IsRef);
}

if (!String.IsNullOrEmpty(input_arg.TypeAttr))


+ 69
- 15
src/TensorFlowNET.Core/Operations/control_flow_ops.py.cs View File

@@ -7,17 +7,20 @@ namespace Tensorflow
{
public class control_flow_ops
{
public static Operation group(List<Operation> inputs, string name = "")
public static Operation group(Operation[] inputs, string name = "")
{
using(var namescope = new ops.name_scope(name, "group_deps", inputs))
return Python.with<ops.name_scope, Operation>(new ops.name_scope(name, "group_deps", inputs), scope =>
{
name = namescope;
name = scope;

// Sorts *inputs according to their devices.
var ops_on_device = new Dictionary<string, Operation[]>();
var ops_on_device = new Dictionary<string, List<Operation>>();
foreach (var inp in inputs)
{
ops_on_device[inp.Device] = new Operation[] { inp };
if (ops_on_device.ContainsKey(inp.Device))
ops_on_device[inp.Device].Add(inp);
else
ops_on_device[inp.Device] = new List<Operation> { inp };
}

// 1-level tree. The root node is the returned NoOp node.
@@ -25,32 +28,28 @@ namespace Tensorflow
{
var dev = ops_on_device.Keys.First();
var deps = ops_on_device.Values.First();
return _GroupControlDeps(dev, deps, name);
return _GroupControlDeps(dev, deps.ToArray(), name);
}

// 2-level tree. The root node is the returned NoOp node.
// deps contains 1 NoOp node for each device.
return null;
}
});
}

private static Operation _GroupControlDeps(string dev, Operation[] deps, string name = "")
{
Operation result = null;

Python.with(ops.control_dependencies(deps), delegate
return Python.with<_ControlDependenciesController, Operation>(ops.control_dependencies(deps), ctl =>
{
if (string.IsNullOrEmpty(dev))
if (dev == null)
{
result = gen_control_flow_ops.no_op(name);
return gen_control_flow_ops.no_op(name);
}
else
{
result = gen_control_flow_ops.no_op(name);
return gen_control_flow_ops.no_op(name);
}
});

return result;
}

/// <summary>
@@ -81,5 +80,60 @@ namespace Tensorflow
{
return op.OpType == "Exit" || op.OpType == "RefExit";
}

public static Tensor[] tuple(Tensor[] tensors, string name = "", Operation[] control_inputs = null)
{
return Python.with<ops.name_scope, Tensor[]>(new ops.name_scope(name, "tuple", tensors), scope =>
{
name = scope;
var gating_ops = tensors.Select(x => x.op).ToList();

if(control_inputs != null)
{
foreach (var c in control_inputs)
gating_ops.Add(c);
}

// Note that in order to ensure ordering in the pbtxt, we must take care to
// ensure the order here.
gating_ops = gating_ops.OrderBy(x => x._id).ToList();
var gate = group(gating_ops.ToArray());

var tpl = new List<Tensor>();
foreach(var t in tensors)
{
tpl.Add(with_dependencies(new Operation[] { gate }, t));
}

return tpl.ToArray();
});
}

public static Tensor with_dependencies(Operation[] dependencies, Tensor output_tensor, string name = "")
{
var values = new List<object>();
values.AddRange(dependencies);
values.Add(output_tensor);

return Python.with<ops.name_scope, Tensor>(new ops.name_scope(name, "control_dependency", values), scope =>
{
name = scope;

return Python.with<_ControlDependenciesController, Tensor>(ops.control_dependencies(dependencies), ctl =>
{
output_tensor = ops.convert_to_tensor_or_composite(output_tensor);
return _Identity(output_tensor, name: name);
});
});
}

public static Tensor _Identity(Tensor data, string name = "")
{
data = ops.internal_convert_to_tensor_or_composite(data, as_ref: true);
if ((int)data.dtype > 100)
throw new NotImplementedException("_Identity");
else
return gen_array_ops.identity(data, name: name);
}
}
}

+ 9
- 0
src/TensorFlowNET.Core/Operations/math_ops.py.cs View File

@@ -6,6 +6,15 @@ namespace Tensorflow
{
public class math_ops
{
public static Tensor cast(Tensor x, TF_DataType dtype = TF_DataType.DtInvalid, string name = "")
{
var base_type = dtype.as_base_dtype();
if(base_type == x.dtype)
return x;

throw new NotImplementedException("math_ops.cast");
}

/// <summary>
/// Helper function for reduction ops.
/// </summary>


+ 1
- 1
src/TensorFlowNET.Core/Python.cs View File

@@ -78,7 +78,7 @@ namespace Tensorflow
yield return(t1.Data<T>(index), t2.Data<T>(index));
}

public static IEnumerable<(T, T)> zip<T>(IList<T> t1, IList<T> t2)
public static IEnumerable<(T1, T2)> zip<T1, T2>(IList<T1> t1, IList<T2> t2)
{
for (int i = 0; i < t1.Count; i++)
yield return (t1[i], t2[i]);


+ 6
- 0
src/TensorFlowNET.Core/Train/GradientDescentOptimizer.cs View File

@@ -12,5 +12,11 @@ namespace Tensorflow
LearningRate = learning_rate;
LearningRateTensor = null;
}

public override void _prepare()
{
LearningRate = _call_if_callable(LearningRate);
LearningRateTensor = ops.convert_to_tensor(LearningRate, name: "learning_rate");
}
}
}

+ 105
- 5
src/TensorFlowNET.Core/Train/Optimizer.cs View File

@@ -56,7 +56,99 @@ namespace Tensorflow
gate_gradients: gate_gradients,
colocate_gradients_with_ops: colocate_gradients_with_ops);

return null;
var vars_with_grad = grads_and_vars.Where(x => x.Item1 != null).Select(x => x.Item2).ToArray();
if (vars_with_grad.Length == 0)
throw new ValueError($"No gradients provided for any variable, check your graph for ops" +
$" that do not support gradients, between variables {string.Join(",", vars_with_grad.Select(x => x.name))} and loss {loss}.");

return apply_gradients(grads_and_vars);
}

public Operation apply_gradients(Tuple<Tensor, RefVariable>[] grads_and_vars, Tensor global_step = null, string name = "")
{
// No DistributionStrategy case.
var converted_grads_and_vars = new List<Tuple<Tensor, RefVariable, _OptimizableVariable>>();
foreach (var (g, v) in grads_and_vars)
{
if(g != null)
{
// Convert the grad to Tensor or IndexedSlices if necessary.
var gR = ops.convert_to_tensor_or_indexed_slices(g);
var p = _get_processor(v);
converted_grads_and_vars.Add(new Tuple<Tensor, RefVariable, _OptimizableVariable>(gR, v, p));
}
}

var var_list = converted_grads_and_vars.Where(x => x.Item1 != null).Select(x => x.Item2).ToArray();
if (var_list.Length == 0)
throw new ValueError($"No gradients provided for any variable");

ops.init_scope();
_create_slots(var_list);

var update_ops = new List<Operation>();
return Python.with<ops.name_scope, Operation>(new ops.name_scope(name, Name), scope =>
{
name = scope;
_prepare();

foreach(var (grad, var, processor) in converted_grads_and_vars)
{
if (grad == null)
continue;

var scope_name = var.op.Name;
Python.with<ops.name_scope>(new ops.name_scope("update_" + scope_name), scope2 =>
{
update_ops.Add(processor.update_op(this, grad));
});
}

Operation apply_updates = null;
if (global_step == null)
{
apply_updates = _finish(update_ops.ToArray(), name);
}
else
{

}

return apply_updates;
});
}

private void _create_slots(RefVariable[] var_list)
{

}

public virtual Operation _finish(Operation[] update_ops, string name_scope)
{
return control_flow_ops.group(update_ops, name_scope);
}

public virtual Operation _apply_dense(Tensor grad, RefVariable var)
{
var alpha = math_ops.cast(LearningRateTensor, var.dtype.as_base_dtype());
return gen_training_ops.apply_gradient_descent(var, alpha, grad, use_locking: _use_locking).op;
}

public virtual void _prepare()
{

}

private _OptimizableVariable _get_processor(RefVariable v)
{
if(v is RefVariable)
{
return new _RefVariableProcessor(v);
}
else
{
throw new NotImplementedException("_get_processor");
}
}

/// <summary>
@@ -68,7 +160,7 @@ namespace Tensorflow
/// A list of (gradient, variable) pairs. Variable is always present, but
/// gradient can be `None`.
/// </returns>
public List<KeyValuePair<Tensor, RefVariable>> compute_gradients(Tensor loss,
public Tuple<Tensor, RefVariable>[] compute_gradients(Tensor loss,
List<RefVariable> var_list = null,
int? aggregation_method = null,
GateGradientType gate_gradients = GateGradientType.GATE_OP,
@@ -97,11 +189,19 @@ namespace Tensorflow
aggregation_method: aggregation_method,
colocate_gradients_with_ops: colocate_gradients_with_ops);

//if ((int)gate_gradients == Optimizer.GATE_GRAPH)
//grads = control_flow_ops.tuple(grads);
if ((int)gate_gradients == Optimizer.GATE_GRAPH)
grads = control_flow_ops.tuple(grads);

var grads_and_vars = Python.zip(grads, var_list)
.Select(x => new Tuple<Tensor, RefVariable>(x.Item1, x.Item2))
.ToArray();

return grads_and_vars;
}

return null;
protected T _call_if_callable<T>(T param)
{
return param;
}
}
}

+ 1
- 1
src/TensorFlowNET.Core/Train/_OptimizableVariable.cs View File

@@ -7,6 +7,6 @@ namespace Tensorflow
public interface _OptimizableVariable
{
Tensor target();
void update_op(Graph g);
Operation update_op(Optimizer optimizer, Tensor g);
}
}

+ 24
- 0
src/TensorFlowNET.Core/Train/gen_training_ops.py.cs View File

@@ -0,0 +1,24 @@
using System;
using System.Collections.Generic;
using System.Text;

namespace Tensorflow
{
public class gen_training_ops
{
public static OpDefLibrary _op_def_lib = new OpDefLibrary();

public static Tensor apply_gradient_descent(RefVariable var, Tensor alpha, Tensor delta, bool use_locking = false, string name = "")
{
var _op = _op_def_lib._apply_op_helper("ApplyGradientDescent", name, new
{
var,
alpha,
delta,
use_locking
});

return _op.outputs[0];
}
}
}

+ 4
- 2
src/TensorFlowNET.Core/Train/optimizer.py.cs View File

@@ -26,9 +26,11 @@ namespace Tensorflow
return _v._ref();
}

public void update_op(Graph g)
public Operation update_op(Optimizer optimizer, Tensor g)
{
var update_op = optimizer._apply_dense(g, _v);

return update_op;
}
}
}

+ 13
- 0
src/TensorFlowNET.Core/Variables/RefVariable.cs View File

@@ -114,11 +114,24 @@ namespace Tensorflow
return _variable;
}

public Tensor value()
{
return _snapshot;
}

public Tensor _AsTensor()
{
return _snapshot;
}

public Tensor _TensorConversionFunction(bool as_ref = false)
{
if (as_ref)
return _ref();
else
return value();
}

/// <summary>
/// Attempt to guard against dependencies on uninitialized variables.
/// </summary>


+ 1
- 1
src/TensorFlowNET.Core/Variables/variables.py.cs View File

@@ -42,7 +42,7 @@ namespace Tensorflow
/// <returns>An Op that run the initializers of all the specified variables.</returns>
public static Operation variables_initializer(RefVariable[] var_list, string name = "init")
{
return control_flow_ops.group(var_list.Select(x => x.initializer).ToList(), name);
return control_flow_ops.group(var_list.Select(x => x.initializer).ToArray(), name);
}
}
}

+ 38
- 15
src/TensorFlowNET.Core/ops.py.cs View File

@@ -80,6 +80,16 @@ namespace Tensorflow
}
}

public static Tensor convert_to_tensor_or_composite(Tensor value, TF_DataType dtype = TF_DataType.DtInvalid, string name = "")
{
return internal_convert_to_tensor_or_composite(value: value, dtype: dtype, name: name, as_ref: false);
}

public static Tensor internal_convert_to_tensor_or_composite(Tensor value, TF_DataType dtype = TF_DataType.DtInvalid, string name = "", bool as_ref = false)
{
return internal_convert_to_tensor<Tensor>(value, dtype: dtype.as_datatype_enum(), name: name, as_ref: as_ref);
}

/// <summary>
/// Wrapper for `Graph.control_dependencies()` using the default graph.
/// </summary>
@@ -222,7 +232,7 @@ namespace Tensorflow
_colocate_with_for_gradient(tensor.op, null, ignore_existing);
}

private static void _colocate_with_for_gradient(Operation op, int? gradient_uid, bool ignore_existing = false)
public static void _colocate_with_for_gradient(Operation op, string gradient_uid, bool ignore_existing = false)
{
var default_graph = get_default_graph();
default_graph._colocate_with_for_gradient(op, gradient_uid, ignore_existing);
@@ -282,7 +292,7 @@ namespace Tensorflow
return tf.Session();
}

public static Func<Operation, Tensor, (Tensor, Tensor)> get_gradient_function(Operation op)
public static Func<Operation, Tensor, Tensor[]> get_gradient_function(Operation op)
{
if (op.inputs == null) return null;

@@ -293,31 +303,42 @@ namespace Tensorflow
switch (oper.type)
{
case "Add":
return math_grad._AddGrad(oper, out_grads);
var add = math_grad._AddGrad(oper, out_grads);
return new Tensor[] { add.Item1, add.Item2 };
case "Identity":
return math_grad._IdGrad(oper, out_grads);
var id = math_grad._IdGrad(oper, out_grads);
return new Tensor[] { id };
case "Mul":
return math_grad._MulGrad(oper, out_grads);
var mul = math_grad._MulGrad(oper, out_grads);
return new Tensor[] { mul.Item1, mul.Item2 };
case "Sum":
return math_grad._SumGrad(oper, out_grads);
var sum = math_grad._SumGrad(oper, out_grads);
return new Tensor[] { sum.Item1, sum.Item2 };
case "Sub":
return math_grad._SubGrad(oper, out_grads);
var sub = math_grad._SubGrad(oper, out_grads);
return new Tensor[] { sub.Item1, sub.Item2 };
case "Pow":
return math_grad._PowGrad(oper, out_grads);
var pow = math_grad._PowGrad(oper, out_grads);
return new Tensor[] { pow.Item1, pow.Item2 };
case "RealDiv":
return math_grad._RealDivGrad(oper, out_grads);
var realdiv = math_grad._RealDivGrad(oper, out_grads);
return new Tensor[] { realdiv.Item1, realdiv.Item2 };
default:
throw new NotImplementedException($"get_gradient_function {oper.type}");
}
/*var result = typeof(math_grad).GetMethod($"_{op.type}Grad").Invoke(null, new object[] { op, out_grads });
var p1 = result.GetType().GetProperty("Item1");
var p2 = result.GetType().GetProperty("Item2");

return (p1.GetValue(result, null) as Tensor, p2.GetValue(result, null) as Tensor);*/
};
}

public static Tensor convert_to_tensor_or_indexed_slices(Tensor value, TF_DataType dtype = TF_DataType.DtInvalid, string name = "")
{
return internal_convert_to_tensor_or_indexed_slices(value: value, dtype: dtype, name: name, as_ref: false);
}

public static Tensor internal_convert_to_tensor_or_indexed_slices(Tensor value, TF_DataType dtype = TF_DataType.DtInvalid, string name = "", bool as_ref = false)
{
return value;
}

public static Tensor[] internal_convert_n_to_tensor<T>(T[] values, DataType dtype = DataType.DtInvalid,
string name = "", DataType preferred_dtype = DataType.DtInvalid,
bool as_ref = false)
@@ -345,6 +366,8 @@ namespace Tensorflow
return constant_op.constant(Convert.ToInt32(value), name);
case "Double":
return constant_op.constant(Convert.ToDouble(value), name);
case "RefVariable":
return (value as RefVariable)._TensorConversionFunction(as_ref: as_ref);
default:
throw new NotImplementedException($"internal_convert_to_tensor: Can't convert {typeof(T).Name} to Tensor");
}


+ 2
- 2
test/TensorFlowNET.Examples/LinearRegression.cs View File

@@ -63,11 +63,11 @@ namespace TensorFlowNET.Examples
// Fit all training data
for (int i = 0; i < training_epochs; i++)
{
foreach((double x, double y) in Python.zip<double>(train_X, train_Y))
foreach(var (x, y) in Python.zip<double>(train_X, train_Y))
{
var feed_dict = new Dictionary<Tensor, NDArray>();

//sess.run(optimizer, feed_dict);
// sess.run(optimizer, feed_dict);
}
}
});


Loading…
Cancel
Save