Browse Source

implement _SwitchGrad when merge_grad is not null.

tags/v0.12
Oceania2018 6 years ago
parent
commit
ad250d0c79
3 changed files with 67 additions and 68 deletions
  1. +8
    -11
      src/TensorFlowNET.Core/Gradients/control_flow_grad.cs
  2. +58
    -53
      src/TensorFlowNET.Core/Gradients/gradients_util.cs
  3. +1
    -4
      src/TensorFlowNET.Core/Operations/Operation.cs

+ 8
- 11
src/TensorFlowNET.Core/Gradients/control_flow_grad.cs View File

@@ -48,7 +48,12 @@ namespace Tensorflow.Gradients
{
var merge_grad = grad_ctxt.grad_state.switch_map.get(op);
if (merge_grad != null)
throw new NotImplementedException("_SwitchGrad merge_grad != null");
{
if (grads[1] != null)
control_flow_ops._AddNextAndBackEdge(merge_grad, grads[1],
enforce_shape_invariant: false);
return new Tensor[] { null, null };
}
else if (grads[0] != null)
{
merge_grad = merge(new[] { grads[0], grads[0] }, name: "b_switch")[0];
@@ -233,17 +238,9 @@ namespace Tensorflow.Gradients
return grads;
if (op.get_attr<bool>("is_constant"))
{
throw new NotImplementedException("_EnterGrad is_constant");
// Add a gradient accumulator for each loop invariant.
// if isinstance(grad, ops.Tensor) :
// result = grad_ctxt.AddBackpropAccumulator(op, grad)
// elif isinstance(grad, ops.IndexedSlices) :
// result = grad_ctxt.AddBackpropIndexedSlicesAccumulator(op, grad)
// else:
// # TODO(yuanbyu, lukasr): Add support for SparseTensor.
// raise TypeError("Type %s not supported" % type(grad))
// Add a gradient accumulator for each loop invariant.
result = grad_ctxt.AddBackpropAccumulator(op, grad);
}

else
{
result = control_flow_ops.exit(grad);


+ 58
- 53
src/TensorFlowNET.Core/Gradients/gradients_util.cs View File

@@ -123,10 +123,7 @@ namespace Tensorflow
{
// generate gradient subgraph for op.
var op = queue.Dequeue();
if(op.name == "rnn/while/Exit")
{

}
_maybe_colocate_with(op, gradient_uid, colocate_gradients_with_ops);
{
if (loop_state != null)
@@ -136,6 +133,7 @@ namespace Tensorflow
loop_state.ExitGradWhileContext(op, before: true);

Tensor[] in_grads = null;
Func<Operation, Tensor[], Tensor[]> grad_fn = null;
var is_partitioned_call = _IsPartitionedCall(op);
var is_func_call = false;
var has_out_grads = out_grads.Exists(x => x != null);
@@ -143,8 +141,6 @@ namespace Tensorflow
{
// A grad_fn must be defined, either as a function or as None
// for ops that do not have gradients.

Func<Operation, Tensor[], Tensor[]> grad_fn = null;
try
{
grad_fn = ops.get_gradient_function(op);
@@ -167,61 +163,57 @@ namespace Tensorflow
throw new LookupError($"No gradient defined for operation '{op.name}' (op type: {op.type})");
}
}
}

if (loop_state != null)
loop_state.EnterGradWhileContext(op, before: false);
if (loop_state != null)
loop_state.EnterGradWhileContext(op, before: false);

if ((is_func_call || grad_fn != null) && has_out_grads)
if ((is_func_call || grad_fn != null) && has_out_grads)
{
// NOTE: If _AggregatedGrads didn't compute a value for the i'th
// output, it means that the cost does not depend on output[i],
// therefore dC/doutput[i] is 0.
foreach (var (i, out_grad) in enumerate(out_grads))
{
// NOTE: If _AggregatedGrads didn't compute a value for the i'th
// output, it means that the cost does not depend on output[i],
// therefore dC/doutput[i] is 0.
foreach (var (i, out_grad) in enumerate(out_grads))
{
if (out_grad == null &&
(grad_fn == null || _IsTrainable(op.outputs[i])))
{
// Only trainable outputs or outputs for a function call that
// will use SymbolicGradient get a zero gradient. Gradient
// functions should ignore the gradient for other outputs.
if (loop_state != null)
out_grads[i] = new List<Tensor> { loop_state.ZerosLike(op, i) };
else
out_grads[i] = new List<Tensor> { control_flow_ops.ZerosLikeOutsideLoop(op, i) };
}
}

tf_with(ops.name_scope(op.name + "_grad"), scope1 =>
if (out_grad == null &&
(grad_fn == null || _IsTrainable(op.outputs[i])))
{
if (grad_fn != null)
{
in_grads = _MaybeCompile(grad_scope,
op,
out_grads.Where(x => x != null).Select(x => x[0]).ToArray(),
null,
grad_fn);
}
// Only trainable outputs or outputs for a function call that
// will use SymbolicGradient get a zero gradient. Gradient
// functions should ignore the gradient for other outputs.
if (loop_state != null)
out_grads[i] = new List<Tensor> { loop_state.ZerosLike(op, i) };
else
{
throw new NotImplementedException("lambda: _SymGrad(op, out_grads)");
}
_VerifyGeneratedGradients(in_grads, op);
if (gate_gradients && in_grads.Count(x => x != null) > 1)
{
ops._colocate_with_for_gradient(null, gradient_uid, ignore_existing: true);
in_grads = control_flow_ops.tuple(in_grads);
}
});
out_grads[i] = new List<Tensor> { control_flow_ops.ZerosLikeOutsideLoop(op, i) };
}
}
else

tf_with(ops.name_scope(op.name + "_grad"), scope1 =>
{
// If no grad_fn is defined or none of out_grads is available,
// just propagate a list of None backwards.
in_grads = new Tensor[_NonEagerInputs(op, xs).Count()];
}
if (grad_fn != null)
{
in_grads = _MaybeCompile(grad_scope,
op,
out_grads.Where(x => x != null).Select(x => x[0]).ToArray(),
null,
grad_fn);
}
else
{
throw new NotImplementedException("lambda: _SymGrad(op, out_grads)");
}
_VerifyGeneratedGradients(in_grads, op);
if (gate_gradients && in_grads.Count(x => x != null) > 1)
{
ops._colocate_with_for_gradient(null, gradient_uid, ignore_existing: true);
in_grads = control_flow_ops.tuple(in_grads);
}
});
}
else
{
// If no grad_fn is defined or none of out_grads is available,
// just propagate a list of None backwards.
in_grads = new Tensor[_NonEagerInputs(op, xs).Count()];
}

@@ -370,7 +362,16 @@ namespace Tensorflow
grads[op.name] = op_grads;
}
var t_grads = op_grads[t.value_index];
t_grads.Add(grad);
if (t_grads.Count == 0)
t_grads.Add(grad);
else
op_grads[t.value_index][0] = grad;

/*if (control_flow_util.IsLoopSwitch(op) &&
t_grads[0] == null)
op_grads[t.value_index] = new List<Tensor> { grad };
else
t_grads.Add(grad);*/
}

private static IEnumerable<Tensor> _NonEagerInputs(Operation op, Tensor[] xs)
@@ -379,7 +380,8 @@ namespace Tensorflow
yield return op.inputs[i];
}

private static List<List<Tensor>> _AggregatedGrads(Dictionary<string, List<List<Tensor>>> grads, Operation op, string gradient_uid, object loop_state, int aggregation_method = 0)
private static List<List<Tensor>> _AggregatedGrads(Dictionary<string, List<List<Tensor>>> grads, Operation op, string gradient_uid,
ControlFlowState loop_state, int aggregation_method = 0)
{
var out_grads = _GetGrads(grads, op);

@@ -387,7 +389,10 @@ namespace Tensorflow
{
if (loop_state != null)
{

if (out_grads.Count > 1 &&
out_grads[1].Count > 0 &&
control_flow_util.IsLoopSwitch(op))
continue;
}

// Aggregate multiple gradients, and convert [] to None.


+ 1
- 4
src/TensorFlowNET.Core/Operations/Operation.cs View File

@@ -182,10 +182,7 @@ namespace Tensorflow
// This will be set by self.inputs.
if (op_def == null)
op_def = g.GetOpDef(node_def.Op);
if(node_def.Name == "gradients/rnn/while/basic_rnn_cell/Tanh_grad/TanhGrad/f_acc")
{
}

var grouped_inputs = _reconstruct_sequence_inputs(op_def, inputs, node_def.Attr);
_handle = ops._create_c_op(g, node_def, grouped_inputs, control_input_ops.ToArray());
_is_stateful = op_def.IsStateful;


Loading…
Cancel
Save