Browse Source

_TopKGrad, _SoftmaxCrossEntropyWithLogitsGrad

tags/v0.8.0
haiping008 6 years ago
parent
commit
bad610d533
10 changed files with 174 additions and 18 deletions
  1. +19
    -9
      src/TensorFlowNET.Core/Gradients/gradients_impl.py.cs
  2. +55
    -4
      src/TensorFlowNET.Core/Gradients/nn_grad.py.cs
  3. +10
    -1
      src/TensorFlowNET.Core/Gradients/ops.gradient_function_mapping.cs
  4. +10
    -0
      src/TensorFlowNET.Core/Operations/NnOps/gen_nn_ops.cs
  5. +4
    -1
      src/TensorFlowNET.Core/Operations/Operation.cs
  6. +33
    -3
      src/TensorFlowNET.Core/Operations/array_ops.py.cs
  7. +14
    -0
      src/TensorFlowNET.Core/Operations/control_flow_ops.py.cs
  8. +10
    -0
      src/TensorFlowNET.Core/Operations/control_flow_util.py.cs
  9. +17
    -0
      src/TensorFlowNET.Core/Operations/nn_ops.cs
  10. +2
    -0
      src/TensorFlowNET.Core/ops.py.cs

+ 19
- 9
src/TensorFlowNET.Core/Gradients/gradients_impl.py.cs View File

@@ -131,6 +131,17 @@ namespace Tensorflow
// for ops that do not have gradients.
var grad_fn = ops.get_gradient_function(op);

foreach(var (i, out_grad) in enumerate(out_grads))
{
if(out_grad == null)
{
if (loop_state != null)
;
else
out_grads[i] = control_flow_ops.ZerosLikeOutsideLoop(op, i);
}
}

with(ops.name_scope(op.name + "_grad"), scope1 =>
{
string name1 = scope1;
@@ -240,28 +251,27 @@ namespace Tensorflow
private static Tensor[] _AggregatedGrads(Dictionary<string, Tensor[][]> grads, Operation op, string gradient_uid, object loop_state, int aggregation_method = 0)
{
var out_grads = _GetGrads(grads, op);
for(int i = 0; i < out_grads.Length; i++)
var return_grads = new Tensor[out_grads.Length];

foreach(var (i, out_grad) in enumerate(out_grads))
{
var out_grad = out_grads[i];
if(loop_state != null)
if (loop_state != null)
{

}

// Grads have to be Tensors or IndexedSlices

// Aggregate multiple gradients, and convert [] to None.
if(out_grad != null)
if (out_grad != null)
{
if(out_grad.Length < 2)
if (out_grad.Length < 2)
{
string used = "nop";
return new Tensor[] { out_grad[0] };
return_grads[i] = out_grad[0];
}
}
}

return null;
return return_grads;
}

/// <summary>


+ 55
- 4
src/TensorFlowNET.Core/Gradients/nn_grad.py.cs View File

@@ -1,5 +1,6 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using Tensorflow.Operations;

@@ -13,16 +14,17 @@ namespace Tensorflow.Gradients
/// <param name="op"></param>
/// <param name="grad"></param>
/// <returns></returns>
public static Tensor[] _BiasAddGrad(Operation op, Tensor grad)
public static Tensor[] _BiasAddGrad(Operation op, Tensor[] grads)
{
var grad = grads[0];
string data_format = op.get_attr("data_format")?.ToString();
var bias_add_grad = gen_nn_ops.bias_add_grad(out_backprop: grad, data_format: data_format);
return new Tensor[] { grad, bias_add_grad };
}

public static Tensor[] _ReluGrad(Operation op, Tensor grad)
public static Tensor[] _ReluGrad(Operation op, Tensor[] grads)
{
return new Tensor[] { gen_nn_ops.relu_grad(grad, op.outputs[0]) };
return new Tensor[] { gen_nn_ops.relu_grad(grads[0], op.outputs[0]) };
}

/// <summary>
@@ -37,8 +39,57 @@ namespace Tensorflow.Gradients
var grad_loss = grads[0];
var grad_grad = grads[1];
var softmax_grad = op.outputs[1];
var grad = _BroadcastMul(grad_loss, softmax_grad);

throw new NotImplementedException("_SoftmaxCrossEntropyWithLogitsGrad");
var logits = op.inputs[0];
if(grad_grad != null && !IsZero(grad_grad))
{
throw new NotImplementedException("_SoftmaxCrossEntropyWithLogitsGrad");
}

return new Tensor[]
{
grad,
_BroadcastMul(grad_loss, -nn_ops.log_softmax(logits))
};
}

private static bool IsZero(Tensor g)
{
if (new string[] { "ZerosLike", "Zeros" }.Contains(g.op.type))
return true;

throw new NotImplementedException("IsZero");
}

private static Tensor _BroadcastMul(Tensor vec, Tensor mat)
{
vec = array_ops.expand_dims(vec, -1);
return vec * mat;
}

/// <summary>
/// Return the gradients for TopK.
/// </summary>
/// <param name="op"></param>
/// <param name="grads"></param>
/// <returns></returns>
public static Tensor[] _TopKGrad(Operation op, Tensor[] grads)
{
var grad = grads[0];
var _ = grads[1];

var in_shape = array_ops.shape(op.inputs[0]);
var ind_shape = array_ops.shape(op.outputs[1]);

// int32 is not supported on GPU hence up-casting
var ind_lastdim = array_ops.gather(math_ops.cast(
ind_shape, TF_DataType.TF_INT64), array_ops.size(ind_shape) - 1);

// Flatten indices to 2D.
var ind_2d = array_ops.reshape(op.outputs[1], array_ops.stack(new object[] { -1, ind_lastdim }));

throw new NotImplementedException("nn_grad._TopKGrad");
}
}
}

+ 10
- 1
src/TensorFlowNET.Core/Gradients/ops.gradient_function_mapping.cs View File

@@ -14,14 +14,18 @@ namespace Tensorflow
// map tensorflow\python\ops\math_grad.py
return (oper, out_grads) =>
{
Console.WriteLine($"get_gradient_function: {oper.type} '{oper.name}'");
// Console.WriteLine($"get_gradient_function: {oper.type} '{oper.name}'");

switch (oper.type)
{
case "Add":
return math_grad._AddGrad(oper, out_grads);
case "BiasAdd":
return nn_grad._BiasAddGrad(oper, out_grads);
case "Identity":
return math_grad._IdGrad(oper, out_grads);
case "MatMul":
return math_grad._MatMulGrad(oper, out_grads);
case "Mul":
return math_grad._MulGrad(oper, out_grads);
case "Mean":
@@ -36,8 +40,13 @@ namespace Tensorflow
return math_grad._RealDivGrad(oper, out_grads);
case "Reshape":
return array_grad._ReshapeGrad(oper, out_grads);
case "Relu":
return nn_grad._ReluGrad(oper, out_grads);
case "SoftmaxCrossEntropyWithLogits":
return nn_grad._SoftmaxCrossEntropyWithLogitsGrad(oper, out_grads);
case "TopK":
case "TopKV2":
return nn_grad._TopKGrad(oper, out_grads);
default:
throw new NotImplementedException($"get_gradient_function {oper.type}");
}


+ 10
- 0
src/TensorFlowNET.Core/Operations/NnOps/gen_nn_ops.cs View File

@@ -94,6 +94,16 @@ namespace Tensorflow.Operations
return _op.outputs;
}

public static Tensor log_softmax(Tensor logits, string name = null)
{
var _op = _op_def_lib._apply_op_helper("LogSoftmax", name: name, args: new
{
logits
});

return _op.outputs[0];
}

public static Tensor max_pool(Tensor input,
int[] ksize,
int[] strides,


+ 4
- 1
src/TensorFlowNET.Core/Operations/Operation.cs View File

@@ -185,7 +185,10 @@ namespace Tensorflow
if (oneof_value == "type")
return x.Type;

return x.GetType().GetProperty(oneof_value).GetValue(x);
object result = x.GetType().GetProperty(oneof_value).GetValue(x);
if (result is Google.Protobuf.ByteString byteString)
return byteString.ToStringUtf8();
return result;
}

public TF_AttrMetadata GetAttributeMetadata(string attr_name, Status s)


+ 33
- 3
src/TensorFlowNET.Core/Operations/array_ops.py.cs View File

@@ -46,10 +46,10 @@ namespace Tensorflow
}
}
public static Tensor _autopacking_helper(Tensor[] list_or_tuple, TF_DataType dtype, string name)
public static Tensor _autopacking_helper(object[] list_or_tuple, TF_DataType dtype, string name)
{
var must_pack = false;
var converted_elems = new List<Tensor>();
var converted_elems = new List<object>();
return with(ops.name_scope(name), scope =>
{
foreach (var (i, elem) in enumerate(list_or_tuple))
@@ -58,7 +58,27 @@ namespace Tensorflow
must_pack = true;
}
return gen_array_ops.pack(converted_elems.ToArray(), name: scope);
if(must_pack)
{
var elems_as_tensors = new List<Tensor>();
foreach (var (i, elem) in enumerate(converted_elems))
{
if (elem is Tensor tensor)
elems_as_tensors.Add(tensor);
else
{
var elem_tensor = constant_op.constant(elem, dtype: dtype, name: i.ToString());
elems_as_tensors.Add(elem_tensor);
}
}
return gen_array_ops.pack(elems_as_tensors.ToArray(), name: scope);
}
else
{
// return converted_elems.ToArray();
throw new NotImplementedException("_autopacking_helper.converted_elems");
}
});
}
@@ -355,5 +375,15 @@ namespace Tensorflow
public static Tensor slice<Tb, Ts>(Tensor input, Tb[] begin, Ts[] size, string name = null)
=> gen_array_ops.slice(input, begin, size, name: name);
public static Tensor stack(object values, int axis = 0, string name = "stack")
{
if (axis == 0)
// If the input is a constant list, it can be converted to a constant op
return ops.convert_to_tensor(values, name: name);
throw new NotImplementedException("array_ops.stack");
}
}
}

+ 14
- 0
src/TensorFlowNET.Core/Operations/control_flow_ops.py.cs View File

@@ -3,6 +3,7 @@ using System.Collections.Generic;
using System.Linq;
using System.Text;
using Tensorflow.Operations;
using util = Tensorflow.control_flow_util;

namespace Tensorflow
{
@@ -226,5 +227,18 @@ namespace Tensorflow
return gen_control_flow_ops.@switch(data, pred, name: name);
});
}

public static Tensor ZerosLikeOutsideLoop(Operation op, int index)
{
var val = op.outputs[index];
if (!util.IsSwitch(op))
{
if (val.dtype == TF_DataType.TF_RESOURCE)
throw new NotImplementedException("ZerosLikeOutsideLoop");
return array_ops.zeros_like(val, optimize: false);
}

throw new NotImplementedException("ZerosLikeOutsideLoop");
}
}
}

+ 10
- 0
src/TensorFlowNET.Core/Operations/control_flow_util.py.cs View File

@@ -15,5 +15,15 @@ namespace Tensorflow
{
return op.type == "Exit" || op.type == "RefExit";
}

/// <summary>
/// Return true if `op` is a Switch.
/// </summary>
/// <param name="op"></param>
/// <returns></returns>
public static bool IsSwitch(Operation op)
{
return op.type == "Switch" || op.type == "RefSwitch";
}
}
}

+ 17
- 0
src/TensorFlowNET.Core/Operations/nn_ops.cs View File

@@ -42,6 +42,23 @@ namespace Tensorflow
});
}

public static Tensor log_softmax(Tensor logits, int axis = -1, string name = null)
{
return _softmax(logits, gen_nn_ops.log_softmax, axis, name);
}

public static Tensor _softmax(Tensor logits, Func<Tensor, string, Tensor> compute_op, int dim = -1, string name = null)
{
logits = ops.convert_to_tensor(logits);

var shape = logits.shape;
bool is_last_dim = dim == -1 || dim == shape.Length - 1;
if (is_last_dim)
return compute_op(logits, name);

throw new NotImplementedException("_softmax helper");
}

public static Tensor softmax_cross_entropy_with_logits_v2_helper(Tensor labels,
Tensor logits,
int axis = -1,


+ 2
- 0
src/TensorFlowNET.Core/ops.py.cs View File

@@ -426,6 +426,8 @@ namespace Tensorflow
return constant_op.constant(doubleVal, dtype: dtype, name: name);
case RefVariable varVal:
return varVal._TensorConversionFunction(as_ref: as_ref);
case object[] objects:
return array_ops._autopacking_helper(objects, dtype: dtype, name: name);
default:
throw new NotImplementedException($"internal_convert_to_tensor: Can't convert {value.GetType().Name} to Tensor");
}


Loading…
Cancel
Save