From bad610d5336bff0e22e283ba99a2f9c1aaf8c18b Mon Sep 17 00:00:00 2001 From: haiping008 Date: Fri, 15 Mar 2019 12:10:26 -0500 Subject: [PATCH] _TopKGrad, _SoftmaxCrossEntropyWithLogitsGrad --- .../Gradients/gradients_impl.py.cs | 28 ++++++--- .../Gradients/nn_grad.py.cs | 59 +++++++++++++++++-- .../ops.gradient_function_mapping.cs | 11 +++- .../Operations/NnOps/gen_nn_ops.cs | 10 ++++ .../Operations/Operation.cs | 5 +- .../Operations/array_ops.py.cs | 36 ++++++++++- .../Operations/control_flow_ops.py.cs | 14 +++++ .../Operations/control_flow_util.py.cs | 10 ++++ src/TensorFlowNET.Core/Operations/nn_ops.cs | 17 ++++++ src/TensorFlowNET.Core/ops.py.cs | 2 + 10 files changed, 174 insertions(+), 18 deletions(-) diff --git a/src/TensorFlowNET.Core/Gradients/gradients_impl.py.cs b/src/TensorFlowNET.Core/Gradients/gradients_impl.py.cs index f07312df..028516bf 100644 --- a/src/TensorFlowNET.Core/Gradients/gradients_impl.py.cs +++ b/src/TensorFlowNET.Core/Gradients/gradients_impl.py.cs @@ -131,6 +131,17 @@ namespace Tensorflow // for ops that do not have gradients. var grad_fn = ops.get_gradient_function(op); + foreach(var (i, out_grad) in enumerate(out_grads)) + { + if(out_grad == null) + { + if (loop_state != null) + ; + else + out_grads[i] = control_flow_ops.ZerosLikeOutsideLoop(op, i); + } + } + with(ops.name_scope(op.name + "_grad"), scope1 => { string name1 = scope1; @@ -240,28 +251,27 @@ namespace Tensorflow private static Tensor[] _AggregatedGrads(Dictionary grads, Operation op, string gradient_uid, object loop_state, int aggregation_method = 0) { var out_grads = _GetGrads(grads, op); - for(int i = 0; i < out_grads.Length; i++) + var return_grads = new Tensor[out_grads.Length]; + + foreach(var (i, out_grad) in enumerate(out_grads)) { - var out_grad = out_grads[i]; - if(loop_state != null) + if (loop_state != null) { } - // Grads have to be Tensors or IndexedSlices - // Aggregate multiple gradients, and convert [] to None. - if(out_grad != null) + if (out_grad != null) { - if(out_grad.Length < 2) + if (out_grad.Length < 2) { string used = "nop"; - return new Tensor[] { out_grad[0] }; + return_grads[i] = out_grad[0]; } } } - return null; + return return_grads; } /// diff --git a/src/TensorFlowNET.Core/Gradients/nn_grad.py.cs b/src/TensorFlowNET.Core/Gradients/nn_grad.py.cs index 6bf3c960..60255687 100644 --- a/src/TensorFlowNET.Core/Gradients/nn_grad.py.cs +++ b/src/TensorFlowNET.Core/Gradients/nn_grad.py.cs @@ -1,5 +1,6 @@ using System; using System.Collections.Generic; +using System.Linq; using System.Text; using Tensorflow.Operations; @@ -13,16 +14,17 @@ namespace Tensorflow.Gradients /// /// /// - public static Tensor[] _BiasAddGrad(Operation op, Tensor grad) + public static Tensor[] _BiasAddGrad(Operation op, Tensor[] grads) { + var grad = grads[0]; string data_format = op.get_attr("data_format")?.ToString(); var bias_add_grad = gen_nn_ops.bias_add_grad(out_backprop: grad, data_format: data_format); return new Tensor[] { grad, bias_add_grad }; } - public static Tensor[] _ReluGrad(Operation op, Tensor grad) + public static Tensor[] _ReluGrad(Operation op, Tensor[] grads) { - return new Tensor[] { gen_nn_ops.relu_grad(grad, op.outputs[0]) }; + return new Tensor[] { gen_nn_ops.relu_grad(grads[0], op.outputs[0]) }; } /// @@ -37,8 +39,57 @@ namespace Tensorflow.Gradients var grad_loss = grads[0]; var grad_grad = grads[1]; var softmax_grad = op.outputs[1]; + var grad = _BroadcastMul(grad_loss, softmax_grad); - throw new NotImplementedException("_SoftmaxCrossEntropyWithLogitsGrad"); + var logits = op.inputs[0]; + if(grad_grad != null && !IsZero(grad_grad)) + { + throw new NotImplementedException("_SoftmaxCrossEntropyWithLogitsGrad"); + } + + return new Tensor[] + { + grad, + _BroadcastMul(grad_loss, -nn_ops.log_softmax(logits)) + }; + } + + private static bool IsZero(Tensor g) + { + if (new string[] { "ZerosLike", "Zeros" }.Contains(g.op.type)) + return true; + + throw new NotImplementedException("IsZero"); + } + + private static Tensor _BroadcastMul(Tensor vec, Tensor mat) + { + vec = array_ops.expand_dims(vec, -1); + return vec * mat; + } + + /// + /// Return the gradients for TopK. + /// + /// + /// + /// + public static Tensor[] _TopKGrad(Operation op, Tensor[] grads) + { + var grad = grads[0]; + var _ = grads[1]; + + var in_shape = array_ops.shape(op.inputs[0]); + var ind_shape = array_ops.shape(op.outputs[1]); + + // int32 is not supported on GPU hence up-casting + var ind_lastdim = array_ops.gather(math_ops.cast( + ind_shape, TF_DataType.TF_INT64), array_ops.size(ind_shape) - 1); + + // Flatten indices to 2D. + var ind_2d = array_ops.reshape(op.outputs[1], array_ops.stack(new object[] { -1, ind_lastdim })); + + throw new NotImplementedException("nn_grad._TopKGrad"); } } } diff --git a/src/TensorFlowNET.Core/Gradients/ops.gradient_function_mapping.cs b/src/TensorFlowNET.Core/Gradients/ops.gradient_function_mapping.cs index 8f34e544..a19e0db2 100644 --- a/src/TensorFlowNET.Core/Gradients/ops.gradient_function_mapping.cs +++ b/src/TensorFlowNET.Core/Gradients/ops.gradient_function_mapping.cs @@ -14,14 +14,18 @@ namespace Tensorflow // map tensorflow\python\ops\math_grad.py return (oper, out_grads) => { - Console.WriteLine($"get_gradient_function: {oper.type} '{oper.name}'"); + // Console.WriteLine($"get_gradient_function: {oper.type} '{oper.name}'"); switch (oper.type) { case "Add": return math_grad._AddGrad(oper, out_grads); + case "BiasAdd": + return nn_grad._BiasAddGrad(oper, out_grads); case "Identity": return math_grad._IdGrad(oper, out_grads); + case "MatMul": + return math_grad._MatMulGrad(oper, out_grads); case "Mul": return math_grad._MulGrad(oper, out_grads); case "Mean": @@ -36,8 +40,13 @@ namespace Tensorflow return math_grad._RealDivGrad(oper, out_grads); case "Reshape": return array_grad._ReshapeGrad(oper, out_grads); + case "Relu": + return nn_grad._ReluGrad(oper, out_grads); case "SoftmaxCrossEntropyWithLogits": return nn_grad._SoftmaxCrossEntropyWithLogitsGrad(oper, out_grads); + case "TopK": + case "TopKV2": + return nn_grad._TopKGrad(oper, out_grads); default: throw new NotImplementedException($"get_gradient_function {oper.type}"); } diff --git a/src/TensorFlowNET.Core/Operations/NnOps/gen_nn_ops.cs b/src/TensorFlowNET.Core/Operations/NnOps/gen_nn_ops.cs index 9d53ab74..dd99a1ff 100644 --- a/src/TensorFlowNET.Core/Operations/NnOps/gen_nn_ops.cs +++ b/src/TensorFlowNET.Core/Operations/NnOps/gen_nn_ops.cs @@ -94,6 +94,16 @@ namespace Tensorflow.Operations return _op.outputs; } + public static Tensor log_softmax(Tensor logits, string name = null) + { + var _op = _op_def_lib._apply_op_helper("LogSoftmax", name: name, args: new + { + logits + }); + + return _op.outputs[0]; + } + public static Tensor max_pool(Tensor input, int[] ksize, int[] strides, diff --git a/src/TensorFlowNET.Core/Operations/Operation.cs b/src/TensorFlowNET.Core/Operations/Operation.cs index d6fb63c1..9df75321 100644 --- a/src/TensorFlowNET.Core/Operations/Operation.cs +++ b/src/TensorFlowNET.Core/Operations/Operation.cs @@ -185,7 +185,10 @@ namespace Tensorflow if (oneof_value == "type") return x.Type; - return x.GetType().GetProperty(oneof_value).GetValue(x); + object result = x.GetType().GetProperty(oneof_value).GetValue(x); + if (result is Google.Protobuf.ByteString byteString) + return byteString.ToStringUtf8(); + return result; } public TF_AttrMetadata GetAttributeMetadata(string attr_name, Status s) diff --git a/src/TensorFlowNET.Core/Operations/array_ops.py.cs b/src/TensorFlowNET.Core/Operations/array_ops.py.cs index a5ae2559..100732b1 100644 --- a/src/TensorFlowNET.Core/Operations/array_ops.py.cs +++ b/src/TensorFlowNET.Core/Operations/array_ops.py.cs @@ -46,10 +46,10 @@ namespace Tensorflow } } - public static Tensor _autopacking_helper(Tensor[] list_or_tuple, TF_DataType dtype, string name) + public static Tensor _autopacking_helper(object[] list_or_tuple, TF_DataType dtype, string name) { var must_pack = false; - var converted_elems = new List(); + var converted_elems = new List(); return with(ops.name_scope(name), scope => { foreach (var (i, elem) in enumerate(list_or_tuple)) @@ -58,7 +58,27 @@ namespace Tensorflow must_pack = true; } - return gen_array_ops.pack(converted_elems.ToArray(), name: scope); + if(must_pack) + { + var elems_as_tensors = new List(); + foreach (var (i, elem) in enumerate(converted_elems)) + { + if (elem is Tensor tensor) + elems_as_tensors.Add(tensor); + else + { + var elem_tensor = constant_op.constant(elem, dtype: dtype, name: i.ToString()); + elems_as_tensors.Add(elem_tensor); + } + } + + return gen_array_ops.pack(elems_as_tensors.ToArray(), name: scope); + } + else + { + // return converted_elems.ToArray(); + throw new NotImplementedException("_autopacking_helper.converted_elems"); + } }); } @@ -355,5 +375,15 @@ namespace Tensorflow public static Tensor slice(Tensor input, Tb[] begin, Ts[] size, string name = null) => gen_array_ops.slice(input, begin, size, name: name); + + public static Tensor stack(object values, int axis = 0, string name = "stack") + { + if (axis == 0) + // If the input is a constant list, it can be converted to a constant op + return ops.convert_to_tensor(values, name: name); + + throw new NotImplementedException("array_ops.stack"); + } + } } diff --git a/src/TensorFlowNET.Core/Operations/control_flow_ops.py.cs b/src/TensorFlowNET.Core/Operations/control_flow_ops.py.cs index 01af43b7..4ea21ee6 100644 --- a/src/TensorFlowNET.Core/Operations/control_flow_ops.py.cs +++ b/src/TensorFlowNET.Core/Operations/control_flow_ops.py.cs @@ -3,6 +3,7 @@ using System.Collections.Generic; using System.Linq; using System.Text; using Tensorflow.Operations; +using util = Tensorflow.control_flow_util; namespace Tensorflow { @@ -226,5 +227,18 @@ namespace Tensorflow return gen_control_flow_ops.@switch(data, pred, name: name); }); } + + public static Tensor ZerosLikeOutsideLoop(Operation op, int index) + { + var val = op.outputs[index]; + if (!util.IsSwitch(op)) + { + if (val.dtype == TF_DataType.TF_RESOURCE) + throw new NotImplementedException("ZerosLikeOutsideLoop"); + return array_ops.zeros_like(val, optimize: false); + } + + throw new NotImplementedException("ZerosLikeOutsideLoop"); + } } } diff --git a/src/TensorFlowNET.Core/Operations/control_flow_util.py.cs b/src/TensorFlowNET.Core/Operations/control_flow_util.py.cs index 4654261e..1b8a304c 100644 --- a/src/TensorFlowNET.Core/Operations/control_flow_util.py.cs +++ b/src/TensorFlowNET.Core/Operations/control_flow_util.py.cs @@ -15,5 +15,15 @@ namespace Tensorflow { return op.type == "Exit" || op.type == "RefExit"; } + + /// + /// Return true if `op` is a Switch. + /// + /// + /// + public static bool IsSwitch(Operation op) + { + return op.type == "Switch" || op.type == "RefSwitch"; + } } } diff --git a/src/TensorFlowNET.Core/Operations/nn_ops.cs b/src/TensorFlowNET.Core/Operations/nn_ops.cs index 74e76bda..e54caf66 100644 --- a/src/TensorFlowNET.Core/Operations/nn_ops.cs +++ b/src/TensorFlowNET.Core/Operations/nn_ops.cs @@ -42,6 +42,23 @@ namespace Tensorflow }); } + public static Tensor log_softmax(Tensor logits, int axis = -1, string name = null) + { + return _softmax(logits, gen_nn_ops.log_softmax, axis, name); + } + + public static Tensor _softmax(Tensor logits, Func compute_op, int dim = -1, string name = null) + { + logits = ops.convert_to_tensor(logits); + + var shape = logits.shape; + bool is_last_dim = dim == -1 || dim == shape.Length - 1; + if (is_last_dim) + return compute_op(logits, name); + + throw new NotImplementedException("_softmax helper"); + } + public static Tensor softmax_cross_entropy_with_logits_v2_helper(Tensor labels, Tensor logits, int axis = -1, diff --git a/src/TensorFlowNET.Core/ops.py.cs b/src/TensorFlowNET.Core/ops.py.cs index 65f7789e..3708c540 100644 --- a/src/TensorFlowNET.Core/ops.py.cs +++ b/src/TensorFlowNET.Core/ops.py.cs @@ -426,6 +426,8 @@ namespace Tensorflow return constant_op.constant(doubleVal, dtype: dtype, name: name); case RefVariable varVal: return varVal._TensorConversionFunction(as_ref: as_ref); + case object[] objects: + return array_ops._autopacking_helper(objects, dtype: dtype, name: name); default: throw new NotImplementedException($"internal_convert_to_tensor: Can't convert {value.GetType().Name} to Tensor"); }