diff --git a/src/TensorFlowNET.Core/Gradients/math_grad.py.cs b/src/TensorFlowNET.Core/Gradients/math_grad.cs similarity index 87% rename from src/TensorFlowNET.Core/Gradients/math_grad.py.cs rename to src/TensorFlowNET.Core/Gradients/math_grad.cs index 5d02033c..4380a7fa 100644 --- a/src/TensorFlowNET.Core/Gradients/math_grad.py.cs +++ b/src/TensorFlowNET.Core/Gradients/math_grad.cs @@ -8,7 +8,7 @@ namespace Tensorflow.Gradients /// /// Gradients for operators defined in math_ops.py. /// - public class math_grad + public class math_grad : Python { public static Tensor[] _AddGrad(Operation op, Tensor[] grads) { @@ -33,6 +33,16 @@ namespace Tensorflow.Gradients return new Tensor[] { grads[0] }; } + public static Tensor[] _LogGrad(Operation op, Tensor[] grads) + { + var grad = grads[0]; + var x = op.inputs[0]; + return with(ops.control_dependencies(new Operation[] { grad }), dp => { + x = math_ops.conj(x); + return new Tensor[] { grad * math_ops.reciprocal(x) }; + }); + } + public static Tensor[] _MulGrad(Operation op, Tensor[] grads) { var x = op.inputs[0]; @@ -106,6 +116,11 @@ namespace Tensorflow.Gradients return new Tensor[] { math_ops.truediv(sum_grad, math_ops.cast(factor, sum_grad.dtype)), null }; } + public static Tensor[] _NegGrad(Operation op, Tensor[] grads) + { + return new Tensor[] { -grads[0] }; + } + private static Tensor _safe_shape_div(Tensor x, Tensor y) { return math_ops.floordiv(x, gen_math_ops.maximum(y, 1)); @@ -145,13 +160,16 @@ namespace Tensorflow.Gradients var axes = tensor_util.constant_value(op.inputs[1]); if(!(axes is null)) { - var rank = axes.shape.Rank; - grad = array_ops.reshape(grad, new int[] { 1 }); - if (!input_0_shape.Contains(-1)) - input_shape = constant_op.constant(input_0_shape); - else - input_shape = array_ops.shape(op.inputs[0]); - return new Tensor[] { gen_array_ops.tile(grad, input_shape), null }; + var rank = input_0_shape.Length; + if (Enumerable.SequenceEqual(Enumerable.Range(0, rank), axes.Data())) + { + grad = array_ops.reshape(grad, new int[] { 1 }); + if (!input_0_shape.Contains(-1)) + input_shape = constant_op.constant(input_0_shape); + else + input_shape = array_ops.shape(op.inputs[0]); + return new Tensor[] { gen_array_ops.tile(grad, input_shape), null }; + } } } diff --git a/src/TensorFlowNET.Core/Gradients/nn_grad.py.cs b/src/TensorFlowNET.Core/Gradients/nn_grad.py.cs index a4840fd7..6740bdbf 100644 --- a/src/TensorFlowNET.Core/Gradients/nn_grad.py.cs +++ b/src/TensorFlowNET.Core/Gradients/nn_grad.py.cs @@ -27,6 +27,21 @@ namespace Tensorflow.Gradients return new Tensor[] { gen_nn_ops.relu_grad(grads[0], op.outputs[0]) }; } + /// + /// The derivative of the softmax nonlinearity. + /// + /// + /// + /// + public static Tensor[] _SoftmaxGrad(Operation op, Tensor[] grads) + { + var grad_softmax = grads[0]; + + var softmax = op.outputs[0]; + var sum_channels = math_ops.reduce_sum(grad_softmax * softmax, -1, keepdims: true); + return new Tensor[] { (grad_softmax - sum_channels) * softmax }; + } + /// /// Gradient function for SoftmaxCrossEntropyWithLogits. /// diff --git a/src/TensorFlowNET.Core/Gradients/ops.gradient_function_mapping.cs b/src/TensorFlowNET.Core/Gradients/ops.gradient_function_mapping.cs index 9601077b..7b650d00 100644 --- a/src/TensorFlowNET.Core/Gradients/ops.gradient_function_mapping.cs +++ b/src/TensorFlowNET.Core/Gradients/ops.gradient_function_mapping.cs @@ -24,6 +24,8 @@ namespace Tensorflow return nn_grad._BiasAddGrad(oper, out_grads); case "Identity": return math_grad._IdGrad(oper, out_grads); + case "Log": + return math_grad._LogGrad(oper, out_grads); case "MatMul": return math_grad._MatMulGrad(oper, out_grads); case "Merge": @@ -32,6 +34,8 @@ namespace Tensorflow return math_grad._MulGrad(oper, out_grads); case "Mean": return math_grad._MeanGrad(oper, out_grads); + case "Neg": + return math_grad._NegGrad(oper, out_grads); case "Sum": return math_grad._SumGrad(oper, out_grads); case "Sub": @@ -46,6 +50,8 @@ namespace Tensorflow return nn_grad._ReluGrad(oper, out_grads); case "Squeeze": return array_grad._SqueezeGrad(oper, out_grads); + case "Softmax": + return nn_grad._SoftmaxGrad(oper, out_grads); case "SoftmaxCrossEntropyWithLogits": return nn_grad._SoftmaxCrossEntropyWithLogitsGrad(oper, out_grads); case "Transpose": diff --git a/src/TensorFlowNET.Core/Operations/gen_math_ops.cs b/src/TensorFlowNET.Core/Operations/gen_math_ops.cs index 1ccb0e96..4cfdc7cf 100644 --- a/src/TensorFlowNET.Core/Operations/gen_math_ops.cs +++ b/src/TensorFlowNET.Core/Operations/gen_math_ops.cs @@ -142,6 +142,13 @@ namespace Tensorflow return _op.outputs[0]; } + public static Tensor reciprocal(Tensor x, string name = null) + { + var _op = _op_def_lib._apply_op_helper("Reciprocal", name, args: new { x }); + + return _op.outputs[0]; + } + public static Tensor floor_mod(Tensor x, Tensor y, string name = null) { var _op = _op_def_lib._apply_op_helper("FloorMod", name, args: new { x, y }); diff --git a/src/TensorFlowNET.Core/Operations/math_ops.py.cs b/src/TensorFlowNET.Core/Operations/math_ops.py.cs index 3c6ea594..c8e3f98f 100644 --- a/src/TensorFlowNET.Core/Operations/math_ops.py.cs +++ b/src/TensorFlowNET.Core/Operations/math_ops.py.cs @@ -126,6 +126,16 @@ namespace Tensorflow return gen_data_flow_ops.dynamic_stitch(a1, a2); } + /// + /// Computes the reciprocal of x element-wise. + /// + /// + /// + /// + public static Tensor reciprocal(Tensor x, string name = null) + => gen_math_ops.reciprocal(x, name: name); + + /// /// Computes log(sum(exp(elements across dimensions of a tensor))). /// Reduces `input_tensor` along the dimensions given in `axis`.