fix _SumGrad, broadcast_gradient_args and real_div.

5 years ago · e2660303f1
--- a/src/TensorFlowNET.Core/Eager/c_api.eager.cs
+++ b/src/TensorFlowNET.Core/Eager/c_api.eager.cs
@@ -11,7 +11,7 @@ namespace Tensorflow
        public static extern void TFE_RegisterGradientFunction(_gradient_function_callback callbackPointer);

        [UnmanagedFunctionPointer(CallingConvention.StdCall)]
        public delegate IntPtr _gradient_function_callback(string op_name, int num_inputs, IntPtr[] op_inputs, int num_attrs, IntPtr[] output_grads);
        public delegate IntPtr _gradient_function_callback(string op_name, int num_inputs, IntPtr op_inputs, int num_attrs, int num_outputs, IntPtr output_grads);

        [DllImport(TensorFlowLibName)]
        public static extern IntPtr VSpace_Handle(VSpace_callback_Ones ones, VSpace_callback_AggregateGrads aggregate_grads);
--- a/src/TensorFlowNET.Core/Gradients/math_grad.cs
+++ b/src/TensorFlowNET.Core/Gradients/math_grad.cs
@@ -14,6 +14,7 @@
   limitations under the License.
 ******************************************************************************/

 using NumSharp;
 using System;
 using System.Linq;
 using Tensorflow.Operations;
@@ -438,8 +439,18 @@ namespace Tensorflow.Gradients
                    var rank = input_0_shape.Length;
                    if (Enumerable.SequenceEqual(Enumerable.Range(0, rank), axes.Data<int>()))
                    {
                        var new_shape = range(rank).Select(x => 1).ToArray();
                        grad = array_ops.reshape(grad, new_shape);
                        if (tf.context.executing_eagerly())
                        {
                            // should add ones_rank_cache
                            var new_shape_tensor = constant_op.constant(np.array(new int[] { 1 }) * rank, dtype: TF_DataType.TF_INT32);
                            grad = array_ops.reshape(grad, new_shape_tensor);
                        }
                        else
                        {
                            var new_shape = range(rank).Select(x => 1).ToArray();
                            grad = array_ops.reshape(grad, new_shape);
                        }

                        // If shape is not fully defined (but rank is), we use Shape.
                        if (!input_0_shape.Contains(-1))
                            input_shape = constant_op.constant(input_0_shape);
@@ -605,6 +616,18 @@ namespace Tensorflow.Gradients
            var grad = grads[0];
            var x = op.inputs[0];
            var y = op.inputs[1];

            if (tf.context.executing_eagerly())
            {
                x = math_ops.conj(x);
                y = math_ops.conj(y);
                return new Tensor[]
                {
                    grad * y * math_ops.pow(x, y - 1),
                    null
                };
            }

            var z = op.outputs[0];

            var (sx, sy) = SmartBroadcastGradientArgs(x, y);
--- a/src/TensorFlowNET.Core/Gradients/resource_variable_grad.cs
+++ b/src/TensorFlowNET.Core/Gradients/resource_variable_grad.cs
@@ -0,0 +1,32 @@
 /*****************************************************************************
   Copyright 2020 The TensorFlow.NET Authors. All Rights Reserved.

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.
 ******************************************************************************/

 using System;
 using System.Collections.Generic;
 using System.Text;

 namespace Tensorflow.Gradients
 {
    [RegisterGradient("resource_variable_grad")]
    public class resource_variable_grad
    {
        [RegisterGradient("ReadVariableOp")]
        public static Tensor[] _ReadGrad(Operation op, Tensor[] grads)
        {
            return new Tensor[] { grads[0] };
        }
    }
 }
--- a/src/TensorFlowNET.Core/Operations/gen_array_ops.cs
+++ b/src/TensorFlowNET.Core/Operations/gen_array_ops.cs
@@ -291,7 +291,7 @@ namespace Tensorflow
        /// <param name="s1">A `Tensor`. Must have the same type as `s0`.</param>
        /// <param name="name">A name for the operation (optional).</param>
        /// <returns>A tuple of `Tensor` objects (r0, r1).</returns>
        public static (Tensor, Tensor) broadcast_gradient_args(Tensor s0, Tensor s1, string name = "")
        public unsafe static (Tensor, Tensor) broadcast_gradient_args(Tensor s0, Tensor s1, string name = "")
        {
            if (tf.context.executing_eagerly())
            {
@@ -303,6 +303,7 @@ namespace Tensorflow
                        s1 as EagerTensor
                    }, 2, null, status);
                status.Check(true);
                return (new EagerTensor(*(IntPtr*)_result), new EagerTensor(*((IntPtr*)_result + 1)));
            }

            var _op = _op_def_lib._apply_op_helper("BroadcastGradientArgs", name, new { s0, s1 });
@@ -318,6 +319,19 @@ namespace Tensorflow

        public static Tensor reshape<T1, T2>(T1 tensor, T2 shape, string name = null)
        {
            if (tf.context.executing_eagerly())
            {
                using var status = new Status();
                EagerTensorHandle _result = c_api.TFE_FastPathExecute(tf.context, tf.context.device_name,
                    "Reshape", name, new IntPtr[]
                    {
                        tensor as EagerTensor,
                        shape as EagerTensor
                    }, 2, null, status);
                status.Check(true);
                return _result;
            }

            var _op = _op_def_lib._apply_op_helper("Reshape", name, new { tensor, shape });
            return _op.output;
        }
@@ -455,6 +469,19 @@ namespace Tensorflow

        public static Tensor tile<T>(Tensor input, T multiples, string name = null)
        {
            if (tf.context.executing_eagerly())
            {
                using var status = new Status();
                EagerTensorHandle tensor = c_api.TFE_FastPathExecute(tf.context, tf.context.device_name,
                    "Tile", name, new IntPtr[]
                    {
                        input as EagerTensor,
                        multiples as EagerTensor
                    }, 2, null, status);
                status.Check(true);
                return tensor;
            }

            var _op = _op_def_lib._apply_op_helper("Tile", name, new { input, multiples });
            return _op.outputs[0];
        }
--- a/src/TensorFlowNET.Core/Operations/gen_math_ops.cs
+++ b/src/TensorFlowNET.Core/Operations/gen_math_ops.cs
@@ -843,14 +843,14 @@ namespace Tensorflow
            if (tf.context.executing_eagerly())
            {
                using var status = new Status();
                var _result = c_api.TFE_FastPathExecute(tf.context, tf.context.device_name,
                EagerTensorHandle tensor = c_api.TFE_FastPathExecute(tf.context, tf.context.device_name,
                    "RealDiv", name, new IntPtr[]
                    {
                        x as EagerTensor,
                        y as EagerTensor
                    }, 2, null, status);
                status.Check(true);
                return new EagerTensor(_result);
                return tensor;
            }

            var _op = _op_def_lib._apply_op_helper("RealDiv", name, args: new { x, y });
--- a/src/TensorFlowNET.Core/Tensors/tensor_util.cs
+++ b/src/TensorFlowNET.Core/Tensors/tensor_util.cs
@@ -19,6 +19,7 @@ using System;
 using System.Linq;
 using NumSharp.Utilities;
 using System.Text;
 using Tensorflow.Eager;

 namespace Tensorflow
 {
@@ -39,6 +40,9 @@ namespace Tensorflow
        /// <returns></returns>
        public static NDArray constant_value(Tensor tensor, bool partial = false)
        {
            if (tensor is EagerTensor)
                return tensor.numpy();

            NDArray ret = _ConstantValue(tensor, partial);
            if (!(ret is null))
                tensor.graph.prevent_feeding(tensor);
--- a/src/TensorFlowNET.Core/tensorflow.cs
+++ b/src/TensorFlowNET.Core/tensorflow.cs
@@ -62,13 +62,15 @@ namespace Tensorflow
            });

            ops.RegisterFromAssembly();
            c_api.TFE_RegisterGradientFunction((op_name, num_inputs, op_inputs, num_attrs, output_grads) =>
            c_api.TFE_RegisterGradientFunction((op_name, num_inputs, op_inputs, num_attrs, num_outputs, output_grads) =>
            {
                var output_grad_tensors = output_grads.Select(x => new EagerTensor(x)).ToArray();

                var input_tensors = new EagerTensor[num_inputs];
                for (int i = 0; i < num_inputs; i++)
                    input_tensors[i] = new EagerTensor(op_inputs[op_inputs.Length == 1 ? 0 : i]);
                    input_tensors[i] = new EagerTensor(*((IntPtr*)op_inputs + i));

                var output_grad_tensors = new EagerTensor[num_outputs];
                for (int i = 0; i < num_outputs; i++)
                    output_grad_tensors[i] = new EagerTensor(*((IntPtr*)output_grads + i));

                var gradients = ops.gradientFunctions[op_name](new EagerOperation
                {
@@ -77,7 +79,7 @@ namespace Tensorflow
                }, output_grad_tensors);

                var ret_tensors = Marshal.AllocHGlobal(sizeof(IntPtr) * num_inputs);
                Marshal.Copy(gradients.Select(x => (x as EagerTensor).EagerTensorHandle).ToArray(), 0, ret_tensors, 2);
                Marshal.Copy(gradients.Select(x => x == null ? IntPtr.Zero : (x as EagerTensor).EagerTensorHandle).ToArray(), 0, ret_tensors, 2);
                // Marshal.FreeHGlobal(ret_tensors);
                return ret_tensors;
            });