SGD works.

5 years ago · 86618e49c9
--- a/src/TensorFlowNET.Core/Eager/c_api.eager.cs
+++ b/src/TensorFlowNET.Core/Eager/c_api.eager.cs
@@ -12,7 +12,7 @@ namespace Tensorflow

        [UnmanagedFunctionPointer(CallingConvention.StdCall)]
        public delegate IntPtr _gradient_function_callback(string op_name,
            BindingArray op_inputs,
            IntPtr op_inputs,
            BindingArray op_outputs,
            int num_attrs,
            BindingArray output_grads, 
--- a/src/TensorFlowNET.Core/Keras/Optimizers/DeviceDType.cs
+++ b/src/TensorFlowNET.Core/Keras/Optimizers/DeviceDType.cs
@@ -0,0 +1,25 @@
 using System;
 using System.Collections.Generic;
 using System.Text;

 namespace Tensorflow.Keras.Optimizers
 {
    public class DeviceDType : IEqualityComparer<DeviceDType>
    {
        public string Device { get; set; }
        public TF_DataType DType { get; set; }

        public bool Equals(DeviceDType x, DeviceDType y)
        {
            return x.ToString() == y.ToString();
        }

        public int GetHashCode(DeviceDType obj)
        {
            return 0;
        }

        public override string ToString()
            => $"{Device}, {DType}";
    }
 }
--- a/src/TensorFlowNET.Core/Keras/Optimizers/OptimizerV2.cs
+++ b/src/TensorFlowNET.Core/Keras/Optimizers/OptimizerV2.cs
@@ -5,6 +5,8 @@ using System.Text;
 using Tensorflow.Keras.Utils;
 using Tensorflow.Train;
 using static Tensorflow.Binding;
 using Tensorflow;
 using Tensorflow.Eager;

 namespace Tensorflow.Keras.Optimizers
 {
@@ -17,18 +19,32 @@ namespace Tensorflow.Keras.Optimizers
        protected virtual string _name { get; }

        ResourceVariable _iterations;
        List<ResourceVariable> _weight = new List<ResourceVariable>();
        Dictionary<string, float> _hyper = new Dictionary<string, float>();
        Dictionary<string, ResourceVariable> _hyper_variables = new Dictionary<string, ResourceVariable>();
        List<ResourceVariable> _weight;
        Dictionary<string, float> _hyper;
        Dictionary<string, ResourceVariable> _hyper_variables;
        protected bool _momentum;
        protected float _initial_decay = 0.0f;
        protected bool _use_locking = true;

        Dictionary<DeviceDType, Dictionary<string, Tensor>> apply_state;

        public OptimizerV2() : base()
        {

            _weight = new List<ResourceVariable>();
            _hyper = new Dictionary<string, float>();
            _hyper_variables = new Dictionary<string, ResourceVariable>();
            apply_state = new Dictionary<DeviceDType, Dictionary<string, Tensor>>();
        }

        public void apply_gradients(IEnumerable<(Tensor, ResourceVariable)> grads_and_vars)
        /// <summary>
        /// Apply gradients to variables.
        /// </summary>
        /// <param name="grads_and_vars"></param>
        /// <param name="name"></param>
        /// <param name="experimental_aggregate_gradients"></param>
        public void apply_gradients(IEnumerable<(Tensor, ResourceVariable)> grads_and_vars,
            string name = null,
            bool experimental_aggregate_gradients = true)
        {
            var var_list = grads_and_vars.Select(x => x.Item2).ToArray();
            tf_with(ops.name_scope(_name), delegate
@@ -38,49 +54,91 @@ namespace Tensorflow.Keras.Optimizers
                if (grads_and_vars == null || grads_and_vars.Count() == 0)
                    return control_flow_ops.no_op();

                //var apply_state = 
                _prepare(var_list);

                _aggregate_gradients(grads_and_vars);
                apply_state = _prepare(var_list);
                if(experimental_aggregate_gradients)
                {
                    // var reduced_grads = _aggregate_gradients(grads_and_vars);
                    _distributed_apply(grads_and_vars, name, apply_state);
                }

                return null;
            });
        }

        void _aggregate_gradients(IEnumerable<(Tensor, ResourceVariable)> grads_and_vars)
        void apply_grad_to_update_var(ResourceVariable var, EagerTensor grad)
        {
            _resource_apply_dense(var, grad, apply_state);
        }

        protected virtual Operation _resource_apply_dense(ResourceVariable var, 
            EagerTensor grad, 
            Dictionary<DeviceDType, Dictionary<string, Tensor>> _apply_state)
        {
            throw new NotImplementedException("_resource_apply_dense");
        }

        void _distributed_apply(IEnumerable<(Tensor, ResourceVariable)> grads_and_vars,
            string name,
            Dictionary<DeviceDType, Dictionary<string, Tensor>> _apply_state)
        {
            tf_with(ops.name_scope(name, "", new { skip_on_eager = true }), delegate
            {
                foreach(var (grad, var) in grads_and_vars)
                {
                    tf_with(ops.name_scope("update"), delegate
                    {
                        apply_grad_to_update_var(var, grad as EagerTensor);
                    });
                }

                _iterations.assign_add(ops.convert_to_tensor(1, dtype: _iterations.dtype));
            });
        }

        Tensor[] _aggregate_gradients(IEnumerable<(Tensor, ResourceVariable)> grads_and_vars)
        {
            return grads_and_vars.Select(x => x.Item1).ToArray();
        }

        Dictionary<DeviceDType, Dictionary<string, Tensor>> _prepare(ResourceVariable[] var_list)
        {
            var lr_t = _hyper_variables["learning_rate"];
            foreach (var grad_and_var in grads_and_vars)
            var _apply_state = new Dictionary<DeviceDType, Dictionary<string, Tensor>>();
            var keys = var_list.Select(x => new DeviceDType
            {
                var grad = grad_and_var.Item1;
                var variable = grad_and_var.Item2;
                // variable.Handle - grad * lr_t.Handle;
                Device = x.Device,
                DType = x.dtype.as_base_dtype()
            }).Distinct(new DeviceDType()).ToArray();

            foreach(var device_dtype in keys)
            {
                _apply_state[device_dtype] = new Dictionary<string, Tensor>();
                _prepare_local(device_dtype, _apply_state);
            }

            return _apply_state;
        }

        void _prepare(ResourceVariable[] var_list)
        protected virtual void _prepare_local(DeviceDType device_dtype, 
            Dictionary<DeviceDType, Dictionary<string, Tensor>> _apply_state)
        {
            var keys = new HashSet<(string, TF_DataType)>();
            foreach(var variable in var_list)
            if (_hyper.ContainsKey("learning_rate"))
            {
                var lr_t = _prepare_local(variable.Device, variable.dtype.as_base_dtype());
                var momentum = _get_hyper("momentum", variable.dtype);
                array_ops.identity(momentum);
                var lr_t = array_ops.identity(_decayed_lr(device_dtype.DType));
                _apply_state[device_dtype]["lr_t"] = lr_t;
            }
        }

        ResourceVariable _prepare_local(string var_device, TF_DataType var_dtype)
        Tensor _decayed_lr(TF_DataType var_dtype)
        {
            var lr_t = _get_hyper("learning_rate", var_dtype);
            if(_initial_decay > 0)
            if(_initial_decay > 0.0f)
            {

                throw new NotImplementedException("");
            }

            return lr_t;
        }

        ResourceVariable _get_hyper(string name, TF_DataType dtype = TF_DataType.DtInvalid)
        protected ResourceVariable _get_hyper(string name, TF_DataType dtype = TF_DataType.DtInvalid)
        {
            var value = _hyper_variables[name];
            return math_ops.cast(value, dtype);
--- a/src/TensorFlowNET.Core/Keras/Optimizers/SGD.cs
+++ b/src/TensorFlowNET.Core/Keras/Optimizers/SGD.cs
@@ -1,6 +1,8 @@
 using System;
 using System.Collections.Generic;
 using System.Linq;
 using System.Text;
 using Tensorflow.Eager;

 namespace Tensorflow.Keras.Optimizers
 {
@@ -24,5 +26,28 @@ namespace Tensorflow.Keras.Optimizers

            nesterov = nesterov;
        }

        protected override void _prepare_local(DeviceDType device_dtype, 
            Dictionary<DeviceDType, Dictionary<string, Tensor>> _apply_state)
        {
            base._prepare_local(device_dtype, _apply_state);

            _apply_state[device_dtype]["momentum"] = array_ops.identity(
                _get_hyper("momentum", device_dtype.DType));
        }

        protected override Operation _resource_apply_dense(ResourceVariable var, EagerTensor grad, Dictionary<DeviceDType, Dictionary<string, Tensor>> _apply_state)
        {
            if (_momentum)
            {
                throw new NotImplementedException("_resource_apply_dense");
            }
            var device_dtype = _apply_state.Keys.FirstOrDefault(x => x.Device == var.Device && x.DType == var.dtype.as_base_dtype());

            return gen_training_ops.resource_apply_gradient_descent(var.Handle as EagerTensor, 
                _apply_state[device_dtype]["lr_t"] as EagerTensor, 
                grad,
                use_locking: _use_locking);
        }
    }
 }
--- a/src/TensorFlowNET.Core/Operations/gen_math_ops.cs
+++ b/src/TensorFlowNET.Core/Operations/gen_math_ops.cs
@@ -894,6 +894,19 @@ namespace Tensorflow

        public static Tensor floor_mod(Tensor x, Tensor y, string name = null)
        {
            if (tf.context.executing_eagerly())
            {
                using var status = new Status();
                EagerTensorHandle tensor = c_api.TFE_FastPathExecute(tf.context, tf.context.device_name,
                    "FloorMod", name, new IntPtr[]
                    {
                        x as EagerTensor,
                        y as EagerTensor
                    }, 2, null, status);
                status.Check(true);
                return tensor;
            }

            var _op = _op_def_lib._apply_op_helper("FloorMod", name, args: new { x, y });

            return _op.outputs[0];
--- a/src/TensorFlowNET.Core/Operations/gen_resource_variable_ops.cs
+++ b/src/TensorFlowNET.Core/Operations/gen_resource_variable_ops.cs
@@ -44,6 +44,32 @@ namespace Tensorflow
            return null;
        }

        /// <summary>
        /// Adds a value to the current value of a variable.
        /// </summary>
        /// <param name="resource"></param>
        /// <param name="value"></param>
        /// <param name="name"></param>
        /// <returns></returns>
        public static Operation assign_add_variable_op(Tensor resource, Tensor value, string name = null)
        {
            if (tf.context.executing_eagerly())
            {
                using var status = new Status();
                var tensor = c_api.TFE_FastPathExecute(tf.context, tf.context.device_name,
                    "AssignAddVariableOp", name,
                    new IntPtr[]
                    {
                        resource as EagerTensor,
                        value as EagerTensor
                    }, 2, null, status);
                status.Check(true);
                return tensor;
            }

            return null;
        }

        public static Operation assign_variable_op(Tensor resource, Tensor value, string name = null)
        {
            if (tf.context.executing_eagerly())
--- a/src/TensorFlowNET.Core/Tensors/Tensor.Value.cs
+++ b/src/TensorFlowNET.Core/Tensors/Tensor.Value.cs
@@ -163,6 +163,9 @@ namespace Tensorflow
                case TF_DataType.TF_INT32:
                    storage = new UnmanagedStorage(NPTypeCode.Int32);
                    break;
                case TF_DataType.TF_INT64:
                    storage = new UnmanagedStorage(NPTypeCode.Int64);
                    break;
                case TF_DataType.TF_FLOAT:
                    storage = new UnmanagedStorage(NPTypeCode.Float);
                    break;
--- a/src/TensorFlowNET.Core/Tensors/constant_op.cs
+++ b/src/TensorFlowNET.Core/Tensors/constant_op.cs
@@ -124,6 +124,9 @@ namespace Tensorflow
                    case TF_DataType.TF_FLOAT:
                        value = Convert.ToSingle(value);
                        break;
                    case TF_DataType.TF_INT64:
                        value = Convert.ToInt64(value);
                        break;
                    default:
                        break;
                }
--- a/src/TensorFlowNET.Core/Training/gen_training_ops.py.cs
+++ b/src/TensorFlowNET.Core/Training/gen_training_ops.py.cs
@@ -14,6 +14,10 @@
   limitations under the License.
 ******************************************************************************/

 using System;
 using Tensorflow.Eager;
 using static Tensorflow.Binding;

 namespace Tensorflow
 {
    public class gen_training_ops
@@ -55,5 +59,34 @@ namespace Tensorflow

            return _op.outputs[0];
        }

        public static Operation resource_apply_gradient_descent(EagerTensor var, EagerTensor alpha, EagerTensor delta, bool use_locking = false, string name = null)
        {
            if (tf.context.executing_eagerly())
            {
                using var status = new Status();
                var tensor = c_api.TFE_FastPathExecute(tf.context, tf.context.device_name,
                    "ResourceApplyGradientDescent", name, new IntPtr[]
                    {
                        var,
                        alpha,
                        delta
                    }, 3, 
                    op => wrap_tfe_src.SetOpAttrs(op, "use_locking", use_locking), 
                    status);
                status.Check(true);
                return tensor;
            }

            var _op = _op_def_lib._apply_op_helper("ResourceApplyGradientDescent", name, new
            {
                var,
                alpha,
                delta,
                use_locking
            });

            return _op.outputs[0];
        }
    }
 }
--- a/src/TensorFlowNET.Core/Variables/ResourceVariable.Functions.cs
+++ b/src/TensorFlowNET.Core/Variables/ResourceVariable.Functions.cs
@@ -33,5 +33,17 @@ namespace Tensorflow
        {
            gen_resource_variable_ops.assign_sub_variable_op(handle, delta, name: name);
        }

        /// <summary>
        /// Adds a value to this variable.
        /// </summary>
        /// <param name="delta"></param>
        /// <param name="use_locking"></param>
        /// <param name="name"></param>
        /// <param name="read_value"></param>
        public void assign_add(Tensor delta, bool use_locking = false, string name = null, bool read_value = true)
        {
            gen_resource_variable_ops.assign_add_variable_op(handle, delta, name: name);
        }
    }
 }
--- a/src/TensorFlowNET.Core/tensorflow.cs
+++ b/src/TensorFlowNET.Core/tensorflow.cs
@@ -57,21 +57,28 @@ namespace Tensorflow
                for (int i = 0; i < num_grads; i++)
                    input_grads[i] = new EagerTensor(*((IntPtr*)gradients + i));

                var add_n = gen_math_ops.add_n(input_grads);
                return (add_n as EagerTensor).EagerTensorHandle;
                var add_n = gen_math_ops.add_n(input_grads) as EagerTensor;
                return add_n.EagerTensorHandle;
            });

            ops.RegisterFromAssembly();
            c_api.TFE_RegisterGradientFunction((op_name, op_inputs, op_outputs, num_attrs, output_grads, skip_input_indices) =>
            c_api.TFE_RegisterGradientFunction((op_name, op_inputs_handle, op_outputs, num_attrs, output_grads, skip_input_indices) =>
            {
                var op_inputs = Marshal.PtrToStructure<BindingArray>(op_inputs_handle);
                var input_tensors = new EagerTensor[op_inputs.length];
                for (int i = 0; i < op_inputs.length; i++)
                {
                    // Console.WriteLine($"debug 4: {op_name} op_inputs=" + (*(IntPtr*)op_inputs_handle).ToString("x16").ToUpper() + $" op_inputs[{i}]=" + (*((IntPtr*)op_inputs.array + i)).ToString("x16").ToUpper());
                    if((*((IntPtr*)op_inputs.array + i)).ToString("x16").ToUpper().StartsWith("FFFFF"))
                    {

                    }
                    input_tensors[i] = new EagerTensor(*((IntPtr*)op_inputs.array + i));
                }

                var output_tensors = new EagerTensor[op_outputs.length];
                for (int i = 0; i < op_outputs.length; i++)
                    if (op_outputs.array != IntPtr.Zero)
                        output_tensors[i] = new EagerTensor(*((IntPtr*)op_outputs.array + i));
                    output_tensors[i] = new EagerTensor(*((IntPtr*)op_outputs.array + i));

                var output_grad_tensors = new EagerTensor[output_grads.length];
                for (int i = 0; i < output_grads.length; i++)
@@ -85,6 +92,7 @@ namespace Tensorflow
                {
                    NumInputs = input_tensors.Length,
                    Inputs = input_tensors,
                    NumOutputs = output_tensors.Length,
                    Outputs = output_tensors,
                    SkipInputIndices = skip_input_indices_param
                }, output_grad_tensors);