From 86618e49c9289ea41fd82baefa2f51484f53f0bc Mon Sep 17 00:00:00 2001 From: Oceania2018 Date: Sat, 23 May 2020 06:47:43 -0500 Subject: [PATCH] SGD works. --- src/TensorFlowNET.Core/Eager/c_api.eager.cs | 2 +- .../Keras/Optimizers/DeviceDType.cs | 25 ++++ .../Keras/Optimizers/OptimizerV2.cs | 110 +++++++++++++----- .../Keras/Optimizers/SGD.cs | 25 ++++ .../Operations/gen_math_ops.cs | 13 +++ .../Operations/gen_resource_variable_ops.cs | 26 +++++ .../Tensors/Tensor.Value.cs | 3 + src/TensorFlowNET.Core/Tensors/constant_op.cs | 3 + .../Training/gen_training_ops.py.cs | 33 ++++++ .../Variables/ResourceVariable.Functions.cs | 12 ++ src/TensorFlowNET.Core/tensorflow.cs | 18 ++- 11 files changed, 238 insertions(+), 32 deletions(-) create mode 100644 src/TensorFlowNET.Core/Keras/Optimizers/DeviceDType.cs diff --git a/src/TensorFlowNET.Core/Eager/c_api.eager.cs b/src/TensorFlowNET.Core/Eager/c_api.eager.cs index 148790c0..8946808c 100644 --- a/src/TensorFlowNET.Core/Eager/c_api.eager.cs +++ b/src/TensorFlowNET.Core/Eager/c_api.eager.cs @@ -12,7 +12,7 @@ namespace Tensorflow [UnmanagedFunctionPointer(CallingConvention.StdCall)] public delegate IntPtr _gradient_function_callback(string op_name, - BindingArray op_inputs, + IntPtr op_inputs, BindingArray op_outputs, int num_attrs, BindingArray output_grads, diff --git a/src/TensorFlowNET.Core/Keras/Optimizers/DeviceDType.cs b/src/TensorFlowNET.Core/Keras/Optimizers/DeviceDType.cs new file mode 100644 index 00000000..d3aa5590 --- /dev/null +++ b/src/TensorFlowNET.Core/Keras/Optimizers/DeviceDType.cs @@ -0,0 +1,25 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace Tensorflow.Keras.Optimizers +{ + public class DeviceDType : IEqualityComparer + { + public string Device { get; set; } + public TF_DataType DType { get; set; } + + public bool Equals(DeviceDType x, DeviceDType y) + { + return x.ToString() == y.ToString(); + } + + public int GetHashCode(DeviceDType obj) + { + return 0; + } + + public override string ToString() + => $"{Device}, {DType}"; + } +} diff --git a/src/TensorFlowNET.Core/Keras/Optimizers/OptimizerV2.cs b/src/TensorFlowNET.Core/Keras/Optimizers/OptimizerV2.cs index 1beae7cd..e2c4808d 100644 --- a/src/TensorFlowNET.Core/Keras/Optimizers/OptimizerV2.cs +++ b/src/TensorFlowNET.Core/Keras/Optimizers/OptimizerV2.cs @@ -5,6 +5,8 @@ using System.Text; using Tensorflow.Keras.Utils; using Tensorflow.Train; using static Tensorflow.Binding; +using Tensorflow; +using Tensorflow.Eager; namespace Tensorflow.Keras.Optimizers { @@ -17,18 +19,32 @@ namespace Tensorflow.Keras.Optimizers protected virtual string _name { get; } ResourceVariable _iterations; - List _weight = new List(); - Dictionary _hyper = new Dictionary(); - Dictionary _hyper_variables = new Dictionary(); + List _weight; + Dictionary _hyper; + Dictionary _hyper_variables; protected bool _momentum; protected float _initial_decay = 0.0f; + protected bool _use_locking = true; + + Dictionary> apply_state; public OptimizerV2() : base() { - + _weight = new List(); + _hyper = new Dictionary(); + _hyper_variables = new Dictionary(); + apply_state = new Dictionary>(); } - public void apply_gradients(IEnumerable<(Tensor, ResourceVariable)> grads_and_vars) + /// + /// Apply gradients to variables. + /// + /// + /// + /// + public void apply_gradients(IEnumerable<(Tensor, ResourceVariable)> grads_and_vars, + string name = null, + bool experimental_aggregate_gradients = true) { var var_list = grads_and_vars.Select(x => x.Item2).ToArray(); tf_with(ops.name_scope(_name), delegate @@ -38,49 +54,91 @@ namespace Tensorflow.Keras.Optimizers if (grads_and_vars == null || grads_and_vars.Count() == 0) return control_flow_ops.no_op(); - //var apply_state = - _prepare(var_list); - - _aggregate_gradients(grads_and_vars); + apply_state = _prepare(var_list); + if(experimental_aggregate_gradients) + { + // var reduced_grads = _aggregate_gradients(grads_and_vars); + _distributed_apply(grads_and_vars, name, apply_state); + } return null; }); } - void _aggregate_gradients(IEnumerable<(Tensor, ResourceVariable)> grads_and_vars) + void apply_grad_to_update_var(ResourceVariable var, EagerTensor grad) + { + _resource_apply_dense(var, grad, apply_state); + } + + protected virtual Operation _resource_apply_dense(ResourceVariable var, + EagerTensor grad, + Dictionary> _apply_state) + { + throw new NotImplementedException("_resource_apply_dense"); + } + + void _distributed_apply(IEnumerable<(Tensor, ResourceVariable)> grads_and_vars, + string name, + Dictionary> _apply_state) + { + tf_with(ops.name_scope(name, "", new { skip_on_eager = true }), delegate + { + foreach(var (grad, var) in grads_and_vars) + { + tf_with(ops.name_scope("update"), delegate + { + apply_grad_to_update_var(var, grad as EagerTensor); + }); + } + + _iterations.assign_add(ops.convert_to_tensor(1, dtype: _iterations.dtype)); + }); + } + + Tensor[] _aggregate_gradients(IEnumerable<(Tensor, ResourceVariable)> grads_and_vars) + { + return grads_and_vars.Select(x => x.Item1).ToArray(); + } + + Dictionary> _prepare(ResourceVariable[] var_list) { - var lr_t = _hyper_variables["learning_rate"]; - foreach (var grad_and_var in grads_and_vars) + var _apply_state = new Dictionary>(); + var keys = var_list.Select(x => new DeviceDType { - var grad = grad_and_var.Item1; - var variable = grad_and_var.Item2; - // variable.Handle - grad * lr_t.Handle; + Device = x.Device, + DType = x.dtype.as_base_dtype() + }).Distinct(new DeviceDType()).ToArray(); + + foreach(var device_dtype in keys) + { + _apply_state[device_dtype] = new Dictionary(); + _prepare_local(device_dtype, _apply_state); } + + return _apply_state; } - void _prepare(ResourceVariable[] var_list) + protected virtual void _prepare_local(DeviceDType device_dtype, + Dictionary> _apply_state) { - var keys = new HashSet<(string, TF_DataType)>(); - foreach(var variable in var_list) + if (_hyper.ContainsKey("learning_rate")) { - var lr_t = _prepare_local(variable.Device, variable.dtype.as_base_dtype()); - var momentum = _get_hyper("momentum", variable.dtype); - array_ops.identity(momentum); + var lr_t = array_ops.identity(_decayed_lr(device_dtype.DType)); + _apply_state[device_dtype]["lr_t"] = lr_t; } } - ResourceVariable _prepare_local(string var_device, TF_DataType var_dtype) + Tensor _decayed_lr(TF_DataType var_dtype) { var lr_t = _get_hyper("learning_rate", var_dtype); - if(_initial_decay > 0) + if(_initial_decay > 0.0f) { - + throw new NotImplementedException(""); } - return lr_t; } - ResourceVariable _get_hyper(string name, TF_DataType dtype = TF_DataType.DtInvalid) + protected ResourceVariable _get_hyper(string name, TF_DataType dtype = TF_DataType.DtInvalid) { var value = _hyper_variables[name]; return math_ops.cast(value, dtype); diff --git a/src/TensorFlowNET.Core/Keras/Optimizers/SGD.cs b/src/TensorFlowNET.Core/Keras/Optimizers/SGD.cs index 975854a6..03be366e 100644 --- a/src/TensorFlowNET.Core/Keras/Optimizers/SGD.cs +++ b/src/TensorFlowNET.Core/Keras/Optimizers/SGD.cs @@ -1,6 +1,8 @@ using System; using System.Collections.Generic; +using System.Linq; using System.Text; +using Tensorflow.Eager; namespace Tensorflow.Keras.Optimizers { @@ -24,5 +26,28 @@ namespace Tensorflow.Keras.Optimizers nesterov = nesterov; } + + protected override void _prepare_local(DeviceDType device_dtype, + Dictionary> _apply_state) + { + base._prepare_local(device_dtype, _apply_state); + + _apply_state[device_dtype]["momentum"] = array_ops.identity( + _get_hyper("momentum", device_dtype.DType)); + } + + protected override Operation _resource_apply_dense(ResourceVariable var, EagerTensor grad, Dictionary> _apply_state) + { + if (_momentum) + { + throw new NotImplementedException("_resource_apply_dense"); + } + var device_dtype = _apply_state.Keys.FirstOrDefault(x => x.Device == var.Device && x.DType == var.dtype.as_base_dtype()); + + return gen_training_ops.resource_apply_gradient_descent(var.Handle as EagerTensor, + _apply_state[device_dtype]["lr_t"] as EagerTensor, + grad, + use_locking: _use_locking); + } } } diff --git a/src/TensorFlowNET.Core/Operations/gen_math_ops.cs b/src/TensorFlowNET.Core/Operations/gen_math_ops.cs index 9d2f556c..2a37d290 100644 --- a/src/TensorFlowNET.Core/Operations/gen_math_ops.cs +++ b/src/TensorFlowNET.Core/Operations/gen_math_ops.cs @@ -894,6 +894,19 @@ namespace Tensorflow public static Tensor floor_mod(Tensor x, Tensor y, string name = null) { + if (tf.context.executing_eagerly()) + { + using var status = new Status(); + EagerTensorHandle tensor = c_api.TFE_FastPathExecute(tf.context, tf.context.device_name, + "FloorMod", name, new IntPtr[] + { + x as EagerTensor, + y as EagerTensor + }, 2, null, status); + status.Check(true); + return tensor; + } + var _op = _op_def_lib._apply_op_helper("FloorMod", name, args: new { x, y }); return _op.outputs[0]; diff --git a/src/TensorFlowNET.Core/Operations/gen_resource_variable_ops.cs b/src/TensorFlowNET.Core/Operations/gen_resource_variable_ops.cs index b7b9fcd2..9a224e5f 100644 --- a/src/TensorFlowNET.Core/Operations/gen_resource_variable_ops.cs +++ b/src/TensorFlowNET.Core/Operations/gen_resource_variable_ops.cs @@ -44,6 +44,32 @@ namespace Tensorflow return null; } + /// + /// Adds a value to the current value of a variable. + /// + /// + /// + /// + /// + public static Operation assign_add_variable_op(Tensor resource, Tensor value, string name = null) + { + if (tf.context.executing_eagerly()) + { + using var status = new Status(); + var tensor = c_api.TFE_FastPathExecute(tf.context, tf.context.device_name, + "AssignAddVariableOp", name, + new IntPtr[] + { + resource as EagerTensor, + value as EagerTensor + }, 2, null, status); + status.Check(true); + return tensor; + } + + return null; + } + public static Operation assign_variable_op(Tensor resource, Tensor value, string name = null) { if (tf.context.executing_eagerly()) diff --git a/src/TensorFlowNET.Core/Tensors/Tensor.Value.cs b/src/TensorFlowNET.Core/Tensors/Tensor.Value.cs index 3fdb3bb9..440fd086 100644 --- a/src/TensorFlowNET.Core/Tensors/Tensor.Value.cs +++ b/src/TensorFlowNET.Core/Tensors/Tensor.Value.cs @@ -163,6 +163,9 @@ namespace Tensorflow case TF_DataType.TF_INT32: storage = new UnmanagedStorage(NPTypeCode.Int32); break; + case TF_DataType.TF_INT64: + storage = new UnmanagedStorage(NPTypeCode.Int64); + break; case TF_DataType.TF_FLOAT: storage = new UnmanagedStorage(NPTypeCode.Float); break; diff --git a/src/TensorFlowNET.Core/Tensors/constant_op.cs b/src/TensorFlowNET.Core/Tensors/constant_op.cs index 6c684dc5..c8ad5fb0 100644 --- a/src/TensorFlowNET.Core/Tensors/constant_op.cs +++ b/src/TensorFlowNET.Core/Tensors/constant_op.cs @@ -124,6 +124,9 @@ namespace Tensorflow case TF_DataType.TF_FLOAT: value = Convert.ToSingle(value); break; + case TF_DataType.TF_INT64: + value = Convert.ToInt64(value); + break; default: break; } diff --git a/src/TensorFlowNET.Core/Training/gen_training_ops.py.cs b/src/TensorFlowNET.Core/Training/gen_training_ops.py.cs index 7235ce7b..dc162865 100644 --- a/src/TensorFlowNET.Core/Training/gen_training_ops.py.cs +++ b/src/TensorFlowNET.Core/Training/gen_training_ops.py.cs @@ -14,6 +14,10 @@ limitations under the License. ******************************************************************************/ +using System; +using Tensorflow.Eager; +using static Tensorflow.Binding; + namespace Tensorflow { public class gen_training_ops @@ -55,5 +59,34 @@ namespace Tensorflow return _op.outputs[0]; } + + public static Operation resource_apply_gradient_descent(EagerTensor var, EagerTensor alpha, EagerTensor delta, bool use_locking = false, string name = null) + { + if (tf.context.executing_eagerly()) + { + using var status = new Status(); + var tensor = c_api.TFE_FastPathExecute(tf.context, tf.context.device_name, + "ResourceApplyGradientDescent", name, new IntPtr[] + { + var, + alpha, + delta + }, 3, + op => wrap_tfe_src.SetOpAttrs(op, "use_locking", use_locking), + status); + status.Check(true); + return tensor; + } + + var _op = _op_def_lib._apply_op_helper("ResourceApplyGradientDescent", name, new + { + var, + alpha, + delta, + use_locking + }); + + return _op.outputs[0]; + } } } diff --git a/src/TensorFlowNET.Core/Variables/ResourceVariable.Functions.cs b/src/TensorFlowNET.Core/Variables/ResourceVariable.Functions.cs index 7b5e3232..1978d60a 100644 --- a/src/TensorFlowNET.Core/Variables/ResourceVariable.Functions.cs +++ b/src/TensorFlowNET.Core/Variables/ResourceVariable.Functions.cs @@ -33,5 +33,17 @@ namespace Tensorflow { gen_resource_variable_ops.assign_sub_variable_op(handle, delta, name: name); } + + /// + /// Adds a value to this variable. + /// + /// + /// + /// + /// + public void assign_add(Tensor delta, bool use_locking = false, string name = null, bool read_value = true) + { + gen_resource_variable_ops.assign_add_variable_op(handle, delta, name: name); + } } } diff --git a/src/TensorFlowNET.Core/tensorflow.cs b/src/TensorFlowNET.Core/tensorflow.cs index 732ab264..de2fe450 100644 --- a/src/TensorFlowNET.Core/tensorflow.cs +++ b/src/TensorFlowNET.Core/tensorflow.cs @@ -57,21 +57,28 @@ namespace Tensorflow for (int i = 0; i < num_grads; i++) input_grads[i] = new EagerTensor(*((IntPtr*)gradients + i)); - var add_n = gen_math_ops.add_n(input_grads); - return (add_n as EagerTensor).EagerTensorHandle; + var add_n = gen_math_ops.add_n(input_grads) as EagerTensor; + return add_n.EagerTensorHandle; }); ops.RegisterFromAssembly(); - c_api.TFE_RegisterGradientFunction((op_name, op_inputs, op_outputs, num_attrs, output_grads, skip_input_indices) => + c_api.TFE_RegisterGradientFunction((op_name, op_inputs_handle, op_outputs, num_attrs, output_grads, skip_input_indices) => { + var op_inputs = Marshal.PtrToStructure(op_inputs_handle); var input_tensors = new EagerTensor[op_inputs.length]; for (int i = 0; i < op_inputs.length; i++) + { + // Console.WriteLine($"debug 4: {op_name} op_inputs=" + (*(IntPtr*)op_inputs_handle).ToString("x16").ToUpper() + $" op_inputs[{i}]=" + (*((IntPtr*)op_inputs.array + i)).ToString("x16").ToUpper()); + if((*((IntPtr*)op_inputs.array + i)).ToString("x16").ToUpper().StartsWith("FFFFF")) + { + + } input_tensors[i] = new EagerTensor(*((IntPtr*)op_inputs.array + i)); + } var output_tensors = new EagerTensor[op_outputs.length]; for (int i = 0; i < op_outputs.length; i++) - if (op_outputs.array != IntPtr.Zero) - output_tensors[i] = new EagerTensor(*((IntPtr*)op_outputs.array + i)); + output_tensors[i] = new EagerTensor(*((IntPtr*)op_outputs.array + i)); var output_grad_tensors = new EagerTensor[output_grads.length]; for (int i = 0; i < output_grads.length; i++) @@ -85,6 +92,7 @@ namespace Tensorflow { NumInputs = input_tensors.Length, Inputs = input_tensors, + NumOutputs = output_tensors.Length, Outputs = output_tensors, SkipInputIndices = skip_input_indices_param }, output_grad_tensors);