Browse Source

SGD works.

tags/v0.20
Oceania2018 5 years ago
parent
commit
86618e49c9
11 changed files with 238 additions and 32 deletions
  1. +1
    -1
      src/TensorFlowNET.Core/Eager/c_api.eager.cs
  2. +25
    -0
      src/TensorFlowNET.Core/Keras/Optimizers/DeviceDType.cs
  3. +84
    -26
      src/TensorFlowNET.Core/Keras/Optimizers/OptimizerV2.cs
  4. +25
    -0
      src/TensorFlowNET.Core/Keras/Optimizers/SGD.cs
  5. +13
    -0
      src/TensorFlowNET.Core/Operations/gen_math_ops.cs
  6. +26
    -0
      src/TensorFlowNET.Core/Operations/gen_resource_variable_ops.cs
  7. +3
    -0
      src/TensorFlowNET.Core/Tensors/Tensor.Value.cs
  8. +3
    -0
      src/TensorFlowNET.Core/Tensors/constant_op.cs
  9. +33
    -0
      src/TensorFlowNET.Core/Training/gen_training_ops.py.cs
  10. +12
    -0
      src/TensorFlowNET.Core/Variables/ResourceVariable.Functions.cs
  11. +13
    -5
      src/TensorFlowNET.Core/tensorflow.cs

+ 1
- 1
src/TensorFlowNET.Core/Eager/c_api.eager.cs View File

@@ -12,7 +12,7 @@ namespace Tensorflow

[UnmanagedFunctionPointer(CallingConvention.StdCall)]
public delegate IntPtr _gradient_function_callback(string op_name,
BindingArray op_inputs,
IntPtr op_inputs,
BindingArray op_outputs,
int num_attrs,
BindingArray output_grads,


+ 25
- 0
src/TensorFlowNET.Core/Keras/Optimizers/DeviceDType.cs View File

@@ -0,0 +1,25 @@
using System;
using System.Collections.Generic;
using System.Text;

namespace Tensorflow.Keras.Optimizers
{
public class DeviceDType : IEqualityComparer<DeviceDType>
{
public string Device { get; set; }
public TF_DataType DType { get; set; }

public bool Equals(DeviceDType x, DeviceDType y)
{
return x.ToString() == y.ToString();
}

public int GetHashCode(DeviceDType obj)
{
return 0;
}

public override string ToString()
=> $"{Device}, {DType}";
}
}

+ 84
- 26
src/TensorFlowNET.Core/Keras/Optimizers/OptimizerV2.cs View File

@@ -5,6 +5,8 @@ using System.Text;
using Tensorflow.Keras.Utils;
using Tensorflow.Train;
using static Tensorflow.Binding;
using Tensorflow;
using Tensorflow.Eager;

namespace Tensorflow.Keras.Optimizers
{
@@ -17,18 +19,32 @@ namespace Tensorflow.Keras.Optimizers
protected virtual string _name { get; }

ResourceVariable _iterations;
List<ResourceVariable> _weight = new List<ResourceVariable>();
Dictionary<string, float> _hyper = new Dictionary<string, float>();
Dictionary<string, ResourceVariable> _hyper_variables = new Dictionary<string, ResourceVariable>();
List<ResourceVariable> _weight;
Dictionary<string, float> _hyper;
Dictionary<string, ResourceVariable> _hyper_variables;
protected bool _momentum;
protected float _initial_decay = 0.0f;
protected bool _use_locking = true;

Dictionary<DeviceDType, Dictionary<string, Tensor>> apply_state;

public OptimizerV2() : base()
{

_weight = new List<ResourceVariable>();
_hyper = new Dictionary<string, float>();
_hyper_variables = new Dictionary<string, ResourceVariable>();
apply_state = new Dictionary<DeviceDType, Dictionary<string, Tensor>>();
}

public void apply_gradients(IEnumerable<(Tensor, ResourceVariable)> grads_and_vars)
/// <summary>
/// Apply gradients to variables.
/// </summary>
/// <param name="grads_and_vars"></param>
/// <param name="name"></param>
/// <param name="experimental_aggregate_gradients"></param>
public void apply_gradients(IEnumerable<(Tensor, ResourceVariable)> grads_and_vars,
string name = null,
bool experimental_aggregate_gradients = true)
{
var var_list = grads_and_vars.Select(x => x.Item2).ToArray();
tf_with(ops.name_scope(_name), delegate
@@ -38,49 +54,91 @@ namespace Tensorflow.Keras.Optimizers
if (grads_and_vars == null || grads_and_vars.Count() == 0)
return control_flow_ops.no_op();

//var apply_state =
_prepare(var_list);

_aggregate_gradients(grads_and_vars);
apply_state = _prepare(var_list);
if(experimental_aggregate_gradients)
{
// var reduced_grads = _aggregate_gradients(grads_and_vars);
_distributed_apply(grads_and_vars, name, apply_state);
}

return null;
});
}

void _aggregate_gradients(IEnumerable<(Tensor, ResourceVariable)> grads_and_vars)
void apply_grad_to_update_var(ResourceVariable var, EagerTensor grad)
{
_resource_apply_dense(var, grad, apply_state);
}

protected virtual Operation _resource_apply_dense(ResourceVariable var,
EagerTensor grad,
Dictionary<DeviceDType, Dictionary<string, Tensor>> _apply_state)
{
throw new NotImplementedException("_resource_apply_dense");
}

void _distributed_apply(IEnumerable<(Tensor, ResourceVariable)> grads_and_vars,
string name,
Dictionary<DeviceDType, Dictionary<string, Tensor>> _apply_state)
{
tf_with(ops.name_scope(name, "", new { skip_on_eager = true }), delegate
{
foreach(var (grad, var) in grads_and_vars)
{
tf_with(ops.name_scope("update"), delegate
{
apply_grad_to_update_var(var, grad as EagerTensor);
});
}

_iterations.assign_add(ops.convert_to_tensor(1, dtype: _iterations.dtype));
});
}

Tensor[] _aggregate_gradients(IEnumerable<(Tensor, ResourceVariable)> grads_and_vars)
{
return grads_and_vars.Select(x => x.Item1).ToArray();
}

Dictionary<DeviceDType, Dictionary<string, Tensor>> _prepare(ResourceVariable[] var_list)
{
var lr_t = _hyper_variables["learning_rate"];
foreach (var grad_and_var in grads_and_vars)
var _apply_state = new Dictionary<DeviceDType, Dictionary<string, Tensor>>();
var keys = var_list.Select(x => new DeviceDType
{
var grad = grad_and_var.Item1;
var variable = grad_and_var.Item2;
// variable.Handle - grad * lr_t.Handle;
Device = x.Device,
DType = x.dtype.as_base_dtype()
}).Distinct(new DeviceDType()).ToArray();

foreach(var device_dtype in keys)
{
_apply_state[device_dtype] = new Dictionary<string, Tensor>();
_prepare_local(device_dtype, _apply_state);
}

return _apply_state;
}

void _prepare(ResourceVariable[] var_list)
protected virtual void _prepare_local(DeviceDType device_dtype,
Dictionary<DeviceDType, Dictionary<string, Tensor>> _apply_state)
{
var keys = new HashSet<(string, TF_DataType)>();
foreach(var variable in var_list)
if (_hyper.ContainsKey("learning_rate"))
{
var lr_t = _prepare_local(variable.Device, variable.dtype.as_base_dtype());
var momentum = _get_hyper("momentum", variable.dtype);
array_ops.identity(momentum);
var lr_t = array_ops.identity(_decayed_lr(device_dtype.DType));
_apply_state[device_dtype]["lr_t"] = lr_t;
}
}

ResourceVariable _prepare_local(string var_device, TF_DataType var_dtype)
Tensor _decayed_lr(TF_DataType var_dtype)
{
var lr_t = _get_hyper("learning_rate", var_dtype);
if(_initial_decay > 0)
if(_initial_decay > 0.0f)
{

throw new NotImplementedException("");
}

return lr_t;
}

ResourceVariable _get_hyper(string name, TF_DataType dtype = TF_DataType.DtInvalid)
protected ResourceVariable _get_hyper(string name, TF_DataType dtype = TF_DataType.DtInvalid)
{
var value = _hyper_variables[name];
return math_ops.cast(value, dtype);


+ 25
- 0
src/TensorFlowNET.Core/Keras/Optimizers/SGD.cs View File

@@ -1,6 +1,8 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using Tensorflow.Eager;

namespace Tensorflow.Keras.Optimizers
{
@@ -24,5 +26,28 @@ namespace Tensorflow.Keras.Optimizers

nesterov = nesterov;
}

protected override void _prepare_local(DeviceDType device_dtype,
Dictionary<DeviceDType, Dictionary<string, Tensor>> _apply_state)
{
base._prepare_local(device_dtype, _apply_state);

_apply_state[device_dtype]["momentum"] = array_ops.identity(
_get_hyper("momentum", device_dtype.DType));
}

protected override Operation _resource_apply_dense(ResourceVariable var, EagerTensor grad, Dictionary<DeviceDType, Dictionary<string, Tensor>> _apply_state)
{
if (_momentum)
{
throw new NotImplementedException("_resource_apply_dense");
}
var device_dtype = _apply_state.Keys.FirstOrDefault(x => x.Device == var.Device && x.DType == var.dtype.as_base_dtype());

return gen_training_ops.resource_apply_gradient_descent(var.Handle as EagerTensor,
_apply_state[device_dtype]["lr_t"] as EagerTensor,
grad,
use_locking: _use_locking);
}
}
}

+ 13
- 0
src/TensorFlowNET.Core/Operations/gen_math_ops.cs View File

@@ -894,6 +894,19 @@ namespace Tensorflow

public static Tensor floor_mod(Tensor x, Tensor y, string name = null)
{
if (tf.context.executing_eagerly())
{
using var status = new Status();
EagerTensorHandle tensor = c_api.TFE_FastPathExecute(tf.context, tf.context.device_name,
"FloorMod", name, new IntPtr[]
{
x as EagerTensor,
y as EagerTensor
}, 2, null, status);
status.Check(true);
return tensor;
}

var _op = _op_def_lib._apply_op_helper("FloorMod", name, args: new { x, y });

return _op.outputs[0];


+ 26
- 0
src/TensorFlowNET.Core/Operations/gen_resource_variable_ops.cs View File

@@ -44,6 +44,32 @@ namespace Tensorflow
return null;
}

/// <summary>
/// Adds a value to the current value of a variable.
/// </summary>
/// <param name="resource"></param>
/// <param name="value"></param>
/// <param name="name"></param>
/// <returns></returns>
public static Operation assign_add_variable_op(Tensor resource, Tensor value, string name = null)
{
if (tf.context.executing_eagerly())
{
using var status = new Status();
var tensor = c_api.TFE_FastPathExecute(tf.context, tf.context.device_name,
"AssignAddVariableOp", name,
new IntPtr[]
{
resource as EagerTensor,
value as EagerTensor
}, 2, null, status);
status.Check(true);
return tensor;
}

return null;
}

public static Operation assign_variable_op(Tensor resource, Tensor value, string name = null)
{
if (tf.context.executing_eagerly())


+ 3
- 0
src/TensorFlowNET.Core/Tensors/Tensor.Value.cs View File

@@ -163,6 +163,9 @@ namespace Tensorflow
case TF_DataType.TF_INT32:
storage = new UnmanagedStorage(NPTypeCode.Int32);
break;
case TF_DataType.TF_INT64:
storage = new UnmanagedStorage(NPTypeCode.Int64);
break;
case TF_DataType.TF_FLOAT:
storage = new UnmanagedStorage(NPTypeCode.Float);
break;


+ 3
- 0
src/TensorFlowNET.Core/Tensors/constant_op.cs View File

@@ -124,6 +124,9 @@ namespace Tensorflow
case TF_DataType.TF_FLOAT:
value = Convert.ToSingle(value);
break;
case TF_DataType.TF_INT64:
value = Convert.ToInt64(value);
break;
default:
break;
}


+ 33
- 0
src/TensorFlowNET.Core/Training/gen_training_ops.py.cs View File

@@ -14,6 +14,10 @@
limitations under the License.
******************************************************************************/

using System;
using Tensorflow.Eager;
using static Tensorflow.Binding;

namespace Tensorflow
{
public class gen_training_ops
@@ -55,5 +59,34 @@ namespace Tensorflow

return _op.outputs[0];
}

public static Operation resource_apply_gradient_descent(EagerTensor var, EagerTensor alpha, EagerTensor delta, bool use_locking = false, string name = null)
{
if (tf.context.executing_eagerly())
{
using var status = new Status();
var tensor = c_api.TFE_FastPathExecute(tf.context, tf.context.device_name,
"ResourceApplyGradientDescent", name, new IntPtr[]
{
var,
alpha,
delta
}, 3,
op => wrap_tfe_src.SetOpAttrs(op, "use_locking", use_locking),
status);
status.Check(true);
return tensor;
}

var _op = _op_def_lib._apply_op_helper("ResourceApplyGradientDescent", name, new
{
var,
alpha,
delta,
use_locking
});

return _op.outputs[0];
}
}
}

+ 12
- 0
src/TensorFlowNET.Core/Variables/ResourceVariable.Functions.cs View File

@@ -33,5 +33,17 @@ namespace Tensorflow
{
gen_resource_variable_ops.assign_sub_variable_op(handle, delta, name: name);
}

/// <summary>
/// Adds a value to this variable.
/// </summary>
/// <param name="delta"></param>
/// <param name="use_locking"></param>
/// <param name="name"></param>
/// <param name="read_value"></param>
public void assign_add(Tensor delta, bool use_locking = false, string name = null, bool read_value = true)
{
gen_resource_variable_ops.assign_add_variable_op(handle, delta, name: name);
}
}
}

+ 13
- 5
src/TensorFlowNET.Core/tensorflow.cs View File

@@ -57,21 +57,28 @@ namespace Tensorflow
for (int i = 0; i < num_grads; i++)
input_grads[i] = new EagerTensor(*((IntPtr*)gradients + i));

var add_n = gen_math_ops.add_n(input_grads);
return (add_n as EagerTensor).EagerTensorHandle;
var add_n = gen_math_ops.add_n(input_grads) as EagerTensor;
return add_n.EagerTensorHandle;
});

ops.RegisterFromAssembly();
c_api.TFE_RegisterGradientFunction((op_name, op_inputs, op_outputs, num_attrs, output_grads, skip_input_indices) =>
c_api.TFE_RegisterGradientFunction((op_name, op_inputs_handle, op_outputs, num_attrs, output_grads, skip_input_indices) =>
{
var op_inputs = Marshal.PtrToStructure<BindingArray>(op_inputs_handle);
var input_tensors = new EagerTensor[op_inputs.length];
for (int i = 0; i < op_inputs.length; i++)
{
// Console.WriteLine($"debug 4: {op_name} op_inputs=" + (*(IntPtr*)op_inputs_handle).ToString("x16").ToUpper() + $" op_inputs[{i}]=" + (*((IntPtr*)op_inputs.array + i)).ToString("x16").ToUpper());
if((*((IntPtr*)op_inputs.array + i)).ToString("x16").ToUpper().StartsWith("FFFFF"))
{

}
input_tensors[i] = new EagerTensor(*((IntPtr*)op_inputs.array + i));
}

var output_tensors = new EagerTensor[op_outputs.length];
for (int i = 0; i < op_outputs.length; i++)
if (op_outputs.array != IntPtr.Zero)
output_tensors[i] = new EagerTensor(*((IntPtr*)op_outputs.array + i));
output_tensors[i] = new EagerTensor(*((IntPtr*)op_outputs.array + i));

var output_grad_tensors = new EagerTensor[output_grads.length];
for (int i = 0; i < output_grads.length; i++)
@@ -85,6 +92,7 @@ namespace Tensorflow
{
NumInputs = input_tensors.Length,
Inputs = input_tensors,
NumOutputs = output_tensors.Length,
Outputs = output_tensors,
SkipInputIndices = skip_input_indices_param
}, output_grad_tensors);


Loading…
Cancel
Save