Browse Source

AdamOptimizer, reduce_prod

tags/v0.8.0
Oceania2018 6 years ago
parent
commit
adae3aa6ff
15 changed files with 155 additions and 27 deletions
  1. +5
    -0
      src/TensorFlowNET.Core/Framework/common_shapes.py.cs
  2. +34
    -8
      src/TensorFlowNET.Core/Gradients/math_grad.py.cs
  3. +1
    -1
      src/TensorFlowNET.Core/Keras/Layers/Dense.cs
  4. +1
    -1
      src/TensorFlowNET.Core/Operations/array_ops.py.cs
  5. +1
    -1
      src/TensorFlowNET.Core/Operations/gen_array_ops.cs
  6. +4
    -4
      src/TensorFlowNET.Core/Operations/gen_math_ops.cs
  7. +59
    -7
      src/TensorFlowNET.Core/Operations/math_ops.py.cs
  8. +2
    -1
      src/TensorFlowNET.Core/Tensors/Tensor.cs
  9. +9
    -0
      src/TensorFlowNET.Core/Tensors/tensor_util.cs
  10. +25
    -0
      src/TensorFlowNET.Core/Train/AdamOptimizer.cs
  11. +1
    -1
      src/TensorFlowNET.Core/Train/GradientDescentOptimizer.cs
  12. +2
    -0
      src/TensorFlowNET.Core/Train/Optimizer.cs
  13. +3
    -0
      src/TensorFlowNET.Core/Train/tf.optimizers.cs
  14. +4
    -0
      src/TensorFlowNET.Core/ops.py.cs
  15. +4
    -3
      test/TensorFlowNET.Examples/TextClassification/cnn_models/VdCnn.cs

+ 5
- 0
src/TensorFlowNET.Core/Framework/common_shapes.py.cs View File

@@ -29,5 +29,10 @@ namespace Tensorflow.Framework
{
throw new NotFiniteNumberException();
}

public static int? rank(Tensor tensor)
{
return tensor.rank;
}
}
}

+ 34
- 8
src/TensorFlowNET.Core/Gradients/math_grad.py.cs View File

@@ -57,6 +57,24 @@ namespace Tensorflow
return (reshape1, reshape2);
}

public static (Tensor, Tensor) _MeanGrad(Operation op, Tensor grad)
{
var sum_grad = _SumGrad(op, grad).Item1;
var input_shape = op.inputs[0]._shape_tuple();
var output_shape = op.outputs[0]._shape_tuple();

var input_shape_tensor = array_ops.shape(op.inputs[0]);
var output_shape_tensor = array_ops.shape(op.outputs[0]);
var factor = _safe_shape_div(math_ops.reduce_prod(input_shape_tensor), math_ops.reduce_prod(output_shape_tensor));

return (math_ops.truediv(sum_grad, math_ops.cast(factor, sum_grad.dtype)), null);
}

private static Tensor _safe_shape_div(Tensor x, Tensor y)
{
return math_ops.floordiv(x, gen_math_ops.maximum(y, 1));
}

public static (Tensor, Tensor) _SubGrad(Operation op, Tensor grad)
{
var x = op.inputs[0];
@@ -81,12 +99,25 @@ namespace Tensorflow

public static (Tensor, Tensor) _SumGrad(Operation op, Tensor grad)
{
if (op.inputs[0].NDims > -1)
{
var input_0_shape = op.inputs[0]._shape_tuple();
Tensor input_shape = null;

if (input_0_shape != null)
{
var axes = tensor_util.constant_value(op.inputs[1]);
if(!(axes is null))
{
var rank = axes.shape.Rank;
grad = array_ops.reshape(grad, new int[] { 1 });
if (!input_0_shape.Contains(-1))
input_shape = constant_op.constant(input_0_shape);
else
input_shape = array_ops.shape(op.inputs[0]);
return (gen_array_ops.tile(grad, input_shape), null);
}
}

var input_shape = array_ops.shape(op.inputs[0]);
input_shape = array_ops.shape(op.inputs[0]);
ops.colocate_with(input_shape);
var output_shape_kept_dims = math_ops.reduced_shape(input_shape, op.inputs[1]);
var tile_scaling = _safe_shape_div(input_shape, output_shape_kept_dims);
@@ -95,11 +126,6 @@ namespace Tensorflow
return (gen_array_ops.tile(grad, tile_scaling), null);
}

public static Tensor _safe_shape_div(Tensor x, Tensor y)
{
return math_ops.floordiv(x, gen_math_ops.maximum(y, 1));
}

public static (Tensor, Tensor) _RealDivGrad(Operation op, Tensor grad)
{
var x = op.inputs[0];


+ 1
- 1
src/TensorFlowNET.Core/Keras/Layers/Dense.cs View File

@@ -63,7 +63,7 @@ namespace Tensorflow.Keras.Layers
var rank = inputs.rank;
if(rank > 2)
{
throw new NotImplementedException("");
throw new NotImplementedException("call rank > 2");
}
else
{


+ 1
- 1
src/TensorFlowNET.Core/Operations/array_ops.py.cs View File

@@ -82,7 +82,7 @@ namespace Tensorflow
public static Tensor ones_like<T>(T tensor, TF_DataType dtype = TF_DataType.DtInvalid, string name = null, bool optimize = true)
=> ones_like_impl(tensor, dtype, name, optimize);
public static Tensor reshape(Tensor tensor, Tensor shape, string name = null)
public static Tensor reshape<T1, T2>(T1 tensor, T2 shape, string name = null)
{
return gen_array_ops.reshape(tensor, shape, null);
}


+ 1
- 1
src/TensorFlowNET.Core/Operations/gen_array_ops.cs View File

@@ -116,7 +116,7 @@ namespace Tensorflow
return (_op.outputs[0], _op.outputs[1]);
}

public static Tensor reshape(Tensor tensor, Tensor shape, string name = null)
public static Tensor reshape<T1, T2>(T1 tensor, T2 shape, string name = null)
{
var _op = _op_def_lib._apply_op_helper("Reshape", name, new { tensor, shape });
return _op.outputs[0];


+ 4
- 4
src/TensorFlowNET.Core/Operations/gen_math_ops.cs View File

@@ -20,16 +20,16 @@ namespace Tensorflow
/// <param name="keep_dims"> An optional `bool`. Defaults to `False`. If true, retain reduced dimensions with length 1.</param>
/// <param name="name"> A name for the operation (optional).</param>
/// <returns> A `Tensor`. Has the same type as `input`.</returns>
public static Tensor mean(Tensor input, Tensor axis, bool keep_dims= false, string name = null)
public static Tensor mean<T1, T2>(T1 input, T2 axis, bool keep_dims= false, string name = null)
{
var _op = _op_def_lib._apply_op_helper("Mean", name, args: new { input, reduction_indices = axis, keep_dims = keep_dims });
return _op.outputs[0];
}
public static Tensor mean(Tensor input, int[] axis, bool keep_dims = false, string name = null)
public static Tensor prod<T1, T2>(T1 input, T2 axis, bool keep_dims = false, string name = null)
{
var _op = _op_def_lib._apply_op_helper("Mean", name, args: new { input, reduction_indices = axis, keep_dims = keep_dims, name });
var _op = _op_def_lib._apply_op_helper("Prod", name, args: new { input, reduction_indices = axis, keep_dims });
return _op.outputs[0];
}
@@ -186,7 +186,7 @@ namespace Tensorflow
return _op.outputs[0];
}
public static Tensor _max(Tensor input, int[] axis, bool keep_dims=false, string name = null)
public static Tensor _max<Tx, Ty>(Tx input, Ty axis, bool keep_dims=false, string name = null)
{
var _op = _op_def_lib._apply_op_helper("Max", name, new { input, reduction_indices = axis, keep_dims });


+ 59
- 7
src/TensorFlowNET.Core/Operations/math_ops.py.cs View File

@@ -2,6 +2,7 @@
using System;
using System.Collections.Generic;
using System.Text;
using Tensorflow.Framework;

namespace Tensorflow
{
@@ -39,9 +40,41 @@ namespace Tensorflow
public static Tensor reduce_mean(Tensor input_tensor, int[] axis = null, bool keepdims = false, string name = null)
{
var r = _ReductionDims(input_tensor, axis);
var m = gen_math_ops.mean(input_tensor, (int[]) r, keepdims, name);
return _may_reduce_to_scalar(keepdims,axis, m);
if (axis == null)
{
var m = gen_math_ops.mean(input_tensor, r, keepdims, name);
return _may_reduce_to_scalar(keepdims, axis, m);
}
else
{
var m = gen_math_ops.mean(input_tensor, axis, keepdims, name);
return _may_reduce_to_scalar(keepdims, axis, m);
}
}

/// <summary>
/// Computes the product of elements across dimensions of a tensor.
/// </summary>
/// <param name="input_tensor"></param>
/// <param name="axis"></param>
/// <param name="keepdims"></param>
/// <param name="name"></param>
/// <returns></returns>
public static Tensor reduce_prod(Tensor input_tensor, int[] axis = null, bool keepdims = false, string name = null)
{
var r = _ReductionDims(input_tensor, axis);
if (axis == null)
{
var m = gen_math_ops.prod(input_tensor, r, keepdims, name);
return _may_reduce_to_scalar(keepdims, axis, m);
}
else
{
var m = gen_math_ops.prod(input_tensor, axis, keepdims, name);
return _may_reduce_to_scalar(keepdims, axis, m);
}
}
/// <summary>
/// Returns (x - y)(x - y) element-wise.
/// </summary>
@@ -134,7 +167,10 @@ namespace Tensorflow

public static Tensor reduce_max(Tensor input_tensor, int[] axis = null, bool keepdims = false, string name = null)
{
return _may_reduce_to_scalar(keepdims, axis, gen_math_ops._max(input_tensor, (int[])_ReductionDims(input_tensor, axis), keepdims, name));
var r = _ReductionDims(input_tensor, axis);
var max = (axis != null) ? gen_math_ops._max(input_tensor, axis, keepdims, name) :
gen_math_ops._max(input_tensor, r, keepdims, name);
return _may_reduce_to_scalar(keepdims, axis, max);
}

/// <summary>
@@ -197,18 +233,19 @@ namespace Tensorflow
}
}
private static object _ReductionDims(Tensor x, int[] axis)
private static Tensor _ReductionDims(Tensor x, int[] axis)
{
if (axis != null)
{
return axis;
// should return axis. or check before.
return null;
}
else
{
var rank = array_ops.rank(x);
var rank = common_shapes.rank(x);
if (rank != null)
{
return constant_op.constant(np.arange(rank), TF_DataType.TF_INT32);
return constant_op.constant(np.arange(rank.Value), TF_DataType.TF_INT32);
}
return range(0, rank, 1);
}
@@ -303,5 +340,20 @@ namespace Tensorflow
return x;
});
}

public static Tensor truediv(Tensor x, Tensor y, string name = null)
=> _truediv_python3(x, y, name);

public static Tensor _truediv_python3(Tensor x, Tensor y, string name = null)
{
return with(ops.name_scope(name, "truediv", new { x, y }), scope =>
{
name = scope;
var x_dtype = x.dtype.as_base_dtype();
var y_dtype = y.dtype.as_base_dtype();

return gen_math_ops.real_div(x, y, name: name);
});
}
}
}

+ 2
- 1
src/TensorFlowNET.Core/Tensors/Tensor.cs View File

@@ -74,7 +74,8 @@ namespace Tensorflow

public int[] _shape_tuple()
{
return null;
if (shape == null) return null;
return shape.Select(x => (int)x).ToArray();
}

public TensorShape getShape()


+ 9
- 0
src/TensorFlowNET.Core/Tensors/tensor_util.cs View File

@@ -51,6 +51,15 @@ namespace Tensorflow
if (tensor.TensorContent.Length > 0)
return np.frombuffer(tensor.TensorContent.ToByteArray(), tensor_dtype)
.reshape(shape);
else if (tensor.Dtype == DataType.DtHalf || tensor.Dtype == DataType.DtBfloat16)
;
else if (tensor.Dtype == DataType.DtFloat)
;
else if (new DataType[] { DataType.DtInt32, DataType.DtUint8 }.Contains(tensor.Dtype))
if (tensor.IntVal.Count == 1)
return np.repeat(np.array(tensor.IntVal[0]), Convert.ToInt32(num_elements))
.reshape(shape);

throw new NotImplementedException("MakeNdarray");
}



+ 25
- 0
src/TensorFlowNET.Core/Train/AdamOptimizer.cs View File

@@ -0,0 +1,25 @@
using System;
using System.Collections.Generic;
using System.Text;

namespace Tensorflow.Train
{
/// <summary>
/// Optimizer that implements the Adam algorithm.
/// http://arxiv.org/abs/1412.6980
/// </summary>
public class AdamOptimizer : Optimizer
{
private float _beta1;
private float _beta2;
private float _epsilon;

public AdamOptimizer(float learning_rate, float beta1 = 0.9f, float beta2 = 0.999f, float epsilon = 1e-8f, bool use_locking = false, string name = "Adam")
: base(learning_rate, use_locking, name)
{
_beta1 = beta1;
_beta2 = beta2;
_epsilon = epsilon;
}
}
}

+ 1
- 1
src/TensorFlowNET.Core/Train/GradientDescentOptimizer.cs View File

@@ -2,7 +2,7 @@
using System.Collections.Generic;
using System.Text;

namespace Tensorflow
namespace Tensorflow.Train
{
public class GradientDescentOptimizer : Optimizer
{


+ 2
- 0
src/TensorFlowNET.Core/Train/Optimizer.cs View File

@@ -34,6 +34,7 @@ namespace Tensorflow

Name = name;
_use_locking = use_locking;
LearningRate = learning_rate;
// Dictionary of slots.
_slots = new Dictionary<string, object>();
_non_slot_dict = new Dictionary<string, object>();
@@ -49,6 +50,7 @@ namespace Tensorflow
/// was not `None`, that operation also increments `global_step`.
/// </returns>
public Operation minimize(Tensor loss,
RefVariable global_step = null,
GateGradientType gate_gradients = GateGradientType.GATE_OP,
bool colocate_gradients_with_ops = false)
{


+ 3
- 0
src/TensorFlowNET.Core/Train/tf.optimizers.cs View File

@@ -2,6 +2,7 @@
using System.Collections.Generic;
using System.IO;
using System.Text;
using Tensorflow.Train;

namespace Tensorflow
{
@@ -11,6 +12,8 @@ namespace Tensorflow
{
public static Optimizer GradientDescentOptimizer(float learning_rate) => new GradientDescentOptimizer(learning_rate);

public static Optimizer AdamOptimizer(float learning_rate) => new AdamOptimizer(learning_rate);

public static Saver Saver() => new Saver();

public static string write_graph(Graph graph, string logdir, string name, bool as_text = true) => graph_io.write_graph(graph, logdir, name, as_text);


+ 4
- 0
src/TensorFlowNET.Core/ops.py.cs View File

@@ -349,6 +349,7 @@ namespace Tensorflow
{
if (op.inputs == null) return null;

// map tensorflow\python\ops\math_grad.py
return (oper, out_grads) =>
{
// Console.WriteLine($"get_gradient_function: {oper.type} '{oper.name}'");
@@ -364,6 +365,9 @@ namespace Tensorflow
case "Mul":
var mul = math_grad._MulGrad(oper, out_grads);
return new Tensor[] { mul.Item1, mul.Item2 };
case "Mean":
var mean = math_grad._MeanGrad(oper, out_grads);
return new Tensor[] { mean.Item1, mean.Item2 };
case "Sum":
var sum = math_grad._SumGrad(oper, out_grads);
return new Tensor[] { sum.Item1, sum.Item2 };


+ 4
- 3
test/TensorFlowNET.Examples/TextClassification/cnn_models/VdCnn.cs View File

@@ -119,10 +119,11 @@ namespace TensorFlowNET.Examples.TextClassification
var y_one_hot = tf.one_hot(y, num_class);
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits: logits, labels: y_one_hot));

var update_ops = tf.get_collection(ops.GraphKeys.UPDATE_OPS) as List<Operation>;
with(tf.control_dependencies(update_ops.ToArray()), delegate
var update_ops = tf.get_collection(ops.GraphKeys.UPDATE_OPS) as List<object>;
with(tf.control_dependencies(update_ops.Select(x => (Operation)x).ToArray()), delegate
{

var adam = tf.train.AdamOptimizer(learning_rate);
adam.minimize(loss, global_step: global_step);
});
});
}


Loading…
Cancel
Save