diff --git a/src/TensorFlowNET.Core/Framework/common_shapes.py.cs b/src/TensorFlowNET.Core/Framework/common_shapes.py.cs index 3fa9f6bf..87b083d5 100644 --- a/src/TensorFlowNET.Core/Framework/common_shapes.py.cs +++ b/src/TensorFlowNET.Core/Framework/common_shapes.py.cs @@ -29,5 +29,10 @@ namespace Tensorflow.Framework { throw new NotFiniteNumberException(); } + + public static int? rank(Tensor tensor) + { + return tensor.rank; + } } } diff --git a/src/TensorFlowNET.Core/Gradients/math_grad.py.cs b/src/TensorFlowNET.Core/Gradients/math_grad.py.cs index 00caf73d..93f43802 100644 --- a/src/TensorFlowNET.Core/Gradients/math_grad.py.cs +++ b/src/TensorFlowNET.Core/Gradients/math_grad.py.cs @@ -57,6 +57,24 @@ namespace Tensorflow return (reshape1, reshape2); } + public static (Tensor, Tensor) _MeanGrad(Operation op, Tensor grad) + { + var sum_grad = _SumGrad(op, grad).Item1; + var input_shape = op.inputs[0]._shape_tuple(); + var output_shape = op.outputs[0]._shape_tuple(); + + var input_shape_tensor = array_ops.shape(op.inputs[0]); + var output_shape_tensor = array_ops.shape(op.outputs[0]); + var factor = _safe_shape_div(math_ops.reduce_prod(input_shape_tensor), math_ops.reduce_prod(output_shape_tensor)); + + return (math_ops.truediv(sum_grad, math_ops.cast(factor, sum_grad.dtype)), null); + } + + private static Tensor _safe_shape_div(Tensor x, Tensor y) + { + return math_ops.floordiv(x, gen_math_ops.maximum(y, 1)); + } + public static (Tensor, Tensor) _SubGrad(Operation op, Tensor grad) { var x = op.inputs[0]; @@ -81,12 +99,25 @@ namespace Tensorflow public static (Tensor, Tensor) _SumGrad(Operation op, Tensor grad) { - if (op.inputs[0].NDims > -1) - { + var input_0_shape = op.inputs[0]._shape_tuple(); + Tensor input_shape = null; + if (input_0_shape != null) + { + var axes = tensor_util.constant_value(op.inputs[1]); + if(!(axes is null)) + { + var rank = axes.shape.Rank; + grad = array_ops.reshape(grad, new int[] { 1 }); + if (!input_0_shape.Contains(-1)) + input_shape = constant_op.constant(input_0_shape); + else + input_shape = array_ops.shape(op.inputs[0]); + return (gen_array_ops.tile(grad, input_shape), null); + } } - var input_shape = array_ops.shape(op.inputs[0]); + input_shape = array_ops.shape(op.inputs[0]); ops.colocate_with(input_shape); var output_shape_kept_dims = math_ops.reduced_shape(input_shape, op.inputs[1]); var tile_scaling = _safe_shape_div(input_shape, output_shape_kept_dims); @@ -95,11 +126,6 @@ namespace Tensorflow return (gen_array_ops.tile(grad, tile_scaling), null); } - public static Tensor _safe_shape_div(Tensor x, Tensor y) - { - return math_ops.floordiv(x, gen_math_ops.maximum(y, 1)); - } - public static (Tensor, Tensor) _RealDivGrad(Operation op, Tensor grad) { var x = op.inputs[0]; diff --git a/src/TensorFlowNET.Core/Keras/Layers/Dense.cs b/src/TensorFlowNET.Core/Keras/Layers/Dense.cs index 323b6658..9a3b45ba 100644 --- a/src/TensorFlowNET.Core/Keras/Layers/Dense.cs +++ b/src/TensorFlowNET.Core/Keras/Layers/Dense.cs @@ -63,7 +63,7 @@ namespace Tensorflow.Keras.Layers var rank = inputs.rank; if(rank > 2) { - throw new NotImplementedException(""); + throw new NotImplementedException("call rank > 2"); } else { diff --git a/src/TensorFlowNET.Core/Operations/array_ops.py.cs b/src/TensorFlowNET.Core/Operations/array_ops.py.cs index 2a972c9c..a5ae2559 100644 --- a/src/TensorFlowNET.Core/Operations/array_ops.py.cs +++ b/src/TensorFlowNET.Core/Operations/array_ops.py.cs @@ -82,7 +82,7 @@ namespace Tensorflow public static Tensor ones_like(T tensor, TF_DataType dtype = TF_DataType.DtInvalid, string name = null, bool optimize = true) => ones_like_impl(tensor, dtype, name, optimize); - public static Tensor reshape(Tensor tensor, Tensor shape, string name = null) + public static Tensor reshape(T1 tensor, T2 shape, string name = null) { return gen_array_ops.reshape(tensor, shape, null); } diff --git a/src/TensorFlowNET.Core/Operations/gen_array_ops.cs b/src/TensorFlowNET.Core/Operations/gen_array_ops.cs index 393b10fa..ae6e1f09 100644 --- a/src/TensorFlowNET.Core/Operations/gen_array_ops.cs +++ b/src/TensorFlowNET.Core/Operations/gen_array_ops.cs @@ -116,7 +116,7 @@ namespace Tensorflow return (_op.outputs[0], _op.outputs[1]); } - public static Tensor reshape(Tensor tensor, Tensor shape, string name = null) + public static Tensor reshape(T1 tensor, T2 shape, string name = null) { var _op = _op_def_lib._apply_op_helper("Reshape", name, new { tensor, shape }); return _op.outputs[0]; diff --git a/src/TensorFlowNET.Core/Operations/gen_math_ops.cs b/src/TensorFlowNET.Core/Operations/gen_math_ops.cs index 1033f76c..26513051 100644 --- a/src/TensorFlowNET.Core/Operations/gen_math_ops.cs +++ b/src/TensorFlowNET.Core/Operations/gen_math_ops.cs @@ -20,16 +20,16 @@ namespace Tensorflow /// An optional `bool`. Defaults to `False`. If true, retain reduced dimensions with length 1. /// A name for the operation (optional). /// A `Tensor`. Has the same type as `input`. - public static Tensor mean(Tensor input, Tensor axis, bool keep_dims= false, string name = null) + public static Tensor mean(T1 input, T2 axis, bool keep_dims= false, string name = null) { var _op = _op_def_lib._apply_op_helper("Mean", name, args: new { input, reduction_indices = axis, keep_dims = keep_dims }); return _op.outputs[0]; } - public static Tensor mean(Tensor input, int[] axis, bool keep_dims = false, string name = null) + public static Tensor prod(T1 input, T2 axis, bool keep_dims = false, string name = null) { - var _op = _op_def_lib._apply_op_helper("Mean", name, args: new { input, reduction_indices = axis, keep_dims = keep_dims, name }); + var _op = _op_def_lib._apply_op_helper("Prod", name, args: new { input, reduction_indices = axis, keep_dims }); return _op.outputs[0]; } @@ -186,7 +186,7 @@ namespace Tensorflow return _op.outputs[0]; } - public static Tensor _max(Tensor input, int[] axis, bool keep_dims=false, string name = null) + public static Tensor _max(Tx input, Ty axis, bool keep_dims=false, string name = null) { var _op = _op_def_lib._apply_op_helper("Max", name, new { input, reduction_indices = axis, keep_dims }); diff --git a/src/TensorFlowNET.Core/Operations/math_ops.py.cs b/src/TensorFlowNET.Core/Operations/math_ops.py.cs index f2abad66..f73164d8 100644 --- a/src/TensorFlowNET.Core/Operations/math_ops.py.cs +++ b/src/TensorFlowNET.Core/Operations/math_ops.py.cs @@ -2,6 +2,7 @@ using System; using System.Collections.Generic; using System.Text; +using Tensorflow.Framework; namespace Tensorflow { @@ -39,9 +40,41 @@ namespace Tensorflow public static Tensor reduce_mean(Tensor input_tensor, int[] axis = null, bool keepdims = false, string name = null) { var r = _ReductionDims(input_tensor, axis); - var m = gen_math_ops.mean(input_tensor, (int[]) r, keepdims, name); - return _may_reduce_to_scalar(keepdims,axis, m); + if (axis == null) + { + var m = gen_math_ops.mean(input_tensor, r, keepdims, name); + return _may_reduce_to_scalar(keepdims, axis, m); + } + else + { + var m = gen_math_ops.mean(input_tensor, axis, keepdims, name); + return _may_reduce_to_scalar(keepdims, axis, m); + } + } + + /// + /// Computes the product of elements across dimensions of a tensor. + /// + /// + /// + /// + /// + /// + public static Tensor reduce_prod(Tensor input_tensor, int[] axis = null, bool keepdims = false, string name = null) + { + var r = _ReductionDims(input_tensor, axis); + if (axis == null) + { + var m = gen_math_ops.prod(input_tensor, r, keepdims, name); + return _may_reduce_to_scalar(keepdims, axis, m); + } + else + { + var m = gen_math_ops.prod(input_tensor, axis, keepdims, name); + return _may_reduce_to_scalar(keepdims, axis, m); + } } + /// /// Returns (x - y)(x - y) element-wise. /// @@ -134,7 +167,10 @@ namespace Tensorflow public static Tensor reduce_max(Tensor input_tensor, int[] axis = null, bool keepdims = false, string name = null) { - return _may_reduce_to_scalar(keepdims, axis, gen_math_ops._max(input_tensor, (int[])_ReductionDims(input_tensor, axis), keepdims, name)); + var r = _ReductionDims(input_tensor, axis); + var max = (axis != null) ? gen_math_ops._max(input_tensor, axis, keepdims, name) : + gen_math_ops._max(input_tensor, r, keepdims, name); + return _may_reduce_to_scalar(keepdims, axis, max); } /// @@ -197,18 +233,19 @@ namespace Tensorflow } } - private static object _ReductionDims(Tensor x, int[] axis) + private static Tensor _ReductionDims(Tensor x, int[] axis) { if (axis != null) { - return axis; + // should return axis. or check before. + return null; } else { - var rank = array_ops.rank(x); + var rank = common_shapes.rank(x); if (rank != null) { - return constant_op.constant(np.arange(rank), TF_DataType.TF_INT32); + return constant_op.constant(np.arange(rank.Value), TF_DataType.TF_INT32); } return range(0, rank, 1); } @@ -303,5 +340,20 @@ namespace Tensorflow return x; }); } + + public static Tensor truediv(Tensor x, Tensor y, string name = null) + => _truediv_python3(x, y, name); + + public static Tensor _truediv_python3(Tensor x, Tensor y, string name = null) + { + return with(ops.name_scope(name, "truediv", new { x, y }), scope => + { + name = scope; + var x_dtype = x.dtype.as_base_dtype(); + var y_dtype = y.dtype.as_base_dtype(); + + return gen_math_ops.real_div(x, y, name: name); + }); + } } } diff --git a/src/TensorFlowNET.Core/Tensors/Tensor.cs b/src/TensorFlowNET.Core/Tensors/Tensor.cs index 2b3db534..af432f15 100644 --- a/src/TensorFlowNET.Core/Tensors/Tensor.cs +++ b/src/TensorFlowNET.Core/Tensors/Tensor.cs @@ -74,7 +74,8 @@ namespace Tensorflow public int[] _shape_tuple() { - return null; + if (shape == null) return null; + return shape.Select(x => (int)x).ToArray(); } public TensorShape getShape() diff --git a/src/TensorFlowNET.Core/Tensors/tensor_util.cs b/src/TensorFlowNET.Core/Tensors/tensor_util.cs index 567f5c9f..a3eabfaa 100644 --- a/src/TensorFlowNET.Core/Tensors/tensor_util.cs +++ b/src/TensorFlowNET.Core/Tensors/tensor_util.cs @@ -51,6 +51,15 @@ namespace Tensorflow if (tensor.TensorContent.Length > 0) return np.frombuffer(tensor.TensorContent.ToByteArray(), tensor_dtype) .reshape(shape); + else if (tensor.Dtype == DataType.DtHalf || tensor.Dtype == DataType.DtBfloat16) + ; + else if (tensor.Dtype == DataType.DtFloat) + ; + else if (new DataType[] { DataType.DtInt32, DataType.DtUint8 }.Contains(tensor.Dtype)) + if (tensor.IntVal.Count == 1) + return np.repeat(np.array(tensor.IntVal[0]), Convert.ToInt32(num_elements)) + .reshape(shape); + throw new NotImplementedException("MakeNdarray"); } diff --git a/src/TensorFlowNET.Core/Train/AdamOptimizer.cs b/src/TensorFlowNET.Core/Train/AdamOptimizer.cs new file mode 100644 index 00000000..b6063234 --- /dev/null +++ b/src/TensorFlowNET.Core/Train/AdamOptimizer.cs @@ -0,0 +1,25 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace Tensorflow.Train +{ + /// + /// Optimizer that implements the Adam algorithm. + /// http://arxiv.org/abs/1412.6980 + /// + public class AdamOptimizer : Optimizer + { + private float _beta1; + private float _beta2; + private float _epsilon; + + public AdamOptimizer(float learning_rate, float beta1 = 0.9f, float beta2 = 0.999f, float epsilon = 1e-8f, bool use_locking = false, string name = "Adam") + : base(learning_rate, use_locking, name) + { + _beta1 = beta1; + _beta2 = beta2; + _epsilon = epsilon; + } + } +} diff --git a/src/TensorFlowNET.Core/Train/GradientDescentOptimizer.cs b/src/TensorFlowNET.Core/Train/GradientDescentOptimizer.cs index 8473d819..ecdb22f6 100644 --- a/src/TensorFlowNET.Core/Train/GradientDescentOptimizer.cs +++ b/src/TensorFlowNET.Core/Train/GradientDescentOptimizer.cs @@ -2,7 +2,7 @@ using System.Collections.Generic; using System.Text; -namespace Tensorflow +namespace Tensorflow.Train { public class GradientDescentOptimizer : Optimizer { diff --git a/src/TensorFlowNET.Core/Train/Optimizer.cs b/src/TensorFlowNET.Core/Train/Optimizer.cs index fc76f7b5..9a88601b 100644 --- a/src/TensorFlowNET.Core/Train/Optimizer.cs +++ b/src/TensorFlowNET.Core/Train/Optimizer.cs @@ -34,6 +34,7 @@ namespace Tensorflow Name = name; _use_locking = use_locking; + LearningRate = learning_rate; // Dictionary of slots. _slots = new Dictionary(); _non_slot_dict = new Dictionary(); @@ -49,6 +50,7 @@ namespace Tensorflow /// was not `None`, that operation also increments `global_step`. /// public Operation minimize(Tensor loss, + RefVariable global_step = null, GateGradientType gate_gradients = GateGradientType.GATE_OP, bool colocate_gradients_with_ops = false) { diff --git a/src/TensorFlowNET.Core/Train/tf.optimizers.cs b/src/TensorFlowNET.Core/Train/tf.optimizers.cs index 5c41dacd..a7a3a39b 100644 --- a/src/TensorFlowNET.Core/Train/tf.optimizers.cs +++ b/src/TensorFlowNET.Core/Train/tf.optimizers.cs @@ -2,6 +2,7 @@ using System.Collections.Generic; using System.IO; using System.Text; +using Tensorflow.Train; namespace Tensorflow { @@ -11,6 +12,8 @@ namespace Tensorflow { public static Optimizer GradientDescentOptimizer(float learning_rate) => new GradientDescentOptimizer(learning_rate); + public static Optimizer AdamOptimizer(float learning_rate) => new AdamOptimizer(learning_rate); + public static Saver Saver() => new Saver(); public static string write_graph(Graph graph, string logdir, string name, bool as_text = true) => graph_io.write_graph(graph, logdir, name, as_text); diff --git a/src/TensorFlowNET.Core/ops.py.cs b/src/TensorFlowNET.Core/ops.py.cs index aeef11f3..731c7f10 100644 --- a/src/TensorFlowNET.Core/ops.py.cs +++ b/src/TensorFlowNET.Core/ops.py.cs @@ -349,6 +349,7 @@ namespace Tensorflow { if (op.inputs == null) return null; + // map tensorflow\python\ops\math_grad.py return (oper, out_grads) => { // Console.WriteLine($"get_gradient_function: {oper.type} '{oper.name}'"); @@ -364,6 +365,9 @@ namespace Tensorflow case "Mul": var mul = math_grad._MulGrad(oper, out_grads); return new Tensor[] { mul.Item1, mul.Item2 }; + case "Mean": + var mean = math_grad._MeanGrad(oper, out_grads); + return new Tensor[] { mean.Item1, mean.Item2 }; case "Sum": var sum = math_grad._SumGrad(oper, out_grads); return new Tensor[] { sum.Item1, sum.Item2 }; diff --git a/test/TensorFlowNET.Examples/TextClassification/cnn_models/VdCnn.cs b/test/TensorFlowNET.Examples/TextClassification/cnn_models/VdCnn.cs index 2f24d87e..d01b458d 100644 --- a/test/TensorFlowNET.Examples/TextClassification/cnn_models/VdCnn.cs +++ b/test/TensorFlowNET.Examples/TextClassification/cnn_models/VdCnn.cs @@ -119,10 +119,11 @@ namespace TensorFlowNET.Examples.TextClassification var y_one_hot = tf.one_hot(y, num_class); loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits: logits, labels: y_one_hot)); - var update_ops = tf.get_collection(ops.GraphKeys.UPDATE_OPS) as List; - with(tf.control_dependencies(update_ops.ToArray()), delegate + var update_ops = tf.get_collection(ops.GraphKeys.UPDATE_OPS) as List; + with(tf.control_dependencies(update_ops.Select(x => (Operation)x).ToArray()), delegate { - + var adam = tf.train.AdamOptimizer(learning_rate); + adam.minimize(loss, global_step: global_step); }); }); }