diff --git a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Preprocessing/CategoryEncodingArgs.cs b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Preprocessing/CategoryEncodingArgs.cs
new file mode 100644
index 00000000..c282afd8
--- /dev/null
+++ b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Preprocessing/CategoryEncodingArgs.cs
@@ -0,0 +1,16 @@
+using Newtonsoft.Json;
+using Tensorflow.NumPy;
+
+namespace Tensorflow.Keras.ArgsDefinition
+{
+ public class CategoryEncodingArgs : AutoSerializeLayerArgs
+ {
+ [JsonProperty("num_tokens")]
+ public int NumTokens { get; set; }
+ [JsonProperty("output_mode")]
+ public string OutputMode { get; set; }
+ [JsonProperty("sparse")]
+ public bool Sparse { get; set; }
+ public NDArray CountWeights { get; set; }
+ }
+}
diff --git a/src/TensorFlowNET.Core/Keras/Layers/ILayersApi.cs b/src/TensorFlowNET.Core/Keras/Layers/ILayersApi.cs
index f1860da1..9fcd0d70 100644
--- a/src/TensorFlowNET.Core/Keras/Layers/ILayersApi.cs
+++ b/src/TensorFlowNET.Core/Keras/Layers/ILayersApi.cs
@@ -1,4 +1,5 @@
using System;
+using Tensorflow.NumPy;
using static Google.Protobuf.Reflection.FieldDescriptorProto.Types;
namespace Tensorflow.Keras.Layers
@@ -28,6 +29,17 @@ namespace Tensorflow.Keras.Layers
bool renorm = false,
float renorm_momentum = 0.99f);
+ ///
+ /// A preprocessing layer which encodes integer features.
+ ///
+ /// The total number of tokens the layer should support.
+ /// Specification for the output of the layer.
+ ///
+ public ILayer CategoryEncoding(int num_tokens,
+ string output_mode = "one_hot",
+ bool sparse = false,
+ NDArray count_weights = null);
+
public ILayer Conv1D(int filters,
Shape kernel_size,
int strides = 1,
diff --git a/src/TensorFlowNET.Core/Operations/math_ops.cs b/src/TensorFlowNET.Core/Operations/math_ops.cs
index 861dba18..9542f643 100644
--- a/src/TensorFlowNET.Core/Operations/math_ops.cs
+++ b/src/TensorFlowNET.Core/Operations/math_ops.cs
@@ -839,10 +839,24 @@ namespace Tensorflow
output_size = math_ops.maximum(minlength, output_size);
if (maxlength != null)
output_size = math_ops.minimum(maxlength, output_size);
- var weights = constant_op.constant(new long[0], dtype: dtype);
+ weights = weights ?? constant_op.constant(new int[0], dtype: dtype);
return tf.Context.ExecuteOp("Bincount", name, new ExecuteOpArgs(arr, output_size, weights));
}
+ else
+ {
+ var array_is_nonempty = math_ops.reduce_prod(array_ops.shape(arr)) > 0;
+ var output_size = math_ops.cast(array_is_nonempty, arr.dtype) * (math_ops.reduce_max(arr) + 1);
+ if (minlength != null)
+ output_size = math_ops.maximum(minlength, output_size);
+ if (maxlength != null)
+ output_size = math_ops.minimum(maxlength, output_size);
+ weights = weights ?? array_ops.constant(new int[0], dtype: dtype);
+ return tf.Context.ExecuteOp("DenseBincount", name,
+ new ExecuteOpArgs(arr, output_size, weights, binary_output)
+ .SetAttributes(new { binary_output }));
+ }
+
throw new NotImplementedException("");
});
diff --git a/src/TensorFlowNET.Core/Tensors/constant_op.cs b/src/TensorFlowNET.Core/Tensors/constant_op.cs
index 2c903517..1a825e0c 100644
--- a/src/TensorFlowNET.Core/Tensors/constant_op.cs
+++ b/src/TensorFlowNET.Core/Tensors/constant_op.cs
@@ -153,6 +153,10 @@ namespace Tensorflow
bool allow_broadcast)
{
var t = convert_to_eager_tensor(value, tf.Context, dtype: dtype);
+ if (dtype != TF_DataType.DtInvalid && dtype != t.dtype)
+ {
+ t = math_ops.cast(t, dtype);
+ }
if (shape is null || shape.IsNull)
return t;
diff --git a/src/TensorFlowNET.Keras/Layers/LayersApi.cs b/src/TensorFlowNET.Keras/Layers/LayersApi.cs
index 76634918..0d71b271 100644
--- a/src/TensorFlowNET.Keras/Layers/LayersApi.cs
+++ b/src/TensorFlowNET.Keras/Layers/LayersApi.cs
@@ -4,6 +4,7 @@ using Tensorflow.Keras.ArgsDefinition.Core;
using Tensorflow.Keras.ArgsDefinition.Rnn;
using Tensorflow.Keras.Engine;
using Tensorflow.Keras.Layers.Rnn;
+using Tensorflow.NumPy;
using static Tensorflow.Binding;
using static Tensorflow.KerasApi;
@@ -829,5 +830,14 @@ namespace Tensorflow.Keras.Layers
"orthogonal" => tf.orthogonal_initializer,
_ => tf.glorot_uniform_initializer
};
+
+ public ILayer CategoryEncoding(int num_tokens, string output_mode = "one_hot", bool sparse = false, NDArray count_weights = null)
+ => new CategoryEncoding(new CategoryEncodingArgs
+ {
+ NumTokens = num_tokens,
+ OutputMode = output_mode,
+ Sparse = sparse,
+ CountWeights = count_weights
+ });
}
}
diff --git a/src/TensorFlowNET.Keras/Layers/Preprocessing/CategoryEncoding.cs b/src/TensorFlowNET.Keras/Layers/Preprocessing/CategoryEncoding.cs
new file mode 100644
index 00000000..5620a916
--- /dev/null
+++ b/src/TensorFlowNET.Keras/Layers/Preprocessing/CategoryEncoding.cs
@@ -0,0 +1,75 @@
+using Tensorflow.Keras.ArgsDefinition;
+using Tensorflow.Keras.Engine;
+
+namespace Tensorflow.Keras.Layers
+{
+ ///
+ /// This layer provides options for condensing data into a categorical encoding when the total number of tokens are known in advance.
+ ///
+ public class CategoryEncoding : Layer
+ {
+ CategoryEncodingArgs args;
+
+ public CategoryEncoding(CategoryEncodingArgs args) : base(args)
+ {
+ this.args = args;
+ }
+
+ protected override Tensors Call(Tensors inputs, Tensor state = null, bool? training = null)
+ {
+ var depth = args.NumTokens;
+ var max_value = tf.reduce_max(inputs);
+ var min_value = tf.reduce_min(inputs);
+
+ /*var condition = tf.logical_and(tf.greater(tf.cast(constant_op.constant(depth), max_value.dtype), max_value),
+ tf.greater_equal(min_value, tf.cast(constant_op.constant(0), min_value.dtype)));*/
+
+ var bincounts = encode_categorical_inputs(inputs, args.OutputMode, depth, args.DType,
+ sparse: args.Sparse,
+ count_weights: args.CountWeights);
+
+ if(args.OutputMode != "tf_idf")
+ {
+ return bincounts;
+ }
+
+ return inputs;
+ }
+
+ public override Shape ComputeOutputShape(Shape input_shape)
+ {
+ return input_shape;
+ }
+
+ Tensors encode_categorical_inputs(Tensor inputs, string output_mode, int depth,
+ TF_DataType dtype = TF_DataType.TF_FLOAT,
+ bool sparse = false,
+ Tensor count_weights = null)
+ {
+ bool binary_output = false;
+ if (output_mode == "one_hot")
+ {
+ binary_output = true;
+ if (inputs.shape[-1] != 1)
+ {
+ inputs = tf.expand_dims(inputs, -1);
+ }
+ }
+ else if (output_mode == "multi_hot")
+ {
+ binary_output = true;
+ }
+
+ var depth_tensor = constant_op.constant(depth);
+ var result = tf.math.bincount(inputs,
+ weights: count_weights,
+ minlength: depth_tensor,
+ maxlength: depth_tensor,
+ dtype: dtype,
+ axis: -1,
+ binary_output: binary_output);
+
+ return result;
+ }
+ }
+}
diff --git a/test/TensorFlowNET.Keras.UnitTest/Layers/LayersTest.cs b/test/TensorFlowNET.Keras.UnitTest/Layers/LayersTest.cs
index 029592c3..f8a6174d 100644
--- a/test/TensorFlowNET.Keras.UnitTest/Layers/LayersTest.cs
+++ b/test/TensorFlowNET.Keras.UnitTest/Layers/LayersTest.cs
@@ -177,5 +177,60 @@ namespace TensorFlowNET.Keras.UnitTest
Assert.AreEqual((5, 2), output.shape);
Assert.IsTrue(output[0].numpy().Equals(new[] { -0.99998f, 0.99998f }));
}
+
+ ///
+ /// https://www.tensorflow.org/api_docs/python/tf/keras/layers/CategoryEncoding
+ ///
+ [TestMethod]
+ public void CategoryEncoding()
+ {
+ // one-hot
+ var inputs = np.array(new[] { 3, 2, 0, 1 });
+ var layer = tf.keras.layers.CategoryEncoding(4);
+
+ Tensor output = layer.Apply(inputs);
+ Assert.AreEqual((4, 4), output.shape);
+ Assert.IsTrue(output[0].numpy().Equals(new[] { 0, 0, 0, 1f }));
+ Assert.IsTrue(output[1].numpy().Equals(new[] { 0, 0, 1, 0f }));
+ Assert.IsTrue(output[2].numpy().Equals(new[] { 1, 0, 0, 0f }));
+ Assert.IsTrue(output[3].numpy().Equals(new[] { 0, 1, 0, 0f }));
+
+ // multi-hot
+ inputs = np.array(new[,]
+ {
+ { 0, 1 },
+ { 0, 0 },
+ { 1, 2 },
+ { 3, 1 }
+ });
+ layer = tf.keras.layers.CategoryEncoding(4, output_mode: "multi_hot");
+ output = layer.Apply(inputs);
+ Assert.IsTrue(output[0].numpy().Equals(new[] { 1, 1, 0, 0f }));
+ Assert.IsTrue(output[1].numpy().Equals(new[] { 1, 0, 0, 0f }));
+ Assert.IsTrue(output[2].numpy().Equals(new[] { 0, 1, 1, 0f }));
+ Assert.IsTrue(output[3].numpy().Equals(new[] { 0, 1, 0, 1f }));
+
+ // using weighted inputs in "count" mode
+ inputs = np.array(new[,]
+ {
+ { 0, 1 },
+ { 0, 0 },
+ { 1, 2 },
+ { 3, 1 }
+ });
+ var weights = np.array(new[,]
+ {
+ { 0.1f, 0.2f },
+ { 0.1f, 0.1f },
+ { 0.2f, 0.3f },
+ { 0.4f, 0.2f }
+ });
+ layer = tf.keras.layers.CategoryEncoding(4, output_mode: "count", count_weights: weights);
+ output = layer.Apply(inputs);
+ Assert.IsTrue(output[0].numpy().Equals(new[] { 0.1f, 0.2f, 0f, 0f }));
+ Assert.IsTrue(output[1].numpy().Equals(new[] { 0.2f, 0f, 0f, 0f }));
+ Assert.IsTrue(output[2].numpy().Equals(new[] { 0f, 0.2f, 0.3f, 0f }));
+ Assert.IsTrue(output[3].numpy().Equals(new[] { 0f, 0.2f, 0f, 0.4f }));
+ }
}
}
diff --git a/test/TensorFlowNET.Keras.UnitTest/Losses/LossesTest.cs b/test/TensorFlowNET.Keras.UnitTest/Losses/LossesTest.cs
index dad46c55..b19f0203 100644
--- a/test/TensorFlowNET.Keras.UnitTest/Losses/LossesTest.cs
+++ b/test/TensorFlowNET.Keras.UnitTest/Losses/LossesTest.cs
@@ -4,11 +4,12 @@ using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
+using Tensorflow;
using TensorFlowNET.Keras.UnitTest;
using static Tensorflow.Binding;
using static Tensorflow.KerasApi;
-namespace Tensorflow.Keras.UnitTest.Losses;
+namespace TensorFlowNET.Keras.UnitTest;
[TestClass]
public class LossesTest : EagerModeTestBase