diff --git a/libraries/message/src/descriptor.rs b/libraries/message/src/descriptor.rs index 1e640eb9..34ddc925 100644 --- a/libraries/message/src/descriptor.rs +++ b/libraries/message/src/descriptor.rs @@ -14,18 +14,73 @@ use std::{ pub const SHELL_SOURCE: &str = "shell"; pub const DYNAMIC_SOURCE: &str = "dynamic"; -/// Dataflow description +/// # Dataflow Specification +/// +/// The main configuration structure for defining a Dora dataflow. Dataflows are +/// specified through YAML files that describe the nodes, their connections, and +/// execution parameters. +/// +/// ## Structure +/// +/// A dataflow consists of: +/// - **Nodes**: The computational units that process data +/// - **Communication**: Optional communication configuration +/// - **Deployment**: Optional deployment configuration (unstable) +/// - **Debug options**: Optional development and debugging settings (unstable) +/// +/// ## Example +/// +/// ```yaml +/// nodes: +/// - id: webcam +/// operator: +/// python: webcam.py +/// inputs: +/// tick: dora/timer/millis/100 +/// outputs: +/// - image +/// - id: plot +/// operator: +/// python: plot.py +/// inputs: +/// image: webcam/image +/// ``` #[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] #[serde(deny_unknown_fields)] #[schemars(title = "dora-rs specification")] pub struct Descriptor { + /// List of nodes in the dataflow + /// + /// This is the most important field of the dataflow specification. + /// Each node must be identified by a unique `id`: + /// + /// ## Example + /// + /// ```yaml + /// nodes: + /// - id: foo + /// path: path/to/the/executable + /// # ... (see below) + /// - id: bar + /// path: path/to/another/executable + /// # ... (see below) + /// ``` + /// + /// For each node, you need to specify the `path` of the executable or script that Dora should run when starting the node. + /// Most of the other node fields are optional, but you typically want to specify at least some `inputs` and/or `outputs`. + pub nodes: Vec, + + /// Communication configuration (optional, uses defaults) #[schemars(skip)] #[serde(default)] pub communication: CommunicationConfig, + + /// Deployment configuration (optional, unstable) #[schemars(skip)] #[serde(rename = "_unstable_deploy")] pub deploy: Option, - pub nodes: Vec, + + /// Debug options (optional, unstable) #[schemars(skip)] #[serde(default, rename = "_unstable_debug")] pub debug: Debug, @@ -34,62 +89,379 @@ pub struct Descriptor { #[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] #[serde(deny_unknown_fields)] pub struct Deploy { + /// Target machine for deployment pub machine: Option, + /// Working directory for the deployment pub working_dir: Option, } #[derive(Debug, Clone, Default, Serialize, Deserialize, JsonSchema)] pub struct Debug { + /// Whether to publish all messages to Zenoh for debugging #[serde(default)] pub publish_all_messages_to_zenoh: bool, } -/// Dora Node +/// # Dora Node Configuration +/// +/// A node represents a computational unit in a Dora dataflow. Each node runs as a +/// separate process and can communicate with other nodes through inputs and outputs. #[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] #[serde(deny_unknown_fields)] pub struct Node { - /// Node identifier + /// Unique node identifier. Must not contain `/` characters. + /// + /// Node IDs can be arbitrary strings with the following limitations: + /// + /// - They must not contain any `/` characters (slashes). + /// - We do not recommend using whitespace characters (e.g. spaces) in IDs + /// + /// Each node must have an ID field. + /// + /// ## Example + /// + /// ```yaml + /// nodes: + /// - id: camera_node + /// - id: some_other_node + /// ``` pub id: NodeId, - /// Node name + + /// Human-readable node name for documentation. + /// + /// This optional field can be used to define a more descriptive name in addition to a short + /// [`id`](Self::id). + /// + /// ## Example + /// + /// ```yaml + /// nodes: + /// - id: camera_node + /// name: "Camera Input Handler" pub name: Option, - /// Description of the node + + /// Detailed description of the node's functionality. + /// + /// ## Example + /// + /// ```yaml + /// nodes: + /// - id: camera_node + /// description: "Captures video frames from webcam" + /// ``` pub description: Option, - /// Environment variables - pub env: Option>, - /// Unstable machine deployment configuration - #[schemars(skip)] - #[serde(rename = "_unstable_deploy")] - pub deploy: Option, + /// Path to executable or script that should be run. + /// + /// Specifies the path of the executable or script that Dora should run when starting the + /// dataflow. + /// This can point to a normal executable (e.g. when using a compiled language such as Rust) or + /// a Python script. + /// + /// Dora will automatically append a `.exe` extension on Windows systems when the specified + /// file name has no extension. + /// + /// ## Example + /// + /// ```yaml + /// nodes: + /// - id: rust-example + /// path: target/release/rust-node + /// - id: python-example + /// path: ./receive_data.py + /// ``` + /// + /// ## URL as Path + /// + /// The `path` field can also point to a URL instead of a local path. + /// In this case, Dora will download the given file when starting the dataflow. + /// + /// Note that this is quite an old feature and using this functionality is **not recommended** + /// anymore. Instead, we recommend using a [`git`][Self::git] and/or [`build`](Self::build) + /// key. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub path: Option, + /// Command-line arguments passed to the executable. + /// + /// The command-line arguments that should be passed to the executable/script specified in `path`. + /// The arguments should be separated by space. + /// This field is optional and defaults to an empty argument list. + /// + /// ## Example + /// ```yaml + /// nodes: + /// - id: example + /// path: example-node + /// args: -v --some-flag foo + /// ``` #[serde(default, skip_serializing_if = "Option::is_none")] - pub operators: Option, + pub args: Option, + + /// Environment variables for node builds and execution. + /// + /// Key-value map of environment variables that should be set for both the + /// [`build`](Self::build) operation and the node execution (i.e. when the node is spawned + /// through [`path`](Self::path)). + /// + /// Supports strings, numbers, and booleans. + /// + /// ## Example + /// + /// ```yaml + /// nodes: + /// - id: example-node + /// path: path/to/node + /// env: + /// DEBUG: true + /// PORT: 8080 + /// API_KEY: "secret-key" + /// ``` + pub env: Option>, + + /// Multiple operators running in a shared runtime process. + /// + /// Operators are an experimental, lightweight alternative to nodes. + /// Instead of running as a separate process, operators are linked into a runtime process. + /// This allows running multiple operators to share a single address space (not supported for + /// Python currently). + /// + /// Operators are defined as part of the node list, as children of a runtime node. + /// A runtime node is a special node that specifies no [`path`](Self::path) field, but contains + /// an `operators` field instead. + /// + /// ## Example + /// + /// ```yaml + /// nodes: + /// - id: runtime-node + /// operators: + /// - id: processor + /// python: process.py + /// ``` #[serde(default, skip_serializing_if = "Option::is_none")] - pub custom: Option, + pub operators: Option, + + /// Single operator configuration. + /// + /// This is a convenience field for defining runtime nodes that contain only a single operator. + /// This field is an alternative to the [`operators`](Self::operators) field, which can be used + /// if there is only a single operator defined for the runtime node. + /// + /// ## Example + /// + /// ```yaml + /// nodes: + /// - id: runtime-node + /// operator: + /// id: processor + /// python: script.py + /// outputs: [data] + /// ``` #[serde(default, skip_serializing_if = "Option::is_none")] pub operator: Option, + /// Legacy node configuration (deprecated). + /// + /// Please use the top-level [`path`](Self::path), [`args`](Self::args), etc. fields instead. #[serde(default, skip_serializing_if = "Option::is_none")] - pub path: Option, + pub custom: Option, + + /// Output data identifiers produced by this node. + /// + /// List of output identifiers that the node sends. + /// Must contain all `output_id` values that the node uses when sending output, e.g. through the + /// [`send_output`](https://docs.rs/dora-node-api/latest/dora_node_api/struct.DoraNode.html#method.send_output) + /// function. + /// + /// ## Example + /// + /// ```yaml + /// nodes: + /// - id: example-node + /// outputs: + /// - processed_image + /// - metadata + /// ``` + #[serde(default)] + pub outputs: BTreeSet, + + /// Input data connections from other nodes. + /// + /// Defines the inputs that this node is subscribing to. + /// + /// The `inputs` field should be a key-value map of the following format: + /// + /// `input_id: source_node_id/source_node_output_id` + /// + /// The components are defined as follows: + /// + /// - `input_id` is the local identifier that should be used for this input. + /// + /// This will map to the `id` field of + /// [`Event::Input`](https://docs.rs/dora-node-api/latest/dora_node_api/enum.Event.html#variant.Input) + /// events sent to the node event loop. + /// - `source_node_id` should be the `id` field of the node that sends the output that we want + /// to subscribe to + /// - `source_node_output_id` should be the identifier of the output that that we want + /// to subscribe to + /// + /// ## Example + /// + /// ```yaml + /// nodes: + /// - id: example-node + /// outputs: + /// - one + /// - two + /// - id: receiver + /// inputs: + /// my_input: example-node/two + /// ``` + #[serde(default)] + pub inputs: BTreeMap, + + /// Redirect stdout/stderr to a data output. + /// + /// This field can be used to send all stdout and stderr output of the node as a Dora output. + /// Each output line is sent as a separate message. + /// + /// + /// ## Example + /// + /// ```yaml + /// nodes: + /// - id: example + /// send_stdout_as: stdout_output + /// - id: logger + /// inputs: + /// example_output: example/stdout_output + /// ``` + #[serde(skip_serializing_if = "Option::is_none")] + pub send_stdout_as: Option, + + /// Build commands executed during `dora build`. Each line runs separately. + /// + /// The `build` key specifies the command that should be invoked for building the node. + /// The key expects a single- or multi-line string. + /// + /// Each line is run as a separate command. + /// Spaces are used to separate arguments. + /// + /// Note that all the environment variables specified in the [`env`](Self::env) field are also + /// applied to the build commands. + /// + /// ## Special treatment of `pip` + /// + /// Build lines that start with `pip` or `pip3` are treated in a special way: + /// If the `--uv` argument is passed to the `dora build` command, all `pip`/`pip3` commands are + /// run through the [`uv` package manager](https://docs.astral.sh/uv/). + /// + /// ## Example + /// + /// ```yaml + /// nodes: + /// - id: build-example + /// build: cargo build -p receive_data --release + /// path: target/release/receive_data + /// - id: multi-line-example + /// build: | + /// pip install requirements.txt + /// pip install -e some/local/package + /// path: package + /// ``` + /// + /// In the above example, the `pip` commands will be replaced by `uv pip` when run through + /// `dora build --uv`. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub build: Option, + + /// Git repository URL for downloading nodes. + /// + /// The `git` key allows downloading nodes (i.e. their source code) from git repositories. + /// This can be especially useful for distributed dataflows. + /// + /// When a `git` key is specified, `dora build` automatically clones the specified repository + /// (or reuse an existing clone). + /// Then it checks out the specified [`branch`](Self::branch), [`tag`](Self::tag), or + /// [`rev`](Self::rev), or the default branch if none of them are specified. + /// Afterwards it runs the [`build`](Self::build) command if specified. + /// + /// Note that the git clone directory is set as working directory for both the + /// [`build`](Self::build) command and the specified [`path`](Self::path). + /// + /// ## Example + /// + /// ```yaml + /// nodes: + /// - id: rust-node + /// git: https://github.com/dora-rs/dora.git + /// build: cargo build -p rust-dataflow-example-node + /// path: target/debug/rust-dataflow-example-node + /// ``` + /// + /// In the above example, `dora build` will first clone the specified `git` repository and then + /// run the specified `build` inside the local clone directory. + /// When `dora run` or `dora start` is invoked, the working directory will be the git clone + /// directory too. So a relative `path` will start from the clone directory. #[serde(default, skip_serializing_if = "Option::is_none")] pub git: Option, + + /// Git branch to checkout after cloning. + /// + /// The `branch` field is only allowed in combination with the [`git`](#git) field. + /// It specifies the branch that should be checked out after cloning. + /// Only one of `branch`, `tag`, or `rev` can be specified. + /// + /// ## Example + /// + /// ```yaml + /// nodes: + /// - id: rust-node + /// git: https://github.com/dora-rs/dora.git + /// branch: some-branch-name + /// ``` #[serde(default, skip_serializing_if = "Option::is_none")] pub branch: Option, + + /// Git tag to checkout after cloning. + /// + /// The `tag` field is only allowed in combination with the [`git`](#git) field. + /// It specifies the git tag that should be checked out after cloning. + /// Only one of `branch`, `tag`, or `rev` can be specified. + /// + /// ## Example + /// + /// ```yaml + /// nodes: + /// - id: rust-node + /// git: https://github.com/dora-rs/dora.git + /// tag: v0.3.0 + /// ``` #[serde(default, skip_serializing_if = "Option::is_none")] pub tag: Option, + + /// Git revision (e.g. commit hash) to checkout after cloning. + /// + /// The `rev` field is only allowed in combination with the [`git`](#git) field. + /// It specifies the git revision (e.g. a commit hash) that should be checked out after cloning. + /// Only one of `branch`, `tag`, or `rev` can be specified. + /// + /// ## Example + /// + /// ```yaml + /// nodes: + /// - id: rust-node + /// git: https://github.com/dora-rs/dora.git + /// rev: 64ab0d7c + /// ``` #[serde(default, skip_serializing_if = "Option::is_none")] pub rev: Option, - #[serde(default, skip_serializing_if = "Option::is_none")] - pub args: Option, - #[serde(default, skip_serializing_if = "Option::is_none")] - pub build: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub send_stdout_as: Option, - #[serde(default)] - pub inputs: BTreeMap, - #[serde(default)] - pub outputs: BTreeSet, + /// Unstable machine deployment configuration + #[schemars(skip)] + #[serde(rename = "_unstable_deploy")] + pub deploy: Option, } #[derive(Debug, Clone, Serialize, Deserialize)] @@ -119,11 +491,13 @@ pub enum CoreNodeKind { #[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] #[serde(transparent)] pub struct RuntimeNode { + /// List of operators running in this runtime pub operators: Vec, } #[derive(Debug, Serialize, Deserialize, JsonSchema, Clone)] pub struct OperatorDefinition { + /// Unique operator identifier within the runtime pub id: OperatorId, #[serde(flatten)] pub config: OperatorConfig, @@ -131,7 +505,7 @@ pub struct OperatorDefinition { #[derive(Debug, Serialize, Deserialize, JsonSchema, Clone)] pub struct SingleOperatorDefinition { - /// ID is optional if there is only a single operator. + /// Operator identifier (optional for single operators) pub id: Option, #[serde(flatten)] pub config: OperatorConfig, @@ -139,19 +513,26 @@ pub struct SingleOperatorDefinition { #[derive(Debug, Serialize, Deserialize, JsonSchema, Clone)] pub struct OperatorConfig { + /// Human-readable operator name pub name: Option, + /// Detailed description of the operator pub description: Option, + /// Input data connections #[serde(default)] pub inputs: BTreeMap, + /// Output data identifiers #[serde(default)] pub outputs: BTreeSet, + /// Operator source configuration (Python, shared library, etc.) #[serde(flatten)] pub source: OperatorSource, + /// Build commands for this operator #[serde(default, skip_serializing_if = "Option::is_none")] pub build: Option, + /// Redirect stdout to data output #[serde(skip_serializing_if = "Option::is_none")] pub send_stdout_as: Option, }