Browse Source

doc: Node & Descriptor (#1069)

related: https://github.com/dora-rs/dora-rs.github.io/pull/40
pull/1080/head
Philipp Oppermann GitHub 6 months ago
parent
commit
5be4edb334
No known key found for this signature in database GPG Key ID: B5690EEEBB952194
1 changed files with 407 additions and 26 deletions
  1. +407
    -26
      libraries/message/src/descriptor.rs

+ 407
- 26
libraries/message/src/descriptor.rs View File

@@ -14,18 +14,73 @@ use std::{
pub const SHELL_SOURCE: &str = "shell"; pub const SHELL_SOURCE: &str = "shell";
pub const DYNAMIC_SOURCE: &str = "dynamic"; pub const DYNAMIC_SOURCE: &str = "dynamic";


/// Dataflow description
/// # Dataflow Specification
///
/// The main configuration structure for defining a Dora dataflow. Dataflows are
/// specified through YAML files that describe the nodes, their connections, and
/// execution parameters.
///
/// ## Structure
///
/// A dataflow consists of:
/// - **Nodes**: The computational units that process data
/// - **Communication**: Optional communication configuration
/// - **Deployment**: Optional deployment configuration (unstable)
/// - **Debug options**: Optional development and debugging settings (unstable)
///
/// ## Example
///
/// ```yaml
/// nodes:
/// - id: webcam
/// operator:
/// python: webcam.py
/// inputs:
/// tick: dora/timer/millis/100
/// outputs:
/// - image
/// - id: plot
/// operator:
/// python: plot.py
/// inputs:
/// image: webcam/image
/// ```
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] #[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
#[serde(deny_unknown_fields)] #[serde(deny_unknown_fields)]
#[schemars(title = "dora-rs specification")] #[schemars(title = "dora-rs specification")]
pub struct Descriptor { pub struct Descriptor {
/// List of nodes in the dataflow
///
/// This is the most important field of the dataflow specification.
/// Each node must be identified by a unique `id`:
///
/// ## Example
///
/// ```yaml
/// nodes:
/// - id: foo
/// path: path/to/the/executable
/// # ... (see below)
/// - id: bar
/// path: path/to/another/executable
/// # ... (see below)
/// ```
///
/// For each node, you need to specify the `path` of the executable or script that Dora should run when starting the node.
/// Most of the other node fields are optional, but you typically want to specify at least some `inputs` and/or `outputs`.
pub nodes: Vec<Node>,

/// Communication configuration (optional, uses defaults)
#[schemars(skip)] #[schemars(skip)]
#[serde(default)] #[serde(default)]
pub communication: CommunicationConfig, pub communication: CommunicationConfig,

/// Deployment configuration (optional, unstable)
#[schemars(skip)] #[schemars(skip)]
#[serde(rename = "_unstable_deploy")] #[serde(rename = "_unstable_deploy")]
pub deploy: Option<Deploy>, pub deploy: Option<Deploy>,
pub nodes: Vec<Node>,

/// Debug options (optional, unstable)
#[schemars(skip)] #[schemars(skip)]
#[serde(default, rename = "_unstable_debug")] #[serde(default, rename = "_unstable_debug")]
pub debug: Debug, pub debug: Debug,
@@ -34,62 +89,379 @@ pub struct Descriptor {
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] #[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
#[serde(deny_unknown_fields)] #[serde(deny_unknown_fields)]
pub struct Deploy { pub struct Deploy {
/// Target machine for deployment
pub machine: Option<String>, pub machine: Option<String>,
/// Working directory for the deployment
pub working_dir: Option<PathBuf>, pub working_dir: Option<PathBuf>,
} }


#[derive(Debug, Clone, Default, Serialize, Deserialize, JsonSchema)] #[derive(Debug, Clone, Default, Serialize, Deserialize, JsonSchema)]
pub struct Debug { pub struct Debug {
/// Whether to publish all messages to Zenoh for debugging
#[serde(default)] #[serde(default)]
pub publish_all_messages_to_zenoh: bool, pub publish_all_messages_to_zenoh: bool,
} }


/// Dora Node
/// # Dora Node Configuration
///
/// A node represents a computational unit in a Dora dataflow. Each node runs as a
/// separate process and can communicate with other nodes through inputs and outputs.
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] #[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
#[serde(deny_unknown_fields)] #[serde(deny_unknown_fields)]
pub struct Node { pub struct Node {
/// Node identifier
/// Unique node identifier. Must not contain `/` characters.
///
/// Node IDs can be arbitrary strings with the following limitations:
///
/// - They must not contain any `/` characters (slashes).
/// - We do not recommend using whitespace characters (e.g. spaces) in IDs
///
/// Each node must have an ID field.
///
/// ## Example
///
/// ```yaml
/// nodes:
/// - id: camera_node
/// - id: some_other_node
/// ```
pub id: NodeId, pub id: NodeId,
/// Node name

/// Human-readable node name for documentation.
///
/// This optional field can be used to define a more descriptive name in addition to a short
/// [`id`](Self::id).
///
/// ## Example
///
/// ```yaml
/// nodes:
/// - id: camera_node
/// name: "Camera Input Handler"
pub name: Option<String>, pub name: Option<String>,
/// Description of the node

/// Detailed description of the node's functionality.
///
/// ## Example
///
/// ```yaml
/// nodes:
/// - id: camera_node
/// description: "Captures video frames from webcam"
/// ```
pub description: Option<String>, pub description: Option<String>,
/// Environment variables
pub env: Option<BTreeMap<String, EnvValue>>,


/// Unstable machine deployment configuration
#[schemars(skip)]
#[serde(rename = "_unstable_deploy")]
pub deploy: Option<Deploy>,
/// Path to executable or script that should be run.
///
/// Specifies the path of the executable or script that Dora should run when starting the
/// dataflow.
/// This can point to a normal executable (e.g. when using a compiled language such as Rust) or
/// a Python script.
///
/// Dora will automatically append a `.exe` extension on Windows systems when the specified
/// file name has no extension.
///
/// ## Example
///
/// ```yaml
/// nodes:
/// - id: rust-example
/// path: target/release/rust-node
/// - id: python-example
/// path: ./receive_data.py
/// ```
///
/// ## URL as Path
///
/// The `path` field can also point to a URL instead of a local path.
/// In this case, Dora will download the given file when starting the dataflow.
///
/// Note that this is quite an old feature and using this functionality is **not recommended**
/// anymore. Instead, we recommend using a [`git`][Self::git] and/or [`build`](Self::build)
/// key.
#[serde(default, skip_serializing_if = "Option::is_none")]
pub path: Option<String>,


/// Command-line arguments passed to the executable.
///
/// The command-line arguments that should be passed to the executable/script specified in `path`.
/// The arguments should be separated by space.
/// This field is optional and defaults to an empty argument list.
///
/// ## Example
/// ```yaml
/// nodes:
/// - id: example
/// path: example-node
/// args: -v --some-flag foo
/// ```
#[serde(default, skip_serializing_if = "Option::is_none")] #[serde(default, skip_serializing_if = "Option::is_none")]
pub operators: Option<RuntimeNode>,
pub args: Option<String>,

/// Environment variables for node builds and execution.
///
/// Key-value map of environment variables that should be set for both the
/// [`build`](Self::build) operation and the node execution (i.e. when the node is spawned
/// through [`path`](Self::path)).
///
/// Supports strings, numbers, and booleans.
///
/// ## Example
///
/// ```yaml
/// nodes:
/// - id: example-node
/// path: path/to/node
/// env:
/// DEBUG: true
/// PORT: 8080
/// API_KEY: "secret-key"
/// ```
pub env: Option<BTreeMap<String, EnvValue>>,

/// Multiple operators running in a shared runtime process.
///
/// Operators are an experimental, lightweight alternative to nodes.
/// Instead of running as a separate process, operators are linked into a runtime process.
/// This allows running multiple operators to share a single address space (not supported for
/// Python currently).
///
/// Operators are defined as part of the node list, as children of a runtime node.
/// A runtime node is a special node that specifies no [`path`](Self::path) field, but contains
/// an `operators` field instead.
///
/// ## Example
///
/// ```yaml
/// nodes:
/// - id: runtime-node
/// operators:
/// - id: processor
/// python: process.py
/// ```
#[serde(default, skip_serializing_if = "Option::is_none")] #[serde(default, skip_serializing_if = "Option::is_none")]
pub custom: Option<CustomNode>,
pub operators: Option<RuntimeNode>,

/// Single operator configuration.
///
/// This is a convenience field for defining runtime nodes that contain only a single operator.
/// This field is an alternative to the [`operators`](Self::operators) field, which can be used
/// if there is only a single operator defined for the runtime node.
///
/// ## Example
///
/// ```yaml
/// nodes:
/// - id: runtime-node
/// operator:
/// id: processor
/// python: script.py
/// outputs: [data]
/// ```
#[serde(default, skip_serializing_if = "Option::is_none")] #[serde(default, skip_serializing_if = "Option::is_none")]
pub operator: Option<SingleOperatorDefinition>, pub operator: Option<SingleOperatorDefinition>,


/// Legacy node configuration (deprecated).
///
/// Please use the top-level [`path`](Self::path), [`args`](Self::args), etc. fields instead.
#[serde(default, skip_serializing_if = "Option::is_none")] #[serde(default, skip_serializing_if = "Option::is_none")]
pub path: Option<String>,
pub custom: Option<CustomNode>,

/// Output data identifiers produced by this node.
///
/// List of output identifiers that the node sends.
/// Must contain all `output_id` values that the node uses when sending output, e.g. through the
/// [`send_output`](https://docs.rs/dora-node-api/latest/dora_node_api/struct.DoraNode.html#method.send_output)
/// function.
///
/// ## Example
///
/// ```yaml
/// nodes:
/// - id: example-node
/// outputs:
/// - processed_image
/// - metadata
/// ```
#[serde(default)]
pub outputs: BTreeSet<DataId>,

/// Input data connections from other nodes.
///
/// Defines the inputs that this node is subscribing to.
///
/// The `inputs` field should be a key-value map of the following format:
///
/// `input_id: source_node_id/source_node_output_id`
///
/// The components are defined as follows:
///
/// - `input_id` is the local identifier that should be used for this input.
///
/// This will map to the `id` field of
/// [`Event::Input`](https://docs.rs/dora-node-api/latest/dora_node_api/enum.Event.html#variant.Input)
/// events sent to the node event loop.
/// - `source_node_id` should be the `id` field of the node that sends the output that we want
/// to subscribe to
/// - `source_node_output_id` should be the identifier of the output that that we want
/// to subscribe to
///
/// ## Example
///
/// ```yaml
/// nodes:
/// - id: example-node
/// outputs:
/// - one
/// - two
/// - id: receiver
/// inputs:
/// my_input: example-node/two
/// ```
#[serde(default)]
pub inputs: BTreeMap<DataId, Input>,

/// Redirect stdout/stderr to a data output.
///
/// This field can be used to send all stdout and stderr output of the node as a Dora output.
/// Each output line is sent as a separate message.
///
///
/// ## Example
///
/// ```yaml
/// nodes:
/// - id: example
/// send_stdout_as: stdout_output
/// - id: logger
/// inputs:
/// example_output: example/stdout_output
/// ```
#[serde(skip_serializing_if = "Option::is_none")]
pub send_stdout_as: Option<String>,

/// Build commands executed during `dora build`. Each line runs separately.
///
/// The `build` key specifies the command that should be invoked for building the node.
/// The key expects a single- or multi-line string.
///
/// Each line is run as a separate command.
/// Spaces are used to separate arguments.
///
/// Note that all the environment variables specified in the [`env`](Self::env) field are also
/// applied to the build commands.
///
/// ## Special treatment of `pip`
///
/// Build lines that start with `pip` or `pip3` are treated in a special way:
/// If the `--uv` argument is passed to the `dora build` command, all `pip`/`pip3` commands are
/// run through the [`uv` package manager](https://docs.astral.sh/uv/).
///
/// ## Example
///
/// ```yaml
/// nodes:
/// - id: build-example
/// build: cargo build -p receive_data --release
/// path: target/release/receive_data
/// - id: multi-line-example
/// build: |
/// pip install requirements.txt
/// pip install -e some/local/package
/// path: package
/// ```
///
/// In the above example, the `pip` commands will be replaced by `uv pip` when run through
/// `dora build --uv`.
#[serde(default, skip_serializing_if = "Option::is_none")]
pub build: Option<String>,

/// Git repository URL for downloading nodes.
///
/// The `git` key allows downloading nodes (i.e. their source code) from git repositories.
/// This can be especially useful for distributed dataflows.
///
/// When a `git` key is specified, `dora build` automatically clones the specified repository
/// (or reuse an existing clone).
/// Then it checks out the specified [`branch`](Self::branch), [`tag`](Self::tag), or
/// [`rev`](Self::rev), or the default branch if none of them are specified.
/// Afterwards it runs the [`build`](Self::build) command if specified.
///
/// Note that the git clone directory is set as working directory for both the
/// [`build`](Self::build) command and the specified [`path`](Self::path).
///
/// ## Example
///
/// ```yaml
/// nodes:
/// - id: rust-node
/// git: https://github.com/dora-rs/dora.git
/// build: cargo build -p rust-dataflow-example-node
/// path: target/debug/rust-dataflow-example-node
/// ```
///
/// In the above example, `dora build` will first clone the specified `git` repository and then
/// run the specified `build` inside the local clone directory.
/// When `dora run` or `dora start` is invoked, the working directory will be the git clone
/// directory too. So a relative `path` will start from the clone directory.
#[serde(default, skip_serializing_if = "Option::is_none")] #[serde(default, skip_serializing_if = "Option::is_none")]
pub git: Option<String>, pub git: Option<String>,

/// Git branch to checkout after cloning.
///
/// The `branch` field is only allowed in combination with the [`git`](#git) field.
/// It specifies the branch that should be checked out after cloning.
/// Only one of `branch`, `tag`, or `rev` can be specified.
///
/// ## Example
///
/// ```yaml
/// nodes:
/// - id: rust-node
/// git: https://github.com/dora-rs/dora.git
/// branch: some-branch-name
/// ```
#[serde(default, skip_serializing_if = "Option::is_none")] #[serde(default, skip_serializing_if = "Option::is_none")]
pub branch: Option<String>, pub branch: Option<String>,

/// Git tag to checkout after cloning.
///
/// The `tag` field is only allowed in combination with the [`git`](#git) field.
/// It specifies the git tag that should be checked out after cloning.
/// Only one of `branch`, `tag`, or `rev` can be specified.
///
/// ## Example
///
/// ```yaml
/// nodes:
/// - id: rust-node
/// git: https://github.com/dora-rs/dora.git
/// tag: v0.3.0
/// ```
#[serde(default, skip_serializing_if = "Option::is_none")] #[serde(default, skip_serializing_if = "Option::is_none")]
pub tag: Option<String>, pub tag: Option<String>,

/// Git revision (e.g. commit hash) to checkout after cloning.
///
/// The `rev` field is only allowed in combination with the [`git`](#git) field.
/// It specifies the git revision (e.g. a commit hash) that should be checked out after cloning.
/// Only one of `branch`, `tag`, or `rev` can be specified.
///
/// ## Example
///
/// ```yaml
/// nodes:
/// - id: rust-node
/// git: https://github.com/dora-rs/dora.git
/// rev: 64ab0d7c
/// ```
#[serde(default, skip_serializing_if = "Option::is_none")] #[serde(default, skip_serializing_if = "Option::is_none")]
pub rev: Option<String>, pub rev: Option<String>,


#[serde(default, skip_serializing_if = "Option::is_none")]
pub args: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub build: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub send_stdout_as: Option<String>,
#[serde(default)]
pub inputs: BTreeMap<DataId, Input>,
#[serde(default)]
pub outputs: BTreeSet<DataId>,
/// Unstable machine deployment configuration
#[schemars(skip)]
#[serde(rename = "_unstable_deploy")]
pub deploy: Option<Deploy>,
} }


#[derive(Debug, Clone, Serialize, Deserialize)] #[derive(Debug, Clone, Serialize, Deserialize)]
@@ -119,11 +491,13 @@ pub enum CoreNodeKind {
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] #[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
#[serde(transparent)] #[serde(transparent)]
pub struct RuntimeNode { pub struct RuntimeNode {
/// List of operators running in this runtime
pub operators: Vec<OperatorDefinition>, pub operators: Vec<OperatorDefinition>,
} }


#[derive(Debug, Serialize, Deserialize, JsonSchema, Clone)] #[derive(Debug, Serialize, Deserialize, JsonSchema, Clone)]
pub struct OperatorDefinition { pub struct OperatorDefinition {
/// Unique operator identifier within the runtime
pub id: OperatorId, pub id: OperatorId,
#[serde(flatten)] #[serde(flatten)]
pub config: OperatorConfig, pub config: OperatorConfig,
@@ -131,7 +505,7 @@ pub struct OperatorDefinition {


#[derive(Debug, Serialize, Deserialize, JsonSchema, Clone)] #[derive(Debug, Serialize, Deserialize, JsonSchema, Clone)]
pub struct SingleOperatorDefinition { pub struct SingleOperatorDefinition {
/// ID is optional if there is only a single operator.
/// Operator identifier (optional for single operators)
pub id: Option<OperatorId>, pub id: Option<OperatorId>,
#[serde(flatten)] #[serde(flatten)]
pub config: OperatorConfig, pub config: OperatorConfig,
@@ -139,19 +513,26 @@ pub struct SingleOperatorDefinition {


#[derive(Debug, Serialize, Deserialize, JsonSchema, Clone)] #[derive(Debug, Serialize, Deserialize, JsonSchema, Clone)]
pub struct OperatorConfig { pub struct OperatorConfig {
/// Human-readable operator name
pub name: Option<String>, pub name: Option<String>,
/// Detailed description of the operator
pub description: Option<String>, pub description: Option<String>,


/// Input data connections
#[serde(default)] #[serde(default)]
pub inputs: BTreeMap<DataId, Input>, pub inputs: BTreeMap<DataId, Input>,
/// Output data identifiers
#[serde(default)] #[serde(default)]
pub outputs: BTreeSet<DataId>, pub outputs: BTreeSet<DataId>,


/// Operator source configuration (Python, shared library, etc.)
#[serde(flatten)] #[serde(flatten)]
pub source: OperatorSource, pub source: OperatorSource,


/// Build commands for this operator
#[serde(default, skip_serializing_if = "Option::is_none")] #[serde(default, skip_serializing_if = "Option::is_none")]
pub build: Option<String>, pub build: Option<String>,
/// Redirect stdout to data output
#[serde(skip_serializing_if = "Option::is_none")] #[serde(skip_serializing_if = "Option::is_none")]
pub send_stdout_as: Option<String>, pub send_stdout_as: Option<String>,
} }


Loading…
Cancel
Save