| @@ -1,13 +1,14 @@ | |||
| { | |||
| "$schema": "http://json-schema.org/draft-07/schema#", | |||
| "title": "dora-rs specification", | |||
| "description": "Dataflow description", | |||
| "description": "The main configuration structure for defining a Dora dataflow. Dataflows are specified through YAML files that describe the nodes, their connections, and execution parameters.\n\n## Structure\n\nA dataflow consists of: - **Nodes**: The computational units that process data - **Communication**: Optional communication configuration - **Deployment**: Optional deployment configuration (unstable) - **Debug options**: Optional development and debugging settings (unstable)\n\n## Example\n\n```yaml nodes: - id: webcam operator: python: webcam.py inputs: tick: dora/timer/millis/100 outputs: - image - id: plot operator: python: plot.py inputs: image: webcam/image ```", | |||
| "type": "object", | |||
| "required": [ | |||
| "nodes" | |||
| ], | |||
| "properties": { | |||
| "nodes": { | |||
| "description": "List of nodes in the dataflow\n\nThis is the most important field of the dataflow specification. Each node must be identified by a unique `id`:\n\n## Example\n\n```yaml nodes: - id: foo path: path/to/the/executable # ... (see below) - id: bar path: path/to/another/executable # ... (see below) ```\n\nFor each node, you need to specify the `path` of the executable or script that Dora should run when starting the node. Most of the other node fields are optional, but you typically want to specify at least some `inputs` and/or `outputs`.", | |||
| "type": "array", | |||
| "items": { | |||
| "$ref": "#/definitions/Node" | |||
| @@ -17,8 +18,10 @@ | |||
| "additionalProperties": true, | |||
| "definitions": { | |||
| "CustomNode": { | |||
| "description": "Contains the input and output configuration of the node.", | |||
| "type": "object", | |||
| "required": [ | |||
| "path", | |||
| "source" | |||
| ], | |||
| "properties": { | |||
| @@ -60,6 +63,10 @@ | |||
| }, | |||
| "uniqueItems": true | |||
| }, | |||
| "path": { | |||
| "description": "Path of the source code\n\nIf you want to use a specific `conda` environment. Provide the python path within the source.\n\nsource: /home/peter/miniconda3/bin/python\n\nargs: some_node.py\n\nSource can match any executable in PATH.", | |||
| "type": "string" | |||
| }, | |||
| "send_stdout_as": { | |||
| "description": "Send stdout and stderr to another node", | |||
| "type": [ | |||
| @@ -68,8 +75,7 @@ | |||
| ] | |||
| }, | |||
| "source": { | |||
| "description": "Path of the source code\n\nIf you want to use a specific `conda` environment. Provide the python path within the source.\n\nsource: /home/peter/miniconda3/bin/python\n\nargs: some_node.py\n\nSource can match any executable in PATH.", | |||
| "type": "string" | |||
| "$ref": "#/definitions/NodeSource" | |||
| } | |||
| } | |||
| }, | |||
| @@ -113,6 +119,46 @@ | |||
| } | |||
| ] | |||
| }, | |||
| "GitRepoRev": { | |||
| "oneOf": [ | |||
| { | |||
| "type": "object", | |||
| "required": [ | |||
| "Branch" | |||
| ], | |||
| "properties": { | |||
| "Branch": { | |||
| "type": "string" | |||
| } | |||
| }, | |||
| "additionalProperties": true | |||
| }, | |||
| { | |||
| "type": "object", | |||
| "required": [ | |||
| "Tag" | |||
| ], | |||
| "properties": { | |||
| "Tag": { | |||
| "type": "string" | |||
| } | |||
| }, | |||
| "additionalProperties": true | |||
| }, | |||
| { | |||
| "type": "object", | |||
| "required": [ | |||
| "Rev" | |||
| ], | |||
| "properties": { | |||
| "Rev": { | |||
| "type": "string" | |||
| } | |||
| }, | |||
| "additionalProperties": true | |||
| } | |||
| ] | |||
| }, | |||
| "Input": { | |||
| "type": "object", | |||
| "required": [ | |||
| @@ -169,25 +215,36 @@ | |||
| ] | |||
| }, | |||
| "Node": { | |||
| "description": "Dora Node", | |||
| "title": "Dora Node Configuration", | |||
| "description": "A node represents a computational unit in a Dora dataflow. Each node runs as a separate process and can communicate with other nodes through inputs and outputs.", | |||
| "type": "object", | |||
| "required": [ | |||
| "id" | |||
| ], | |||
| "properties": { | |||
| "args": { | |||
| "description": "Command-line arguments passed to the executable.\n\nThe command-line arguments that should be passed to the executable/script specified in `path`. The arguments should be separated by space. This field is optional and defaults to an empty argument list.\n\n## Example ```yaml nodes: - id: example path: example-node args: -v --some-flag foo ```", | |||
| "type": [ | |||
| "string", | |||
| "null" | |||
| ] | |||
| }, | |||
| "branch": { | |||
| "description": "Git branch to checkout after cloning.\n\nThe `branch` field is only allowed in combination with the [`git`](#git) field. It specifies the branch that should be checked out after cloning. Only one of `branch`, `tag`, or `rev` can be specified.\n\n## Example\n\n```yaml nodes: - id: rust-node git: https://github.com/dora-rs/dora.git branch: some-branch-name ```", | |||
| "type": [ | |||
| "string", | |||
| "null" | |||
| ] | |||
| }, | |||
| "build": { | |||
| "description": "Build commands executed during `dora build`. Each line runs separately.\n\nThe `build` key specifies the command that should be invoked for building the node. The key expects a single- or multi-line string.\n\nEach line is run as a separate command. Spaces are used to separate arguments.\n\nNote that all the environment variables specified in the [`env`](Self::env) field are also applied to the build commands.\n\n## Special treatment of `pip`\n\nBuild lines that start with `pip` or `pip3` are treated in a special way: If the `--uv` argument is passed to the `dora build` command, all `pip`/`pip3` commands are run through the [`uv` package manager](https://docs.astral.sh/uv/).\n\n## Example\n\n```yaml nodes: - id: build-example build: cargo build -p receive_data --release path: target/release/receive_data - id: multi-line-example build: | pip install requirements.txt pip install -e some/local/package path: package ```\n\nIn the above example, the `pip` commands will be replaced by `uv pip` when run through `dora build --uv`.", | |||
| "type": [ | |||
| "string", | |||
| "null" | |||
| ] | |||
| }, | |||
| "custom": { | |||
| "description": "Legacy node configuration (deprecated).\n\nPlease use the top-level [`path`](Self::path), [`args`](Self::args), etc. fields instead.", | |||
| "anyOf": [ | |||
| { | |||
| "$ref": "#/definitions/CustomNode" | |||
| @@ -198,14 +255,14 @@ | |||
| ] | |||
| }, | |||
| "description": { | |||
| "description": "Description of the node", | |||
| "description": "Detailed description of the node's functionality.\n\n## Example\n\n```yaml nodes: - id: camera_node description: \"Captures video frames from webcam\" ```", | |||
| "type": [ | |||
| "string", | |||
| "null" | |||
| ] | |||
| }, | |||
| "env": { | |||
| "description": "Environment variables", | |||
| "description": "Environment variables for node builds and execution.\n\nKey-value map of environment variables that should be set for both the [`build`](Self::build) operation and the node execution (i.e. when the node is spawned through [`path`](Self::path)).\n\nSupports strings, numbers, and booleans.\n\n## Example\n\n```yaml nodes: - id: example-node path: path/to/node env: DEBUG: true PORT: 8080 API_KEY: \"secret-key\" ```", | |||
| "type": [ | |||
| "object", | |||
| "null" | |||
| @@ -214,8 +271,15 @@ | |||
| "$ref": "#/definitions/EnvValue" | |||
| } | |||
| }, | |||
| "git": { | |||
| "description": "Git repository URL for downloading nodes.\n\nThe `git` key allows downloading nodes (i.e. their source code) from git repositories. This can be especially useful for distributed dataflows.\n\nWhen a `git` key is specified, `dora build` automatically clones the specified repository (or reuse an existing clone). Then it checks out the specified [`branch`](Self::branch), [`tag`](Self::tag), or [`rev`](Self::rev), or the default branch if none of them are specified. Afterwards it runs the [`build`](Self::build) command if specified.\n\nNote that the git clone directory is set as working directory for both the [`build`](Self::build) command and the specified [`path`](Self::path).\n\n## Example\n\n```yaml nodes: - id: rust-node git: https://github.com/dora-rs/dora.git build: cargo build -p rust-dataflow-example-node path: target/debug/rust-dataflow-example-node ```\n\nIn the above example, `dora build` will first clone the specified `git` repository and then run the specified `build` inside the local clone directory. When `dora run` or `dora start` is invoked, the working directory will be the git clone directory too. So a relative `path` will start from the clone directory.", | |||
| "type": [ | |||
| "string", | |||
| "null" | |||
| ] | |||
| }, | |||
| "id": { | |||
| "description": "Node identifier", | |||
| "description": "Unique node identifier. Must not contain `/` characters.\n\nNode IDs can be arbitrary strings with the following limitations:\n\n- They must not contain any `/` characters (slashes). - We do not recommend using whitespace characters (e.g. spaces) in IDs\n\nEach node must have an ID field.\n\n## Example\n\n```yaml nodes: - id: camera_node - id: some_other_node ```", | |||
| "allOf": [ | |||
| { | |||
| "$ref": "#/definitions/NodeId" | |||
| @@ -223,18 +287,20 @@ | |||
| ] | |||
| }, | |||
| "inputs": { | |||
| "description": "Input data connections from other nodes.\n\nDefines the inputs that this node is subscribing to.\n\nThe `inputs` field should be a key-value map of the following format:\n\n`input_id: source_node_id/source_node_output_id`\n\nThe components are defined as follows:\n\n- `input_id` is the local identifier that should be used for this input.\n\nThis will map to the `id` field of [`Event::Input`](https://docs.rs/dora-node-api/latest/dora_node_api/enum.Event.html#variant.Input) events sent to the node event loop. - `source_node_id` should be the `id` field of the node that sends the output that we want to subscribe to - `source_node_output_id` should be the identifier of the output that that we want to subscribe to\n\n## Example\n\n```yaml nodes: - id: example-node outputs: - one - two - id: receiver inputs: my_input: example-node/two ```", | |||
| "default": {}, | |||
| "type": "object", | |||
| "additionalProperties": true | |||
| }, | |||
| "name": { | |||
| "description": "Node name", | |||
| "description": "Human-readable node name for documentation.\n\nThis optional field can be used to define a more descriptive name in addition to a short [`id`](Self::id).\n\n## Example\n\n```yaml nodes: - id: camera_node name: \"Camera Input Handler\"", | |||
| "type": [ | |||
| "string", | |||
| "null" | |||
| ] | |||
| }, | |||
| "operator": { | |||
| "description": "Single operator configuration.\n\nThis is a convenience field for defining runtime nodes that contain only a single operator. This field is an alternative to the [`operators`](Self::operators) field, which can be used if there is only a single operator defined for the runtime node.\n\n## Example\n\n```yaml nodes: - id: runtime-node operator: id: processor python: script.py outputs: [data] ```", | |||
| "anyOf": [ | |||
| { | |||
| "$ref": "#/definitions/SingleOperatorDefinition" | |||
| @@ -245,6 +311,7 @@ | |||
| ] | |||
| }, | |||
| "operators": { | |||
| "description": "Multiple operators running in a shared runtime process.\n\nOperators are an experimental, lightweight alternative to nodes. Instead of running as a separate process, operators are linked into a runtime process. This allows running multiple operators to share a single address space (not supported for Python currently).\n\nOperators are defined as part of the node list, as children of a runtime node. A runtime node is a special node that specifies no [`path`](Self::path) field, but contains an `operators` field instead.\n\n## Example\n\n```yaml nodes: - id: runtime-node operators: - id: processor python: process.py ```", | |||
| "type": [ | |||
| "array", | |||
| "null" | |||
| @@ -254,6 +321,7 @@ | |||
| } | |||
| }, | |||
| "outputs": { | |||
| "description": "Output data identifiers produced by this node.\n\nList of output identifiers that the node sends. Must contain all `output_id` values that the node uses when sending output, e.g. through the [`send_output`](https://docs.rs/dora-node-api/latest/dora_node_api/struct.DoraNode.html#method.send_output) function.\n\n## Example\n\n```yaml nodes: - id: example-node outputs: - processed_image - metadata ```", | |||
| "default": [], | |||
| "type": "array", | |||
| "items": { | |||
| @@ -262,12 +330,28 @@ | |||
| "uniqueItems": true | |||
| }, | |||
| "path": { | |||
| "description": "Path to executable or script that should be run.\n\nSpecifies the path of the executable or script that Dora should run when starting the dataflow. This can point to a normal executable (e.g. when using a compiled language such as Rust) or a Python script.\n\nDora will automatically append a `.exe` extension on Windows systems when the specified file name has no extension.\n\n## Example\n\n```yaml nodes: - id: rust-example path: target/release/rust-node - id: python-example path: ./receive_data.py ```\n\n## URL as Path\n\nThe `path` field can also point to a URL instead of a local path. In this case, Dora will download the given file when starting the dataflow.\n\nNote that this is quite an old feature and using this functionality is **not recommended** anymore. Instead, we recommend using a [`git`][Self::git] and/or [`build`](Self::build) key.", | |||
| "type": [ | |||
| "string", | |||
| "null" | |||
| ] | |||
| }, | |||
| "rev": { | |||
| "description": "Git revision (e.g. commit hash) to checkout after cloning.\n\nThe `rev` field is only allowed in combination with the [`git`](#git) field. It specifies the git revision (e.g. a commit hash) that should be checked out after cloning. Only one of `branch`, `tag`, or `rev` can be specified.\n\n## Example\n\n```yaml nodes: - id: rust-node git: https://github.com/dora-rs/dora.git rev: 64ab0d7c ```", | |||
| "type": [ | |||
| "string", | |||
| "null" | |||
| ] | |||
| }, | |||
| "send_stdout_as": { | |||
| "description": "Redirect stdout/stderr to a data output.\n\nThis field can be used to send all stdout and stderr output of the node as a Dora output. Each output line is sent as a separate message.\n\n## Example\n\n```yaml nodes: - id: example send_stdout_as: stdout_output - id: logger inputs: example_output: example/stdout_output ```", | |||
| "type": [ | |||
| "string", | |||
| "null" | |||
| ] | |||
| }, | |||
| "tag": { | |||
| "description": "Git tag to checkout after cloning.\n\nThe `tag` field is only allowed in combination with the [`git`](#git) field. It specifies the git tag that should be checked out after cloning. Only one of `branch`, `tag`, or `rev` can be specified.\n\n## Example\n\n```yaml nodes: - id: rust-node git: https://github.com/dora-rs/dora.git tag: v0.3.0 ```", | |||
| "type": [ | |||
| "string", | |||
| "null" | |||
| @@ -279,6 +363,46 @@ | |||
| "NodeId": { | |||
| "type": "string" | |||
| }, | |||
| "NodeSource": { | |||
| "oneOf": [ | |||
| { | |||
| "type": "string", | |||
| "enum": [ | |||
| "Local" | |||
| ] | |||
| }, | |||
| { | |||
| "type": "object", | |||
| "required": [ | |||
| "GitBranch" | |||
| ], | |||
| "properties": { | |||
| "GitBranch": { | |||
| "type": "object", | |||
| "required": [ | |||
| "repo" | |||
| ], | |||
| "properties": { | |||
| "repo": { | |||
| "type": "string" | |||
| }, | |||
| "rev": { | |||
| "anyOf": [ | |||
| { | |||
| "$ref": "#/definitions/GitRepoRev" | |||
| }, | |||
| { | |||
| "type": "null" | |||
| } | |||
| ] | |||
| } | |||
| } | |||
| } | |||
| }, | |||
| "additionalProperties": true | |||
| } | |||
| ] | |||
| }, | |||
| "OperatorDefinition": { | |||
| "type": "object", | |||
| "oneOf": [ | |||
| @@ -310,32 +434,42 @@ | |||
| ], | |||
| "properties": { | |||
| "build": { | |||
| "description": "Build commands for this operator", | |||
| "type": [ | |||
| "string", | |||
| "null" | |||
| ] | |||
| }, | |||
| "description": { | |||
| "description": "Detailed description of the operator", | |||
| "type": [ | |||
| "string", | |||
| "null" | |||
| ] | |||
| }, | |||
| "id": { | |||
| "$ref": "#/definitions/OperatorId" | |||
| "description": "Unique operator identifier within the runtime", | |||
| "allOf": [ | |||
| { | |||
| "$ref": "#/definitions/OperatorId" | |||
| } | |||
| ] | |||
| }, | |||
| "inputs": { | |||
| "description": "Input data connections", | |||
| "default": {}, | |||
| "type": "object", | |||
| "additionalProperties": true | |||
| }, | |||
| "name": { | |||
| "description": "Human-readable operator name", | |||
| "type": [ | |||
| "string", | |||
| "null" | |||
| ] | |||
| }, | |||
| "outputs": { | |||
| "description": "Output data identifiers", | |||
| "default": [], | |||
| "type": "array", | |||
| "items": { | |||
| @@ -344,6 +478,7 @@ | |||
| "uniqueItems": true | |||
| }, | |||
| "send_stdout_as": { | |||
| "description": "Redirect stdout to data output", | |||
| "type": [ | |||
| "string", | |||
| "null" | |||
| @@ -399,19 +534,21 @@ | |||
| ], | |||
| "properties": { | |||
| "build": { | |||
| "description": "Build commands for this operator", | |||
| "type": [ | |||
| "string", | |||
| "null" | |||
| ] | |||
| }, | |||
| "description": { | |||
| "description": "Detailed description of the operator", | |||
| "type": [ | |||
| "string", | |||
| "null" | |||
| ] | |||
| }, | |||
| "id": { | |||
| "description": "ID is optional if there is only a single operator.", | |||
| "description": "Operator identifier (optional for single operators)", | |||
| "anyOf": [ | |||
| { | |||
| "$ref": "#/definitions/OperatorId" | |||
| @@ -422,17 +559,20 @@ | |||
| ] | |||
| }, | |||
| "inputs": { | |||
| "description": "Input data connections", | |||
| "default": {}, | |||
| "type": "object", | |||
| "additionalProperties": true | |||
| }, | |||
| "name": { | |||
| "description": "Human-readable operator name", | |||
| "type": [ | |||
| "string", | |||
| "null" | |||
| ] | |||
| }, | |||
| "outputs": { | |||
| "description": "Output data identifiers", | |||
| "default": [], | |||
| "type": "array", | |||
| "items": { | |||
| @@ -441,6 +581,7 @@ | |||
| "uniqueItems": true | |||
| }, | |||
| "send_stdout_as": { | |||
| "description": "Redirect stdout to data output", | |||
| "type": [ | |||
| "string", | |||
| "null" | |||